Ansible playbook voor het opzetten van een MetaVox loadtest omgeving: - 50 teamfolders met 10.000 bestanden elk (500K totaal) - 100 metadata velddefinities (10 teamfolder + 90 file-level) - 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen) - ~43M metadata records via directe MySQL inserts - Geoptimaliseerde database indexes (7 redundante indexes gedropt) Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API voor maximale performance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
258 lines
9.9 KiB
Django/Jinja
258 lines
9.9 KiB
Django/Jinja
#!/usr/bin/env python3
|
|
"""
|
|
Create Folder Structure - Mappenstructuur aanmaken en bestanden verplaatsen
|
|
===========================================================================
|
|
Maakt een 3-niveau mappenstructuur aan binnen elke teamfolder en verplaatst
|
|
de bestanden gelijkmatig over de leaf folders.
|
|
|
|
10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder.
|
|
Bestanden worden verplaatst met os.rename (geen extra diskruimte nodig).
|
|
Database (oc_filecache) wordt bijgewerkt met nieuwe paden en parent IDs.
|
|
"""
|
|
import os
|
|
import subprocess
|
|
import hashlib
|
|
import time
|
|
|
|
DB_NAME = "{{ nextcloud_db_name }}"
|
|
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
|
|
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
|
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
|
|
|
FOLDER_STRUCTURE = {{ folder_structure | to_json }}
|
|
|
|
# Build flat list of leaf paths
|
|
LEAF_FOLDERS = []
|
|
for main, subs in FOLDER_STRUCTURE.items():
|
|
for sub in subs:
|
|
LEAF_FOLDERS.append(f"{main}/{sub}")
|
|
|
|
print(f"Folder structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen")
|
|
|
|
|
|
def mysql_exec(sql):
|
|
result = subprocess.run(
|
|
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
if result.returncode != 0 and result.stderr.strip():
|
|
print(f" [SQL ERROR] {result.stderr[:200]}")
|
|
return result.stdout.strip()
|
|
|
|
|
|
def mysql_exec_file(filepath):
|
|
with open(filepath) as f:
|
|
result = subprocess.run(
|
|
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
|
stdin=f, capture_output=True, text=True, timeout=300
|
|
)
|
|
if result.returncode != 0:
|
|
print(f" [SQL ERROR] {result.stderr[:200]}")
|
|
return result
|
|
|
|
|
|
def escape_sql(s):
|
|
return s.replace("\\", "\\\\").replace("'", "\\'")
|
|
|
|
|
|
def main():
|
|
start_time = time.time()
|
|
now = int(time.time())
|
|
|
|
# Get storage mapping
|
|
print("Ophalen storage mapping...")
|
|
rows = mysql_exec("""
|
|
SELECT s.numeric_id,
|
|
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
|
FROM oc_storages s
|
|
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
|
""")
|
|
storage_map = {}
|
|
for line in rows.split('\n'):
|
|
if line.strip():
|
|
parts = line.split('\t')
|
|
storage_map[int(parts[1])] = int(parts[0])
|
|
|
|
# Get groupfolder mapping
|
|
rows = mysql_exec("""
|
|
SELECT folder_id, mount_point FROM oc_group_folders
|
|
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
|
ORDER BY folder_id
|
|
""")
|
|
gf_map = {}
|
|
for line in rows.split('\n'):
|
|
if line.strip():
|
|
parts = line.split('\t')
|
|
gf_id = int(parts[0])
|
|
tf_idx = int(parts[1].split('-')[-1])
|
|
if tf_idx <= NUM_TEAMFOLDERS:
|
|
gf_map[tf_idx] = gf_id
|
|
|
|
print(f"{len(gf_map)} teamfolders gevonden")
|
|
|
|
# Get mimetype ID for directories
|
|
dir_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='httpd/unix-directory'"))
|
|
|
|
total_moved = 0
|
|
|
|
for tf_idx in sorted(gf_map.keys()):
|
|
gf_id = gf_map[tf_idx]
|
|
storage_id = storage_map.get(gf_id)
|
|
if not storage_id:
|
|
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
|
continue
|
|
|
|
folder_num = gf_id
|
|
files_dir = f"{GROUPFOLDERS_DIR}/{folder_num}/files"
|
|
|
|
# Check if already restructured
|
|
first_sub = list(FOLDER_STRUCTURE.keys())[0]
|
|
if os.path.isdir(f"{files_dir}/{first_sub}"):
|
|
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): al gestructureerd")
|
|
total_moved += FILES_PER_FOLDER
|
|
continue
|
|
|
|
# Get parent_id for 'files' directory
|
|
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
|
|
if not files_parent:
|
|
print(f" [SKIP] Geen files entry voor storage {storage_id}")
|
|
continue
|
|
files_parent_id = int(files_parent)
|
|
|
|
# Step 1: Create directories on disk
|
|
for main_folder, subs in FOLDER_STRUCTURE.items():
|
|
os.makedirs(f"{files_dir}/{main_folder}", exist_ok=True)
|
|
for sub in subs:
|
|
os.makedirs(f"{files_dir}/{main_folder}/{sub}", exist_ok=True)
|
|
|
|
# Step 2: Insert main directory entries into oc_filecache
|
|
dir_values = []
|
|
for main_folder in FOLDER_STRUCTURE.keys():
|
|
path = f"files/{main_folder}"
|
|
path_hash = hashlib.md5(path.encode()).hexdigest()
|
|
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
|
dir_values.append(
|
|
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {files_parent_id}, "
|
|
f"'{escape_sql(main_folder)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
|
|
)
|
|
|
|
sql_file = "/tmp/dir_insert.sql"
|
|
with open(sql_file, 'w') as f:
|
|
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
|
f.write(",\n".join(dir_values))
|
|
f.write(";\n")
|
|
mysql_exec_file(sql_file)
|
|
|
|
# Get main folder IDs
|
|
main_ids = {}
|
|
for main_folder in FOLDER_STRUCTURE.keys():
|
|
path = f"files/{main_folder}"
|
|
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
|
|
if fid:
|
|
main_ids[main_folder] = int(fid)
|
|
|
|
# Insert sub folder entries
|
|
sub_values = []
|
|
for main_folder, subs in FOLDER_STRUCTURE.items():
|
|
parent_id = main_ids.get(main_folder, files_parent_id)
|
|
for sub in subs:
|
|
path = f"files/{main_folder}/{sub}"
|
|
path_hash = hashlib.md5(path.encode()).hexdigest()
|
|
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
|
sub_values.append(
|
|
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
|
|
f"'{escape_sql(sub)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
|
|
)
|
|
|
|
with open(sql_file, 'w') as f:
|
|
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
|
f.write(",\n".join(sub_values))
|
|
f.write(";\n")
|
|
mysql_exec_file(sql_file)
|
|
|
|
# Get sub folder IDs
|
|
sub_ids = {}
|
|
for main_folder, subs in FOLDER_STRUCTURE.items():
|
|
for sub in subs:
|
|
path = f"files/{main_folder}/{sub}"
|
|
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
|
|
if fid:
|
|
sub_ids[f"{main_folder}/{sub}"] = int(fid)
|
|
|
|
# Step 3: Move files on disk and update DB
|
|
file_rows = mysql_exec(f"""
|
|
SELECT fileid, name FROM oc_filecache
|
|
WHERE storage = {storage_id} AND path LIKE 'files/{{ dummy_file_prefix }}%'
|
|
ORDER BY fileid
|
|
""")
|
|
files = []
|
|
for line in file_rows.split('\n'):
|
|
if line.strip():
|
|
parts = line.split('\t')
|
|
files.append((int(parts[0]), parts[1]))
|
|
|
|
if not files:
|
|
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
|
|
continue
|
|
|
|
# Distribute files across leaf folders
|
|
files_per_leaf = len(files) // len(LEAF_FOLDERS)
|
|
remainder = len(files) % len(LEAF_FOLDERS)
|
|
|
|
file_idx = 0
|
|
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
|
|
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
|
|
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
|
|
|
|
for _ in range(count):
|
|
if file_idx >= len(files):
|
|
break
|
|
fid, fname = files[file_idx]
|
|
old_path = f"{files_dir}/{fname}"
|
|
new_path_disk = f"{files_dir}/{leaf_path}/{fname}"
|
|
|
|
if os.path.exists(old_path):
|
|
os.rename(old_path, new_path_disk)
|
|
file_idx += 1
|
|
|
|
# Batch update DB
|
|
BATCH = 2000
|
|
for batch_start in range(0, len(files), BATCH):
|
|
updates = []
|
|
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
|
|
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
|
|
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
|
|
leaf_start = sum(files_per_leaf + (1 if i < remainder else 0) for i in range(leaf_idx))
|
|
leaf_end = leaf_start + count
|
|
|
|
for i in range(max(leaf_start, batch_start), min(leaf_end, batch_start + BATCH)):
|
|
if i >= len(files):
|
|
break
|
|
fid, fname = files[i]
|
|
new_path_db = f"files/{leaf_path}/{fname}"
|
|
new_path_hash = hashlib.md5(new_path_db.encode()).hexdigest()
|
|
updates.append(f"UPDATE oc_filecache SET path='{escape_sql(new_path_db)}', path_hash='{new_path_hash}', parent={leaf_parent_id} WHERE fileid={fid};")
|
|
|
|
if updates:
|
|
with open(sql_file, 'w') as f:
|
|
f.write("\n".join(updates))
|
|
mysql_exec_file(sql_file)
|
|
|
|
# Fix ownership
|
|
os.system(f"chown -R www-data:www-data {files_dir}")
|
|
|
|
total_moved += len(files)
|
|
elapsed = time.time() - start_time
|
|
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(files)} bestanden verplaatst naar {len(LEAF_FOLDERS)} submappen ({elapsed:.0f}s, totaal: {total_moved})")
|
|
|
|
elapsed = time.time() - start_time
|
|
print(f"\n{'='*60}")
|
|
print(f"Voltooid in {elapsed:.0f} seconden")
|
|
print(f"Totaal verplaatst: {total_moved}")
|
|
print(f"Structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen per teamfolder")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|