Files
metavox-loadtest/templates/create_folder_structure.py.j2
Sam428-png a1eda430c8 Initial commit: MetaVox loadtest playbook
Ansible playbook voor het opzetten van een MetaVox loadtest omgeving:
- 50 teamfolders met 10.000 bestanden elk (500K totaal)
- 100 metadata velddefinities (10 teamfolder + 90 file-level)
- 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen)
- ~43M metadata records via directe MySQL inserts
- Geoptimaliseerde database indexes (7 redundante indexes gedropt)

Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API
voor maximale performance.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 16:55:30 +01:00

258 lines
9.9 KiB
Django/Jinja

#!/usr/bin/env python3
"""
Create Folder Structure - Mappenstructuur aanmaken en bestanden verplaatsen
===========================================================================
Maakt een 3-niveau mappenstructuur aan binnen elke teamfolder en verplaatst
de bestanden gelijkmatig over de leaf folders.
10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder.
Bestanden worden verplaatst met os.rename (geen extra diskruimte nodig).
Database (oc_filecache) wordt bijgewerkt met nieuwe paden en parent IDs.
"""
import os
import subprocess
import hashlib
import time
DB_NAME = "{{ nextcloud_db_name }}"
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
NUM_TEAMFOLDERS = {{ num_teamfolders }}
FILES_PER_FOLDER = {{ files_per_teamfolder }}
FOLDER_STRUCTURE = {{ folder_structure | to_json }}
# Build flat list of leaf paths
LEAF_FOLDERS = []
for main, subs in FOLDER_STRUCTURE.items():
for sub in subs:
LEAF_FOLDERS.append(f"{main}/{sub}")
print(f"Folder structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen")
def mysql_exec(sql):
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0 and result.stderr.strip():
print(f" [SQL ERROR] {result.stderr[:200]}")
return result.stdout.strip()
def mysql_exec_file(filepath):
with open(filepath) as f:
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
stdin=f, capture_output=True, text=True, timeout=300
)
if result.returncode != 0:
print(f" [SQL ERROR] {result.stderr[:200]}")
return result
def escape_sql(s):
return s.replace("\\", "\\\\").replace("'", "\\'")
def main():
start_time = time.time()
now = int(time.time())
# Get storage mapping
print("Ophalen storage mapping...")
rows = mysql_exec("""
SELECT s.numeric_id,
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
FROM oc_storages s
WHERE s.id LIKE 'local::%/__groupfolders/%'
""")
storage_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
storage_map[int(parts[1])] = int(parts[0])
# Get groupfolder mapping
rows = mysql_exec("""
SELECT folder_id, mount_point FROM oc_group_folders
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
ORDER BY folder_id
""")
gf_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
gf_id = int(parts[0])
tf_idx = int(parts[1].split('-')[-1])
if tf_idx <= NUM_TEAMFOLDERS:
gf_map[tf_idx] = gf_id
print(f"{len(gf_map)} teamfolders gevonden")
# Get mimetype ID for directories
dir_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='httpd/unix-directory'"))
total_moved = 0
for tf_idx in sorted(gf_map.keys()):
gf_id = gf_map[tf_idx]
storage_id = storage_map.get(gf_id)
if not storage_id:
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
continue
folder_num = gf_id
files_dir = f"{GROUPFOLDERS_DIR}/{folder_num}/files"
# Check if already restructured
first_sub = list(FOLDER_STRUCTURE.keys())[0]
if os.path.isdir(f"{files_dir}/{first_sub}"):
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): al gestructureerd")
total_moved += FILES_PER_FOLDER
continue
# Get parent_id for 'files' directory
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
if not files_parent:
print(f" [SKIP] Geen files entry voor storage {storage_id}")
continue
files_parent_id = int(files_parent)
# Step 1: Create directories on disk
for main_folder, subs in FOLDER_STRUCTURE.items():
os.makedirs(f"{files_dir}/{main_folder}", exist_ok=True)
for sub in subs:
os.makedirs(f"{files_dir}/{main_folder}/{sub}", exist_ok=True)
# Step 2: Insert main directory entries into oc_filecache
dir_values = []
for main_folder in FOLDER_STRUCTURE.keys():
path = f"files/{main_folder}"
path_hash = hashlib.md5(path.encode()).hexdigest()
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
dir_values.append(
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {files_parent_id}, "
f"'{escape_sql(main_folder)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
)
sql_file = "/tmp/dir_insert.sql"
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(dir_values))
f.write(";\n")
mysql_exec_file(sql_file)
# Get main folder IDs
main_ids = {}
for main_folder in FOLDER_STRUCTURE.keys():
path = f"files/{main_folder}"
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
if fid:
main_ids[main_folder] = int(fid)
# Insert sub folder entries
sub_values = []
for main_folder, subs in FOLDER_STRUCTURE.items():
parent_id = main_ids.get(main_folder, files_parent_id)
for sub in subs:
path = f"files/{main_folder}/{sub}"
path_hash = hashlib.md5(path.encode()).hexdigest()
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
sub_values.append(
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
f"'{escape_sql(sub)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
)
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(sub_values))
f.write(";\n")
mysql_exec_file(sql_file)
# Get sub folder IDs
sub_ids = {}
for main_folder, subs in FOLDER_STRUCTURE.items():
for sub in subs:
path = f"files/{main_folder}/{sub}"
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
if fid:
sub_ids[f"{main_folder}/{sub}"] = int(fid)
# Step 3: Move files on disk and update DB
file_rows = mysql_exec(f"""
SELECT fileid, name FROM oc_filecache
WHERE storage = {storage_id} AND path LIKE 'files/{{ dummy_file_prefix }}%'
ORDER BY fileid
""")
files = []
for line in file_rows.split('\n'):
if line.strip():
parts = line.split('\t')
files.append((int(parts[0]), parts[1]))
if not files:
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
continue
# Distribute files across leaf folders
files_per_leaf = len(files) // len(LEAF_FOLDERS)
remainder = len(files) % len(LEAF_FOLDERS)
file_idx = 0
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
for _ in range(count):
if file_idx >= len(files):
break
fid, fname = files[file_idx]
old_path = f"{files_dir}/{fname}"
new_path_disk = f"{files_dir}/{leaf_path}/{fname}"
if os.path.exists(old_path):
os.rename(old_path, new_path_disk)
file_idx += 1
# Batch update DB
BATCH = 2000
for batch_start in range(0, len(files), BATCH):
updates = []
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
leaf_start = sum(files_per_leaf + (1 if i < remainder else 0) for i in range(leaf_idx))
leaf_end = leaf_start + count
for i in range(max(leaf_start, batch_start), min(leaf_end, batch_start + BATCH)):
if i >= len(files):
break
fid, fname = files[i]
new_path_db = f"files/{leaf_path}/{fname}"
new_path_hash = hashlib.md5(new_path_db.encode()).hexdigest()
updates.append(f"UPDATE oc_filecache SET path='{escape_sql(new_path_db)}', path_hash='{new_path_hash}', parent={leaf_parent_id} WHERE fileid={fid};")
if updates:
with open(sql_file, 'w') as f:
f.write("\n".join(updates))
mysql_exec_file(sql_file)
# Fix ownership
os.system(f"chown -R www-data:www-data {files_dir}")
total_moved += len(files)
elapsed = time.time() - start_time
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(files)} bestanden verplaatst naar {len(LEAF_FOLDERS)} submappen ({elapsed:.0f}s, totaal: {total_moved})")
elapsed = time.time() - start_time
print(f"\n{'='*60}")
print(f"Voltooid in {elapsed:.0f} seconden")
print(f"Totaal verplaatst: {total_moved}")
print(f"Structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen per teamfolder")
print(f"{'='*60}")
if __name__ == "__main__":
main()