Initial commit: MetaVox loadtest playbook
Ansible playbook voor het opzetten van een MetaVox loadtest omgeving: - 50 teamfolders met 10.000 bestanden elk (500K totaal) - 100 metadata velddefinities (10 teamfolder + 90 file-level) - 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen) - ~43M metadata records via directe MySQL inserts - Geoptimaliseerde database indexes (7 redundante indexes gedropt) Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API voor maximale performance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
257
templates/create_folder_structure.py.j2
Normal file
257
templates/create_folder_structure.py.j2
Normal file
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create Folder Structure - Mappenstructuur aanmaken en bestanden verplaatsen
|
||||
===========================================================================
|
||||
Maakt een 3-niveau mappenstructuur aan binnen elke teamfolder en verplaatst
|
||||
de bestanden gelijkmatig over de leaf folders.
|
||||
|
||||
10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder.
|
||||
Bestanden worden verplaatst met os.rename (geen extra diskruimte nodig).
|
||||
Database (oc_filecache) wordt bijgewerkt met nieuwe paden en parent IDs.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
DB_NAME = "{{ nextcloud_db_name }}"
|
||||
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
|
||||
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
||||
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
||||
|
||||
FOLDER_STRUCTURE = {{ folder_structure | to_json }}
|
||||
|
||||
# Build flat list of leaf paths
|
||||
LEAF_FOLDERS = []
|
||||
for main, subs in FOLDER_STRUCTURE.items():
|
||||
for sub in subs:
|
||||
LEAF_FOLDERS.append(f"{main}/{sub}")
|
||||
|
||||
print(f"Folder structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen")
|
||||
|
||||
|
||||
def mysql_exec(sql):
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
||||
capture_output=True, text=True, timeout=120
|
||||
)
|
||||
if result.returncode != 0 and result.stderr.strip():
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def mysql_exec_file(filepath):
|
||||
with open(filepath) as f:
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
||||
stdin=f, capture_output=True, text=True, timeout=300
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result
|
||||
|
||||
|
||||
def escape_sql(s):
|
||||
return s.replace("\\", "\\\\").replace("'", "\\'")
|
||||
|
||||
|
||||
def main():
|
||||
start_time = time.time()
|
||||
now = int(time.time())
|
||||
|
||||
# Get storage mapping
|
||||
print("Ophalen storage mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT s.numeric_id,
|
||||
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
||||
FROM oc_storages s
|
||||
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
||||
""")
|
||||
storage_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
storage_map[int(parts[1])] = int(parts[0])
|
||||
|
||||
# Get groupfolder mapping
|
||||
rows = mysql_exec("""
|
||||
SELECT folder_id, mount_point FROM oc_group_folders
|
||||
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
||||
ORDER BY folder_id
|
||||
""")
|
||||
gf_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
gf_id = int(parts[0])
|
||||
tf_idx = int(parts[1].split('-')[-1])
|
||||
if tf_idx <= NUM_TEAMFOLDERS:
|
||||
gf_map[tf_idx] = gf_id
|
||||
|
||||
print(f"{len(gf_map)} teamfolders gevonden")
|
||||
|
||||
# Get mimetype ID for directories
|
||||
dir_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='httpd/unix-directory'"))
|
||||
|
||||
total_moved = 0
|
||||
|
||||
for tf_idx in sorted(gf_map.keys()):
|
||||
gf_id = gf_map[tf_idx]
|
||||
storage_id = storage_map.get(gf_id)
|
||||
if not storage_id:
|
||||
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
||||
continue
|
||||
|
||||
folder_num = gf_id
|
||||
files_dir = f"{GROUPFOLDERS_DIR}/{folder_num}/files"
|
||||
|
||||
# Check if already restructured
|
||||
first_sub = list(FOLDER_STRUCTURE.keys())[0]
|
||||
if os.path.isdir(f"{files_dir}/{first_sub}"):
|
||||
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): al gestructureerd")
|
||||
total_moved += FILES_PER_FOLDER
|
||||
continue
|
||||
|
||||
# Get parent_id for 'files' directory
|
||||
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
|
||||
if not files_parent:
|
||||
print(f" [SKIP] Geen files entry voor storage {storage_id}")
|
||||
continue
|
||||
files_parent_id = int(files_parent)
|
||||
|
||||
# Step 1: Create directories on disk
|
||||
for main_folder, subs in FOLDER_STRUCTURE.items():
|
||||
os.makedirs(f"{files_dir}/{main_folder}", exist_ok=True)
|
||||
for sub in subs:
|
||||
os.makedirs(f"{files_dir}/{main_folder}/{sub}", exist_ok=True)
|
||||
|
||||
# Step 2: Insert main directory entries into oc_filecache
|
||||
dir_values = []
|
||||
for main_folder in FOLDER_STRUCTURE.keys():
|
||||
path = f"files/{main_folder}"
|
||||
path_hash = hashlib.md5(path.encode()).hexdigest()
|
||||
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
||||
dir_values.append(
|
||||
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {files_parent_id}, "
|
||||
f"'{escape_sql(main_folder)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
|
||||
)
|
||||
|
||||
sql_file = "/tmp/dir_insert.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(dir_values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
# Get main folder IDs
|
||||
main_ids = {}
|
||||
for main_folder in FOLDER_STRUCTURE.keys():
|
||||
path = f"files/{main_folder}"
|
||||
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
|
||||
if fid:
|
||||
main_ids[main_folder] = int(fid)
|
||||
|
||||
# Insert sub folder entries
|
||||
sub_values = []
|
||||
for main_folder, subs in FOLDER_STRUCTURE.items():
|
||||
parent_id = main_ids.get(main_folder, files_parent_id)
|
||||
for sub in subs:
|
||||
path = f"files/{main_folder}/{sub}"
|
||||
path_hash = hashlib.md5(path.encode()).hexdigest()
|
||||
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
||||
sub_values.append(
|
||||
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
|
||||
f"'{escape_sql(sub)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
|
||||
)
|
||||
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(sub_values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
# Get sub folder IDs
|
||||
sub_ids = {}
|
||||
for main_folder, subs in FOLDER_STRUCTURE.items():
|
||||
for sub in subs:
|
||||
path = f"files/{main_folder}/{sub}"
|
||||
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
|
||||
if fid:
|
||||
sub_ids[f"{main_folder}/{sub}"] = int(fid)
|
||||
|
||||
# Step 3: Move files on disk and update DB
|
||||
file_rows = mysql_exec(f"""
|
||||
SELECT fileid, name FROM oc_filecache
|
||||
WHERE storage = {storage_id} AND path LIKE 'files/{{ dummy_file_prefix }}%'
|
||||
ORDER BY fileid
|
||||
""")
|
||||
files = []
|
||||
for line in file_rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
files.append((int(parts[0]), parts[1]))
|
||||
|
||||
if not files:
|
||||
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
|
||||
continue
|
||||
|
||||
# Distribute files across leaf folders
|
||||
files_per_leaf = len(files) // len(LEAF_FOLDERS)
|
||||
remainder = len(files) % len(LEAF_FOLDERS)
|
||||
|
||||
file_idx = 0
|
||||
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
|
||||
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
|
||||
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
|
||||
|
||||
for _ in range(count):
|
||||
if file_idx >= len(files):
|
||||
break
|
||||
fid, fname = files[file_idx]
|
||||
old_path = f"{files_dir}/{fname}"
|
||||
new_path_disk = f"{files_dir}/{leaf_path}/{fname}"
|
||||
|
||||
if os.path.exists(old_path):
|
||||
os.rename(old_path, new_path_disk)
|
||||
file_idx += 1
|
||||
|
||||
# Batch update DB
|
||||
BATCH = 2000
|
||||
for batch_start in range(0, len(files), BATCH):
|
||||
updates = []
|
||||
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
|
||||
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
|
||||
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
|
||||
leaf_start = sum(files_per_leaf + (1 if i < remainder else 0) for i in range(leaf_idx))
|
||||
leaf_end = leaf_start + count
|
||||
|
||||
for i in range(max(leaf_start, batch_start), min(leaf_end, batch_start + BATCH)):
|
||||
if i >= len(files):
|
||||
break
|
||||
fid, fname = files[i]
|
||||
new_path_db = f"files/{leaf_path}/{fname}"
|
||||
new_path_hash = hashlib.md5(new_path_db.encode()).hexdigest()
|
||||
updates.append(f"UPDATE oc_filecache SET path='{escape_sql(new_path_db)}', path_hash='{new_path_hash}', parent={leaf_parent_id} WHERE fileid={fid};")
|
||||
|
||||
if updates:
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("\n".join(updates))
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
# Fix ownership
|
||||
os.system(f"chown -R www-data:www-data {files_dir}")
|
||||
|
||||
total_moved += len(files)
|
||||
elapsed = time.time() - start_time
|
||||
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(files)} bestanden verplaatst naar {len(LEAF_FOLDERS)} submappen ({elapsed:.0f}s, totaal: {total_moved})")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Voltooid in {elapsed:.0f} seconden")
|
||||
print(f"Totaal verplaatst: {total_moved}")
|
||||
print(f"Structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen per teamfolder")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user