Initial commit: MetaVox loadtest playbook
Ansible playbook voor het opzetten van een MetaVox loadtest omgeving: - 50 teamfolders met 10.000 bestanden elk (500K totaal) - 100 metadata velddefinities (10 teamfolder + 90 file-level) - 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen) - ~43M metadata records via directe MySQL inserts - Geoptimaliseerde database indexes (7 redundante indexes gedropt) Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API voor maximale performance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
174
templates/fast_db_register.py.j2
Normal file
174
templates/fast_db_register.py.j2
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fast DB Register - Direct MySQL insert into oc_filecache
|
||||
=========================================================
|
||||
Registreert bestanden die op het filesystem zijn aangemaakt in de Nextcloud
|
||||
database (oc_filecache). Veel sneller dan occ files:scan (~500K in ~2 min).
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
DB_NAME = "{{ nextcloud_db_name }}"
|
||||
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
|
||||
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
||||
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
||||
FILE_PREFIX = "{{ dummy_file_prefix }}"
|
||||
FILE_EXT = "{{ dummy_file_extension }}"
|
||||
SQL_BATCH_SIZE = {{ sql_batch_size }}
|
||||
|
||||
|
||||
def mysql_exec(sql):
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
||||
capture_output=True, text=True, timeout=120
|
||||
)
|
||||
if result.returncode != 0 and result.stderr.strip():
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def mysql_exec_file(filepath):
|
||||
with open(filepath) as f:
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
||||
stdin=f, capture_output=True, text=True, timeout=300
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result
|
||||
|
||||
|
||||
def escape_sql(s):
|
||||
return s.replace("\\", "\\\\").replace("'", "\\'")
|
||||
|
||||
|
||||
def main():
|
||||
start_time = time.time()
|
||||
now = int(time.time())
|
||||
|
||||
print("=" * 60)
|
||||
print("Fast DB Register - oc_filecache insert")
|
||||
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} bestanden")
|
||||
print("=" * 60)
|
||||
|
||||
# Get storage mapping (gf_id -> storage numeric_id)
|
||||
print("\nStap 1: Ophalen storage mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT s.numeric_id,
|
||||
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
||||
FROM oc_storages s
|
||||
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
||||
""")
|
||||
storage_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
storage_map[int(parts[1])] = int(parts[0])
|
||||
|
||||
# Get groupfolder mapping
|
||||
print("Stap 2: Ophalen groupfolder mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT folder_id, mount_point FROM oc_group_folders
|
||||
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
||||
ORDER BY folder_id
|
||||
""")
|
||||
gf_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
gf_id = int(parts[0])
|
||||
tf_idx = int(parts[1].split('-')[-1])
|
||||
if tf_idx <= NUM_TEAMFOLDERS:
|
||||
gf_map[tf_idx] = gf_id
|
||||
|
||||
print(f" {len(gf_map)} teamfolders gevonden")
|
||||
|
||||
# Get mimetype IDs
|
||||
txt_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text/plain'"))
|
||||
txt_part = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text'"))
|
||||
|
||||
total_inserted = 0
|
||||
|
||||
for tf_idx in sorted(gf_map.keys()):
|
||||
gf_id = gf_map[tf_idx]
|
||||
storage_id = storage_map.get(gf_id)
|
||||
if not storage_id:
|
||||
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
||||
continue
|
||||
|
||||
# Check if files already registered
|
||||
existing = mysql_exec(f"""
|
||||
SELECT COUNT(*) FROM oc_filecache
|
||||
WHERE storage = {storage_id} AND path LIKE 'files/{FILE_PREFIX}%'
|
||||
""")
|
||||
if existing and int(existing) >= FILES_PER_FOLDER:
|
||||
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): {existing} bestanden al geregistreerd")
|
||||
total_inserted += int(existing)
|
||||
continue
|
||||
|
||||
# Get parent fileid for 'files' directory
|
||||
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
|
||||
if not files_parent:
|
||||
print(f" [SKIP] Geen files entry voor storage {storage_id}")
|
||||
continue
|
||||
parent_id = int(files_parent)
|
||||
|
||||
# Get file size from disk
|
||||
sample_file = f"{GROUPFOLDERS_DIR}/{gf_id}/files/{FILE_PREFIX}-00001.{FILE_EXT}"
|
||||
try:
|
||||
file_size = os.path.getsize(sample_file)
|
||||
except OSError:
|
||||
file_size = 80 # fallback
|
||||
|
||||
# Generate INSERT statements in batches
|
||||
values = []
|
||||
for i in range(1, FILES_PER_FOLDER + 1):
|
||||
fname = f"{FILE_PREFIX}-{i:05d}.{FILE_EXT}"
|
||||
path = f"files/{fname}"
|
||||
path_hash = hashlib.md5(path.encode()).hexdigest()
|
||||
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
||||
|
||||
values.append(
|
||||
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
|
||||
f"'{escape_sql(fname)}', {txt_mime}, {txt_part}, {file_size}, "
|
||||
f"{now}, {now}, '{etag}', 27, 0)"
|
||||
)
|
||||
|
||||
if len(values) >= SQL_BATCH_SIZE:
|
||||
sql_file = "/tmp/filecache_batch.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache "
|
||||
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
|
||||
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
values = []
|
||||
|
||||
# Flush remaining
|
||||
if values:
|
||||
sql_file = "/tmp/filecache_batch.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache "
|
||||
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
|
||||
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
total_inserted += FILES_PER_FOLDER
|
||||
elapsed = time.time() - start_time
|
||||
rate = total_inserted / elapsed if elapsed > 0 else 0
|
||||
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {FILES_PER_FOLDER} records ({elapsed:.0f}s, totaal: {total_inserted}, {rate:.0f}/s)")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Voltooid in {elapsed:.0f} seconden")
|
||||
print(f"Totaal geregistreerd: {total_inserted}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user