Ansible playbook voor het opzetten van een MetaVox loadtest omgeving: - 50 teamfolders met 10.000 bestanden elk (500K totaal) - 100 metadata velddefinities (10 teamfolder + 90 file-level) - 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen) - ~43M metadata records via directe MySQL inserts - Geoptimaliseerde database indexes (7 redundante indexes gedropt) Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API voor maximale performance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
175 lines
6.3 KiB
Django/Jinja
175 lines
6.3 KiB
Django/Jinja
#!/usr/bin/env python3
|
|
"""
|
|
Fast DB Register - Direct MySQL insert into oc_filecache
|
|
=========================================================
|
|
Registreert bestanden die op het filesystem zijn aangemaakt in de Nextcloud
|
|
database (oc_filecache). Veel sneller dan occ files:scan (~500K in ~2 min).
|
|
"""
|
|
import os
|
|
import subprocess
|
|
import hashlib
|
|
import time
|
|
|
|
DB_NAME = "{{ nextcloud_db_name }}"
|
|
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
|
|
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
|
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
|
FILE_PREFIX = "{{ dummy_file_prefix }}"
|
|
FILE_EXT = "{{ dummy_file_extension }}"
|
|
SQL_BATCH_SIZE = {{ sql_batch_size }}
|
|
|
|
|
|
def mysql_exec(sql):
|
|
result = subprocess.run(
|
|
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
if result.returncode != 0 and result.stderr.strip():
|
|
print(f" [SQL ERROR] {result.stderr[:200]}")
|
|
return result.stdout.strip()
|
|
|
|
|
|
def mysql_exec_file(filepath):
|
|
with open(filepath) as f:
|
|
result = subprocess.run(
|
|
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
|
stdin=f, capture_output=True, text=True, timeout=300
|
|
)
|
|
if result.returncode != 0:
|
|
print(f" [SQL ERROR] {result.stderr[:200]}")
|
|
return result
|
|
|
|
|
|
def escape_sql(s):
|
|
return s.replace("\\", "\\\\").replace("'", "\\'")
|
|
|
|
|
|
def main():
|
|
start_time = time.time()
|
|
now = int(time.time())
|
|
|
|
print("=" * 60)
|
|
print("Fast DB Register - oc_filecache insert")
|
|
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} bestanden")
|
|
print("=" * 60)
|
|
|
|
# Get storage mapping (gf_id -> storage numeric_id)
|
|
print("\nStap 1: Ophalen storage mapping...")
|
|
rows = mysql_exec("""
|
|
SELECT s.numeric_id,
|
|
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
|
FROM oc_storages s
|
|
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
|
""")
|
|
storage_map = {}
|
|
for line in rows.split('\n'):
|
|
if line.strip():
|
|
parts = line.split('\t')
|
|
storage_map[int(parts[1])] = int(parts[0])
|
|
|
|
# Get groupfolder mapping
|
|
print("Stap 2: Ophalen groupfolder mapping...")
|
|
rows = mysql_exec("""
|
|
SELECT folder_id, mount_point FROM oc_group_folders
|
|
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
|
ORDER BY folder_id
|
|
""")
|
|
gf_map = {}
|
|
for line in rows.split('\n'):
|
|
if line.strip():
|
|
parts = line.split('\t')
|
|
gf_id = int(parts[0])
|
|
tf_idx = int(parts[1].split('-')[-1])
|
|
if tf_idx <= NUM_TEAMFOLDERS:
|
|
gf_map[tf_idx] = gf_id
|
|
|
|
print(f" {len(gf_map)} teamfolders gevonden")
|
|
|
|
# Get mimetype IDs
|
|
txt_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text/plain'"))
|
|
txt_part = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text'"))
|
|
|
|
total_inserted = 0
|
|
|
|
for tf_idx in sorted(gf_map.keys()):
|
|
gf_id = gf_map[tf_idx]
|
|
storage_id = storage_map.get(gf_id)
|
|
if not storage_id:
|
|
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
|
continue
|
|
|
|
# Check if files already registered
|
|
existing = mysql_exec(f"""
|
|
SELECT COUNT(*) FROM oc_filecache
|
|
WHERE storage = {storage_id} AND path LIKE 'files/{FILE_PREFIX}%'
|
|
""")
|
|
if existing and int(existing) >= FILES_PER_FOLDER:
|
|
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): {existing} bestanden al geregistreerd")
|
|
total_inserted += int(existing)
|
|
continue
|
|
|
|
# Get parent fileid for 'files' directory
|
|
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
|
|
if not files_parent:
|
|
print(f" [SKIP] Geen files entry voor storage {storage_id}")
|
|
continue
|
|
parent_id = int(files_parent)
|
|
|
|
# Get file size from disk
|
|
sample_file = f"{GROUPFOLDERS_DIR}/{gf_id}/files/{FILE_PREFIX}-00001.{FILE_EXT}"
|
|
try:
|
|
file_size = os.path.getsize(sample_file)
|
|
except OSError:
|
|
file_size = 80 # fallback
|
|
|
|
# Generate INSERT statements in batches
|
|
values = []
|
|
for i in range(1, FILES_PER_FOLDER + 1):
|
|
fname = f"{FILE_PREFIX}-{i:05d}.{FILE_EXT}"
|
|
path = f"files/{fname}"
|
|
path_hash = hashlib.md5(path.encode()).hexdigest()
|
|
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
|
|
|
values.append(
|
|
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
|
|
f"'{escape_sql(fname)}', {txt_mime}, {txt_part}, {file_size}, "
|
|
f"{now}, {now}, '{etag}', 27, 0)"
|
|
)
|
|
|
|
if len(values) >= SQL_BATCH_SIZE:
|
|
sql_file = "/tmp/filecache_batch.sql"
|
|
with open(sql_file, 'w') as f:
|
|
f.write("INSERT IGNORE INTO oc_filecache "
|
|
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
|
|
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
|
f.write(",\n".join(values))
|
|
f.write(";\n")
|
|
mysql_exec_file(sql_file)
|
|
values = []
|
|
|
|
# Flush remaining
|
|
if values:
|
|
sql_file = "/tmp/filecache_batch.sql"
|
|
with open(sql_file, 'w') as f:
|
|
f.write("INSERT IGNORE INTO oc_filecache "
|
|
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
|
|
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
|
f.write(",\n".join(values))
|
|
f.write(";\n")
|
|
mysql_exec_file(sql_file)
|
|
|
|
total_inserted += FILES_PER_FOLDER
|
|
elapsed = time.time() - start_time
|
|
rate = total_inserted / elapsed if elapsed > 0 else 0
|
|
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {FILES_PER_FOLDER} records ({elapsed:.0f}s, totaal: {total_inserted}, {rate:.0f}/s)")
|
|
|
|
elapsed = time.time() - start_time
|
|
print(f"\n{'='*60}")
|
|
print(f"Voltooid in {elapsed:.0f} seconden")
|
|
print(f"Totaal geregistreerd: {total_inserted}")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|