Files
metavox-loadtest/templates/fast_db_register.py.j2
Sam428-png a1eda430c8 Initial commit: MetaVox loadtest playbook
Ansible playbook voor het opzetten van een MetaVox loadtest omgeving:
- 50 teamfolders met 10.000 bestanden elk (500K totaal)
- 100 metadata velddefinities (10 teamfolder + 90 file-level)
- 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen)
- ~43M metadata records via directe MySQL inserts
- Geoptimaliseerde database indexes (7 redundante indexes gedropt)

Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API
voor maximale performance.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 16:55:30 +01:00

175 lines
6.3 KiB
Django/Jinja

#!/usr/bin/env python3
"""
Fast DB Register - Direct MySQL insert into oc_filecache
=========================================================
Registreert bestanden die op het filesystem zijn aangemaakt in de Nextcloud
database (oc_filecache). Veel sneller dan occ files:scan (~500K in ~2 min).
"""
import os
import subprocess
import hashlib
import time
DB_NAME = "{{ nextcloud_db_name }}"
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
NUM_TEAMFOLDERS = {{ num_teamfolders }}
FILES_PER_FOLDER = {{ files_per_teamfolder }}
FILE_PREFIX = "{{ dummy_file_prefix }}"
FILE_EXT = "{{ dummy_file_extension }}"
SQL_BATCH_SIZE = {{ sql_batch_size }}
def mysql_exec(sql):
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0 and result.stderr.strip():
print(f" [SQL ERROR] {result.stderr[:200]}")
return result.stdout.strip()
def mysql_exec_file(filepath):
with open(filepath) as f:
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
stdin=f, capture_output=True, text=True, timeout=300
)
if result.returncode != 0:
print(f" [SQL ERROR] {result.stderr[:200]}")
return result
def escape_sql(s):
return s.replace("\\", "\\\\").replace("'", "\\'")
def main():
start_time = time.time()
now = int(time.time())
print("=" * 60)
print("Fast DB Register - oc_filecache insert")
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} bestanden")
print("=" * 60)
# Get storage mapping (gf_id -> storage numeric_id)
print("\nStap 1: Ophalen storage mapping...")
rows = mysql_exec("""
SELECT s.numeric_id,
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
FROM oc_storages s
WHERE s.id LIKE 'local::%/__groupfolders/%'
""")
storage_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
storage_map[int(parts[1])] = int(parts[0])
# Get groupfolder mapping
print("Stap 2: Ophalen groupfolder mapping...")
rows = mysql_exec("""
SELECT folder_id, mount_point FROM oc_group_folders
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
ORDER BY folder_id
""")
gf_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
gf_id = int(parts[0])
tf_idx = int(parts[1].split('-')[-1])
if tf_idx <= NUM_TEAMFOLDERS:
gf_map[tf_idx] = gf_id
print(f" {len(gf_map)} teamfolders gevonden")
# Get mimetype IDs
txt_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text/plain'"))
txt_part = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text'"))
total_inserted = 0
for tf_idx in sorted(gf_map.keys()):
gf_id = gf_map[tf_idx]
storage_id = storage_map.get(gf_id)
if not storage_id:
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
continue
# Check if files already registered
existing = mysql_exec(f"""
SELECT COUNT(*) FROM oc_filecache
WHERE storage = {storage_id} AND path LIKE 'files/{FILE_PREFIX}%'
""")
if existing and int(existing) >= FILES_PER_FOLDER:
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): {existing} bestanden al geregistreerd")
total_inserted += int(existing)
continue
# Get parent fileid for 'files' directory
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
if not files_parent:
print(f" [SKIP] Geen files entry voor storage {storage_id}")
continue
parent_id = int(files_parent)
# Get file size from disk
sample_file = f"{GROUPFOLDERS_DIR}/{gf_id}/files/{FILE_PREFIX}-00001.{FILE_EXT}"
try:
file_size = os.path.getsize(sample_file)
except OSError:
file_size = 80 # fallback
# Generate INSERT statements in batches
values = []
for i in range(1, FILES_PER_FOLDER + 1):
fname = f"{FILE_PREFIX}-{i:05d}.{FILE_EXT}"
path = f"files/{fname}"
path_hash = hashlib.md5(path.encode()).hexdigest()
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
values.append(
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
f"'{escape_sql(fname)}', {txt_mime}, {txt_part}, {file_size}, "
f"{now}, {now}, '{etag}', 27, 0)"
)
if len(values) >= SQL_BATCH_SIZE:
sql_file = "/tmp/filecache_batch.sql"
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache "
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(values))
f.write(";\n")
mysql_exec_file(sql_file)
values = []
# Flush remaining
if values:
sql_file = "/tmp/filecache_batch.sql"
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache "
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(values))
f.write(";\n")
mysql_exec_file(sql_file)
total_inserted += FILES_PER_FOLDER
elapsed = time.time() - start_time
rate = total_inserted / elapsed if elapsed > 0 else 0
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {FILES_PER_FOLDER} records ({elapsed:.0f}s, totaal: {total_inserted}, {rate:.0f}/s)")
elapsed = time.time() - start_time
print(f"\n{'='*60}")
print(f"Voltooid in {elapsed:.0f} seconden")
print(f"Totaal geregistreerd: {total_inserted}")
print(f"{'='*60}")
if __name__ == "__main__":
main()