#!/usr/bin/env python3 """ Fast DB Register - Direct MySQL insert into oc_filecache ========================================================= Registreert bestanden die op het filesystem zijn aangemaakt in de Nextcloud database (oc_filecache). Veel sneller dan occ files:scan (~500K in ~2 min). """ import os import subprocess import hashlib import time DB_NAME = "{{ nextcloud_db_name }}" GROUPFOLDERS_DIR = "{{ groupfolders_dir }}" NUM_TEAMFOLDERS = {{ num_teamfolders }} FILES_PER_FOLDER = {{ files_per_teamfolder }} FILE_PREFIX = "{{ dummy_file_prefix }}" FILE_EXT = "{{ dummy_file_extension }}" SQL_BATCH_SIZE = {{ sql_batch_size }} def mysql_exec(sql): result = subprocess.run( ["mysql", "-u", "root", DB_NAME, "-N", "-e", sql], capture_output=True, text=True, timeout=120 ) if result.returncode != 0 and result.stderr.strip(): print(f" [SQL ERROR] {result.stderr[:200]}") return result.stdout.strip() def mysql_exec_file(filepath): with open(filepath) as f: result = subprocess.run( ["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"], stdin=f, capture_output=True, text=True, timeout=300 ) if result.returncode != 0: print(f" [SQL ERROR] {result.stderr[:200]}") return result def escape_sql(s): return s.replace("\\", "\\\\").replace("'", "\\'") def main(): start_time = time.time() now = int(time.time()) print("=" * 60) print("Fast DB Register - oc_filecache insert") print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} bestanden") print("=" * 60) # Get storage mapping (gf_id -> storage numeric_id) print("\nStap 1: Ophalen storage mapping...") rows = mysql_exec(""" SELECT s.numeric_id, REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num FROM oc_storages s WHERE s.id LIKE 'local::%/__groupfolders/%' """) storage_map = {} for line in rows.split('\n'): if line.strip(): parts = line.split('\t') storage_map[int(parts[1])] = int(parts[0]) # Get groupfolder mapping print("Stap 2: Ophalen groupfolder mapping...") rows = mysql_exec(""" SELECT folder_id, mount_point FROM oc_group_folders WHERE mount_point LIKE '{{ teamfolder_prefix }}-%' ORDER BY folder_id """) gf_map = {} for line in rows.split('\n'): if line.strip(): parts = line.split('\t') gf_id = int(parts[0]) tf_idx = int(parts[1].split('-')[-1]) if tf_idx <= NUM_TEAMFOLDERS: gf_map[tf_idx] = gf_id print(f" {len(gf_map)} teamfolders gevonden") # Get mimetype IDs txt_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text/plain'")) txt_part = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text'")) total_inserted = 0 for tf_idx in sorted(gf_map.keys()): gf_id = gf_map[tf_idx] storage_id = storage_map.get(gf_id) if not storage_id: print(f" [SKIP] Geen storage voor gf_id {gf_id}") continue # Check if files already registered existing = mysql_exec(f""" SELECT COUNT(*) FROM oc_filecache WHERE storage = {storage_id} AND path LIKE 'files/{FILE_PREFIX}%' """) if existing and int(existing) >= FILES_PER_FOLDER: print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): {existing} bestanden al geregistreerd") total_inserted += int(existing) continue # Get parent fileid for 'files' directory files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'") if not files_parent: print(f" [SKIP] Geen files entry voor storage {storage_id}") continue parent_id = int(files_parent) # Get file size from disk sample_file = f"{GROUPFOLDERS_DIR}/{gf_id}/files/{FILE_PREFIX}-00001.{FILE_EXT}" try: file_size = os.path.getsize(sample_file) except OSError: file_size = 80 # fallback # Generate INSERT statements in batches values = [] for i in range(1, FILES_PER_FOLDER + 1): fname = f"{FILE_PREFIX}-{i:05d}.{FILE_EXT}" path = f"files/{fname}" path_hash = hashlib.md5(path.encode()).hexdigest() etag = hashlib.md5(f"{now}{path}".encode()).hexdigest() values.append( f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, " f"'{escape_sql(fname)}', {txt_mime}, {txt_part}, {file_size}, " f"{now}, {now}, '{etag}', 27, 0)" ) if len(values) >= SQL_BATCH_SIZE: sql_file = "/tmp/filecache_batch.sql" with open(sql_file, 'w') as f: f.write("INSERT IGNORE INTO oc_filecache " "(storage, path, path_hash, parent, name, mimetype, mimepart, " "size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n") f.write(",\n".join(values)) f.write(";\n") mysql_exec_file(sql_file) values = [] # Flush remaining if values: sql_file = "/tmp/filecache_batch.sql" with open(sql_file, 'w') as f: f.write("INSERT IGNORE INTO oc_filecache " "(storage, path, path_hash, parent, name, mimetype, mimepart, " "size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n") f.write(",\n".join(values)) f.write(";\n") mysql_exec_file(sql_file) total_inserted += FILES_PER_FOLDER elapsed = time.time() - start_time rate = total_inserted / elapsed if elapsed > 0 else 0 print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {FILES_PER_FOLDER} records ({elapsed:.0f}s, totaal: {total_inserted}, {rate:.0f}/s)") elapsed = time.time() - start_time print(f"\n{'='*60}") print(f"Voltooid in {elapsed:.0f} seconden") print(f"Totaal geregistreerd: {total_inserted}") print(f"{'='*60}") if __name__ == "__main__": main()