#!/usr/bin/env python3 """ Create Folder Structure - Mappenstructuur aanmaken en bestanden verplaatsen =========================================================================== Maakt een 3-niveau mappenstructuur aan binnen elke teamfolder en verplaatst de bestanden gelijkmatig over de leaf folders. 10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder. Bestanden worden verplaatst met os.rename (geen extra diskruimte nodig). Database (oc_filecache) wordt bijgewerkt met nieuwe paden en parent IDs. """ import os import subprocess import hashlib import time DB_NAME = "{{ nextcloud_db_name }}" GROUPFOLDERS_DIR = "{{ groupfolders_dir }}" NUM_TEAMFOLDERS = {{ num_teamfolders }} FILES_PER_FOLDER = {{ files_per_teamfolder }} FOLDER_STRUCTURE = {{ folder_structure | to_json }} # Build flat list of leaf paths LEAF_FOLDERS = [] for main, subs in FOLDER_STRUCTURE.items(): for sub in subs: LEAF_FOLDERS.append(f"{main}/{sub}") print(f"Folder structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen") def mysql_exec(sql): result = subprocess.run( ["mysql", "-u", "root", DB_NAME, "-N", "-e", sql], capture_output=True, text=True, timeout=120 ) if result.returncode != 0 and result.stderr.strip(): print(f" [SQL ERROR] {result.stderr[:200]}") return result.stdout.strip() def mysql_exec_file(filepath): with open(filepath) as f: result = subprocess.run( ["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"], stdin=f, capture_output=True, text=True, timeout=300 ) if result.returncode != 0: print(f" [SQL ERROR] {result.stderr[:200]}") return result def escape_sql(s): return s.replace("\\", "\\\\").replace("'", "\\'") def main(): start_time = time.time() now = int(time.time()) # Get storage mapping print("Ophalen storage mapping...") rows = mysql_exec(""" SELECT s.numeric_id, REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num FROM oc_storages s WHERE s.id LIKE 'local::%/__groupfolders/%' """) storage_map = {} for line in rows.split('\n'): if line.strip(): parts = line.split('\t') storage_map[int(parts[1])] = int(parts[0]) # Get groupfolder mapping rows = mysql_exec(""" SELECT folder_id, mount_point FROM oc_group_folders WHERE mount_point LIKE '{{ teamfolder_prefix }}-%' ORDER BY folder_id """) gf_map = {} for line in rows.split('\n'): if line.strip(): parts = line.split('\t') gf_id = int(parts[0]) tf_idx = int(parts[1].split('-')[-1]) if tf_idx <= NUM_TEAMFOLDERS: gf_map[tf_idx] = gf_id print(f"{len(gf_map)} teamfolders gevonden") # Get mimetype ID for directories dir_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='httpd/unix-directory'")) total_moved = 0 for tf_idx in sorted(gf_map.keys()): gf_id = gf_map[tf_idx] storage_id = storage_map.get(gf_id) if not storage_id: print(f" [SKIP] Geen storage voor gf_id {gf_id}") continue folder_num = gf_id files_dir = f"{GROUPFOLDERS_DIR}/{folder_num}/files" # Check if already restructured first_sub = list(FOLDER_STRUCTURE.keys())[0] if os.path.isdir(f"{files_dir}/{first_sub}"): print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): al gestructureerd") total_moved += FILES_PER_FOLDER continue # Get parent_id for 'files' directory files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'") if not files_parent: print(f" [SKIP] Geen files entry voor storage {storage_id}") continue files_parent_id = int(files_parent) # Step 1: Create directories on disk for main_folder, subs in FOLDER_STRUCTURE.items(): os.makedirs(f"{files_dir}/{main_folder}", exist_ok=True) for sub in subs: os.makedirs(f"{files_dir}/{main_folder}/{sub}", exist_ok=True) # Step 2: Insert main directory entries into oc_filecache dir_values = [] for main_folder in FOLDER_STRUCTURE.keys(): path = f"files/{main_folder}" path_hash = hashlib.md5(path.encode()).hexdigest() etag = hashlib.md5(f"{now}{path}".encode()).hexdigest() dir_values.append( f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {files_parent_id}, " f"'{escape_sql(main_folder)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)" ) sql_file = "/tmp/dir_insert.sql" with open(sql_file, 'w') as f: f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n") f.write(",\n".join(dir_values)) f.write(";\n") mysql_exec_file(sql_file) # Get main folder IDs main_ids = {} for main_folder in FOLDER_STRUCTURE.keys(): path = f"files/{main_folder}" fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'") if fid: main_ids[main_folder] = int(fid) # Insert sub folder entries sub_values = [] for main_folder, subs in FOLDER_STRUCTURE.items(): parent_id = main_ids.get(main_folder, files_parent_id) for sub in subs: path = f"files/{main_folder}/{sub}" path_hash = hashlib.md5(path.encode()).hexdigest() etag = hashlib.md5(f"{now}{path}".encode()).hexdigest() sub_values.append( f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, " f"'{escape_sql(sub)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)" ) with open(sql_file, 'w') as f: f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n") f.write(",\n".join(sub_values)) f.write(";\n") mysql_exec_file(sql_file) # Get sub folder IDs sub_ids = {} for main_folder, subs in FOLDER_STRUCTURE.items(): for sub in subs: path = f"files/{main_folder}/{sub}" fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'") if fid: sub_ids[f"{main_folder}/{sub}"] = int(fid) # Step 3: Move files on disk and update DB file_rows = mysql_exec(f""" SELECT fileid, name FROM oc_filecache WHERE storage = {storage_id} AND path LIKE 'files/{{ dummy_file_prefix }}%' ORDER BY fileid """) files = [] for line in file_rows.split('\n'): if line.strip(): parts = line.split('\t') files.append((int(parts[0]), parts[1])) if not files: print(f" [SKIP] Folder {tf_idx}: geen bestanden") continue # Distribute files across leaf folders files_per_leaf = len(files) // len(LEAF_FOLDERS) remainder = len(files) % len(LEAF_FOLDERS) file_idx = 0 for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS): leaf_parent_id = sub_ids.get(leaf_path, files_parent_id) count = files_per_leaf + (1 if leaf_idx < remainder else 0) for _ in range(count): if file_idx >= len(files): break fid, fname = files[file_idx] old_path = f"{files_dir}/{fname}" new_path_disk = f"{files_dir}/{leaf_path}/{fname}" if os.path.exists(old_path): os.rename(old_path, new_path_disk) file_idx += 1 # Batch update DB BATCH = 2000 for batch_start in range(0, len(files), BATCH): updates = [] for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS): leaf_parent_id = sub_ids.get(leaf_path, files_parent_id) count = files_per_leaf + (1 if leaf_idx < remainder else 0) leaf_start = sum(files_per_leaf + (1 if i < remainder else 0) for i in range(leaf_idx)) leaf_end = leaf_start + count for i in range(max(leaf_start, batch_start), min(leaf_end, batch_start + BATCH)): if i >= len(files): break fid, fname = files[i] new_path_db = f"files/{leaf_path}/{fname}" new_path_hash = hashlib.md5(new_path_db.encode()).hexdigest() updates.append(f"UPDATE oc_filecache SET path='{escape_sql(new_path_db)}', path_hash='{new_path_hash}', parent={leaf_parent_id} WHERE fileid={fid};") if updates: with open(sql_file, 'w') as f: f.write("\n".join(updates)) mysql_exec_file(sql_file) # Fix ownership os.system(f"chown -R www-data:www-data {files_dir}") total_moved += len(files) elapsed = time.time() - start_time print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(files)} bestanden verplaatst naar {len(LEAF_FOLDERS)} submappen ({elapsed:.0f}s, totaal: {total_moved})") elapsed = time.time() - start_time print(f"\n{'='*60}") print(f"Voltooid in {elapsed:.0f} seconden") print(f"Totaal verplaatst: {total_moved}") print(f"Structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen per teamfolder") print(f"{'='*60}") if __name__ == "__main__": main()