Initial commit: MetaVox loadtest playbook

Ansible playbook voor het opzetten van een MetaVox loadtest omgeving:
- 50 teamfolders met 10.000 bestanden elk (500K totaal)
- 100 metadata velddefinities (10 teamfolder + 90 file-level)
- 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen)
- ~43M metadata records via directe MySQL inserts
- Geoptimaliseerde database indexes (7 redundante indexes gedropt)

Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API
voor maximale performance.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-16 16:55:30 +01:00
commit a1eda430c8
12 changed files with 1677 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
"""
MetaVox Metadata Cleanup Script
Verwijdert metadata-velddefinities via de OCS API.
"""
import json
import sys
import time
import requests
from requests.auth import HTTPBasicAuth
NC_URL = "{{ nextcloud_url }}"
NC_USER = "{{ nextcloud_admin_user }}"
NC_PASS = "{{ nextcloud_admin_password }}"
TIMEOUT = {{ http_timeout }}
WEB_API_BASE = f"{NC_URL}/index.php/apps/metavox/api"
OCS_BASE = f"{NC_URL}/ocs/v2.php/apps/metavox/api/v1"
AUTH = HTTPBasicAuth(NC_USER, NC_PASS)
HEADERS = {
"OCS-APIRequest": "true",
"Accept": "application/json",
}
TF_FIELDS = json.loads("""{{ teamfolder_metadata_fields | to_json }}""")
FILE_FIELDS = json.loads("""{{ file_metadata_fields | to_json }}""")
FIELD_NAMES_TO_DELETE = set(f["name"] for f in TF_FIELDS + FILE_FIELDS)
def get_all_fields():
url = f"{OCS_BASE}/groupfolder-fields"
try:
resp = requests.get(url, auth=AUTH, headers=HEADERS, timeout=TIMEOUT)
if resp.status_code == 200:
data = resp.json()
if "ocs" in data and "data" in data["ocs"]:
return data["ocs"]["data"]
except Exception as e:
print(f"[ERROR] Ophalen velden: {e}")
return []
def delete_field(field_id):
url = f"{WEB_API_BASE}/groupfolder-fields/{field_id}"
try:
resp = requests.delete(url, auth=AUTH, headers=HEADERS, timeout=TIMEOUT)
return resp.status_code in [200, 204, 404]
except Exception as e:
print(f" [ERROR] Verwijderen veld {field_id}: {e}")
return False
def main():
print("MetaVox Metadata Cleanup")
print("=" * 40)
fields = get_all_fields()
print(f"Gevonden: {len(fields)} velden totaal")
deleted = 0
skipped = 0
for field in fields:
fname = field.get("field_name", "")
fid = field.get("id")
if fname in FIELD_NAMES_TO_DELETE and fid:
if delete_field(fid):
deleted += 1
else:
print(f" [FAIL] Kon veld '{fname}' (id={fid}) niet verwijderen")
else:
skipped += 1
print(f"\n{deleted} velden deleted, {skipped} overgeslagen")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,257 @@
#!/usr/bin/env python3
"""
Create Folder Structure - Mappenstructuur aanmaken en bestanden verplaatsen
===========================================================================
Maakt een 3-niveau mappenstructuur aan binnen elke teamfolder en verplaatst
de bestanden gelijkmatig over de leaf folders.
10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder.
Bestanden worden verplaatst met os.rename (geen extra diskruimte nodig).
Database (oc_filecache) wordt bijgewerkt met nieuwe paden en parent IDs.
"""
import os
import subprocess
import hashlib
import time
DB_NAME = "{{ nextcloud_db_name }}"
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
NUM_TEAMFOLDERS = {{ num_teamfolders }}
FILES_PER_FOLDER = {{ files_per_teamfolder }}
FOLDER_STRUCTURE = {{ folder_structure | to_json }}
# Build flat list of leaf paths
LEAF_FOLDERS = []
for main, subs in FOLDER_STRUCTURE.items():
for sub in subs:
LEAF_FOLDERS.append(f"{main}/{sub}")
print(f"Folder structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen")
def mysql_exec(sql):
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0 and result.stderr.strip():
print(f" [SQL ERROR] {result.stderr[:200]}")
return result.stdout.strip()
def mysql_exec_file(filepath):
with open(filepath) as f:
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
stdin=f, capture_output=True, text=True, timeout=300
)
if result.returncode != 0:
print(f" [SQL ERROR] {result.stderr[:200]}")
return result
def escape_sql(s):
return s.replace("\\", "\\\\").replace("'", "\\'")
def main():
start_time = time.time()
now = int(time.time())
# Get storage mapping
print("Ophalen storage mapping...")
rows = mysql_exec("""
SELECT s.numeric_id,
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
FROM oc_storages s
WHERE s.id LIKE 'local::%/__groupfolders/%'
""")
storage_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
storage_map[int(parts[1])] = int(parts[0])
# Get groupfolder mapping
rows = mysql_exec("""
SELECT folder_id, mount_point FROM oc_group_folders
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
ORDER BY folder_id
""")
gf_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
gf_id = int(parts[0])
tf_idx = int(parts[1].split('-')[-1])
if tf_idx <= NUM_TEAMFOLDERS:
gf_map[tf_idx] = gf_id
print(f"{len(gf_map)} teamfolders gevonden")
# Get mimetype ID for directories
dir_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='httpd/unix-directory'"))
total_moved = 0
for tf_idx in sorted(gf_map.keys()):
gf_id = gf_map[tf_idx]
storage_id = storage_map.get(gf_id)
if not storage_id:
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
continue
folder_num = gf_id
files_dir = f"{GROUPFOLDERS_DIR}/{folder_num}/files"
# Check if already restructured
first_sub = list(FOLDER_STRUCTURE.keys())[0]
if os.path.isdir(f"{files_dir}/{first_sub}"):
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): al gestructureerd")
total_moved += FILES_PER_FOLDER
continue
# Get parent_id for 'files' directory
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
if not files_parent:
print(f" [SKIP] Geen files entry voor storage {storage_id}")
continue
files_parent_id = int(files_parent)
# Step 1: Create directories on disk
for main_folder, subs in FOLDER_STRUCTURE.items():
os.makedirs(f"{files_dir}/{main_folder}", exist_ok=True)
for sub in subs:
os.makedirs(f"{files_dir}/{main_folder}/{sub}", exist_ok=True)
# Step 2: Insert main directory entries into oc_filecache
dir_values = []
for main_folder in FOLDER_STRUCTURE.keys():
path = f"files/{main_folder}"
path_hash = hashlib.md5(path.encode()).hexdigest()
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
dir_values.append(
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {files_parent_id}, "
f"'{escape_sql(main_folder)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
)
sql_file = "/tmp/dir_insert.sql"
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(dir_values))
f.write(";\n")
mysql_exec_file(sql_file)
# Get main folder IDs
main_ids = {}
for main_folder in FOLDER_STRUCTURE.keys():
path = f"files/{main_folder}"
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
if fid:
main_ids[main_folder] = int(fid)
# Insert sub folder entries
sub_values = []
for main_folder, subs in FOLDER_STRUCTURE.items():
parent_id = main_ids.get(main_folder, files_parent_id)
for sub in subs:
path = f"files/{main_folder}/{sub}"
path_hash = hashlib.md5(path.encode()).hexdigest()
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
sub_values.append(
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
f"'{escape_sql(sub)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
)
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(sub_values))
f.write(";\n")
mysql_exec_file(sql_file)
# Get sub folder IDs
sub_ids = {}
for main_folder, subs in FOLDER_STRUCTURE.items():
for sub in subs:
path = f"files/{main_folder}/{sub}"
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
if fid:
sub_ids[f"{main_folder}/{sub}"] = int(fid)
# Step 3: Move files on disk and update DB
file_rows = mysql_exec(f"""
SELECT fileid, name FROM oc_filecache
WHERE storage = {storage_id} AND path LIKE 'files/{{ dummy_file_prefix }}%'
ORDER BY fileid
""")
files = []
for line in file_rows.split('\n'):
if line.strip():
parts = line.split('\t')
files.append((int(parts[0]), parts[1]))
if not files:
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
continue
# Distribute files across leaf folders
files_per_leaf = len(files) // len(LEAF_FOLDERS)
remainder = len(files) % len(LEAF_FOLDERS)
file_idx = 0
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
for _ in range(count):
if file_idx >= len(files):
break
fid, fname = files[file_idx]
old_path = f"{files_dir}/{fname}"
new_path_disk = f"{files_dir}/{leaf_path}/{fname}"
if os.path.exists(old_path):
os.rename(old_path, new_path_disk)
file_idx += 1
# Batch update DB
BATCH = 2000
for batch_start in range(0, len(files), BATCH):
updates = []
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
leaf_start = sum(files_per_leaf + (1 if i < remainder else 0) for i in range(leaf_idx))
leaf_end = leaf_start + count
for i in range(max(leaf_start, batch_start), min(leaf_end, batch_start + BATCH)):
if i >= len(files):
break
fid, fname = files[i]
new_path_db = f"files/{leaf_path}/{fname}"
new_path_hash = hashlib.md5(new_path_db.encode()).hexdigest()
updates.append(f"UPDATE oc_filecache SET path='{escape_sql(new_path_db)}', path_hash='{new_path_hash}', parent={leaf_parent_id} WHERE fileid={fid};")
if updates:
with open(sql_file, 'w') as f:
f.write("\n".join(updates))
mysql_exec_file(sql_file)
# Fix ownership
os.system(f"chown -R www-data:www-data {files_dir}")
total_moved += len(files)
elapsed = time.time() - start_time
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(files)} bestanden verplaatst naar {len(LEAF_FOLDERS)} submappen ({elapsed:.0f}s, totaal: {total_moved})")
elapsed = time.time() - start_time
print(f"\n{'='*60}")
print(f"Voltooid in {elapsed:.0f} seconden")
print(f"Totaal verplaatst: {total_moved}")
print(f"Structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen per teamfolder")
print(f"{'='*60}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,73 @@
#!/bin/bash
# =============================================================================
# Fast File Creator - Direct filesystem
# =============================================================================
# Maakt {{ files_per_teamfolder }} bestanden per teamfolder aan op het filesystem.
# Veel sneller dan WebDAV uploads (~500K bestanden in ~40 minuten).
# =============================================================================
set -euo pipefail
GROUPFOLDERS_DIR="{{ groupfolders_dir }}"
NUM_TEAMFOLDERS={{ num_teamfolders }}
FILES_PER_FOLDER={{ files_per_teamfolder }}
FILE_PREFIX="{{ dummy_file_prefix }}"
FILE_EXT="{{ dummy_file_extension }}"
FILE_CONTENT="{{ dummy_file_content }}"
echo "============================================================"
echo "Fast File Creator - Direct Filesystem"
echo "Doel: ${NUM_TEAMFOLDERS} folders x ${FILES_PER_FOLDER} bestanden"
echo "============================================================"
# Haal groupfolder mapping op (API folder_id -> disk folder number)
GF_IDS=$(mysql -u root {{ nextcloud_db_name }} -N -e \
"SELECT folder_id FROM oc_group_folders
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
ORDER BY folder_id
LIMIT ${NUM_TEAMFOLDERS}")
TOTAL_CREATED=0
FOLDER_COUNT=0
START_TIME=$(date +%s)
for GF_ID in ${GF_IDS}; do
FOLDER_COUNT=$((FOLDER_COUNT + 1))
FILES_DIR="${GROUPFOLDERS_DIR}/${GF_ID}/files"
# Maak files directory aan als die niet bestaat
mkdir -p "${FILES_DIR}"
# Tel bestaande bestanden
EXISTING=$(find "${FILES_DIR}" -maxdepth 1 -name "${FILE_PREFIX}-*.${FILE_EXT}" 2>/dev/null | wc -l)
if [ "${EXISTING}" -ge "${FILES_PER_FOLDER}" ]; then
echo "[SKIP] Folder ${FOLDER_COUNT} (gf_id=${GF_ID}): ${EXISTING} bestanden bestaan al"
TOTAL_CREATED=$((TOTAL_CREATED + EXISTING))
continue
fi
# Maak bestanden aan
CREATED=0
for i in $(seq -w 1 ${FILES_PER_FOLDER}); do
FNAME="${FILE_PREFIX}-${i}.${FILE_EXT}"
FPATH="${FILES_DIR}/${FNAME}"
if [ ! -f "${FPATH}" ]; then
echo "${FILE_CONTENT}" > "${FPATH}"
CREATED=$((CREATED + 1))
fi
done
# Fix ownership
chown -R www-data:www-data "${FILES_DIR}"
TOTAL_CREATED=$((TOTAL_CREATED + FILES_PER_FOLDER))
ELAPSED=$(( $(date +%s) - START_TIME ))
echo "[DONE] Folder ${FOLDER_COUNT} (gf_id=${GF_ID}): ${CREATED} nieuw aangemaakt (${ELAPSED}s, totaal: ${TOTAL_CREATED})"
done
ELAPSED=$(( $(date +%s) - START_TIME ))
echo ""
echo "============================================================"
echo "Voltooid in ${ELAPSED} seconden"
echo "Totaal bestanden: ${TOTAL_CREATED}"
echo "============================================================"

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env python3
"""
Fast DB Register - Direct MySQL insert into oc_filecache
=========================================================
Registreert bestanden die op het filesystem zijn aangemaakt in de Nextcloud
database (oc_filecache). Veel sneller dan occ files:scan (~500K in ~2 min).
"""
import os
import subprocess
import hashlib
import time
DB_NAME = "{{ nextcloud_db_name }}"
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
NUM_TEAMFOLDERS = {{ num_teamfolders }}
FILES_PER_FOLDER = {{ files_per_teamfolder }}
FILE_PREFIX = "{{ dummy_file_prefix }}"
FILE_EXT = "{{ dummy_file_extension }}"
SQL_BATCH_SIZE = {{ sql_batch_size }}
def mysql_exec(sql):
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0 and result.stderr.strip():
print(f" [SQL ERROR] {result.stderr[:200]}")
return result.stdout.strip()
def mysql_exec_file(filepath):
with open(filepath) as f:
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
stdin=f, capture_output=True, text=True, timeout=300
)
if result.returncode != 0:
print(f" [SQL ERROR] {result.stderr[:200]}")
return result
def escape_sql(s):
return s.replace("\\", "\\\\").replace("'", "\\'")
def main():
start_time = time.time()
now = int(time.time())
print("=" * 60)
print("Fast DB Register - oc_filecache insert")
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} bestanden")
print("=" * 60)
# Get storage mapping (gf_id -> storage numeric_id)
print("\nStap 1: Ophalen storage mapping...")
rows = mysql_exec("""
SELECT s.numeric_id,
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
FROM oc_storages s
WHERE s.id LIKE 'local::%/__groupfolders/%'
""")
storage_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
storage_map[int(parts[1])] = int(parts[0])
# Get groupfolder mapping
print("Stap 2: Ophalen groupfolder mapping...")
rows = mysql_exec("""
SELECT folder_id, mount_point FROM oc_group_folders
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
ORDER BY folder_id
""")
gf_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
gf_id = int(parts[0])
tf_idx = int(parts[1].split('-')[-1])
if tf_idx <= NUM_TEAMFOLDERS:
gf_map[tf_idx] = gf_id
print(f" {len(gf_map)} teamfolders gevonden")
# Get mimetype IDs
txt_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text/plain'"))
txt_part = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text'"))
total_inserted = 0
for tf_idx in sorted(gf_map.keys()):
gf_id = gf_map[tf_idx]
storage_id = storage_map.get(gf_id)
if not storage_id:
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
continue
# Check if files already registered
existing = mysql_exec(f"""
SELECT COUNT(*) FROM oc_filecache
WHERE storage = {storage_id} AND path LIKE 'files/{FILE_PREFIX}%'
""")
if existing and int(existing) >= FILES_PER_FOLDER:
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): {existing} bestanden al geregistreerd")
total_inserted += int(existing)
continue
# Get parent fileid for 'files' directory
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
if not files_parent:
print(f" [SKIP] Geen files entry voor storage {storage_id}")
continue
parent_id = int(files_parent)
# Get file size from disk
sample_file = f"{GROUPFOLDERS_DIR}/{gf_id}/files/{FILE_PREFIX}-00001.{FILE_EXT}"
try:
file_size = os.path.getsize(sample_file)
except OSError:
file_size = 80 # fallback
# Generate INSERT statements in batches
values = []
for i in range(1, FILES_PER_FOLDER + 1):
fname = f"{FILE_PREFIX}-{i:05d}.{FILE_EXT}"
path = f"files/{fname}"
path_hash = hashlib.md5(path.encode()).hexdigest()
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
values.append(
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
f"'{escape_sql(fname)}', {txt_mime}, {txt_part}, {file_size}, "
f"{now}, {now}, '{etag}', 27, 0)"
)
if len(values) >= SQL_BATCH_SIZE:
sql_file = "/tmp/filecache_batch.sql"
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache "
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(values))
f.write(";\n")
mysql_exec_file(sql_file)
values = []
# Flush remaining
if values:
sql_file = "/tmp/filecache_batch.sql"
with open(sql_file, 'w') as f:
f.write("INSERT IGNORE INTO oc_filecache "
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
f.write(",\n".join(values))
f.write(";\n")
mysql_exec_file(sql_file)
total_inserted += FILES_PER_FOLDER
elapsed = time.time() - start_time
rate = total_inserted / elapsed if elapsed > 0 else 0
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {FILES_PER_FOLDER} records ({elapsed:.0f}s, totaal: {total_inserted}, {rate:.0f}/s)")
elapsed = time.time() - start_time
print(f"\n{'='*60}")
print(f"Voltooid in {elapsed:.0f} seconden")
print(f"Totaal geregistreerd: {total_inserted}")
print(f"{'='*60}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""
Fast Metadata Insert - Direct MySQL
=====================================
Voegt metadata records in voor alle bestanden via directe MySQL inserts.
Veel sneller dan de MetaVox API (~43M records in ~2 uur).
Tabellen:
- oc_metavox_file_gf_meta: per-bestand metadata (file_id, groupfolder_id, field_name, field_value)
- oc_metavox_gf_metadata: per-teamfolder metadata (groupfolder_id, field_name, field_value)
"""
import subprocess
import time
import random
import string
from datetime import datetime, timedelta
DB_NAME = "{{ nextcloud_db_name }}"
NUM_TEAMFOLDERS = {{ num_teamfolders }}
FILES_PER_FOLDER = {{ files_per_teamfolder }}
SQL_BATCH_SIZE = {{ sql_batch_size }}
NAMES = ["Jan", "Piet", "Klaas", "Marie", "Anna", "Sophie", "Thomas", "Eva",
"Lucas", "Emma", "Liam", "Olivia", "Noah", "Mia", "Daan", "Sara"]
ORGS = ["UvA", "HvA", "VU", "TU Delft", "Gemeente Amsterdam", "Ministerie BZK",
"Rijkswaterstaat", "UWV", "DUO", "KNAW"]
CITIES = ["Amsterdam", "Rotterdam", "Den Haag", "Utrecht", "Eindhoven",
"Groningen", "Tilburg", "Almere", "Breda", "Nijmegen"]
WORDS = ["beleid", "rapport", "analyse", "voorstel", "evaluatie", "plan",
"nota", "brief", "contract", "factuur", "verslag", "advies"]
FILE_FIELDS = [
{% for field in file_metadata_fields %}
("{{ field.name }}", "{{ field.type }}{% if field.options is defined %}:{{ field.options }}{% endif %}"),
{% endfor %}
]
TF_FIELDS = [
{% for field in teamfolder_metadata_fields %}
("{{ field.name }}", "{{ field.type }}{% if field.options is defined %}:{{ field.options }}{% endif %}"),
{% endfor %}
]
def random_date():
start = datetime(2020, 1, 1)
delta = (datetime(2026, 12, 31) - start).days
return (start + timedelta(days=random.randint(0, delta))).strftime("%Y-%m-%d")
def gen_value(fname, ftype):
if ftype.startswith("dropdown:"):
options = ftype.split(":", 1)[1].split(",")
return random.choice(options)
elif ftype == "date":
return random_date()
elif ftype == "checkbox":
return random.choice(["true", "false"])
else: # text
if "email" in fname:
return f"{random.choice(NAMES).lower()}@example.nl"
elif "phone" in fname:
return f"+31 6 {random.randint(10000000, 99999999)}"
elif "postal" in fname:
return f"{random.randint(1000, 9999)} {''.join(random.choices(string.ascii_uppercase, k=2))}"
elif "city" in fname:
return random.choice(CITIES)
elif "country" in fname:
return "Nederland"
elif "organization" in fname:
return random.choice(ORGS)
elif any(w in fname for w in ["author", "creator", "contact", "owner"]):
return f"{random.choice(NAMES)} {random.choice(['de Vries', 'Jansen', 'Bakker', 'Visser'])}"
elif any(w in fname for w in ["number", "code", "identifier", "reference"]):
return f"{fname[:3].upper()}-{random.randint(10000, 99999)}"
elif "version" in fname:
return f"{random.randint(1, 10)}.{random.randint(0, 9)}"
elif "language" in fname:
return random.choice(["Nederlands", "Engels", "Duits"])
else:
return " ".join(random.choices(WORDS, k=random.randint(1, 3))).capitalize()
def mysql_exec(sql):
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0:
print(f" [SQL ERROR] {result.stderr[:200]}")
return result.stdout.strip()
def mysql_exec_file(filepath):
with open(filepath) as f:
result = subprocess.run(
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
stdin=f, capture_output=True, text=True, timeout=300
)
if result.returncode != 0:
print(f" [SQL ERROR] {result.stderr[:200]}")
return result
def escape_sql(s):
return s.replace("\\", "\\\\").replace("'", "\\'").replace('"', '\\"')
def main():
print("=" * 60)
print("Fast Metadata Insert - Direct MySQL")
print(f"File fields: {len(FILE_FIELDS)}, TF fields: {len(TF_FIELDS)}")
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} files x {len(FILE_FIELDS)} fields")
print(f" = {NUM_TEAMFOLDERS * FILES_PER_FOLDER * len(FILE_FIELDS):,} file metadata rows")
print(f" + {NUM_TEAMFOLDERS * len(TF_FIELDS):,} groupfolder metadata rows")
print("=" * 60)
start_time = time.time()
# Step 1: Get groupfolder mapping
print("\nStap 1: Ophalen groupfolder mapping...")
rows = mysql_exec("""
SELECT folder_id, mount_point FROM oc_group_folders
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
ORDER BY folder_id
""")
gf_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
gf_id = int(parts[0])
tf_idx = int(parts[1].split('-')[-1])
gf_map[tf_idx] = gf_id
print(f" {len(gf_map)} groupfolders gevonden")
# Step 2: Get storage mapping
print("\nStap 2: Ophalen storage mapping...")
rows = mysql_exec("""
SELECT s.numeric_id,
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
FROM oc_storages s
WHERE s.id LIKE 'local::%/__groupfolders/%'
""")
storage_map = {}
for line in rows.split('\n'):
if line.strip():
parts = line.split('\t')
storage_map[int(parts[1])] = int(parts[0])
# Step 3: Check which folders are already complete
print("\nStap 3: Checken welke folders al klaar zijn...")
done_rows = mysql_exec("""
SELECT groupfolder_id, COUNT(*) as cnt
FROM oc_metavox_file_gf_meta
GROUP BY groupfolder_id
""")
done_gf_ids = set()
target_per_folder = FILES_PER_FOLDER * len(FILE_FIELDS)
for line in (done_rows or "").split('\n'):
if line.strip():
parts = line.split('\t')
gf_id = int(parts[0])
cnt = int(parts[1])
if cnt >= target_per_folder * 0.1:
done_gf_ids.add(gf_id)
print(f" gf_id={gf_id}: {cnt}/{target_per_folder} ({cnt*100//target_per_folder}%) - SKIP")
print(f" {len(done_gf_ids)} folders al (grotendeels) compleet, worden overgeslagen")
# Step 4: Insert groupfolder metadata
print("\nStap 4: Groupfolder metadata invoegen...")
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
values = []
for tf_idx in range(1, NUM_TEAMFOLDERS + 1):
gf_id = gf_map.get(tf_idx)
if not gf_id:
continue
for fname, ftype in TF_FIELDS:
val = escape_sql(gen_value(fname, ftype))
values.append(f"({gf_id}, '{fname}', '{val}', '{now}', '{now}')")
sql_file = "/tmp/gf_metadata_insert.sql"
with open(sql_file, 'w') as f:
f.write("INSERT INTO oc_metavox_gf_metadata (groupfolder_id, field_name, field_value, created_at, updated_at) VALUES\n")
f.write(",\n".join(values))
f.write(";\n")
mysql_exec_file(sql_file)
print(f" {len(values)} groupfolder metadata records ingevoegd")
# Step 5: Insert file metadata per folder
print("\nStap 5: File metadata invoegen...")
total_inserted = 0
for tf_idx in range(1, NUM_TEAMFOLDERS + 1):
gf_id = gf_map.get(tf_idx)
if not gf_id:
continue
if gf_id in done_gf_ids:
total_inserted += FILES_PER_FOLDER * len(FILE_FIELDS)
continue
storage_id = storage_map.get(gf_id)
if not storage_id:
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
continue
# Get file IDs for this folder
file_ids_raw = mysql_exec(f"""
SELECT fileid FROM oc_filecache
WHERE storage = {storage_id} AND name LIKE '{{ dummy_file_prefix }}%'
""")
file_ids = [int(x) for x in file_ids_raw.split('\n') if x.strip()]
if not file_ids:
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
continue
# Generate all rows for this folder
values = []
for fid in file_ids:
for fname, ftype in FILE_FIELDS:
val = escape_sql(gen_value(fname, ftype))
values.append(f"({fid}, {gf_id}, '{fname}', '{val}', '{now}', '{now}')")
# Write in batches
for batch_start in range(0, len(values), SQL_BATCH_SIZE):
batch = values[batch_start:batch_start + SQL_BATCH_SIZE]
sql_file = "/tmp/file_meta_batch.sql"
with open(sql_file, 'w') as f:
f.write("INSERT INTO oc_metavox_file_gf_meta (file_id, groupfolder_id, field_name, field_value, created_at, updated_at) VALUES\n")
f.write(",\n".join(batch))
f.write(";\n")
mysql_exec_file(sql_file)
total_inserted += len(values)
elapsed = time.time() - start_time
rate = total_inserted / elapsed if elapsed > 0 else 0
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(file_ids)} files x {len(FILE_FIELDS)} fields = {len(values)} rows ({elapsed:.0f}s, totaal: {total_inserted:,}, {rate:.0f}/s)")
elapsed = time.time() - start_time
print(f"\n{'='*60}")
print(f"Voltooid in {elapsed:.0f} seconden ({elapsed/3600:.1f} uur)")
print(f"File metadata records: {total_inserted:,}")
print(f"GF metadata records: {len(gf_map) * len(TF_FIELDS)}")
print(f"{'='*60}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python3
"""
MetaVox Metadata Velden Setup Script
=====================================
Maakt teamfolder-velden en file-metadata-velden aan via de MetaVox OCS API.
POST /ocs/v2.php/apps/metavox/api/v1/groupfolder-fields
Payload: field_name, field_label, field_type, field_description,
field_options (array), is_required, sort_order,
applies_to_groupfolder (1=teamfolder metadata, 0=file metadata)
POST /ocs/v2.php/apps/metavox/api/v1/groupfolders/{id}/fields
Payload: field_ids (array) - wijst velden toe aan een specifieke groupfolder
"""
import json
import sys
import time
import requests
from requests.auth import HTTPBasicAuth
NC_URL = "{{ nextcloud_url }}"
NC_USER = "{{ nextcloud_admin_user }}"
NC_PASS = "{{ nextcloud_admin_password }}"
TIMEOUT = {{ http_timeout }}
MAX_RETRIES = {{ max_retries }}
OCS_BASE = f"{NC_URL}/ocs/v2.php/apps/metavox/api/v1"
AUTH = HTTPBasicAuth(NC_USER, NC_PASS)
HEADERS = {
"OCS-APIRequest": "true",
"Accept": "application/json",
"Content-Type": "application/json",
}
def api_request(method, url, data=None, retries=MAX_RETRIES):
for attempt in range(retries):
try:
resp = requests.request(
method, url, auth=AUTH, headers=HEADERS,
json=data, timeout=TIMEOUT,
)
if resp.status_code in [200, 201]:
return resp
elif resp.status_code == 500 and "already exists" in resp.text:
print(f" [SKIP] Veld bestaat al")
return resp
else:
print(f" [WARN] HTTP {resp.status_code}: {resp.text[:200]}")
if attempt < retries - 1:
time.sleep(2 ** attempt)
return resp
except requests.exceptions.RequestException as e:
print(f" [ERROR] Poging {attempt+1}/{retries}: {e}")
if attempt < retries - 1:
time.sleep(2 ** attempt)
else:
raise
return None
def create_field(field, applies_to_groupfolder):
options = field.get("options", "")
if isinstance(options, str) and options:
options = [o.strip() for o in options.split(",")]
elif not isinstance(options, list):
options = []
payload = {
"field_name": field["name"],
"field_label": field.get("description", field["name"]),
"field_type": field["type"],
"field_description": field.get("description", ""),
"field_options": options,
"is_required": False,
"sort_order": 0,
"applies_to_groupfolder": applies_to_groupfolder,
}
url = f"{OCS_BASE}/groupfolder-fields"
return api_request("POST", url, payload)
def assign_fields_to_groupfolder(groupfolder_id, field_ids):
url = f"{OCS_BASE}/groupfolders/{groupfolder_id}/fields"
return api_request("POST", url, {"field_ids": field_ids})
def get_existing_fields():
url = f"{OCS_BASE}/groupfolder-fields"
resp = api_request("GET", url)
if resp and resp.status_code == 200:
try:
data = resp.json()
if "ocs" in data and "data" in data["ocs"]:
return data["ocs"]["data"]
except Exception:
pass
return []
def get_groupfolders():
url = f"{OCS_BASE}/groupfolders"
resp = api_request("GET", url)
if resp and resp.status_code == 200:
try:
data = resp.json()
if "ocs" in data and "data" in data["ocs"]:
return data["ocs"]["data"]
except Exception:
pass
return []
def main():
print("=" * 60)
print("MetaVox Metadata Velden Setup (OCS API)")
print(f"Endpoint: {OCS_BASE}/groupfolder-fields")
print("=" * 60)
created_field_ids = []
# ---- Teamfolder metadata-velden (applies_to_groupfolder=1) ----
tf_fields = json.loads("""{{ teamfolder_metadata_fields | to_json }}""")
print(f"\n[1/3] {len(tf_fields)} teamfolder-velden (applies_to_groupfolder=1)...")
tf_created = 0
for field in tf_fields:
print(f" Aanmaken: {field['name']} ({field['type']})")
resp = create_field(field, applies_to_groupfolder=1)
if resp and resp.status_code in [200, 201]:
tf_created += 1
try:
fid = resp.json().get("ocs", {}).get("data", {}).get("id")
if fid:
created_field_ids.append(fid)
except Exception:
pass
print(f" -> {tf_created} teamfolder-velden created")
# ---- File metadata-velden (applies_to_groupfolder=0) ----
file_fields = json.loads("""{{ file_metadata_fields | to_json }}""")
print(f"\n[2/3] {len(file_fields)} file-velden (applies_to_groupfolder=0)...")
f_created = 0
for field in file_fields:
print(f" Aanmaken: {field['name']} ({field['type']})")
resp = create_field(field, applies_to_groupfolder=0)
if resp and resp.status_code in [200, 201]:
f_created += 1
try:
fid = resp.json().get("ocs", {}).get("data", {}).get("id")
if fid:
created_field_ids.append(fid)
except Exception:
pass
print(f" -> {f_created} file-velden created")
# ---- Wijs alle velden toe aan alle groupfolders ----
print(f"\n[3/3] Velden toewijzen aan groupfolders...")
all_fields = get_existing_fields()
all_field_ids = [f["id"] for f in all_fields if "id" in f]
if not all_field_ids:
all_field_ids = created_field_ids
if all_field_ids:
groupfolders = get_groupfolders()
gf_count = 0
for gf in groupfolders:
gf_id = gf.get("id") or gf.get("group_folder_id")
if gf_id:
resp = assign_fields_to_groupfolder(gf_id, all_field_ids)
if resp and resp.status_code == 200:
gf_count += 1
if gf_count % 10 == 0 and gf_count > 0:
print(f" {gf_count} groupfolders verwerkt...")
print(f" -> {gf_count} groupfolders hebben nu alle velden")
else:
print(" [WARN] Geen field IDs beschikbaar")
total = tf_created + f_created
print(f"\n{'=' * 60}")
print(f"Totaal: {total} metadata-velden created")
print(f"{'=' * 60}")
if __name__ == "__main__":
main()