Initial commit: MetaVox loadtest playbook
Ansible playbook voor het opzetten van een MetaVox loadtest omgeving: - 50 teamfolders met 10.000 bestanden elk (500K totaal) - 100 metadata velddefinities (10 teamfolder + 90 file-level) - 3-niveau mappenstructuur (10 hoofdmappen x 3 submappen) - ~43M metadata records via directe MySQL inserts - Geoptimaliseerde database indexes (7 redundante indexes gedropt) Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API voor maximale performance. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
*.retry
|
||||
*.pyc
|
||||
__pycache__/
|
||||
.vagrant/
|
||||
9
ansible.cfg
Normal file
9
ansible.cfg
Normal file
@@ -0,0 +1,9 @@
|
||||
[defaults]
|
||||
inventory = inventory/hosts.yml
|
||||
roles_path = roles
|
||||
stdout_callback = yaml
|
||||
bin_ansible_callbacks = true
|
||||
retry_files_enabled = false
|
||||
|
||||
[ssh_connection]
|
||||
pipelining = true
|
||||
127
cleanup.yml
Normal file
127
cleanup.yml
Normal file
@@ -0,0 +1,127 @@
|
||||
---
|
||||
# =============================================================================
|
||||
# MetaVox Load Test - Cleanup Playbook
|
||||
# =============================================================================
|
||||
# Verwijdert alle teamfolders, metadata-velden, metadata-records en bestanden
|
||||
# die door de load test zijn aangemaakt.
|
||||
#
|
||||
# Gebruik: ansible-playbook -i inventory/hosts.yml cleanup.yml
|
||||
# =============================================================================
|
||||
|
||||
- name: MetaVox Load Test - Opruimen
|
||||
hosts: nextcloud_server
|
||||
gather_facts: false
|
||||
vars:
|
||||
nc_api_base: "{{ nextcloud_url }}/ocs/v2.php"
|
||||
nc_index_base: "{{ nextcloud_url }}/index.php"
|
||||
nc_auth_header: "Basic {{ (nextcloud_admin_user + ':' + nextcloud_admin_password) | b64encode }}"
|
||||
common_headers:
|
||||
OCS-APIRequest: "true"
|
||||
Accept: "application/json"
|
||||
Authorization: "{{ nc_auth_header }}"
|
||||
|
||||
tasks:
|
||||
# =========================================================================
|
||||
# Stap 1: Verwijder metadata records uit database
|
||||
# =========================================================================
|
||||
- name: "Cleanup: Verwijder file metadata records"
|
||||
ansible.builtin.command:
|
||||
cmd: mysql -u root {{ nextcloud_db_name }} -e "TRUNCATE TABLE oc_metavox_file_gf_meta;"
|
||||
ignore_errors: true
|
||||
|
||||
- name: "Cleanup: Verwijder groupfolder metadata records"
|
||||
ansible.builtin.command:
|
||||
cmd: mysql -u root {{ nextcloud_db_name }} -e "TRUNCATE TABLE oc_metavox_gf_metadata;"
|
||||
ignore_errors: true
|
||||
|
||||
- name: "Cleanup: Verwijder field assignments"
|
||||
ansible.builtin.command:
|
||||
cmd: mysql -u root {{ nextcloud_db_name }} -e "DELETE FROM oc_metavox_gf_assigns WHERE groupfolder_id IN (SELECT folder_id FROM oc_group_folders WHERE mount_point LIKE '{{ teamfolder_prefix }}-%');"
|
||||
ignore_errors: true
|
||||
|
||||
# =========================================================================
|
||||
# Stap 2: Verwijder teamfolders via API
|
||||
# =========================================================================
|
||||
- name: "Cleanup: Haal lijst van bestaande teamfolders op"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_index_base }}/apps/groupfolders/folders"
|
||||
method: GET
|
||||
headers: "{{ common_headers }}"
|
||||
return_content: true
|
||||
timeout: "{{ http_timeout }}"
|
||||
register: existing_folders
|
||||
|
||||
- name: "Cleanup: Filter load-test teamfolders"
|
||||
ansible.builtin.set_fact:
|
||||
loadtest_folder_ids: >-
|
||||
{{
|
||||
existing_folders.json.ocs.data | dict2items
|
||||
| selectattr('value.mount_point', 'match', teamfolder_prefix ~ '-.*')
|
||||
| map(attribute='key')
|
||||
| list
|
||||
}}
|
||||
when: existing_folders.json.ocs.data is defined
|
||||
|
||||
- name: "Cleanup: Verwijder {{ loadtest_folder_ids | default([]) | length }} teamfolders"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_index_base }}/apps/groupfolders/folders/{{ item }}"
|
||||
method: DELETE
|
||||
headers: "{{ common_headers }}"
|
||||
status_code: [200, 404]
|
||||
timeout: "{{ http_timeout }}"
|
||||
loop: "{{ loadtest_folder_ids | default([]) }}"
|
||||
loop_control:
|
||||
label: "Folder ID {{ item }}"
|
||||
pause: 0.2
|
||||
|
||||
# =========================================================================
|
||||
# Stap 3: Verwijder metadata velddefinities
|
||||
# =========================================================================
|
||||
- name: "Cleanup: Verwijder metadata-velden via script"
|
||||
ansible.builtin.template:
|
||||
src: templates/cleanup_metadata.py.j2
|
||||
dest: /tmp/metavox_cleanup_metadata.py
|
||||
mode: '0755'
|
||||
|
||||
- name: "Cleanup: Draai metadata cleanup"
|
||||
ansible.builtin.command:
|
||||
cmd: python3 /tmp/metavox_cleanup_metadata.py
|
||||
register: cleanup_result
|
||||
changed_when: "'deleted' in cleanup_result.stdout"
|
||||
|
||||
# =========================================================================
|
||||
# Stap 4: Verwijder groep en tijdelijke bestanden
|
||||
# =========================================================================
|
||||
- name: "Cleanup: Verwijder loadtest groep"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_api_base }}/cloud/groups/{{ loadtest_group }}"
|
||||
method: DELETE
|
||||
headers: "{{ common_headers }}"
|
||||
status_code: [200, 404]
|
||||
timeout: "{{ http_timeout }}"
|
||||
when: create_group | default(true)
|
||||
|
||||
- name: "Cleanup: Verwijder tijdelijke scripts"
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /tmp/metavox_setup_metadata_fields.py
|
||||
- /tmp/metavox_fast_create_files.sh
|
||||
- /tmp/metavox_fast_db_register.py
|
||||
- /tmp/metavox_create_folder_structure.py
|
||||
- /tmp/metavox_fast_metadata_insert.py
|
||||
- /tmp/metavox_cleanup_metadata.py
|
||||
- /tmp/filecache_batch.sql
|
||||
- /tmp/dir_insert.sql
|
||||
- /tmp/gf_metadata_insert.sql
|
||||
- /tmp/file_meta_batch.sql
|
||||
|
||||
- name: "Cleanup: Voltooid"
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
Cleanup voltooid!
|
||||
- {{ loadtest_folder_ids | default([]) | length }} teamfolders verwijderd
|
||||
- Metadata records opgeruimd (TRUNCATE)
|
||||
- Metadata-velden opgeruimd
|
||||
- Groep '{{ loadtest_group }}' verwijderd
|
||||
192
group_vars/all.yml
Normal file
192
group_vars/all.yml
Normal file
@@ -0,0 +1,192 @@
|
||||
# =============================================================================
|
||||
# Nextcloud Server Configuratie
|
||||
# =============================================================================
|
||||
nextcloud_url: "https://seedmv.researchdrivede.src.surf-hosted.nl"
|
||||
nextcloud_admin_user: "admin"
|
||||
nextcloud_admin_password: "secureadminpass"
|
||||
nextcloud_db_name: "nextcloud"
|
||||
nextcloud_data_dir: "/var/www/nextcloud/data"
|
||||
groupfolders_dir: "{{ nextcloud_data_dir }}/__groupfolders"
|
||||
|
||||
# =============================================================================
|
||||
# Groep die toegang krijgt tot de teamfolders
|
||||
# =============================================================================
|
||||
loadtest_group: "loadtest-group"
|
||||
create_group: true
|
||||
|
||||
# =============================================================================
|
||||
# Team Folders Configuratie
|
||||
# =============================================================================
|
||||
num_teamfolders: 50
|
||||
teamfolder_prefix: "LoadTest-TF"
|
||||
teamfolder_permissions: 31
|
||||
|
||||
# =============================================================================
|
||||
# Mappenstructuur per teamfolder
|
||||
# =============================================================================
|
||||
# 10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder
|
||||
folder_structure:
|
||||
Financieel: ["Facturen", "Contracten", "Begrotingen"]
|
||||
HR: ["Personeelsdossiers", "Sollicitaties", "Verlof"]
|
||||
IT: ["Projecten", "Infrastructuur", "Security"]
|
||||
Juridisch: ["Contracten", "Compliance", "Dossiers"]
|
||||
Marketing: ["Campagnes", "Materiaal", "Analyses"]
|
||||
Operations: ["Logistiek", "Inkoop", "Planning"]
|
||||
Management: ["Notulen", "Strategie", "Rapportages"]
|
||||
Onderzoek: ["Publicaties", "Data", "Experimenten"]
|
||||
Communicatie: ["Intern", "Extern", "Persberichten"]
|
||||
Archief: ["2023", "2024", "2025"]
|
||||
|
||||
# =============================================================================
|
||||
# MetaVox Metadata Velden Configuratie
|
||||
# =============================================================================
|
||||
|
||||
# 10 Teamfolder-velden (metadata die op teamfolder-niveau wordt ingesteld)
|
||||
teamfolder_metadata_fields:
|
||||
- { name: "tf_department", type: "text", description: "Afdeling" }
|
||||
- { name: "tf_project_code", type: "text", description: "Projectcode" }
|
||||
- { name: "tf_classification", type: "dropdown", description: "Classificatie", options: "Openbaar,Intern,Vertrouwelijk,Geheim" }
|
||||
- { name: "tf_owner", type: "text", description: "Eigenaar" }
|
||||
- { name: "tf_status", type: "dropdown", description: "Status", options: "Actief,Archief,Concept,Afgesloten" }
|
||||
- { name: "tf_start_date", type: "date", description: "Startdatum" }
|
||||
- { name: "tf_end_date", type: "date", description: "Einddatum" }
|
||||
- { name: "tf_budget_code", type: "text", description: "Budgetcode" }
|
||||
- { name: "tf_is_confidential", type: "checkbox", description: "Vertrouwelijk" }
|
||||
- { name: "tf_location", type: "text", description: "Locatie" }
|
||||
|
||||
# 90 File-metadata-velden (metadata die per bestand wordt ingesteld)
|
||||
file_metadata_fields:
|
||||
# Text velden (30 stuks)
|
||||
- { name: "doc_title", type: "text", description: "Documenttitel" }
|
||||
- { name: "doc_author", type: "text", description: "Auteur" }
|
||||
- { name: "doc_subject", type: "text", description: "Onderwerp" }
|
||||
- { name: "doc_keywords", type: "text", description: "Trefwoorden" }
|
||||
- { name: "doc_source", type: "text", description: "Bron" }
|
||||
- { name: "doc_language", type: "text", description: "Taal" }
|
||||
- { name: "doc_version", type: "text", description: "Versie" }
|
||||
- { name: "doc_reference", type: "text", description: "Referentie" }
|
||||
- { name: "doc_creator", type: "text", description: "Maker" }
|
||||
- { name: "doc_contributor", type: "text", description: "Bijdrager" }
|
||||
- { name: "doc_publisher", type: "text", description: "Uitgever" }
|
||||
- { name: "doc_rights", type: "text", description: "Rechten" }
|
||||
- { name: "doc_identifier", type: "text", description: "Identifier" }
|
||||
- { name: "doc_relation", type: "text", description: "Relatie" }
|
||||
- { name: "doc_coverage", type: "text", description: "Dekking" }
|
||||
- { name: "doc_abstract", type: "text", description: "Samenvatting" }
|
||||
- { name: "doc_notes", type: "text", description: "Notities" }
|
||||
- { name: "doc_contact", type: "text", description: "Contactpersoon" }
|
||||
- { name: "doc_email", type: "text", description: "E-mailadres" }
|
||||
- { name: "doc_phone", type: "text", description: "Telefoonnummer" }
|
||||
- { name: "doc_address", type: "text", description: "Adres" }
|
||||
- { name: "doc_city", type: "text", description: "Stad" }
|
||||
- { name: "doc_country", type: "text", description: "Land" }
|
||||
- { name: "doc_postal_code", type: "text", description: "Postcode" }
|
||||
- { name: "doc_organization", type: "text", description: "Organisatie" }
|
||||
- { name: "doc_unit", type: "text", description: "Afdeling" }
|
||||
- { name: "doc_role", type: "text", description: "Rol" }
|
||||
- { name: "doc_case_number", type: "text", description: "Zaaknummer" }
|
||||
- { name: "doc_invoice_number", type: "text", description: "Factuurnummer" }
|
||||
- { name: "doc_contract_number", type: "text", description: "Contractnummer" }
|
||||
|
||||
# Dropdown velden (20 stuks)
|
||||
- { name: "doc_type", type: "dropdown", description: "Documenttype", options: "Brief,Rapport,Notitie,Factuur,Contract,Offerte,Notulen,Beleidsstuk,Memo,Overig" }
|
||||
- { name: "doc_status", type: "dropdown", description: "Documentstatus", options: "Concept,Review,Goedgekeurd,Definitief,Verlopen,Ingetrokken" }
|
||||
- { name: "doc_priority", type: "dropdown", description: "Prioriteit", options: "Laag,Normaal,Hoog,Urgent,Kritiek" }
|
||||
- { name: "doc_category", type: "dropdown", description: "Categorie", options: "Financieel,Juridisch,HR,IT,Marketing,Operations,R&D,Strategie,Compliance" }
|
||||
- { name: "doc_sensitivity", type: "dropdown", description: "Gevoeligheid", options: "Openbaar,Intern,Vertrouwelijk,Strikt vertrouwelijk" }
|
||||
- { name: "doc_retention", type: "dropdown", description: "Bewaartermijn", options: "1 jaar,3 jaar,5 jaar,7 jaar,10 jaar,Permanent" }
|
||||
- { name: "doc_format", type: "dropdown", description: "Formaat", options: "PDF,Word,Excel,PowerPoint,Afbeelding,E-mail,Overig" }
|
||||
- { name: "doc_review_status", type: "dropdown", description: "Reviewstatus", options: "Niet gereviewed,In review,Goedgekeurd,Afgekeurd" }
|
||||
- { name: "doc_approval_level", type: "dropdown", description: "Goedkeuringsniveau", options: "Team,Management,Directie,Bestuur" }
|
||||
- { name: "doc_lifecycle", type: "dropdown", description: "Levenscyclus", options: "Creatie,Gebruik,Archivering,Vernietiging" }
|
||||
- { name: "doc_audience", type: "dropdown", description: "Doelgroep", options: "Intern,Extern,Bestuur,Partners,Klanten,Leveranciers" }
|
||||
- { name: "doc_region", type: "dropdown", description: "Regio", options: "Noord,Oost,Zuid,West,Centraal,Internationaal" }
|
||||
- { name: "doc_quarter", type: "dropdown", description: "Kwartaal", options: "Q1,Q2,Q3,Q4" }
|
||||
- { name: "doc_fiscal_year", type: "dropdown", description: "Boekjaar", options: "2023,2024,2025,2026,2027" }
|
||||
- { name: "doc_department", type: "dropdown", description: "Afdeling", options: "Finance,HR,IT,Legal,Marketing,Operations,R&D,Sales,Support" }
|
||||
- { name: "doc_workflow_state", type: "dropdown", description: "Workflowstatus", options: "Nieuw,In behandeling,Wachtend,Afgerond,Geannuleerd" }
|
||||
- { name: "doc_archive_reason", type: "dropdown", description: "Archiveringsreden", options: "Bewaartermijn,Afgesloten project,Wettelijke verplichting,Verzoek" }
|
||||
- { name: "doc_access_level", type: "dropdown", description: "Toegangsniveau", options: "Iedereen,Team,Management,Beperkt" }
|
||||
- { name: "doc_origin", type: "dropdown", description: "Herkomst", options: "Intern,Extern,Partner,Overheid,Klant" }
|
||||
- { name: "doc_media_type", type: "dropdown", description: "Mediatype", options: "Tekst,Afbeelding,Audio,Video,Mixed" }
|
||||
|
||||
# Date velden (20 stuks)
|
||||
- { name: "doc_created_date", type: "date", description: "Aanmaakdatum" }
|
||||
- { name: "doc_modified_date", type: "date", description: "Wijzigingsdatum" }
|
||||
- { name: "doc_published_date", type: "date", description: "Publicatiedatum" }
|
||||
- { name: "doc_expiry_date", type: "date", description: "Verloopdatum" }
|
||||
- { name: "doc_review_date", type: "date", description: "Reviewdatum" }
|
||||
- { name: "doc_approval_date", type: "date", description: "Goedkeuringsdatum" }
|
||||
- { name: "doc_archive_date", type: "date", description: "Archiveringsdatum" }
|
||||
- { name: "doc_effective_date", type: "date", description: "Ingangsdatum" }
|
||||
- { name: "doc_received_date", type: "date", description: "Ontvangstdatum" }
|
||||
- { name: "doc_sent_date", type: "date", description: "Verzenddatum" }
|
||||
- { name: "doc_signed_date", type: "date", description: "Tekendatum" }
|
||||
- { name: "doc_deadline", type: "date", description: "Deadline" }
|
||||
- { name: "doc_meeting_date", type: "date", description: "Vergaderdatum" }
|
||||
- { name: "doc_start_date", type: "date", description: "Startdatum" }
|
||||
- { name: "doc_end_date", type: "date", description: "Einddatum" }
|
||||
- { name: "doc_invoice_date", type: "date", description: "Factuurdatum" }
|
||||
- { name: "doc_payment_date", type: "date", description: "Betaaldatum" }
|
||||
- { name: "doc_due_date", type: "date", description: "Vervaldatum" }
|
||||
- { name: "doc_birth_date", type: "date", description: "Geboortedatum" }
|
||||
- { name: "doc_registration_date", type: "date", description: "Registratiedatum" }
|
||||
|
||||
# Checkbox velden (20 stuks)
|
||||
- { name: "doc_is_template", type: "checkbox", description: "Is template" }
|
||||
- { name: "doc_is_signed", type: "checkbox", description: "Is getekend" }
|
||||
- { name: "doc_is_approved", type: "checkbox", description: "Is goedgekeurd" }
|
||||
- { name: "doc_is_archived", type: "checkbox", description: "Is gearchiveerd" }
|
||||
- { name: "doc_is_public", type: "checkbox", description: "Is openbaar" }
|
||||
- { name: "doc_is_confidential", type: "checkbox", description: "Is vertrouwelijk" }
|
||||
- { name: "doc_is_final", type: "checkbox", description: "Is definitief" }
|
||||
- { name: "doc_is_draft", type: "checkbox", description: "Is concept" }
|
||||
- { name: "doc_needs_review", type: "checkbox", description: "Review nodig" }
|
||||
- { name: "doc_needs_approval", type: "checkbox", description: "Goedkeuring nodig" }
|
||||
- { name: "doc_has_attachments", type: "checkbox", description: "Heeft bijlagen" }
|
||||
- { name: "doc_is_scanned", type: "checkbox", description: "Is gescand" }
|
||||
- { name: "doc_is_ocr", type: "checkbox", description: "OCR verwerkt" }
|
||||
- { name: "doc_is_encrypted", type: "checkbox", description: "Is versleuteld" }
|
||||
- { name: "doc_is_compressed", type: "checkbox", description: "Is gecomprimeerd" }
|
||||
- { name: "doc_is_original", type: "checkbox", description: "Is origineel" }
|
||||
- { name: "doc_is_copy", type: "checkbox", description: "Is kopie" }
|
||||
- { name: "doc_requires_action", type: "checkbox", description: "Actie vereist" }
|
||||
- { name: "doc_is_billable", type: "checkbox", description: "Is factureerbaar" }
|
||||
- { name: "doc_is_completed", type: "checkbox", description: "Is afgerond" }
|
||||
|
||||
# =============================================================================
|
||||
# Dummy Bestanden Configuratie
|
||||
# =============================================================================
|
||||
files_per_teamfolder: 10000
|
||||
dummy_file_content: "Dit is een dummy bestand voor MetaVox load testing. Aangemaakt door Ansible."
|
||||
dummy_file_extension: "txt"
|
||||
dummy_file_prefix: "loadtest-doc"
|
||||
|
||||
# =============================================================================
|
||||
# Performance Configuratie
|
||||
# =============================================================================
|
||||
sql_batch_size: 2000
|
||||
http_timeout: 300
|
||||
max_retries: 3
|
||||
|
||||
# =============================================================================
|
||||
# Database Index Optimalisatie
|
||||
# =============================================================================
|
||||
# Bij grote hoeveelheden metadata records moeten overbodige indexes
|
||||
# op oc_metavox_file_gf_meta gedropt worden voor performance en diskruimte.
|
||||
#
|
||||
# BEHOUDEN (4 indexes):
|
||||
# - PRIMARY
|
||||
# - mf_file_gf_meta_unique (file_id, groupfolder_id, field_name)
|
||||
# - idx_file_gf_gf_lookup (groupfolder_id, field_name, field_value)
|
||||
# - idx_gf_file_meta_filter (field_name, field_value, groupfolder_id)
|
||||
#
|
||||
# DROPPEN (7 redundante indexes):
|
||||
drop_indexes:
|
||||
- "idx_file_gf_composite"
|
||||
- "mf_file_gf_meta_file"
|
||||
- "mf_file_gf_meta_gf"
|
||||
- "mf_file_gf_meta_field"
|
||||
- "idx_gf_file_meta_file_id"
|
||||
- "idx_gf_file_meta_timestamps"
|
||||
- "idx_file_gf_updated"
|
||||
6
inventory/hosts.yml
Normal file
6
inventory/hosts.yml
Normal file
@@ -0,0 +1,6 @@
|
||||
---
|
||||
all:
|
||||
hosts:
|
||||
nextcloud_server:
|
||||
ansible_connection: local
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
317
site.yml
Normal file
317
site.yml
Normal file
@@ -0,0 +1,317 @@
|
||||
---
|
||||
# =============================================================================
|
||||
# MetaVox Load Test - Hoofd Playbook
|
||||
# =============================================================================
|
||||
# Maakt teamfolders, metadata-velden, bestanden en metadata-records aan op een
|
||||
# Nextcloud-instantie met MetaVox.
|
||||
#
|
||||
# Gebruikt directe filesystem writes en MySQL inserts i.p.v. WebDAV/API
|
||||
# voor maximale snelheid (~500K bestanden + ~43M metadata records in < 2 uur).
|
||||
#
|
||||
# Gebruik: ansible-playbook -i inventory/hosts.yml site.yml
|
||||
# Tags: precheck, teamfolders, metadata-fields, files, db-register,
|
||||
# folder-structure, drop-indexes, metadata-records
|
||||
# =============================================================================
|
||||
|
||||
- name: MetaVox Load Test - Setup en Data Generatie
|
||||
hosts: nextcloud_server
|
||||
gather_facts: true
|
||||
vars:
|
||||
nc_api_base: "{{ nextcloud_url }}/ocs/v2.php"
|
||||
nc_dav_base: "{{ nextcloud_url }}/remote.php/dav"
|
||||
nc_index_base: "{{ nextcloud_url }}/index.php"
|
||||
nc_auth_header: "Basic {{ (nextcloud_admin_user + ':' + nextcloud_admin_password) | b64encode }}"
|
||||
common_headers:
|
||||
OCS-APIRequest: "true"
|
||||
Accept: "application/json"
|
||||
Authorization: "{{ nc_auth_header }}"
|
||||
|
||||
tasks:
|
||||
# =========================================================================
|
||||
# FASE 0: Pre-checks
|
||||
# =========================================================================
|
||||
- name: "Pre-check: Controleer of Nextcloud bereikbaar is"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nextcloud_url }}/status.php"
|
||||
method: GET
|
||||
return_content: true
|
||||
timeout: 30
|
||||
register: nc_status
|
||||
failed_when: nc_status.status != 200
|
||||
tags: [precheck]
|
||||
|
||||
- name: "Pre-check: Toon Nextcloud versie"
|
||||
ansible.builtin.debug:
|
||||
msg: "Nextcloud is bereikbaar. Versie: {{ (nc_status.content | from_json).versionstring }}"
|
||||
tags: [precheck]
|
||||
|
||||
- name: "Pre-check: Controleer beschikbare schijfruimte"
|
||||
ansible.builtin.command: df -h /var/www/nextcloud/data
|
||||
register: disk_check
|
||||
changed_when: false
|
||||
tags: [precheck]
|
||||
|
||||
- name: "Pre-check: Toon schijfruimte"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ disk_check.stdout }}"
|
||||
tags: [precheck]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 0.5: Maak groep aan (optioneel)
|
||||
# =========================================================================
|
||||
- name: "Setup: Maak loadtest groep aan"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_api_base }}/cloud/groups"
|
||||
method: POST
|
||||
headers: "{{ common_headers }}"
|
||||
body_format: form-urlencoded
|
||||
body:
|
||||
groupid: "{{ loadtest_group }}"
|
||||
status_code: [200, 400]
|
||||
timeout: "{{ http_timeout }}"
|
||||
when: create_group | default(true)
|
||||
tags: [setup, teamfolders]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 1: Teamfolders aanmaken via API
|
||||
# =========================================================================
|
||||
- name: "Teamfolders: Genereer lijst van teamfolder namen"
|
||||
ansible.builtin.set_fact:
|
||||
teamfolder_names: "{{ teamfolder_names | default([]) + [teamfolder_prefix + '-%03d' | format(item)] }}"
|
||||
loop: "{{ range(1, num_teamfolders + 1) | list }}"
|
||||
tags: [teamfolders]
|
||||
|
||||
- name: "Teamfolders: Maak {{ num_teamfolders }} teamfolders aan"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_index_base }}/apps/groupfolders/folders"
|
||||
method: POST
|
||||
headers: "{{ common_headers }}"
|
||||
body_format: form-urlencoded
|
||||
body:
|
||||
mountpoint: "{{ item }}"
|
||||
return_content: true
|
||||
status_code: [200]
|
||||
timeout: "{{ http_timeout }}"
|
||||
loop: "{{ teamfolder_names }}"
|
||||
register: teamfolder_results
|
||||
loop_control:
|
||||
label: "{{ item }}"
|
||||
pause: 0.2
|
||||
tags: [teamfolders]
|
||||
|
||||
- name: "Teamfolders: Verzamel folder IDs"
|
||||
ansible.builtin.set_fact:
|
||||
teamfolder_ids: "{{ teamfolder_results.results | map(attribute='json') | map(attribute='ocs') | map(attribute='data') | map(attribute='id') | list }}"
|
||||
tags: [teamfolders]
|
||||
when: teamfolder_results is defined
|
||||
|
||||
- name: "Teamfolders: Ken groep '{{ loadtest_group }}' toe aan elke teamfolder"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_index_base }}/apps/groupfolders/folders/{{ item }}/groups"
|
||||
method: POST
|
||||
headers: "{{ common_headers }}"
|
||||
body_format: form-urlencoded
|
||||
body:
|
||||
group: "{{ loadtest_group }}"
|
||||
status_code: [200]
|
||||
timeout: "{{ http_timeout }}"
|
||||
loop: "{{ teamfolder_ids }}"
|
||||
loop_control:
|
||||
label: "Folder ID {{ item }}"
|
||||
pause: 0.1
|
||||
tags: [teamfolders]
|
||||
|
||||
- name: "Teamfolders: Stel permissies in voor de groep"
|
||||
ansible.builtin.uri:
|
||||
url: "{{ nc_index_base }}/apps/groupfolders/folders/{{ item }}/groups/{{ loadtest_group }}"
|
||||
method: POST
|
||||
headers: "{{ common_headers }}"
|
||||
body_format: form-urlencoded
|
||||
body:
|
||||
permissions: "{{ teamfolder_permissions }}"
|
||||
status_code: [200]
|
||||
timeout: "{{ http_timeout }}"
|
||||
loop: "{{ teamfolder_ids }}"
|
||||
loop_control:
|
||||
label: "Folder ID {{ item }}"
|
||||
pause: 0.1
|
||||
tags: [teamfolders]
|
||||
|
||||
- name: "Teamfolders: Resultaat"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ num_teamfolders }} teamfolders aangemaakt met IDs: {{ teamfolder_ids[:5] }}... (eerste 5 getoond)"
|
||||
tags: [teamfolders]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 2: Metadata velden aanmaken (MetaVox OCS API)
|
||||
# =========================================================================
|
||||
- name: "Metadata: Kopieer metadata-setup script"
|
||||
ansible.builtin.template:
|
||||
src: templates/setup_metadata_fields.py.j2
|
||||
dest: /tmp/metavox_setup_metadata_fields.py
|
||||
mode: '0755'
|
||||
tags: [metadata-fields]
|
||||
|
||||
- name: "Metadata: Maak metadata velddefinities aan via MetaVox API"
|
||||
ansible.builtin.command:
|
||||
cmd: python3 /tmp/metavox_setup_metadata_fields.py
|
||||
environment:
|
||||
PYTHONUNBUFFERED: "1"
|
||||
register: metadata_fields_result
|
||||
changed_when: "'created' in metadata_fields_result.stdout"
|
||||
tags: [metadata-fields]
|
||||
|
||||
- name: "Metadata: Toon resultaat"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ metadata_fields_result.stdout_lines | default(['Geen output']) }}"
|
||||
tags: [metadata-fields]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 3: Bestanden aanmaken (direct filesystem)
|
||||
# =========================================================================
|
||||
- name: "Bestanden: Kopieer file-creatie script"
|
||||
ansible.builtin.template:
|
||||
src: templates/fast_create_files.sh.j2
|
||||
dest: /tmp/metavox_fast_create_files.sh
|
||||
mode: '0755'
|
||||
tags: [files]
|
||||
|
||||
- name: "Bestanden: Maak {{ files_per_teamfolder * num_teamfolders }} bestanden aan op filesystem"
|
||||
ansible.builtin.command:
|
||||
cmd: bash /tmp/metavox_fast_create_files.sh
|
||||
register: create_files_result
|
||||
changed_when: true
|
||||
async: 7200
|
||||
poll: 30
|
||||
tags: [files]
|
||||
|
||||
- name: "Bestanden: Toon resultaat"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ create_files_result.stdout_lines[-10:] | default(['Geen output']) }}"
|
||||
tags: [files]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 4: Bestanden registreren in database (direct MySQL)
|
||||
# =========================================================================
|
||||
- name: "DB Register: Kopieer database registratie script"
|
||||
ansible.builtin.template:
|
||||
src: templates/fast_db_register.py.j2
|
||||
dest: /tmp/metavox_fast_db_register.py
|
||||
mode: '0755'
|
||||
tags: [db-register]
|
||||
|
||||
- name: "DB Register: Registreer bestanden in oc_filecache"
|
||||
ansible.builtin.command:
|
||||
cmd: python3 /tmp/metavox_fast_db_register.py
|
||||
environment:
|
||||
PYTHONUNBUFFERED: "1"
|
||||
register: db_register_result
|
||||
changed_when: true
|
||||
async: 7200
|
||||
poll: 30
|
||||
tags: [db-register]
|
||||
|
||||
- name: "DB Register: Toon resultaat"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ db_register_result.stdout_lines[-10:] | default(['Geen output']) }}"
|
||||
tags: [db-register]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 5: Mappenstructuur aanmaken en bestanden verplaatsen
|
||||
# =========================================================================
|
||||
- name: "Mappen: Kopieer mappenstructuur script"
|
||||
ansible.builtin.template:
|
||||
src: templates/create_folder_structure.py.j2
|
||||
dest: /tmp/metavox_create_folder_structure.py
|
||||
mode: '0755'
|
||||
tags: [folder-structure]
|
||||
|
||||
- name: "Mappen: Maak mappenstructuur aan en verplaats bestanden"
|
||||
ansible.builtin.command:
|
||||
cmd: python3 /tmp/metavox_create_folder_structure.py
|
||||
environment:
|
||||
PYTHONUNBUFFERED: "1"
|
||||
register: folder_structure_result
|
||||
changed_when: true
|
||||
async: 7200
|
||||
poll: 30
|
||||
tags: [folder-structure]
|
||||
|
||||
- name: "Mappen: Toon resultaat"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ folder_structure_result.stdout_lines[-10:] | default(['Geen output']) }}"
|
||||
tags: [folder-structure]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 6: Drop overbodige indexes voor performance
|
||||
# =========================================================================
|
||||
- name: "Indexes: Drop overbodige indexes op oc_metavox_file_gf_meta"
|
||||
ansible.builtin.command:
|
||||
cmd: >
|
||||
mysql -u root {{ nextcloud_db_name }} -e
|
||||
"DROP INDEX IF EXISTS {{ item }} ON oc_metavox_file_gf_meta;"
|
||||
loop: "{{ drop_indexes }}"
|
||||
loop_control:
|
||||
label: "DROP INDEX {{ item }}"
|
||||
ignore_errors: true
|
||||
tags: [drop-indexes]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 7: Metadata records invoegen (direct MySQL)
|
||||
# =========================================================================
|
||||
- name: "Metadata Records: Kopieer metadata insert script"
|
||||
ansible.builtin.template:
|
||||
src: templates/fast_metadata_insert.py.j2
|
||||
dest: /tmp/metavox_fast_metadata_insert.py
|
||||
mode: '0755'
|
||||
tags: [metadata-records]
|
||||
|
||||
- name: "Metadata Records: Voeg metadata records in via MySQL"
|
||||
ansible.builtin.command:
|
||||
cmd: python3 /tmp/metavox_fast_metadata_insert.py
|
||||
environment:
|
||||
PYTHONUNBUFFERED: "1"
|
||||
register: metadata_insert_result
|
||||
changed_when: true
|
||||
async: 86400
|
||||
poll: 60
|
||||
tags: [metadata-records]
|
||||
|
||||
- name: "Metadata Records: Toon resultaat"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ metadata_insert_result.stdout_lines[-10:] | default(['Geen output']) }}"
|
||||
tags: [metadata-records]
|
||||
|
||||
# =========================================================================
|
||||
# FASE 8: Fix ownership
|
||||
# =========================================================================
|
||||
- name: "Fix: Zet eigenaar op www-data voor groupfolders"
|
||||
ansible.builtin.file:
|
||||
path: "{{ groupfolders_dir }}"
|
||||
owner: www-data
|
||||
group: www-data
|
||||
recurse: true
|
||||
tags: [fix-ownership]
|
||||
|
||||
# =========================================================================
|
||||
# SAMENVATTING
|
||||
# =========================================================================
|
||||
- name: "Samenvatting"
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
============================================================
|
||||
MetaVox Load Test - Voltooid!
|
||||
============================================================
|
||||
Teamfolders aangemaakt: {{ num_teamfolders }}
|
||||
Teamfolder metadata-velden: {{ teamfolder_metadata_fields | length }}
|
||||
File metadata-velden: {{ file_metadata_fields | length }}
|
||||
Totaal metadata-velden: {{ teamfolder_metadata_fields | length + file_metadata_fields | length }}
|
||||
Bestanden per teamfolder: {{ files_per_teamfolder }}
|
||||
Totaal bestanden: {{ files_per_teamfolder * num_teamfolders }}
|
||||
Mappenstructuur: 10 hoofdmappen x 3 submappen
|
||||
Metadata records: ~{{ files_per_teamfolder * num_teamfolders * (file_metadata_fields | length) }}
|
||||
Gedropte indexes: {{ drop_indexes | length }}
|
||||
============================================================
|
||||
Methode: Direct filesystem + MySQL (geen WebDAV/API)
|
||||
============================================================
|
||||
tags: [always]
|
||||
80
templates/cleanup_metadata.py.j2
Normal file
80
templates/cleanup_metadata.py.j2
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MetaVox Metadata Cleanup Script
|
||||
Verwijdert metadata-velddefinities via de OCS API.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
NC_URL = "{{ nextcloud_url }}"
|
||||
NC_USER = "{{ nextcloud_admin_user }}"
|
||||
NC_PASS = "{{ nextcloud_admin_password }}"
|
||||
TIMEOUT = {{ http_timeout }}
|
||||
|
||||
WEB_API_BASE = f"{NC_URL}/index.php/apps/metavox/api"
|
||||
OCS_BASE = f"{NC_URL}/ocs/v2.php/apps/metavox/api/v1"
|
||||
|
||||
AUTH = HTTPBasicAuth(NC_USER, NC_PASS)
|
||||
HEADERS = {
|
||||
"OCS-APIRequest": "true",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
TF_FIELDS = json.loads("""{{ teamfolder_metadata_fields | to_json }}""")
|
||||
FILE_FIELDS = json.loads("""{{ file_metadata_fields | to_json }}""")
|
||||
|
||||
FIELD_NAMES_TO_DELETE = set(f["name"] for f in TF_FIELDS + FILE_FIELDS)
|
||||
|
||||
|
||||
def get_all_fields():
|
||||
url = f"{OCS_BASE}/groupfolder-fields"
|
||||
try:
|
||||
resp = requests.get(url, auth=AUTH, headers=HEADERS, timeout=TIMEOUT)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if "ocs" in data and "data" in data["ocs"]:
|
||||
return data["ocs"]["data"]
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Ophalen velden: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def delete_field(field_id):
|
||||
url = f"{WEB_API_BASE}/groupfolder-fields/{field_id}"
|
||||
try:
|
||||
resp = requests.delete(url, auth=AUTH, headers=HEADERS, timeout=TIMEOUT)
|
||||
return resp.status_code in [200, 204, 404]
|
||||
except Exception as e:
|
||||
print(f" [ERROR] Verwijderen veld {field_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
print("MetaVox Metadata Cleanup")
|
||||
print("=" * 40)
|
||||
|
||||
fields = get_all_fields()
|
||||
print(f"Gevonden: {len(fields)} velden totaal")
|
||||
|
||||
deleted = 0
|
||||
skipped = 0
|
||||
for field in fields:
|
||||
fname = field.get("field_name", "")
|
||||
fid = field.get("id")
|
||||
if fname in FIELD_NAMES_TO_DELETE and fid:
|
||||
if delete_field(fid):
|
||||
deleted += 1
|
||||
else:
|
||||
print(f" [FAIL] Kon veld '{fname}' (id={fid}) niet verwijderen")
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
print(f"\n{deleted} velden deleted, {skipped} overgeslagen")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
257
templates/create_folder_structure.py.j2
Normal file
257
templates/create_folder_structure.py.j2
Normal file
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create Folder Structure - Mappenstructuur aanmaken en bestanden verplaatsen
|
||||
===========================================================================
|
||||
Maakt een 3-niveau mappenstructuur aan binnen elke teamfolder en verplaatst
|
||||
de bestanden gelijkmatig over de leaf folders.
|
||||
|
||||
10 hoofdmappen x 3 submappen = 30 leaf folders per teamfolder.
|
||||
Bestanden worden verplaatst met os.rename (geen extra diskruimte nodig).
|
||||
Database (oc_filecache) wordt bijgewerkt met nieuwe paden en parent IDs.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
DB_NAME = "{{ nextcloud_db_name }}"
|
||||
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
|
||||
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
||||
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
||||
|
||||
FOLDER_STRUCTURE = {{ folder_structure | to_json }}
|
||||
|
||||
# Build flat list of leaf paths
|
||||
LEAF_FOLDERS = []
|
||||
for main, subs in FOLDER_STRUCTURE.items():
|
||||
for sub in subs:
|
||||
LEAF_FOLDERS.append(f"{main}/{sub}")
|
||||
|
||||
print(f"Folder structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen")
|
||||
|
||||
|
||||
def mysql_exec(sql):
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
||||
capture_output=True, text=True, timeout=120
|
||||
)
|
||||
if result.returncode != 0 and result.stderr.strip():
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def mysql_exec_file(filepath):
|
||||
with open(filepath) as f:
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
||||
stdin=f, capture_output=True, text=True, timeout=300
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result
|
||||
|
||||
|
||||
def escape_sql(s):
|
||||
return s.replace("\\", "\\\\").replace("'", "\\'")
|
||||
|
||||
|
||||
def main():
|
||||
start_time = time.time()
|
||||
now = int(time.time())
|
||||
|
||||
# Get storage mapping
|
||||
print("Ophalen storage mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT s.numeric_id,
|
||||
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
||||
FROM oc_storages s
|
||||
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
||||
""")
|
||||
storage_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
storage_map[int(parts[1])] = int(parts[0])
|
||||
|
||||
# Get groupfolder mapping
|
||||
rows = mysql_exec("""
|
||||
SELECT folder_id, mount_point FROM oc_group_folders
|
||||
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
||||
ORDER BY folder_id
|
||||
""")
|
||||
gf_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
gf_id = int(parts[0])
|
||||
tf_idx = int(parts[1].split('-')[-1])
|
||||
if tf_idx <= NUM_TEAMFOLDERS:
|
||||
gf_map[tf_idx] = gf_id
|
||||
|
||||
print(f"{len(gf_map)} teamfolders gevonden")
|
||||
|
||||
# Get mimetype ID for directories
|
||||
dir_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='httpd/unix-directory'"))
|
||||
|
||||
total_moved = 0
|
||||
|
||||
for tf_idx in sorted(gf_map.keys()):
|
||||
gf_id = gf_map[tf_idx]
|
||||
storage_id = storage_map.get(gf_id)
|
||||
if not storage_id:
|
||||
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
||||
continue
|
||||
|
||||
folder_num = gf_id
|
||||
files_dir = f"{GROUPFOLDERS_DIR}/{folder_num}/files"
|
||||
|
||||
# Check if already restructured
|
||||
first_sub = list(FOLDER_STRUCTURE.keys())[0]
|
||||
if os.path.isdir(f"{files_dir}/{first_sub}"):
|
||||
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): al gestructureerd")
|
||||
total_moved += FILES_PER_FOLDER
|
||||
continue
|
||||
|
||||
# Get parent_id for 'files' directory
|
||||
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
|
||||
if not files_parent:
|
||||
print(f" [SKIP] Geen files entry voor storage {storage_id}")
|
||||
continue
|
||||
files_parent_id = int(files_parent)
|
||||
|
||||
# Step 1: Create directories on disk
|
||||
for main_folder, subs in FOLDER_STRUCTURE.items():
|
||||
os.makedirs(f"{files_dir}/{main_folder}", exist_ok=True)
|
||||
for sub in subs:
|
||||
os.makedirs(f"{files_dir}/{main_folder}/{sub}", exist_ok=True)
|
||||
|
||||
# Step 2: Insert main directory entries into oc_filecache
|
||||
dir_values = []
|
||||
for main_folder in FOLDER_STRUCTURE.keys():
|
||||
path = f"files/{main_folder}"
|
||||
path_hash = hashlib.md5(path.encode()).hexdigest()
|
||||
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
||||
dir_values.append(
|
||||
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {files_parent_id}, "
|
||||
f"'{escape_sql(main_folder)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
|
||||
)
|
||||
|
||||
sql_file = "/tmp/dir_insert.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(dir_values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
# Get main folder IDs
|
||||
main_ids = {}
|
||||
for main_folder in FOLDER_STRUCTURE.keys():
|
||||
path = f"files/{main_folder}"
|
||||
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
|
||||
if fid:
|
||||
main_ids[main_folder] = int(fid)
|
||||
|
||||
# Insert sub folder entries
|
||||
sub_values = []
|
||||
for main_folder, subs in FOLDER_STRUCTURE.items():
|
||||
parent_id = main_ids.get(main_folder, files_parent_id)
|
||||
for sub in subs:
|
||||
path = f"files/{main_folder}/{sub}"
|
||||
path_hash = hashlib.md5(path.encode()).hexdigest()
|
||||
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
||||
sub_values.append(
|
||||
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
|
||||
f"'{escape_sql(sub)}', {dir_mime}, {dir_mime}, 0, {now}, {now}, '{etag}', 31, 0)"
|
||||
)
|
||||
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache (storage, path, path_hash, parent, name, mimetype, mimepart, size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(sub_values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
# Get sub folder IDs
|
||||
sub_ids = {}
|
||||
for main_folder, subs in FOLDER_STRUCTURE.items():
|
||||
for sub in subs:
|
||||
path = f"files/{main_folder}/{sub}"
|
||||
fid = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = '{escape_sql(path)}'")
|
||||
if fid:
|
||||
sub_ids[f"{main_folder}/{sub}"] = int(fid)
|
||||
|
||||
# Step 3: Move files on disk and update DB
|
||||
file_rows = mysql_exec(f"""
|
||||
SELECT fileid, name FROM oc_filecache
|
||||
WHERE storage = {storage_id} AND path LIKE 'files/{{ dummy_file_prefix }}%'
|
||||
ORDER BY fileid
|
||||
""")
|
||||
files = []
|
||||
for line in file_rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
files.append((int(parts[0]), parts[1]))
|
||||
|
||||
if not files:
|
||||
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
|
||||
continue
|
||||
|
||||
# Distribute files across leaf folders
|
||||
files_per_leaf = len(files) // len(LEAF_FOLDERS)
|
||||
remainder = len(files) % len(LEAF_FOLDERS)
|
||||
|
||||
file_idx = 0
|
||||
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
|
||||
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
|
||||
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
|
||||
|
||||
for _ in range(count):
|
||||
if file_idx >= len(files):
|
||||
break
|
||||
fid, fname = files[file_idx]
|
||||
old_path = f"{files_dir}/{fname}"
|
||||
new_path_disk = f"{files_dir}/{leaf_path}/{fname}"
|
||||
|
||||
if os.path.exists(old_path):
|
||||
os.rename(old_path, new_path_disk)
|
||||
file_idx += 1
|
||||
|
||||
# Batch update DB
|
||||
BATCH = 2000
|
||||
for batch_start in range(0, len(files), BATCH):
|
||||
updates = []
|
||||
for leaf_idx, leaf_path in enumerate(LEAF_FOLDERS):
|
||||
leaf_parent_id = sub_ids.get(leaf_path, files_parent_id)
|
||||
count = files_per_leaf + (1 if leaf_idx < remainder else 0)
|
||||
leaf_start = sum(files_per_leaf + (1 if i < remainder else 0) for i in range(leaf_idx))
|
||||
leaf_end = leaf_start + count
|
||||
|
||||
for i in range(max(leaf_start, batch_start), min(leaf_end, batch_start + BATCH)):
|
||||
if i >= len(files):
|
||||
break
|
||||
fid, fname = files[i]
|
||||
new_path_db = f"files/{leaf_path}/{fname}"
|
||||
new_path_hash = hashlib.md5(new_path_db.encode()).hexdigest()
|
||||
updates.append(f"UPDATE oc_filecache SET path='{escape_sql(new_path_db)}', path_hash='{new_path_hash}', parent={leaf_parent_id} WHERE fileid={fid};")
|
||||
|
||||
if updates:
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("\n".join(updates))
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
# Fix ownership
|
||||
os.system(f"chown -R www-data:www-data {files_dir}")
|
||||
|
||||
total_moved += len(files)
|
||||
elapsed = time.time() - start_time
|
||||
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(files)} bestanden verplaatst naar {len(LEAF_FOLDERS)} submappen ({elapsed:.0f}s, totaal: {total_moved})")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Voltooid in {elapsed:.0f} seconden")
|
||||
print(f"Totaal verplaatst: {total_moved}")
|
||||
print(f"Structuur: {len(FOLDER_STRUCTURE)} hoofdmappen, {len(LEAF_FOLDERS)} submappen per teamfolder")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
73
templates/fast_create_files.sh.j2
Normal file
73
templates/fast_create_files.sh.j2
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# Fast File Creator - Direct filesystem
|
||||
# =============================================================================
|
||||
# Maakt {{ files_per_teamfolder }} bestanden per teamfolder aan op het filesystem.
|
||||
# Veel sneller dan WebDAV uploads (~500K bestanden in ~40 minuten).
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
GROUPFOLDERS_DIR="{{ groupfolders_dir }}"
|
||||
NUM_TEAMFOLDERS={{ num_teamfolders }}
|
||||
FILES_PER_FOLDER={{ files_per_teamfolder }}
|
||||
FILE_PREFIX="{{ dummy_file_prefix }}"
|
||||
FILE_EXT="{{ dummy_file_extension }}"
|
||||
FILE_CONTENT="{{ dummy_file_content }}"
|
||||
|
||||
echo "============================================================"
|
||||
echo "Fast File Creator - Direct Filesystem"
|
||||
echo "Doel: ${NUM_TEAMFOLDERS} folders x ${FILES_PER_FOLDER} bestanden"
|
||||
echo "============================================================"
|
||||
|
||||
# Haal groupfolder mapping op (API folder_id -> disk folder number)
|
||||
GF_IDS=$(mysql -u root {{ nextcloud_db_name }} -N -e \
|
||||
"SELECT folder_id FROM oc_group_folders
|
||||
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
||||
ORDER BY folder_id
|
||||
LIMIT ${NUM_TEAMFOLDERS}")
|
||||
|
||||
TOTAL_CREATED=0
|
||||
FOLDER_COUNT=0
|
||||
START_TIME=$(date +%s)
|
||||
|
||||
for GF_ID in ${GF_IDS}; do
|
||||
FOLDER_COUNT=$((FOLDER_COUNT + 1))
|
||||
FILES_DIR="${GROUPFOLDERS_DIR}/${GF_ID}/files"
|
||||
|
||||
# Maak files directory aan als die niet bestaat
|
||||
mkdir -p "${FILES_DIR}"
|
||||
|
||||
# Tel bestaande bestanden
|
||||
EXISTING=$(find "${FILES_DIR}" -maxdepth 1 -name "${FILE_PREFIX}-*.${FILE_EXT}" 2>/dev/null | wc -l)
|
||||
if [ "${EXISTING}" -ge "${FILES_PER_FOLDER}" ]; then
|
||||
echo "[SKIP] Folder ${FOLDER_COUNT} (gf_id=${GF_ID}): ${EXISTING} bestanden bestaan al"
|
||||
TOTAL_CREATED=$((TOTAL_CREATED + EXISTING))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Maak bestanden aan
|
||||
CREATED=0
|
||||
for i in $(seq -w 1 ${FILES_PER_FOLDER}); do
|
||||
FNAME="${FILE_PREFIX}-${i}.${FILE_EXT}"
|
||||
FPATH="${FILES_DIR}/${FNAME}"
|
||||
if [ ! -f "${FPATH}" ]; then
|
||||
echo "${FILE_CONTENT}" > "${FPATH}"
|
||||
CREATED=$((CREATED + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Fix ownership
|
||||
chown -R www-data:www-data "${FILES_DIR}"
|
||||
|
||||
TOTAL_CREATED=$((TOTAL_CREATED + FILES_PER_FOLDER))
|
||||
ELAPSED=$(( $(date +%s) - START_TIME ))
|
||||
echo "[DONE] Folder ${FOLDER_COUNT} (gf_id=${GF_ID}): ${CREATED} nieuw aangemaakt (${ELAPSED}s, totaal: ${TOTAL_CREATED})"
|
||||
done
|
||||
|
||||
ELAPSED=$(( $(date +%s) - START_TIME ))
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "Voltooid in ${ELAPSED} seconden"
|
||||
echo "Totaal bestanden: ${TOTAL_CREATED}"
|
||||
echo "============================================================"
|
||||
174
templates/fast_db_register.py.j2
Normal file
174
templates/fast_db_register.py.j2
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fast DB Register - Direct MySQL insert into oc_filecache
|
||||
=========================================================
|
||||
Registreert bestanden die op het filesystem zijn aangemaakt in de Nextcloud
|
||||
database (oc_filecache). Veel sneller dan occ files:scan (~500K in ~2 min).
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
DB_NAME = "{{ nextcloud_db_name }}"
|
||||
GROUPFOLDERS_DIR = "{{ groupfolders_dir }}"
|
||||
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
||||
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
||||
FILE_PREFIX = "{{ dummy_file_prefix }}"
|
||||
FILE_EXT = "{{ dummy_file_extension }}"
|
||||
SQL_BATCH_SIZE = {{ sql_batch_size }}
|
||||
|
||||
|
||||
def mysql_exec(sql):
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
||||
capture_output=True, text=True, timeout=120
|
||||
)
|
||||
if result.returncode != 0 and result.stderr.strip():
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def mysql_exec_file(filepath):
|
||||
with open(filepath) as f:
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
||||
stdin=f, capture_output=True, text=True, timeout=300
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result
|
||||
|
||||
|
||||
def escape_sql(s):
|
||||
return s.replace("\\", "\\\\").replace("'", "\\'")
|
||||
|
||||
|
||||
def main():
|
||||
start_time = time.time()
|
||||
now = int(time.time())
|
||||
|
||||
print("=" * 60)
|
||||
print("Fast DB Register - oc_filecache insert")
|
||||
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} bestanden")
|
||||
print("=" * 60)
|
||||
|
||||
# Get storage mapping (gf_id -> storage numeric_id)
|
||||
print("\nStap 1: Ophalen storage mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT s.numeric_id,
|
||||
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
||||
FROM oc_storages s
|
||||
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
||||
""")
|
||||
storage_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
storage_map[int(parts[1])] = int(parts[0])
|
||||
|
||||
# Get groupfolder mapping
|
||||
print("Stap 2: Ophalen groupfolder mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT folder_id, mount_point FROM oc_group_folders
|
||||
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
||||
ORDER BY folder_id
|
||||
""")
|
||||
gf_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
gf_id = int(parts[0])
|
||||
tf_idx = int(parts[1].split('-')[-1])
|
||||
if tf_idx <= NUM_TEAMFOLDERS:
|
||||
gf_map[tf_idx] = gf_id
|
||||
|
||||
print(f" {len(gf_map)} teamfolders gevonden")
|
||||
|
||||
# Get mimetype IDs
|
||||
txt_mime = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text/plain'"))
|
||||
txt_part = int(mysql_exec("SELECT id FROM oc_mimetypes WHERE mimetype='text'"))
|
||||
|
||||
total_inserted = 0
|
||||
|
||||
for tf_idx in sorted(gf_map.keys()):
|
||||
gf_id = gf_map[tf_idx]
|
||||
storage_id = storage_map.get(gf_id)
|
||||
if not storage_id:
|
||||
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
||||
continue
|
||||
|
||||
# Check if files already registered
|
||||
existing = mysql_exec(f"""
|
||||
SELECT COUNT(*) FROM oc_filecache
|
||||
WHERE storage = {storage_id} AND path LIKE 'files/{FILE_PREFIX}%'
|
||||
""")
|
||||
if existing and int(existing) >= FILES_PER_FOLDER:
|
||||
print(f" [SKIP] Folder {tf_idx} (gf_id={gf_id}): {existing} bestanden al geregistreerd")
|
||||
total_inserted += int(existing)
|
||||
continue
|
||||
|
||||
# Get parent fileid for 'files' directory
|
||||
files_parent = mysql_exec(f"SELECT fileid FROM oc_filecache WHERE storage = {storage_id} AND path = 'files'")
|
||||
if not files_parent:
|
||||
print(f" [SKIP] Geen files entry voor storage {storage_id}")
|
||||
continue
|
||||
parent_id = int(files_parent)
|
||||
|
||||
# Get file size from disk
|
||||
sample_file = f"{GROUPFOLDERS_DIR}/{gf_id}/files/{FILE_PREFIX}-00001.{FILE_EXT}"
|
||||
try:
|
||||
file_size = os.path.getsize(sample_file)
|
||||
except OSError:
|
||||
file_size = 80 # fallback
|
||||
|
||||
# Generate INSERT statements in batches
|
||||
values = []
|
||||
for i in range(1, FILES_PER_FOLDER + 1):
|
||||
fname = f"{FILE_PREFIX}-{i:05d}.{FILE_EXT}"
|
||||
path = f"files/{fname}"
|
||||
path_hash = hashlib.md5(path.encode()).hexdigest()
|
||||
etag = hashlib.md5(f"{now}{path}".encode()).hexdigest()
|
||||
|
||||
values.append(
|
||||
f"({storage_id}, '{escape_sql(path)}', '{path_hash}', {parent_id}, "
|
||||
f"'{escape_sql(fname)}', {txt_mime}, {txt_part}, {file_size}, "
|
||||
f"{now}, {now}, '{etag}', 27, 0)"
|
||||
)
|
||||
|
||||
if len(values) >= SQL_BATCH_SIZE:
|
||||
sql_file = "/tmp/filecache_batch.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache "
|
||||
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
|
||||
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
values = []
|
||||
|
||||
# Flush remaining
|
||||
if values:
|
||||
sql_file = "/tmp/filecache_batch.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT IGNORE INTO oc_filecache "
|
||||
"(storage, path, path_hash, parent, name, mimetype, mimepart, "
|
||||
"size, mtime, storage_mtime, etag, permissions, unencrypted_size) VALUES\n")
|
||||
f.write(",\n".join(values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
total_inserted += FILES_PER_FOLDER
|
||||
elapsed = time.time() - start_time
|
||||
rate = total_inserted / elapsed if elapsed > 0 else 0
|
||||
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {FILES_PER_FOLDER} records ({elapsed:.0f}s, totaal: {total_inserted}, {rate:.0f}/s)")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Voltooid in {elapsed:.0f} seconden")
|
||||
print(f"Totaal geregistreerd: {total_inserted}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
247
templates/fast_metadata_insert.py.j2
Normal file
247
templates/fast_metadata_insert.py.j2
Normal file
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fast Metadata Insert - Direct MySQL
|
||||
=====================================
|
||||
Voegt metadata records in voor alle bestanden via directe MySQL inserts.
|
||||
Veel sneller dan de MetaVox API (~43M records in ~2 uur).
|
||||
|
||||
Tabellen:
|
||||
- oc_metavox_file_gf_meta: per-bestand metadata (file_id, groupfolder_id, field_name, field_value)
|
||||
- oc_metavox_gf_metadata: per-teamfolder metadata (groupfolder_id, field_name, field_value)
|
||||
"""
|
||||
import subprocess
|
||||
import time
|
||||
import random
|
||||
import string
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
DB_NAME = "{{ nextcloud_db_name }}"
|
||||
NUM_TEAMFOLDERS = {{ num_teamfolders }}
|
||||
FILES_PER_FOLDER = {{ files_per_teamfolder }}
|
||||
SQL_BATCH_SIZE = {{ sql_batch_size }}
|
||||
|
||||
NAMES = ["Jan", "Piet", "Klaas", "Marie", "Anna", "Sophie", "Thomas", "Eva",
|
||||
"Lucas", "Emma", "Liam", "Olivia", "Noah", "Mia", "Daan", "Sara"]
|
||||
ORGS = ["UvA", "HvA", "VU", "TU Delft", "Gemeente Amsterdam", "Ministerie BZK",
|
||||
"Rijkswaterstaat", "UWV", "DUO", "KNAW"]
|
||||
CITIES = ["Amsterdam", "Rotterdam", "Den Haag", "Utrecht", "Eindhoven",
|
||||
"Groningen", "Tilburg", "Almere", "Breda", "Nijmegen"]
|
||||
WORDS = ["beleid", "rapport", "analyse", "voorstel", "evaluatie", "plan",
|
||||
"nota", "brief", "contract", "factuur", "verslag", "advies"]
|
||||
|
||||
FILE_FIELDS = [
|
||||
{% for field in file_metadata_fields %}
|
||||
("{{ field.name }}", "{{ field.type }}{% if field.options is defined %}:{{ field.options }}{% endif %}"),
|
||||
{% endfor %}
|
||||
]
|
||||
|
||||
TF_FIELDS = [
|
||||
{% for field in teamfolder_metadata_fields %}
|
||||
("{{ field.name }}", "{{ field.type }}{% if field.options is defined %}:{{ field.options }}{% endif %}"),
|
||||
{% endfor %}
|
||||
]
|
||||
|
||||
|
||||
def random_date():
|
||||
start = datetime(2020, 1, 1)
|
||||
delta = (datetime(2026, 12, 31) - start).days
|
||||
return (start + timedelta(days=random.randint(0, delta))).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def gen_value(fname, ftype):
|
||||
if ftype.startswith("dropdown:"):
|
||||
options = ftype.split(":", 1)[1].split(",")
|
||||
return random.choice(options)
|
||||
elif ftype == "date":
|
||||
return random_date()
|
||||
elif ftype == "checkbox":
|
||||
return random.choice(["true", "false"])
|
||||
else: # text
|
||||
if "email" in fname:
|
||||
return f"{random.choice(NAMES).lower()}@example.nl"
|
||||
elif "phone" in fname:
|
||||
return f"+31 6 {random.randint(10000000, 99999999)}"
|
||||
elif "postal" in fname:
|
||||
return f"{random.randint(1000, 9999)} {''.join(random.choices(string.ascii_uppercase, k=2))}"
|
||||
elif "city" in fname:
|
||||
return random.choice(CITIES)
|
||||
elif "country" in fname:
|
||||
return "Nederland"
|
||||
elif "organization" in fname:
|
||||
return random.choice(ORGS)
|
||||
elif any(w in fname for w in ["author", "creator", "contact", "owner"]):
|
||||
return f"{random.choice(NAMES)} {random.choice(['de Vries', 'Jansen', 'Bakker', 'Visser'])}"
|
||||
elif any(w in fname for w in ["number", "code", "identifier", "reference"]):
|
||||
return f"{fname[:3].upper()}-{random.randint(10000, 99999)}"
|
||||
elif "version" in fname:
|
||||
return f"{random.randint(1, 10)}.{random.randint(0, 9)}"
|
||||
elif "language" in fname:
|
||||
return random.choice(["Nederlands", "Engels", "Duits"])
|
||||
else:
|
||||
return " ".join(random.choices(WORDS, k=random.randint(1, 3))).capitalize()
|
||||
|
||||
|
||||
def mysql_exec(sql):
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "-N", "-e", sql],
|
||||
capture_output=True, text=True, timeout=120
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def mysql_exec_file(filepath):
|
||||
with open(filepath) as f:
|
||||
result = subprocess.run(
|
||||
["mysql", "-u", "root", DB_NAME, "--max-allowed-packet=64M"],
|
||||
stdin=f, capture_output=True, text=True, timeout=300
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [SQL ERROR] {result.stderr[:200]}")
|
||||
return result
|
||||
|
||||
|
||||
def escape_sql(s):
|
||||
return s.replace("\\", "\\\\").replace("'", "\\'").replace('"', '\\"')
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Fast Metadata Insert - Direct MySQL")
|
||||
print(f"File fields: {len(FILE_FIELDS)}, TF fields: {len(TF_FIELDS)}")
|
||||
print(f"Doel: {NUM_TEAMFOLDERS} folders x {FILES_PER_FOLDER} files x {len(FILE_FIELDS)} fields")
|
||||
print(f" = {NUM_TEAMFOLDERS * FILES_PER_FOLDER * len(FILE_FIELDS):,} file metadata rows")
|
||||
print(f" + {NUM_TEAMFOLDERS * len(TF_FIELDS):,} groupfolder metadata rows")
|
||||
print("=" * 60)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Step 1: Get groupfolder mapping
|
||||
print("\nStap 1: Ophalen groupfolder mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT folder_id, mount_point FROM oc_group_folders
|
||||
WHERE mount_point LIKE '{{ teamfolder_prefix }}-%'
|
||||
ORDER BY folder_id
|
||||
""")
|
||||
gf_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
gf_id = int(parts[0])
|
||||
tf_idx = int(parts[1].split('-')[-1])
|
||||
gf_map[tf_idx] = gf_id
|
||||
print(f" {len(gf_map)} groupfolders gevonden")
|
||||
|
||||
# Step 2: Get storage mapping
|
||||
print("\nStap 2: Ophalen storage mapping...")
|
||||
rows = mysql_exec("""
|
||||
SELECT s.numeric_id,
|
||||
REPLACE(REPLACE(s.id, 'local::/var/www/nextcloud/data/__groupfolders/', ''), '/', '') as folder_num
|
||||
FROM oc_storages s
|
||||
WHERE s.id LIKE 'local::%/__groupfolders/%'
|
||||
""")
|
||||
storage_map = {}
|
||||
for line in rows.split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
storage_map[int(parts[1])] = int(parts[0])
|
||||
|
||||
# Step 3: Check which folders are already complete
|
||||
print("\nStap 3: Checken welke folders al klaar zijn...")
|
||||
done_rows = mysql_exec("""
|
||||
SELECT groupfolder_id, COUNT(*) as cnt
|
||||
FROM oc_metavox_file_gf_meta
|
||||
GROUP BY groupfolder_id
|
||||
""")
|
||||
done_gf_ids = set()
|
||||
target_per_folder = FILES_PER_FOLDER * len(FILE_FIELDS)
|
||||
for line in (done_rows or "").split('\n'):
|
||||
if line.strip():
|
||||
parts = line.split('\t')
|
||||
gf_id = int(parts[0])
|
||||
cnt = int(parts[1])
|
||||
if cnt >= target_per_folder * 0.1:
|
||||
done_gf_ids.add(gf_id)
|
||||
print(f" gf_id={gf_id}: {cnt}/{target_per_folder} ({cnt*100//target_per_folder}%) - SKIP")
|
||||
print(f" {len(done_gf_ids)} folders al (grotendeels) compleet, worden overgeslagen")
|
||||
|
||||
# Step 4: Insert groupfolder metadata
|
||||
print("\nStap 4: Groupfolder metadata invoegen...")
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
values = []
|
||||
for tf_idx in range(1, NUM_TEAMFOLDERS + 1):
|
||||
gf_id = gf_map.get(tf_idx)
|
||||
if not gf_id:
|
||||
continue
|
||||
for fname, ftype in TF_FIELDS:
|
||||
val = escape_sql(gen_value(fname, ftype))
|
||||
values.append(f"({gf_id}, '{fname}', '{val}', '{now}', '{now}')")
|
||||
|
||||
sql_file = "/tmp/gf_metadata_insert.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT INTO oc_metavox_gf_metadata (groupfolder_id, field_name, field_value, created_at, updated_at) VALUES\n")
|
||||
f.write(",\n".join(values))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
print(f" {len(values)} groupfolder metadata records ingevoegd")
|
||||
|
||||
# Step 5: Insert file metadata per folder
|
||||
print("\nStap 5: File metadata invoegen...")
|
||||
total_inserted = 0
|
||||
|
||||
for tf_idx in range(1, NUM_TEAMFOLDERS + 1):
|
||||
gf_id = gf_map.get(tf_idx)
|
||||
if not gf_id:
|
||||
continue
|
||||
if gf_id in done_gf_ids:
|
||||
total_inserted += FILES_PER_FOLDER * len(FILE_FIELDS)
|
||||
continue
|
||||
storage_id = storage_map.get(gf_id)
|
||||
if not storage_id:
|
||||
print(f" [SKIP] Geen storage voor gf_id {gf_id}")
|
||||
continue
|
||||
|
||||
# Get file IDs for this folder
|
||||
file_ids_raw = mysql_exec(f"""
|
||||
SELECT fileid FROM oc_filecache
|
||||
WHERE storage = {storage_id} AND name LIKE '{{ dummy_file_prefix }}%'
|
||||
""")
|
||||
file_ids = [int(x) for x in file_ids_raw.split('\n') if x.strip()]
|
||||
|
||||
if not file_ids:
|
||||
print(f" [SKIP] Folder {tf_idx}: geen bestanden")
|
||||
continue
|
||||
|
||||
# Generate all rows for this folder
|
||||
values = []
|
||||
for fid in file_ids:
|
||||
for fname, ftype in FILE_FIELDS:
|
||||
val = escape_sql(gen_value(fname, ftype))
|
||||
values.append(f"({fid}, {gf_id}, '{fname}', '{val}', '{now}', '{now}')")
|
||||
|
||||
# Write in batches
|
||||
for batch_start in range(0, len(values), SQL_BATCH_SIZE):
|
||||
batch = values[batch_start:batch_start + SQL_BATCH_SIZE]
|
||||
sql_file = "/tmp/file_meta_batch.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("INSERT INTO oc_metavox_file_gf_meta (file_id, groupfolder_id, field_name, field_value, created_at, updated_at) VALUES\n")
|
||||
f.write(",\n".join(batch))
|
||||
f.write(";\n")
|
||||
mysql_exec_file(sql_file)
|
||||
|
||||
total_inserted += len(values)
|
||||
elapsed = time.time() - start_time
|
||||
rate = total_inserted / elapsed if elapsed > 0 else 0
|
||||
print(f" [DONE] Folder {tf_idx} (gf_id={gf_id}): {len(file_ids)} files x {len(FILE_FIELDS)} fields = {len(values)} rows ({elapsed:.0f}s, totaal: {total_inserted:,}, {rate:.0f}/s)")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Voltooid in {elapsed:.0f} seconden ({elapsed/3600:.1f} uur)")
|
||||
print(f"File metadata records: {total_inserted:,}")
|
||||
print(f"GF metadata records: {len(gf_map) * len(TF_FIELDS)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
191
templates/setup_metadata_fields.py.j2
Normal file
191
templates/setup_metadata_fields.py.j2
Normal file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MetaVox Metadata Velden Setup Script
|
||||
=====================================
|
||||
Maakt teamfolder-velden en file-metadata-velden aan via de MetaVox OCS API.
|
||||
|
||||
POST /ocs/v2.php/apps/metavox/api/v1/groupfolder-fields
|
||||
Payload: field_name, field_label, field_type, field_description,
|
||||
field_options (array), is_required, sort_order,
|
||||
applies_to_groupfolder (1=teamfolder metadata, 0=file metadata)
|
||||
|
||||
POST /ocs/v2.php/apps/metavox/api/v1/groupfolders/{id}/fields
|
||||
Payload: field_ids (array) - wijst velden toe aan een specifieke groupfolder
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
NC_URL = "{{ nextcloud_url }}"
|
||||
NC_USER = "{{ nextcloud_admin_user }}"
|
||||
NC_PASS = "{{ nextcloud_admin_password }}"
|
||||
TIMEOUT = {{ http_timeout }}
|
||||
MAX_RETRIES = {{ max_retries }}
|
||||
|
||||
OCS_BASE = f"{NC_URL}/ocs/v2.php/apps/metavox/api/v1"
|
||||
|
||||
AUTH = HTTPBasicAuth(NC_USER, NC_PASS)
|
||||
HEADERS = {
|
||||
"OCS-APIRequest": "true",
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
||||
def api_request(method, url, data=None, retries=MAX_RETRIES):
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
resp = requests.request(
|
||||
method, url, auth=AUTH, headers=HEADERS,
|
||||
json=data, timeout=TIMEOUT,
|
||||
)
|
||||
if resp.status_code in [200, 201]:
|
||||
return resp
|
||||
elif resp.status_code == 500 and "already exists" in resp.text:
|
||||
print(f" [SKIP] Veld bestaat al")
|
||||
return resp
|
||||
else:
|
||||
print(f" [WARN] HTTP {resp.status_code}: {resp.text[:200]}")
|
||||
if attempt < retries - 1:
|
||||
time.sleep(2 ** attempt)
|
||||
return resp
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" [ERROR] Poging {attempt+1}/{retries}: {e}")
|
||||
if attempt < retries - 1:
|
||||
time.sleep(2 ** attempt)
|
||||
else:
|
||||
raise
|
||||
return None
|
||||
|
||||
|
||||
def create_field(field, applies_to_groupfolder):
|
||||
options = field.get("options", "")
|
||||
if isinstance(options, str) and options:
|
||||
options = [o.strip() for o in options.split(",")]
|
||||
elif not isinstance(options, list):
|
||||
options = []
|
||||
|
||||
payload = {
|
||||
"field_name": field["name"],
|
||||
"field_label": field.get("description", field["name"]),
|
||||
"field_type": field["type"],
|
||||
"field_description": field.get("description", ""),
|
||||
"field_options": options,
|
||||
"is_required": False,
|
||||
"sort_order": 0,
|
||||
"applies_to_groupfolder": applies_to_groupfolder,
|
||||
}
|
||||
|
||||
url = f"{OCS_BASE}/groupfolder-fields"
|
||||
return api_request("POST", url, payload)
|
||||
|
||||
|
||||
def assign_fields_to_groupfolder(groupfolder_id, field_ids):
|
||||
url = f"{OCS_BASE}/groupfolders/{groupfolder_id}/fields"
|
||||
return api_request("POST", url, {"field_ids": field_ids})
|
||||
|
||||
|
||||
def get_existing_fields():
|
||||
url = f"{OCS_BASE}/groupfolder-fields"
|
||||
resp = api_request("GET", url)
|
||||
if resp and resp.status_code == 200:
|
||||
try:
|
||||
data = resp.json()
|
||||
if "ocs" in data and "data" in data["ocs"]:
|
||||
return data["ocs"]["data"]
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def get_groupfolders():
|
||||
url = f"{OCS_BASE}/groupfolders"
|
||||
resp = api_request("GET", url)
|
||||
if resp and resp.status_code == 200:
|
||||
try:
|
||||
data = resp.json()
|
||||
if "ocs" in data and "data" in data["ocs"]:
|
||||
return data["ocs"]["data"]
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("MetaVox Metadata Velden Setup (OCS API)")
|
||||
print(f"Endpoint: {OCS_BASE}/groupfolder-fields")
|
||||
print("=" * 60)
|
||||
|
||||
created_field_ids = []
|
||||
|
||||
# ---- Teamfolder metadata-velden (applies_to_groupfolder=1) ----
|
||||
tf_fields = json.loads("""{{ teamfolder_metadata_fields | to_json }}""")
|
||||
print(f"\n[1/3] {len(tf_fields)} teamfolder-velden (applies_to_groupfolder=1)...")
|
||||
|
||||
tf_created = 0
|
||||
for field in tf_fields:
|
||||
print(f" Aanmaken: {field['name']} ({field['type']})")
|
||||
resp = create_field(field, applies_to_groupfolder=1)
|
||||
if resp and resp.status_code in [200, 201]:
|
||||
tf_created += 1
|
||||
try:
|
||||
fid = resp.json().get("ocs", {}).get("data", {}).get("id")
|
||||
if fid:
|
||||
created_field_ids.append(fid)
|
||||
except Exception:
|
||||
pass
|
||||
print(f" -> {tf_created} teamfolder-velden created")
|
||||
|
||||
# ---- File metadata-velden (applies_to_groupfolder=0) ----
|
||||
file_fields = json.loads("""{{ file_metadata_fields | to_json }}""")
|
||||
print(f"\n[2/3] {len(file_fields)} file-velden (applies_to_groupfolder=0)...")
|
||||
|
||||
f_created = 0
|
||||
for field in file_fields:
|
||||
print(f" Aanmaken: {field['name']} ({field['type']})")
|
||||
resp = create_field(field, applies_to_groupfolder=0)
|
||||
if resp and resp.status_code in [200, 201]:
|
||||
f_created += 1
|
||||
try:
|
||||
fid = resp.json().get("ocs", {}).get("data", {}).get("id")
|
||||
if fid:
|
||||
created_field_ids.append(fid)
|
||||
except Exception:
|
||||
pass
|
||||
print(f" -> {f_created} file-velden created")
|
||||
|
||||
# ---- Wijs alle velden toe aan alle groupfolders ----
|
||||
print(f"\n[3/3] Velden toewijzen aan groupfolders...")
|
||||
|
||||
all_fields = get_existing_fields()
|
||||
all_field_ids = [f["id"] for f in all_fields if "id" in f]
|
||||
if not all_field_ids:
|
||||
all_field_ids = created_field_ids
|
||||
|
||||
if all_field_ids:
|
||||
groupfolders = get_groupfolders()
|
||||
gf_count = 0
|
||||
for gf in groupfolders:
|
||||
gf_id = gf.get("id") or gf.get("group_folder_id")
|
||||
if gf_id:
|
||||
resp = assign_fields_to_groupfolder(gf_id, all_field_ids)
|
||||
if resp and resp.status_code == 200:
|
||||
gf_count += 1
|
||||
if gf_count % 10 == 0 and gf_count > 0:
|
||||
print(f" {gf_count} groupfolders verwerkt...")
|
||||
print(f" -> {gf_count} groupfolders hebben nu alle velden")
|
||||
else:
|
||||
print(" [WARN] Geen field IDs beschikbaar")
|
||||
|
||||
total = tf_created + f_created
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"Totaal: {total} metadata-velden created")
|
||||
print(f"{'=' * 60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user