commit 9c7df76fbdc86bc0fc8348773eeefa8080709f78
Author: Henrik Højmark <henrik.hojmark@fejl40.nu>
Date:   Sat Apr 11 04:38:11 2026 +0200

    Initial commit

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..8cfce70
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,168 @@
+name: Build — Windows & Linux
+
+# Trigger on every push to main, on version tags, or manually
+on:
+  push:
+    branches: [main]
+    tags:     ['v*']
+  workflow_dispatch:
+
+# Only run one build at a time per branch to avoid race conditions
+concurrency:
+  group: build-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  # ── Document Scanner ──────────────────────────────────────────────────────
+  build-document-scanner:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows-latest
+            name: windows
+            artifact_glob: "dist/*.exe"
+          - os: ubuntu-22.04
+            name: linux
+            artifact_glob: "dist/Document Scanner"
+
+    runs-on: ${{ matrix.os }}
+    name: Document Scanner / ${{ matrix.name }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      # Linux: install system libraries required by OpenCV, pdf2image, Tesseract
+      - name: Install Linux system dependencies
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y --no-install-recommends \
+            tesseract-ocr tesseract-ocr-dan tesseract-ocr-deu \
+            poppler-utils \
+            libgtk-3-dev libwebkit2gtk-4.0-dev \
+            libglib2.0-dev libcairo2-dev pkg-config \
+            python3-dev
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      # Download the Danish spaCy model used for NER/anonymisation
+      - name: Download spaCy model
+        run: python -m spacy download da_core_news_sm
+
+      - name: Build Document Scanner
+        run: python build.py
+
+      # Zip the Linux binary (no installer on Linux)
+      - name: Package Linux binary
+        if: runner.os == 'Linux'
+        run: |
+          cd dist
+          zip -r "Document_Scanner_linux_x86_64.zip" "Document Scanner"
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: DocumentScanner-${{ matrix.name }}
+          retention-days: 30
+          path: |
+            dist/*.exe
+            dist/Document_Scanner_linux_x86_64.zip
+
+  # ── GDPRScanner ──────────────────────────────────────────────────────────
+  build-m365-scanner:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: windows-latest
+            name: windows
+            artifact_glob: "dist/*.exe"
+          - os: ubuntu-22.04
+            name: linux
+            artifact_glob: "dist/GDPRScanner"
+
+    runs-on: ${{ matrix.os }}
+    name: GDPRScanner / ${{ matrix.name }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      - name: Install Linux system dependencies
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y --no-install-recommends \
+            libgtk-3-dev libwebkit2gtk-4.0-dev \
+            libglib2.0-dev libcairo2-dev pkg-config \
+            python3-dev
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # GDPRScanner only needs a subset — skip OCR/CV heavy deps
+          pip install flask msal requests openpyxl pillow \
+                      python-docx \
+                      pywebview pystray \
+                      pyinstaller pyinstaller-hooks-contrib
+
+      - name: Build GDPRScanner
+        run: python build_gdpr.py
+
+      - name: Package Linux binary
+        if: runner.os == 'Linux'
+        run: |
+          cd dist
+          zip -r "GDPRScanner_linux_x86_64.zip" "GDPRScanner"
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: M365Scanner-${{ matrix.name }}
+          retention-days: 30
+          path: |
+            dist/*.exe
+            dist/M365_Scanner_linux_x86_64.zip
+
+  # ── Release (only on version tags v*) ────────────────────────────────────
+  release:
+    name: Create GitHub Release
+    needs: [build-document-scanner, build-m365-scanner]
+    if: startsWith(github.ref, 'refs/tags/v')
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+
+    steps:
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+          merge-multiple: true
+
+      - name: Create release
+        uses: softprops/action-gh-release@v2
+        with:
+          name: ${{ github.ref_name }}
+          draft: false
+          prerelease: ${{ contains(github.ref_name, '-beta') || contains(github.ref_name, '-rc') }}
+          generate_release_notes: true
+          files: artifacts/**
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..aa89886
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,91 @@
+# VERSION, CHANGELOG.md, LICENSE, README.md — always commit these
+# (VERSION is plain text, not JSON, so the *.json rule does not catch it)
+
+# ── Credentials and config (NEVER commit these) ───────────────────────────────
+*.json
+!lang/*.json
+!keywords/*.json
+!skus/*.json
+!package*.json
+
+# Be explicit about the most sensitive files
+.m365_scanner_config.json
+.m365_scanner_smtp.json
+.m365_scanner_settings.json
+.m365_scanner_delta.json
+.m365_scanner_checkpoint.json
+.m365_scanner_lang
+.document_scanner_lang
+
+# ── Databases (contain personal data) ────────────────────────────────────────
+*.db
+*.sqlite
+*.sqlite3
+
+# ── Audit logs (contain personal data) ───────────────────────────────────────
+*.jsonl
+scanner_audit.jsonl
+
+# ── Python ────────────────────────────────────────────────────────────────────
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+.venv/
+env/
+ENV/
+*.egg-info/
+dist/
+build/
+.eggs/
+pip-wheel-metadata/
+*.egg
+
+# ── PyInstaller output ────────────────────────────────────────────────────────
+dist/
+build/
+*.spec
+*.exe
+*.app
+
+# ── Node (docx generation) ────────────────────────────────────────────────────
+node_modules/
+npm-debug.log*
+
+# ── macOS ─────────────────────────────────────────────────────────────────────
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V3
+.Trashes
+Icon?
+
+# ── Windows ───────────────────────────────────────────────────────────────────
+Thumbs.db
+ehthumbs.db
+Desktop.ini
+$RECYCLE.BIN/
+
+# ── Editor / IDE ──────────────────────────────────────────────────────────────
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.project
+.settings/
+
+# ── Test artifacts ────────────────────────────────────────────────────────────
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+
+# ── Temporary / local ─────────────────────────────────────────────────────────
+*.tmp
+*.bak
+*.orig
+tools/
+# Tools folder is created by the installer — not part of the repo
diff --git a/ACRONYMS.md b/ACRONYMS.md
new file mode 100644
index 0000000..5541d45
--- /dev/null
+++ b/ACRONYMS.md
@@ -0,0 +1,58 @@
+# Acronyms and Abbreviations
+
+GDPR-related terms and abbreviations used throughout the GDPR Scanner project.
+
+## GDPR / Legal
+
+| Term | Full name | Meaning in context |
+|---|---|---|
+| GDPR | General Data Protection Regulation | The EU regulation (2016/679) — the primary legal framework the scanner addresses |
+| CPR | Centrale Personregister | Danish national personal identification number (DDMMYY-XXXX) |
+| PII | Personally Identifiable Information | Any data that can identify a person — names, addresses, phone numbers, IBANs etc. |
+| NER | Named Entity Recognition | ML technique (via spaCy) used to detect names, addresses, and organisations in text |
+| DPA | Data Protection Authority | Supervisory authority — in Denmark: Datatilsynet |
+| DSR | Data Subject Request | A request from an individual to access, correct, or delete their data (Art. 15/17) |
+| DPIA | Data Protection Impact Assessment | Risk assessment required before high-risk processing (Art. 35) — not yet in scanner |
+| RoPA | Register of Processing Activities | The Article 30 register — what the Art.30 export produces |
+| IBAN | International Bank Account Number | Financial identifier detected as sensitive PII |
+| SKU | Stock Keeping Unit | In context: Microsoft license product code used to classify student vs staff accounts |
+
+## GDPR Articles referenced in this project
+
+| Article | Subject |
+|---|---|
+| Art. 5(1)(a) | Lawfulness, fairness, transparency |
+| Art. 5(1)(b) | Purpose limitation |
+| Art. 5(1)(c) | Data minimisation |
+| Art. 5(1)(e) | Storage limitation — basis for retention enforcement |
+| Art. 5(2) | Accountability — basis for the deletion audit log |
+| Art. 8 | Conditions for child consent — age threshold |
+| Art. 9 | Special categories of personal data (biometric, health, criminal etc.) |
+| Art. 15 | Right of access — basis for data subject lookup |
+| Art. 17 | Right to erasure ("right to be forgotten") |
+| Art. 30 | Records of processing activities — basis for Article 30 export |
+| Art. 35 | Data Protection Impact Assessment |
+| Art. 44–46 | Transfers to third countries |
+| Art. 89 | Archiving in the public interest — potential basis for retaining historical data |
+
+## Danish law
+
+| Term | Meaning |
+|---|---|
+| Databeskyttelsesloven | Danish Data Protection Act — supplements GDPR in Denmark |
+| Databeskyttelsesloven §6 | Sets digital consent age at 15 — below this, parental consent required |
+| Bogføringsloven | Danish Bookkeeping Act — requires accounting records for 5 years from end of financial year |
+| Datatilsynet | Danish Data Protection Authority — the national supervisory body |
+
+## Microsoft 365 / Technical
+
+| Term | Full name | Meaning in context |
+|---|---|---|
+| M365 | Microsoft 365 | The cloud productivity suite (Exchange, OneDrive, SharePoint, Teams) |
+| AAD / Entra | Azure Active Directory / Microsoft Entra ID | Microsoft's identity and access management service |
+| MSAL | Microsoft Authentication Library | Library used for OAuth2 authentication against Azure AD |
+| UPN | User Principal Name | Microsoft's unique user identifier — typically the user's email address |
+| SKU | Stock Keeping Unit | Microsoft license product code (e.g. M365EDU_A3_STUDENT) |
+| SPO | SharePoint Online | Microsoft's cloud document management platform |
+| SSE | Server-Sent Events | HTTP streaming used to push scan results to the browser in real time |
+| ORM | Object-Relational Mapping | Not used — the scanner uses raw SQL via sqlite3 |
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..ef189f4
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,2458 @@
+# Changelog
+
+All notable changes to GDPR Scanner are documented here.
+
+Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
+Version numbers follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+---
+
+## [Unreleased]
+
+### Added
+
+- **`EFFORT_ESTIMATE.md`** — build effort estimate document covering component-by-component hour breakdowns and complexity drivers for the project.
+- **Settings → Security tab** — new dedicated pane in the Settings modal. Admin PIN and Viewer PIN groups moved here from the General tab, which now contains only Appearance and About. The Share modal's **Configure** button navigates directly to the Security tab.
+- **Viewer mode layout** — the sidebar, log panel, and progress bar are now hidden in viewer mode so results fill the full window width. The `🔍 GDPRScanner` brand is shown in the top-left of the topbar (replacing the sidebar header) at the same size and weight as the normal sidebar title.
+
+### Fixed
+
+- **Share modal — Revoke / Copy buttons broken** — `JSON.stringify(token)` produced a double-quoted string that terminated the surrounding `onclick="…"` HTML attribute early, so neither button fired its handler. Both now pass the token as a single-quoted JS string literal, which is safe for the hex token format.
+- **Viewer PIN — Clear PIN rejected with "current PIN is incorrect"** — clicking **Clear PIN** without first typing in the Current PIN field sent an empty string to the server, which correctly rejected it. A client-side guard now validates the field is non-empty before sending the request, and focuses the input with an inline error message if it is empty.
+- **Share modal — all UI strings now translated** — the Share results modal and Viewer PIN settings group were fully hardcoded in English. All visible strings are now backed by i18n keys (`share_*`, `viewer_pin_*`) in `en.json`, `da.json`, and `de.json`.
+
+---
+
+## [1.6.14] — 2026-04-10
+
+### Added — read-only viewer mode (#33)
+
+A DPO, school principal, or compliance coordinator can now review scan results and tag dispositions without access to scan controls, credentials, or settings.
+
+**Token links**
+
+- New `🔗` **Share** button in the topbar opens a token management modal.
+- **Create** generates a 64-char hex token (`secrets.token_hex(32)`) with an optional label and expiry (7 d / 30 d / 90 d / 1 yr / never).
+- **Copy** copies the full `http://host:5100/view?token=…` URL to the clipboard.
+- **Revoke** deletes the token immediately; any browser using it is locked out on next navigation.
+- Tokens are stored in `~/.gdprscanner/viewer_tokens.json` with `created_at`, `expires_at`, and `last_used_at` metadata. Expired tokens are cleaned up on each list fetch.
+
+**PIN alternative**
+
+- A 4–8 digit numeric PIN can be set in **Settings → General → Viewer PIN**.
+- Opening `/view` without a token shows a PIN entry form (`templates/viewer_pin.html`).
+- Correct PIN sets a Flask session cookie (`session["viewer_ok"]`) valid for the browser session — no token needed after that.
+- Brute-force guard: 5 failed attempts per 5 minutes per IP returns 429.
+- PIN stored as salted SHA-256 inside `viewer_tokens.json` (no extra dependencies).
+
+**`/view` route**
+
+- Checks `?token=` first (validates + binds session), then existing session cookie, then PIN form (if a PIN is configured), then 403.
+- Serves the same `index.html` with `window.VIEWER_MODE = true` injected.
+- Invalid/expired tokens show `templates/viewer_denied.html`.
+
+**Viewer mode (JS)**
+
+- `auth.js` — bypasses M365 auth check entirely; adds `viewer-mode` class to `<body>`; shows scanner screen immediately.
+- `results.js` — on `DOMContentLoaded` calls `_loadViewerResults()` which fetches `GET /api/db/flagged` (all items from the last completed scan session, joined with dispositions) and renders the grid directly — no SSE required.
+- CSS (`body.viewer-mode`) hides: Sources/Options/Accounts sidebar panels; Scan/Stop buttons; profile bar; config-group buttons; resume banner; bulk-delete button; per-card delete button; data-subject delete button; Share button.
+- Disposition tagging (select + Save) remains fully functional — `/api/db/disposition` has no auth guard.
+- Filter bar, Excel export, Art.30 export, preview panel, and log remain accessible.
+
+**New files:** `routes/viewer.py`, `static/js/viewer.js`, `templates/viewer_pin.html`, `templates/viewer_denied.html`
+
+**Files changed:** `app_config.py`, `gdpr_scanner.py`, `templates/index.html`, `static/style.css`, `static/js/auth.js`, `static/js/results.js`, `static/js/scheduler.js`, `routes/database.py`
+
+---
+
+### Fixed — memory exhaustion during large M365 scans
+
+Addressed root causes of runaway memory growth (reported: up to 90 GB RSS) that could crash the host machine during scans of large Microsoft 365 tenants.
+
+**`scan_engine.py`**
+
+- **Email body HTML stripped at collection time** — Graph API returns the full `body` field (raw HTML, up to ~1 MB per message) for every email fetched. Previously, all message dicts — including the raw HTML — were accumulated in `work_items` before any scanning began. For 1 000 users × 2 000 emails this could mean >100 GB in `work_items` alone. The body is now converted to plain text immediately on collection (`_precomputed_body`), and the raw `body` and `bodyPreview` keys are deleted from the dict before it is queued. The processing loop reads `_precomputed_body` via `pop()` and `del`s it after use.
+- **`work_items` converted to `deque` before processing** — items are now released from memory one by one via `popleft()` as they are processed, rather than keeping the entire list alive for the duration of the scan. `gc.collect()` is called immediately after conversion and after each checkpoint save.
+- **`content` bytes freed as early as possible in the file processing branch** — raw download bytes are now `del`'d immediately after `content.decode()` (before the expensive NER/PII pass), and also in the no-hits `else` branch where they were previously kept alive until the next loop iteration.
+- **`body_text` freed after use in the email branch** — `del body_text` added after `_broadcast_card` so large plain-text bodies do not linger until the next iteration.
+- **Memory guard before file downloads** — uses `psutil.virtual_memory().available` to skip a file download and log a warning if fewer than 300 MB of RAM are available, preventing a single large file from pushing an already-pressured machine into OOM.
+
+**`document_scanner.py`**
+
+- **PDF OCR page images freed page by page** — `convert_from_path()` renders all pages at 300 DPI before scanning begins (~26 MB per A4 page; a 100-page PDF ≈ 2.6 GB). Each rendered `PIL.Image` is now nulled out (`images[page_num-1] = None`) immediately after OCR, so only one page image is live at a time instead of the entire document.
+
+### Changed — Sources panel is now resizable and collapsible
+
+The **KILDER** sidebar panel now behaves consistently with the other sidebar sections.
+
+- **Collapsible** — the `▾` / `▸` toggle was already wired up; collapse state is already persisted in `localStorage`. No change needed here.
+- **Resizable** — a drag handle (`sources-resize-handle`) added at the bottom of the panel body. Dragging up shrinks the panel (scroll appears); dragging down is capped at the panel's natural content height — you cannot expand it beyond what is needed to show all sources. Height preference persisted in `localStorage` under `gdpr_sources_h`.
+- **Auto-fit on render** — `_fitSourcesPanel()` is called at the end of every `renderSourcesPanel()` invocation. On first load and whenever sources are added or removed (e.g. connecting Google), the panel height snaps to exactly fit all visible sources. A previously saved smaller height is honoured only if it is still smaller than the new content height; dragging back to full height clears the saved preference.
+- The old `max-height: calc(5 * 26px)` fixed cap is removed.
+
+**Files changed:** `templates/index.html`, `static/style.css`, `static/js/log.js` (`_fitSourcesPanel`, `_initSourcesResize`), `static/js/sources.js`, `static/js/results.js`.
+
+---
+
+## [1.6.13] — 2026-04-10
+
+### Added — developer tooling
+
+- **`run_tests.sh`** — shell script to activate the venv and run the full test suite. Accepts any `pytest` arguments: `./run_tests.sh`, `./run_tests.sh -q`, `./run_tests.sh tests/test_app_config.py`.
+- **Directory-scoped `CLAUDE.md` rules** — `routes/CLAUDE.md`, `static/js/CLAUDE.md`, `templates/CLAUDE.md`, `lang/CLAUDE.md` replace the previous single-file context document. Each file is loaded automatically by Claude Code only when working in the relevant directory.
+
+### Fixed — documentation
+
+- **`README.md` project files table** — removed four phantom entries (`Dockerfile`, `docker-compose.yml`, `.dockerignore`, `scanner_audit.jsonl`); corrected `static/app.js` description to "archived monolith — no longer loaded"; fixed manual paths (`MANUAL-EN.md` → `docs/manuals/MANUAL-EN.md`); added missing files: `scan_engine.py`, `sse.py`, `checkpoint.py`, `app_config.py`, `cpr_detector.py`, `google_connector.py`, `static/style.css`, `static/js/*.js`, `routes/google_auth.py`, `routes/google_scan.py`, `run_tests.sh`, `docs/setup/` guides.
+- **`docs/manuals/MANUAL-EN.md`**, **`docs/manuals/MANUAL-DA.md`** — version header updated from 1.6.11 → 1.6.13; footer updated from v1.6.8 → v1.6.13.
+
+### Changed — blueprint migration batch 3, 4, 5 (auth, database, export — migration complete)
+
+All remaining direct `@app.route` registrations removed from `gdpr_scanner.py`. Flask now routes every API endpoint exclusively through its blueprint. Only `GET /` and `GET /api/scan/stream` (SSE) remain in `gdpr_scanner.py`.
+
+**`routes/auth.py`** — rewritten with direct imports (batch 3, 6 routes):
+- `MSAL_OK`, `M365Connector`, `M365Error` imported from `m365_connector`
+- `_load_config`, `_save_config` imported from `app_config`
+- Dead module-level globals `_pending_flow` and `_auth_poll_result` removed from `gdpr_scanner.py`
+- Routes removed: `/api/auth/status`, `/api/auth/start`, `/api/auth/poll`, `/api/auth/userinfo`, `/api/auth/signout`, `/api/auth/config`
+
+**`routes/database.py`** — rewritten with direct imports (batch 4, 15 routes):
+- `_get_db`, `DB_OK` from `gdpr_db`; `_set_admin_pin`, `_verify_admin_pin`, `_admin_pin_is_set` from `app_config`; `_clear_checkpoint`, `_DELTA_PATH` from `checkpoint`; `_extract_exif`, `_html_esc`, `_placeholder_svg` from `cpr_detector`
+- `SCANNER_OK` determined by local `import document_scanner` try/except
+- `db_export` improved: uses `NamedTemporaryFile` instead of `mktemp` (safer for frozen apps)
+- Email preview HTML: full CSS ruleset (`*, *::before, *::after`, `img`, `table`, scrollbar) from gdpr_scanner.py version restored
+- Routes removed: `/api/db/stats`, `/api/db/trend`, `/api/db/scans`, `/api/db/subject`, `/api/db/overdue`, `/api/db/disposition` (×2), `/api/db/deletion_log`, `/api/db/reset`, `/api/admin/pin` (×2), `/api/db/export`, `/api/db/import`, `/api/preview/<item_id>`, `/api/thumb`
+
+**`routes/export.py`** — rewritten with direct imports (batch 5, 3 routes):
+- `_get_db`, `DB_OK` from `gdpr_db`; `_GUID_RE`, `_resolve_display_name` from `app_config`; `M365PermissionError` from `m365_connector`
+- `app.logger` replaced with `logging.getLogger(__name__)`
+- Dead `delete_item()` helper removed from `gdpr_scanner.py` (was unreachable; blueprint has its own copy)
+- Routes removed: `/api/export_excel`, `/api/export_article30`, `/api/delete_bulk`
+
+**`tests/test_routes.py`** — `db_patch` fixture updated: now patches `routes.database._get_db` / `routes.database.DB_OK` and `routes.export._get_db` / `routes.export.DB_OK` (was patching `gdpr_scanner._get_db`/`gdpr_scanner.DB_OK` which no longer have any effect). Two `test_without_db_returns_503` tests updated to monkeypatch `routes.database.DB_OK` instead of `gdpr_scanner.DB_OK`.
+
+---
+
+## [1.6.12] — 2026-04-10
+
+### Fixed — profile editor save drops users from non-active role groups
+
+In `_pmgmtSaveFullEdit` (profile management editor), the save function applied the active role filter (`_pmgmtRoleActive`) to the list of checked checkboxes before saving. Since `_pmgmtFilterAccounts` hides rows via `display:none` but does not uncheck them, users from other role groups that remained checked (but hidden) were silently discarded on save. The role filter at save time is removed — all checked checkboxes are now captured regardless of which role tab is visible.
+
+---
+
+## [1.6.11] — 2026-04-10
+
+### Changed — blueprint migration batch 1 (scan + app_routes)
+
+15 direct `@app.route` registrations removed from `gdpr_scanner.py`. Flask now routes all of these exclusively through their blueprint counterparts, which previously existed as dead code shadowed by the direct routes.
+
+**`routes/scan.py`** — rewritten with direct imports (was entirely non-functional as dead code due to bare-name `NameError`s behind the shadow):
+- Added `GET /api/scan/status` (new — was only in gdpr_scanner.py)
+- Added `GET /api/src_toggles`, `POST /api/src_toggles` (new — was only in gdpr_scanner.py)
+- `scan_checkpoint_info` — added missing `check_only` handling present in the gdpr_scanner.py version
+- All state references converted from bare names to `state._scan_lock` / `state._scan_abort`; `run_scan` imported lazily from `scan_engine` inside `_run` to avoid circular imports
+- `_save_settings`, `_load_settings`, `_load_src_toggles`, `_save_src_toggles` imported from `app_config`
+- `_checkpoint_key`, `_load_checkpoint`, `_clear_checkpoint`, `_load_delta_tokens`, `_DELTA_PATH` imported from `checkpoint`
+
+**`routes/app_routes.py`** — cleaned up:
+- `APP_VERSION` now computed locally from `VERSION` file (was a bare-name reference to gdpr_scanner.py global)
+- `_LANG_DIR` computed at module level; fixed `sys` / `_sys` alias mismatch in `get_langs` (bug in blueprint that never manifested while shadowed)
+- `_set_lang_override`, `_load_lang_forced` imported directly from `app_config`
+- `get_langs` — added missing `langs.sort()` present in the gdpr_scanner.py version
+
+**`tests/test_routes.py`** — `mock_connector` fixture simplified: no longer needs to patch `gdpr_scanner._connector` since the direct `scan/start` route is gone; `state.connector` alone is sufficient. `run_scan` stub in `test_authenticated_returns_started` updated to target `scan_engine` directly.
+
+**Routes removed from `gdpr_scanner.py`:** `/api/about`, `/api/langs`, `/api/set_lang`, `/api/lang`, `/api/scan/status`, `/api/scan/start`, `/api/scan/stop`, `/api/scan/checkpoint`, `/api/scan/clear_checkpoint`, `/api/settings/save`, `/api/settings/load`, `/api/src_toggles`, `/api/delta/status`, `/api/delta/clear`
+
+**Still in `gdpr_scanner.py`:** `GET /` (root), `GET /api/scan/stream` (SSE — cannot be in a blueprint), and the `auth`, `users`, `sources`, `database`, `export` route groups (31 routes — next batches).
+
+---
+
+## [1.6.10] — 2026-04-10
+
+### Fixed — Google Drive `exportSizeLimitExceeded` warning
+
+Native Google Workspace files too large for Drive's export API (Google's server-side limit, distinct from the 20 MB local cap) now produce a clean skip message instead of a stray `WARNING googleapiclient.http — Encountered 403 Forbidden with reason "exportSizeLimitExceeded"` in the log. A `logging.Filter` subclass is installed on the `googleapiclient.http` logger at import time to suppress the duplicate external warning; the `except HttpError` block in `_drive_iter` detects the reason and logs `[gdrive] skip '<name>' — file too large for Google export API (exportSizeLimitExceeded)` with the file ID.
+
+### Fixed — peak memory during large file/SMB scans (OOM risk reduction)
+
+Three targeted buffer-lifetime fixes reduce peak RSS during large scans:
+
+- **`cpr_detector.py`** — `del content` after writing the PDF bytes to a temp file in `_scan_bytes_timeout`. The 20 MB buffer was previously held in the main process for the entire duration of `p.join(timeout)` (up to 60 s), overlapping with the spawned subprocess's ~150–300 MB heap. It is now freed before the subprocess starts.
+- **`scan_engine.py`** — `del content` after the thumbnail block in `run_file_scan`. The raw file buffer was kept alive through card dict construction and the start of the next loop iteration; it is now freed as soon as the thumbnail (or placeholder SVG) has been generated.
+- **`file_scanner.py`** — `PREFETCH_WINDOW` reduced from 2 to 1. Halves the maximum number of concurrently-held SMB read buffers (from 2 × 20 MB to 1 × 20 MB).
+
+---
+
+## [1.6.9] — 2026-04-10
+
+### Changed — frontend migrated to ES modules
+
+**Phase 2 complete:** All 10 split JS files converted from `<script defer>` to `<script type="module">`.
+
+- `static/js/state.js` introduced as the shared state module — exports a single `S` object holding all previously-global mutable state (`flaggedData`, `_allUsers`, `_profiles`, `_fileSources`, `_srcPct`, scan-running flags, etc.). All 10 modules import `{ S }` from `state.js` and mutate its properties in place.
+- Every function called from an inline HTML `onclick=` handler is explicitly exported via `window.fnName = fnName` at the bottom of each module (~80 exports across 10 files).
+- `var LANG` retained in the inline `<script>` block (not a module) so it remains a true global accessible from all modules as a bare name.
+- `app.js` retained as archive; no longer loaded by `index.html`.
+
+### Fixed — connector.js SyntaxError caused by duplicate function declarations
+
+`openFileSourcesModal` and `closeFileSourcesModal` were declared **twice** at module top level in `connector.js` — once as redirect stubs pointing to the new unified Sources modal, and once as the old `#fsrcBackdrop` implementations left over from the pre-unification code. In ES module strict mode, duplicate `function` declarations in the same scope are a **SyntaxError**. The engine rejected the entire module at parse time, meaning none of its ~35 `window.*` exports were ever set. Symptoms:
+
+- **"Kilder" (Sources) button did nothing** — `window.openSourcesMgmt` was never set
+- Google status dot, file source loading, and sources panel re-render all silently failed — `window.smGoogleRefreshStatus`, `window._loadFileSources` etc. were undefined
+- Sources panel showed only M365 sources even when Google Workspace was configured
+
+**Fix:** removed the stale `async function openFileSourcesModal` / `function closeFileSourcesModal` bodies (lines 511–518). The redirect stubs at lines 505–506 (`openSourcesMgmt('files')`) are the correct new behaviour. Also removed the duplicate `window.openFileSourcesModal` and `window.closeFileSourcesModal` assignments that appeared twice in the exports block.
+
+### Fixed — Profiler modal did not open when `_renderProfileMgmt` threw
+
+If `_renderProfileMgmt()` threw a runtime error (e.g. due to downstream failures from the connector.js parse error), `openProfileMgmtModal` would abort before reaching `classList.add('open')`, leaving the modal invisibly closed. The function now wraps both `_renderProfileMgmt()` and `_pmgmtOpenEditor()` in individual try-catch blocks. Any error is logged to the console; the modal opens regardless.
+
+### Fixed — blocking alert on every unhandled async error
+
+`ui.js` contained a duplicate `unhandledrejection` listener that called `alert()` for every unhandled Promise rejection. Background API calls (Google status, file sources, src_toggles) could fire these alerts at page load, and browsers that had already suppressed one alert silently blocked all subsequent ones. Removed the `alert()` handler; the `console.error` handler is retained.
+
+---
+
+## [1.6.8] — 2026-04-09
+
+### Fixed — memory pressure during large scans
+
+**SMB prefetch window reduced**
+- `PREFETCH_WINDOW` reduced from 5 to 2 in `file_scanner.py`. Peak in-flight SMB memory drops from ~250 MB to ~40 MB during large network share scans.
+- `MAX_FILE_BYTES` reduced from 50 MB to 20 MB — files larger than 20 MB are skipped rather than buffered in full.
+
+**PDF subprocess concurrency limited**
+- A module-level `threading.Semaphore(1)` in `cpr_detector.py` ensures at most one PDF OCR subprocess runs at a time. Previously, multiple threads could each spawn a ~200 MB subprocess simultaneously, causing OOM under load.
+
+**Google Workspace export buffer reduced**
+- `_MAX_EXPORT_BYTES` in `google_connector.py` reduced from 50 MB to 20 MB.
+- `_drive_iter` now explicitly deletes the `BytesIO` buffer (`del buf`) before yielding each file's bytes, releasing the double-buffer peak immediately rather than waiting for GC.
+
+### Fixed — Excel and Article 30 exports missing sources
+
+**Gmail and Google Drive tabs added to Excel export**
+- `SOURCE_MAP` in `routes/export.py` was missing `gmail`, `gdrive`, `local`, and `smb` entries. Items from these sources were silently dropped — they were grouped internally but never written to a sheet.
+- All eight source types now have dedicated tabs: Outlook, OneDrive, SharePoint, Teams, Gmail, Google Drive, Local, Network.
+- The same fix applies to the inline Excel builder in `gdpr_scanner.py`.
+
+**Concurrent scan results captured in exports**
+- M365, Google Workspace, and file scans each create their own `scan_id`. The previous DB fallback used `get_flagged_items()`, which only returned results for the single most-recently-completed scan — silently dropping the other sources after page reload.
+- New `get_session_items(window_seconds=300)` in `gdpr_db.py` returns items from all scans whose `started_at` falls within a 5-minute session window of the latest completed scan.
+- Both `export_excel()` and `export_article30()` now use `get_session_items()` as their DB fallback. `_build_article30_docx()` also uses it directly.
+
+### Changed — "Email" source renamed to "Outlook"
+
+The `email` source type (Microsoft Exchange mailboxes) is now consistently labelled **Outlook** everywhere:
+- Source badges on result cards (`SOURCE_BADGES.email`)
+- Filter bar dropdown
+- `_sourceLabel()` in JS
+- Excel tab label
+- `m365_src_email`, `m365_filter_email`, `m365_phase_emails` in all three lang files (`en.json`, `da.json`, `de.json`)
+- Article 30 report uses **Exchange (Outlook)** for the formal legal context
+
+Rationale: with Gmail also present, "Email" was ambiguous. "Outlook" ties the source unambiguously to Microsoft 365.
+
+### Changed — progress bar moved above log panel
+
+- `#progressBar` moved from below the topbar to just above `#logWrap` (above the activity log).
+- The bar is now a permanent placeholder — always visible, never hidden. `display: flex` is the permanent state; `display: none` is no longer used.
+- Background changed from `var(--surface)` to `var(--bg)` to match the log area. Border changed from `border-bottom` to `border-top`.
+- New `_clearProgressBar()` helper resets phase, stats, ETA, and file fields on scan end, leaving the bar visually empty at idle. All previous `style.display` assignments removed.
+
+### Fixed — profile manager Cancel closes entire modal
+
+- Clicking **Cancel** in the profile editor previously closed the editor panel but left the profile list modal open behind it. `_pmgmtCloseEditor()` now calls `closeProfileMgmt()` to dismiss the full modal.
+- Dead stub `function _pmgmtCancelEdit(id) {}` removed.
+
+### Changed — exports available without running a new scan
+
+- The filter bar (including Excel and Art.30 export buttons) is always visible on page load.
+- Exports now use `get_session_items()` as the DB fallback, so the buttons produce a complete report from the previous session immediately after page reload — no new scan required.
+
+### Fixed — profile loading clobbered by scan start
+
+- `_save_settings()` is called on every M365 scan start with a payload containing only M365 `sources`, `user_ids`, and `options`. It was writing this back via `_profile_from_settings()`, which has no `google_sources` field — permanently stripping Google and file source selections from the active profile after each scan.
+- `_save_settings()` now preserves `google_sources` and `file_sources` from the existing profile when the payload does not include them, and rebuilds the combined `sources` array as M365 + google + file.
+- `_profile_from_settings()` updated to pass through `google_sources` when present in the payload.
+
+### Fixed — "no results" shown during live scan after hard refresh
+
+- Hard-refreshing the browser mid-scan caused the "Ingen CPR-numre fundet" card to appear immediately, before the SSE watchdog had detected the running scan.
+- `loadLastScanSummary()` is no longer called directly on `DOMContentLoaded`. It is now called inside `_sseWatchdog` on the first status poll, only if no scan is currently running (`_initialStatusChecked` flag).
+
+### Fixed — progress bar source pill showing "Email" instead of "Outlook"
+
+- `_PHASE_SOURCE_MAP` entry for Exchange mail phases still had `label: 'Email'`. Updated to `'Outlook'` to match the rename applied elsewhere.
+
+### Changed — profile manager UI simplified
+
+- Removed the redundant **×** close button from the list panel header — the editor panel's **×** already closes the entire modal.
+- Removed the **Luk** (Close) button from the list panel footer — the footer now contains only **+ Ny profil**.
+- The editor footer **Cancel/Annuller** button replaced with a single **Luk** button that closes the entire modal (consistent with `_pmgmtCloseEditor()` behaviour).
+
+### Changed — log panel collapsible
+
+- A **▾/▸** toggle button added to the left of the log header. Clicking it collapses or expands the log panel (resize handle + log body together, wrapped in `#logSectionBody`).
+- State persists in `localStorage` via the existing `toggleSection` / `restoreSectionStates` mechanism (`sc_logSection` key).
+
+### Changed — log header buttons translated
+
+- **All**, **Errors**, and **Copy** buttons in the log header now use `data-i18n` attributes and are fully translated in all three lang files.
+- Translation keys added: `btn_errors` (da: Fejl, de: Fehler), `log_copy` (da: Kopier, de: Kopieren).
+- Symbol prefix `⎘` removed from the Copy button label.
+
+### Changed — project documentation structure
+
+- User manuals moved from project root to `docs/manuals/` (`MANUAL-DA.md`, `MANUAL-EN.md`).
+- Setup guides moved from project root to `docs/setup/` (`M365_SETUP.md`, `GOOGLE_SETUP.md`).
+- `routes/app_routes.py` and `build_gdpr.py` updated to reference the new manual paths.
+- `README.md` links updated accordingly.
+
+### Fixed — disposition carry-forward across scans
+
+When a previously reviewed file reappears in a new scan it now shows its prior disposition immediately on the result card — no need to open the preview panel first.
+
+- `get_prior_disposition(item_id)` added to `ScanDB` in `gdpr_db.py`. Returns the stored disposition status if it differs from `'unreviewed'`, otherwise `None`.
+- `get_flagged_items()` and `get_session_items()` in `gdpr_db.py` now `LEFT JOIN dispositions` and return `COALESCE(d.status, 'unreviewed')` as `disposition` on every row. Exports and the results grid therefore reflect the latest review decision without an extra round-trip.
+- `_with_disposition(card, db)` helper added to `scan_engine.py`. Injects the prior disposition into a card dict before it is broadcast as `scan_file_flagged`. Used at all four broadcast points:
+  - `scan_engine.py` — file scan (line ~297)
+  - `scan_engine.py` — checkpoint resume re-emit loop (line ~357)
+  - `scan_engine.py` — M365 scan (line ~456)
+  - `routes/google_scan.py` — Google Workspace scan (line ~225)
+- The frontend already reads `f.disposition || 'unreviewed'` for filter matching — no JS changes required.
+
+---
+
+## [1.6.7] — 2026-04-06
+
+### Fixed — emoji/symbol removal from all buttons and indicators
+
+**All UI buttons stripped of emoji and symbol prefixes**
+- Every interactive element in the topbar, filter bar, modals, and settings panels now uses plain text only. Removed: `▶`, `■`, `💾`, `✕`, `⚙`, `🕐`, `⬇`, `⬆`, `🗑`, `📋`, `☰`, `⊞`.
+- Affected buttons: Scan, Stop, Save (profile), Clear (profile), Profiler/Profiles, Kilder/Sources, Indstillinger/Settings, Excel, Art.30, Slet/Delete (bulk), Liste/List, Gitter/Grid, Export (DB), Import (DB), Reset DB, scheduled scan title.
+- Labels updated in `templates/index.html` and all three lang files (`da.json`, `en.json`, `de.json`).
+
+**Filter bar — Clear button standardised**
+- The `×` clear-filter button was an oversized bare symbol (`font-size: 16px`, no border). Replaced with a proper text button (`Ryd`/`Clear`/`Löschen`) matching the 26 px filter bar standard: bordered, `border-radius: 5px`, turns red on hover.
+- Translation key `m365_filter_clear` added to all three lang files.
+
+**Scheduler indicator — "Next:" label translated**
+- The hardcoded `'Next: '` prefix in `schedUpdateSidebarIndicator()` is now `t('m365_sched_next', 'Next')`. Key added to all three lang files (da: `Næste`, de: `Nächste`).
+- Clock emoji `🕐` removed from the indicator and from `m365_sched_title` in all lang files.
+
+### Fixed — result card badges, progress bar on browser refresh
+
+**Result card badges — standardised to 9 px pill style**
+- All result card badges now follow the app-wide badge standard: `font-size: 9px; padding: 1px 5px; border-radius: 10px`.
+- `.source-badge` (OneDrive, Exchange, Gmail, etc.) had no CSS definition at all — it now has the correct size, padding, and border-radius.
+- `.cpr-badge` reduced from `10px / 2px 6px` to `9px / 1px 5px`.
+- `.photo-face-badge`, `.special-cat-badge`, `.overdue-badge`, `.role-pill` reduced from `10px` / `border-radius: 4px` to `9px / 1px 5px / border-radius: 10px`.
+- Removed camera emoji (📷) from the Faces badge.
+- `.card-source` gains `flex-wrap: wrap` so badges wrap on narrow cards instead of overflowing.
+
+**Progress bar — survives browser refresh**
+- Refreshing the browser mid-scan no longer causes the progress bar to appear without coloured segment pills.
+- Three code paths now defensively set the correct running flag and call `_renderProgressSegments()` before the track is needed:
+  - `scan_start` SSE handler (sets `_m365ScanRunning`).
+  - `scan_progress` SSE handler (sets the flag matching the event's `source` field — covers mid-scan reconnects where `scan_start` has scrolled out of the 500-event replay buffer).
+  - `scan_phase` SSE handler (infers source from phase text; fires before `scan_progress` in the replay sequence).
+  - `_sseWatchdog` (sets `_m365ScanRunning` immediately on detecting a running scan via `/api/scan/status`, which checks the M365 lock).
+
+### Improved — scan responsiveness, UI layout, preview panel
+
+**Scan abort responsiveness**
+- Stop now takes effect within one Graph API round-trip across all collection phases. Previously, pressing Stop only checked the abort flag in the *processing* loop — the entire collection phase (email folder enumeration, OneDrive file listing, Teams channel fetching, SharePoint site iteration) ran to completion first, which could take 10+ minutes on large tenants.
+- Abort checks added to: email folder loop (inside `_scan_user_email`), OneDrive items loop (delta and full modes in `_scan_user_onedrive`), Teams team loop and channel loop (inside `_scan_user_teams`), SharePoint site loop, and all outer per-user loops.
+- Side effect: the scheduler no longer fails with "Manual scan already running" when a job fires shortly after the user pressed Stop — the lock is now released promptly.
+
+**Scheduler — graceful skip on lock contention**
+- When a scheduled job fires while a manual (or other scheduled) scan holds the lock, the job now logs `Skipped — a scan is already running` and returns cleanly. Previously it raised `RuntimeError("Manual scan already running")`, which was logged as a hard failure with a full traceback in the UI.
+
+**Filter bar — always visible, full-width, 26 px**
+- Filter bar was hidden until the first result arrived. It is now always visible.
+- Moved from inside the left column to a direct child of `.main`, above `.content-area`. The preview panel's top edge now aligns with the grid's top edge rather than overlapping the filter bar.
+- All filter bar controls standardised to `height: 26 px` (`input`, `select`, `button`) to match the topbar control standard. Redundant inline `padding`/`font-size`/`border-radius` stripped from button inline styles.
+
+**Preview panel**
+- Resizable: a 5 px drag handle on the left edge lets the user adjust the panel width. Handle uses pointer capture (`setPointerCapture`) so dragging over the iframe or releasing outside the browser window always terminates the drag cleanly. Width is persisted in `sessionStorage` and restored when the panel is next opened.
+- Min width: 280 px; max width: 70% of window width.
+- Fixed: clicking the close (×) button had no effect. Root cause: `panel.style.width` set by the resize logic is an inline style and overrides the CSS class `.hidden { width: 0 }`. Fix: `closePreview()` now clears `panel.style.width = ''` before adding `.hidden`; `openPreview()` restores the saved width when showing the panel.
+- Email preview iframe: added `* { max-width: 100% }`, `overflow-x: hidden`, `table { table-layout: fixed }`, and `img { height: auto }` to prevent wide HTML emails from creating a horizontal scrollbar inside the 420 px panel.
+- Email preview iframe scrollbar: matches the app's 4 px thin scrollbar style.
+
+**Thin scrollbars everywhere**
+- `.grid-area` (results grid) and `.log-panel` now use the same 4 px thin scrollbar style (`scrollbar-width: thin; width: 4px`) as `#accountsList` and `#sourcesPanel`. Previously they used the system-default wide scrollbar.
+
+**Scheduler next scan indicator**
+- `#schedNextIndicator` was a plain `display: block` div with no height constraint, causing it to sit taller than adjacent topbar controls. Fixed to `height: 26 px; display: inline-flex; align-items: center` with a border and border-radius matching the surrounding pill buttons.
+
+**Log and preview resize — pointer capture fix**
+- Both resize handles (`logResizeHandle`, `previewResizeHandle`) switched from `mousedown` + `document.addEventListener('mousemove'/'mouseup')` to `pointerdown` + `setPointerCapture`. The old approach lost the drag when the cursor moved over the iframe (which has its own input context) or left the browser window. Pointer capture routes all pointer events to the handle until `pointerup`/`pointercancel` regardless of cursor position.
+
+**Manuals updated (MANUAL-DA.md, MANUAL-EN.md)**
+- Version 1.6.4 → 1.6.6.
+- Section 2: activity log description now mentions copy button, error filter, and resize handle.
+- Section 4.4: progress bar description updated — source pill labels listed, old "current phase" wording removed.
+- Section 8: profiles section updated for loader model, ✕ clear button, and explicit mention that Google/file sources are saved.
+
+---
+
+## [1.6.6] — 2026-04-06
+
+### Improved — UX polish II (clusters, badges, log panel, progress bar)
+
+**Pill clusters**
+- KONTI section header: Alle / Ingen / ↻ converted from bare text links to a pill cluster (`height: 22px`), matching the pattern used in the Profile editor.
+- Profile list rows (Profiler modal): Brug + Kopier grouped into a pill cluster; Slet kept as a separate standalone danger button.
+
+**Badge sizing**
+- Platform badges (M365, GWS, M365+GWS) and role badges (Ansat, Elev, Anden) standardised to `font-size: 9px; padding: 1px 5px; border-radius: 10px` across the main sidebar account list and the profile modal. Previously the sidebar used larger inline styles (`font-size: 10px; padding: 2px 7px`) that made badges visually heavier than in the modal.
+
+**Account rows**
+- Main sidebar account row padding reduced from `4px 0` to `2px 0`, matching the compact density of the profile modal account list.
+- SKU debug search icon button standardised to `height: 26px` to match the adjacent role filter cluster.
+
+**Log panel — full rebuild**
+- Color-coded log levels: `.log-err` (red `var(--danger)`), `.log-ok` (green `var(--success)`), `.log-warn` (orange `#e0922a)`). Level classes were already passed to `log()` but had no CSS — all entries appeared in the same muted colour.
+- Live scanning indicator: a single italic `▶ filename` line at the bottom of the log updates in place via `scan_file` SSE events. Never scrolls; clears automatically when the scan finishes. Avoids flooding the log with per-file entries.
+- Copy button (`⎘ Copy`) in the log header copies all log text to clipboard; flashes `✓ Copied` for 1.5 s.
+- Log level filter (`All` / `Errors`) in log header — hides info lines when Errors mode is active.
+- Resizable: drag handle at the top edge of the panel resizes vertically and **snaps to the nearest full line** (row = 18 px: 16 px line-height + 2 px margin; 2–30 lines range).
+- Default height set to **8 lines exactly** (`height: 154px` = 8 × 18 + 10 px padding).
+- Persistent across page refresh: up to 300 lines saved to `sessionStorage`; restored on `DOMContentLoaded`; cleared on new scan start.
+- Smart scroll: auto-scroll only triggers when already within 24 px of the bottom — scrolling up to read earlier entries stops the follow behaviour.
+
+**Progress bar — segmented multi-source**
+- Replaced the single `progressFill` bar with a dynamically segmented track (`#progressTrack`). One segment per active scan type (M365 / Google / Files), equal width, separated by a 1 px gap. Segments are added at scan start and removed as each source finishes.
+- Color-coded: M365 = blue (`var(--accent)`), Google = dark green (`#3a7d44`), Files = purple-gray (`#7a6a9e`).
+- Each segment fills independently — M365 at 80% and Google at 20% are shown simultaneously with no interference. Eliminates the `_maxPct` hack (bar stuck at 100% after first source finishes).
+- Backend (`scan_engine.py`, `routes/google_scan.py`): all `scan_progress` SSE events now include `"source": "m365"` / `"google"` / `"file"`. Frontend routes each event to the correct segment by `d.source`.
+- Stats (`X / Y`) and ETA only update from M365 events — the only source with meaningful totals and time estimates.
+
+**Progress bar — phase display**
+- `#progressWho` replaces the plain-text phase span. Renders a colour-coded source pill (`[Email]`, `[OneDrive]`, `[Gmail]`, `[GDrive]`, `[Local]`, etc.) followed by the user's full display name.
+- Source pill uses the universal badge standard: `font-size: 9px; padding: 1px 5px; border-radius: 10px; font-weight: 500`.
+- `_setProgressPhase()` identifies the source from the full phase string via `_PHASE_SOURCE_MAP`, then splits on ` — ` to extract the username. Phases without a dash (e.g. `📂 folder: 3 msg(s)`) fall back to the last known user (`_progressCurrentUser`).
+- `_resolveDisplayName()` resolves email addresses in Google phase strings to the user's display name via `_allUsers`. Also strips trailing count suffixes (`: 3 file(s)`).
+- Pill labels standardised: `Email`, `OneDrive`, `SharePoint`, `Teams`, `Gmail`, `GDrive`, `Local` — matching the source names used elsewhere in the UI.
+- All 25 `scan_phase` strings now produce a pill: `📂` emoji maps to `Email`; `Google Workspace — email` phases resolve to display name; file scan startup uses `Files — {label}`; Google per-user phase uses `Google Workspace — {email}`.
+- Source map ordering: `Google Workspace` matched before `Gmail` so the GWS startup phase shows `[Gmail]` only when no broader match applies.
+- Fixed: email regex was missing the `i` flag (`/E-?mail.../u` → `/E-?mail.../iu`), causing Danish `"Indsamler e-mails"` to fall through to plain text.
+
+**Scheduler — Google and file sources**
+- Scheduled scans now run Google Workspace sources. `_build_options` extracts `google_sources` from the profile (with legacy fallback for profiles that stored gmail/gdrive inside `sources`). A separate Google scan block runs after the file scan loop using `_google_scan_lock`.
+
+**Profile dropdown — loader model**
+- Removed the selectable "Standard (sidebar)" / "Default (sidebar)" empty option. Profiles are now **loaders**, not persistent modes — selecting one pushes its settings into the sidebar; the sidebar is always the live state.
+- Replaced with a `disabled` placeholder `"— Vælg profil —"` shown when no profile has been loaded.
+- Added a `✕` clear button (`#profileClearBtn`) that appears next to the dropdown when a profile is active. Clicking it clears `_activeProfileId` and resets the dropdown to the placeholder **without touching the sidebar** — the loaded settings remain.
+- `clearActiveProfile()` function added.
+- Lang keys: `m365_profile_default` removed, `m365_profile_placeholder` added (da/en/de).
+
+**Bug fixes**
+- Profile role filter respected at scan time: `getSelectedUsers()` now filters the returned list by `_activeRoleFilter`, preventing hidden-role users from being silently included in M365 scans and profile saves via the topbar quick-save.
+- Profile editor role filter respected at save time: `_pmgmtSaveFullEdit` now excludes IDs whose role doesn't match `_pmgmtRoleActive`. Prevents "select all → filter by staff → save" from silently saving student accounts that were checked but hidden.
+- Profile editor role filter state reset on open: `_openEditorForProfile` resets `_pmgmtRoleActive = ''` so a stale filter from a previous session doesn't silently hide accounts when the editor is reopened.
+- Google and file sources not saved in profiles: `_pmgmtSaveFullEdit` now checks whether the checkboxes are actually present in `#peSourcesPanel` (DOM query) rather than using `!!window._googleConnected` and `_fileSources.length > 0` as proxies. The async status fetches could complete after the editor opened, leaving the panel without checkboxes while the proxy read `true`, silently discarding the user's selection.
+- Profile editor now re-renders `#peSourcesPanel` when `smGoogleRefreshStatus()` resolves or `_loadFileSources()` completes if the editor is open and the panel has no Google/file checkboxes yet.
+
+---
+
+## [1.6.5] — 2026-04-04
+
+### Improved — UX polish pass (topbar, sidebar, clusters)
+
+**Topbar**
+- All topbar elements normalised to `height: 26px`: Scan/Stop buttons, profile dropdown, save button, config cluster, stats pill, icon buttons (🔍, ?, 🌙). Previously each had independent padding, making the topbar uneven.
+- Config buttons (Profiler, Kilder, Indstillinger) extracted from `#profileBar` into a dedicated `.config-group` pill cluster separated by a `.topbar-sep` divider — visually distinct from the profile selector group.
+- Data subject lookup moved from the sidebar footer into the topbar as a 🔍 icon button (left of `?`). Sidebar strip removed.
+
+**Sidebar**
+- KILDER, INDSTILLINGER, and KONTI sections are now collapsible. Each header gets a `▾`/`▸` chevron (`section-collapse-btn`). Collapse state persists in `localStorage` per section. KONTI releases its `flex:1` when collapsed.
+- Role filter buttons (Alle / Ansat / Elev) converted to a pill cluster (`.role-filter-btn`) matching the topbar cluster pattern. SKU debug button stays separate.
+- Date preset buttons (1 år / 2 år / 5 år / 10 år / Alle) converted to a pill cluster.
+- All pill cluster buttons, input fields, and date picker set to `height: 26px` — the universal control height across the UI.
+- Toggle size reduced from `36×20px` to `32×18px` with knob gap tightened from 3px to 2px. Knob-to-track ratio improved for a sleeker look.
+- Role filter buttons display live counts: "Alle (277)", "Ansat (62)", "Elev (254)". Updated by `updateRoleFilterCounts()`, called from `renderAccountList()`.
+
+**Empty state**
+- On load, fetches `/api/db/stats`. If a previous scan exists, shows a summary card (hits, unique CPR subjects, items scanned, date, sources) instead of the bare placeholder. The placeholder is shown below as a "start new scan" prompt. Summary hidden when a scan starts.
+
+### Added — Single-instance lock
+
+- **`~/.gdprscanner/app.lock`** — an exclusive process lock is acquired at startup to prevent two instances from running simultaneously against the same database and settings files.
+- **Desktop (`build_gdpr.py` launcher)**: lock is checked before Flask starts. If another instance holds the lock the app prints `"GDPRScanner is already running."` to stderr and exits immediately.
+- **Server (`gdpr_scanner.py`)**: same guard in interactive web-UI mode (not headless — batch runs may legitimately coexist with a live server).
+- Uses `fcntl.flock(LOCK_EX | LOCK_NB)` on macOS/Linux and `msvcrt.locking` on Windows. The OS releases the lock automatically on crash or clean exit — no stale lockfiles.
+
+### Added — Port auto-increment + stdout port signal
+
+- **`gdpr_scanner.py`** (server mode): if the requested port (default 5100, or `--port N`) is already in use, the server auto-increments up to 100 ports and logs a warning: `[!] Port 5100 in use — using 5101 instead`.
+- **`build_gdpr.py` launcher** (desktop mode): `find_free_port()` was already present; auto-increment was already the desktop behaviour.
+- Both modes emit `GDPR_PORT=<n>` (flush=True) to stdout before Flask starts — a machine-readable signal parseable by any parent process or wrapper script that needs to know the actual bound port.
+
+### Added — Built-in user manual (#31 ✅)
+
+- **`MANUAL-EN.md`** / **`MANUAL-DA.md`** — standalone end-user manuals in English and Danish. 14 sections covering all major features: Getting started, Sources panel, Running a scan, Understanding results, Reviewing results, Bulk actions, Profiles, Scheduler, Export & email, Article 30 report, Data subject lookup, Settings, Retention policy, and FAQ. Written for school administrators and municipal compliance officers — no technical knowledge assumed.
+- **`GET /manual`** — new Flask route in `routes/app_routes.py`. Reads `?lang=da|en` (falls back to the current UI language). Finds the appropriate `.md` file relative to the project root, converts it to a fully self-contained styled HTML page, and returns it without any external dependencies.
+- **`_md_to_html(md)`** — zero-dependency Markdown-to-HTML converter using only Python's `re` and `html` stdlib modules. Handles headings with anchor IDs, fenced code blocks, tables, ordered/unordered lists, blockquotes, bold, italic, inline code, links, and horizontal rules.
+- **`?` button** in the topbar (right of the theme toggle) — opens the manual in a dedicated window (960×800, resizable) using the current `langSelect` value. In the packaged desktop app the window is a native pywebview window (`pywebview.api.open_manual()`); in the browser it opens via `window.open()`. Repeated clicks reuse the same window rather than spawning new ones. Does not interrupt any in-progress scan.
+- Manual page: 860 px max-width layout, language switcher (DA ↔ EN), 🖨 print button, `@media print` CSS (toolbar hidden, `h2` page breaks, external link URLs appended for paper printing).
+
+### Fixed — Manual not found in packaged app
+
+- `MANUAL-DA.md` and `MANUAL-EN.md` were missing from the PyInstaller bundle — `build_gdpr.py` now includes all `MANUAL-*.md` files as root-level data files (`--add-data MANUAL-*.md:.`). The route already used `sys._MEIPASS` for the frozen path; the files simply weren't being copied in.
+- `build_gdpr.py` `LAUNCHER_CODE` — added `open_manual(lang)` method to the `Api` class. Creates a new pywebview window for the manual URL; reuses the existing window if already open.
+
+### Fixed — Email routing, profile source persistence, SMTP error messages
+
+**`routes/email.py`** — structural rewrite
+- Removed `__getattr__` module-level hook. Bare-name lookups inside function bodies do not go through `__getattr__` (Python resolves them via `LOAD_GLOBAL` directly from `__dict__`), so `_load_smtp_config`, `_save_smtp_config`, `_build_excel_bytes`, and `_send_report_email` all raised `NameError` at runtime when the blueprint route won instead of the app-level duplicate.
+- `_load_smtp_config`, `_save_smtp_config` now imported directly from `app_config`. `_build_excel_bytes` imported from `routes.export`.
+- `_send_report_email(xl_bytes, fname, smtp_cfg, recipients)` was called in three places but never defined anywhere. Now defined as a module-level helper: builds a `MIMEMultipart("mixed")` message with the Excel as a `MIMEBase` attachment and sends via the configured SMTP server.
+- `_send_email_graph` moved into the blueprint (was only used by the duplicate app-level routes).
+
+**`gdpr_scanner.py`**
+- Removed four duplicate app-level routes that were masking the broken blueprint: `GET /api/smtp/config`, `POST /api/smtp/config`, `POST /api/smtp/test`, `POST /api/send_report`.
+- `from routes.email import _send_report_email` added after blueprint imports so `scan_scheduler.py` (`_m._send_report_email`) and the CLI headless path both resolve the function correctly.
+
+**SMTP error messages** (`routes/email.py`)
+- All three auth/connection error handlers (smtp_test, send_report, _send_report_email) now classify errors by host type before choosing a message:
+  - DNS / connection failure (`nodename nor servname`, `getaddrinfo`, `Connection refused`, timeout) → "Could not connect to SMTP server — check hostname and port."
+  - Corporate M365 host (`office365`, `microsoft`) + auth error → M365 admin centre / enable Authenticated SMTP guidance.
+  - Personal Microsoft host (`outlook`, `live`, `hotmail`) + auth error → App Password guidance at `account.microsoft.com/security`.
+  - Gmail host + auth error → App Password guidance at Google Account Security.
+  - Anything else → raw SMTP error, unmodified.
+- Previously `530` (generic "authentication required") unconditionally triggered the M365 admin centre message even when the configured host was Gmail or a personal Outlook account.
+
+**`static/app.js`** — profile source persistence
+- `_pmgmtSaveFullEdit` was overwriting `google_sources` and `file_sources` with `[]` whenever the editor was opened and those checkboxes weren't rendered (Google not connected / file sources not loaded). Now preserves the profile's existing `google_sources` when `_googleConnected` is false, and `file_sources` when `_fileSources` is empty.
+- `_applyProfile` built `_pendingProfileSources` by filtering against `_fileSources` — which is empty at profile-apply time (async load not yet complete), so the pending list was always empty and file source checkboxes defaulted to `checked=true` regardless of the profile. Now stores `profile.file_sources` directly (falling back to non-M365/Google IDs from `profile.sources`).
+- Added `_pendingGoogleSources` (mirrors `_pendingProfileSources` for Google). Set in `_applyProfile` from `profile.google_sources`; consumed in `renderSourcesPanel()` the first time Gmail/Drive checkboxes appear (when Google connects after the profile was applied). Previously they defaulted to `checked=true`.
+
+### Fixed — Progress bar and profile sources
+
+**`static/app.js`**
+- Progress bar fluctuated and ETA flickered when M365, Google, and file scans ran concurrently. Root cause: all three scan types broadcast `scan_progress` on the same SSE stream and their events interleave. Fixed with two changes: (1) `_maxPct` tracks the highest `pct` seen across all concurrent scans — the bar only ever moves forward; (2) ETA and stats counter are only written when the incoming event actually carries those fields (`d.eta !== undefined`, `d.total` present) — a Google/file event without ETA no longer wipes the ETA set by the M365 event a millisecond earlier.
+- `progressPhase` was being overwritten with the current filename by `scan_progress` events, causing it to alternate between phase text ("Google Workspace scan…") and individual filenames. Current filename now correctly updates `progressFile` instead.
+- Profile editor (`_openEditorForProfile`) only passed `profile.sources` (M365 IDs) to `_renderEditorSources` — Google and Local/SMB source checkboxes were always unchecked when reopening a saved profile. Now passes the union of `sources`, `google_sources`, and `file_sources`.
+
+### Added — SMB pre-fetch cache (#22 ✅)
+
+- SMB file scans now decouple directory traversal from file reads. A 5-slot sliding-window `ThreadPoolExecutor` keeps up to 5 reads in flight simultaneously, with a 60-second hard timeout per file. A stalled NAS read produces an error card in the UI and the scan continues — the scan thread is never blocked.
+- **`file_scanner.py`** — `_smb_collect()` new method walks the SMB tree (directory listing only, no reads), yielding file descriptors plus `_COLLECT_SKIP` / `_COLLECT_ERROR` sentinels for over-size files and listing failures. `_iter_smb()` rewritten: phase 1 collects all candidates; phase 2 resolves sentinels immediately then feeds real files through the executor window. `PREFETCH_WINDOW = 5` and `SMB_READ_TIMEOUT = 60` constants added. Local scanner (`_iter_local`) untouched.
+
+### Added — PDF OCR via multiprocessing (#20 ✅)
+
+- PDF files are now scanned in local/SMB file scans. Previously excluded because Tesseract/Poppler subprocesses could hang indefinitely.
+- **`cpr_detector.py`** — new `_worker_scan_pdf()` (module-level, required for `spawn` context) runs `document_scanner.scan_pdf()` in a fresh subprocess and returns results via a `multiprocessing.Queue`. New `_scan_bytes_timeout()` wraps PDF scanning: writes content to a temp file, spawns the worker via `multiprocessing.get_context("spawn")`, joins with a 60-second hard timeout, and terminates the process tree if it exceeds the limit. Non-PDF files delegate straight to `_scan_bytes()`.
+- **`scan_engine.py`** — `run_file_scan()` now calls `_scan_bytes_timeout()` instead of `_scan_bytes()` for all files. Stub added to module-level injected globals.
+- **`gdpr_scanner.py`** — `_scan_bytes_timeout` imported from `cpr_detector` and injected into `scan_engine`.
+- **`file_scanner.py`** — `.pdf` removed from `FILE_SCAN_EXTENSIONS` exclusion; all default extensions now included.
+
+### Fixed — Post-v1.6.4 release bugs (continued)
+
+**`routes/google_scan.py`**
+- `_run_google_scan()` crashed with `UnboundLocalError: cannot access local variable 'data'` when `user_emails` was not passed in the request. The fallback `data.get("user_emails", [])` referenced the request-handler local `data` which is not in scope inside the scan function — `data` and `options` are the same object. Removed the redundant fallback.
+
+**`routes/export.py`** — Article 30 report
+- `SOURCE_LABELS` was missing `gmail`, `gdrive`, `local`, and `smb` — all four source types rendered as raw keys in every table (inventory, Art. 9, photo, deletion audit log). Now map to "Gmail", "Google Drive", "Local files", "Network / SMB".
+- Per-source breakdown table only iterated M365 sources (`email`, `onedrive`, `sharepoint`, `teams`) — Google and local/SMB findings were completely absent from the summary even when present. Loop now covers all eight source types.
+- Methodology bullet (`a30_method_4`) only mentioned Microsoft Graph sources. Updated in `en.json`, `da.json`, `de.json`, and the hardcoded fallback to also mention Google Workspace (service account + domain-wide delegation) and local/SMB file shares.
+
+**`scheduler.py`**
+- Removed stale file. `scan_scheduler.py` fully supersedes it; `routes/scheduler.py` and `gdpr_scanner.py` both import from `scan_scheduler`. The old file had diverged significantly (missing UUID migration, connector auto-reconnect, file source resolution, debug SSE events).
+
+**`templates/index.html`**
+- Removed 9 unused CSS classes: `.sidebar-sub`, `.btn-secondary`, `.log-ok`, `.log-err`, `.log-warn`, `.user-bar`, `.sign-out-btn`, `.source-badge`, `.srcmgmt-coming-soon`.
+
+### Added — Personal Google account OAuth (#30 ✅)
+
+- Personal Google accounts can now be scanned without a service account or Workspace admin. A device-code OAuth flow (mirrors M365 delegated mode) lets a user sign in interactively with their own Google account.
+- **`google_connector.py`** — new `PersonalGoogleConnector` class: `get_device_code_flow()` / `complete_device_code_flow()` static methods hit Google's device-auth endpoint; `_refresh_if_needed()` handles transparent token refresh via `google.oauth2.credentials.Credentials`; `list_users()` returns a single-item list (the signed-in user) so the scan engine needs no changes. `iter_gmail_messages()` / `iter_drive_files()` share the same iteration logic as `GoogleConnector` via extracted `_gmail_iter()` / `_drive_iter()` module-level helpers.
+- Token persisted to `~/.gdprscanner/google_token.json` (chmod 600). New helpers: `save_personal_token`, `load_personal_token`, `delete_personal_token`.
+- **`routes/google_auth.py`** — four new endpoints: `GET /api/google/personal/status`, `POST /api/google/personal/start`, `POST /api/google/personal/poll`, `POST /api/google/personal/signout`. Background thread blocks on `complete_device_code_flow`; frontend polls — identical pattern to M365 delegated auth.
+- **`routes/state.py`** — `google_pending_flow` and `google_poll_result` added.
+- **`templates/index.html`** — auth-mode toggle (Workspace / Personal account) in the Google pane; personal section with client ID/secret fields and inline device-code box (reuses `.device-code-box` CSS); workspace setup guide hidden in personal mode.
+- **`static/app.js`** — `smGoogleSetMode()` switches visible sections; `smGoogleRefreshStatus()` now checks both `/api/google/auth/status` and `/api/google/personal/status` in parallel; `smGooglePersonalStart()`, `smGooglePersonalPoll()`, `smGooglePersonalSignOut()` added.
+- **`lang/en.json`, `da.json`, `de.json`** — 14 new keys each.
+
+### Fixed — Post-v1.6.4 release bugs
+
+**`checkpoint.py`**
+- Scheduled scans crashed with `string indices must be integers, not 'str'` when `user_ids` in the profile contained plain ID strings rather than dicts. `_checkpoint_key()` now handles both formats: `u["id"] if isinstance(u, dict) else u`.
+
+**`scan_engine.py`**
+- Same root cause as above: `run_scan()` now normalises `user_ids` entries to dicts at the top of the function before any access, so both plain strings and `{id, displayName, userRole}` objects work correctly.
+
+**`scan_scheduler.py`**
+- `file_sources` in profiles are stored as source ID strings by the JS frontend. The scheduler now resolves each ID to its full source dict via `_load_file_sources()` before calling `run_file_scan()`. Plain path strings are also handled as a fallback.
+- Full traceback is now included in the `scheduler_error` SSE event so failures are diagnosable from the UI status panel without needing the CLI.
+
+**`routes/app_routes.py`**
+- `/api/langs` (language selector endpoint) only globbed `*.lang` files — after the v1.6.3 JSON migration the language dropdown was silently empty. Now globs both `*.json` and `*.lang` with deduplication, matching the existing logic in `gdpr_scanner.py`.
+
+**`static/app.js`**
+- Profile editor (`_pmgmtSaveFullEdit`) did not update `file_sources` or `google_sources` when the user changed source checkboxes — both fields were carried forward unchanged via `...profile`. Now splits `#peSourcesPanel` checkboxes by `data-source-type` and writes `file_sources`, `google_sources`, and `sources` explicitly on every save.
+
+**`gdpr_scanner.py`**
+- `/api/langs` only globbed `*.lang` files — after migrating to JSON, the language selector showed nothing. Now globs both `*.json` and `*.lang`, deduplicates by language code, and sorts alphabetically.
+- `SOURCE_LABELS` was missing `gmail`, `gdrive`, `local`, and `smb` entries — these sources now get correct tab names in Excel export and correct labels in the Article 30 report.
+- Excel export filename changed from `m365_scan_*.xlsx` to `gdpr_scan_*.xlsx`.
+- Article 30 methodology paragraph now mentions Google Workspace scanning via service account with domain-wide delegation. DA and DE lang files updated to match.
+
+**`routes/google_scan.py`**
+- Gmail and Google Drive result cards showed the email address as account name instead of the user's display name. Fixed: `_user_display_map` is now built from `list_users()` and applied to each scanned item.
+- Role badge (Elev/Ansat/Anden) was missing on Google results when `user_emails` came from the request rather than `list_users()`. Fixed: role map is now populated in both cases.
+- Google scan now emits `google_scan_done` instead of `scan_done` so the progress bar stays open until both M365 and Google scans finish.
+
+**`scan_engine.py`**
+- File scan now emits `file_scan_done` instead of `scan_done` so the progress bar stays open until all active scan types finish.
+- `pct` in both Google and file scan progress events was hardcoded at 50 — now increments from 10 to a max of 90.
+
+**`static/app.js`**
+- Progress bar now tracks three independent flags (`_m365ScanRunning`, `_googleScanRunning`, `_fileScanRunning`) and only hides when all active scans have completed.
+- `google_scan_done` and `file_scan_done` SSE event handlers added.
+- Source filter dropdown (search results) and bulk delete source dropdown were missing Gmail, Google Drive, Lokal, and Netværk (SMB) options.
+- Profile preset buttons (1 år / 2 år / etc.) were never highlighted when applying a profile — matching used `years × 365.25` but profiles store `years × 365`. Fixed.
+- `_fileScanRunning` flag set correctly at scan start from `fileSources.length`.
+
+**`routes/state.py` / `routes/google_scan.py`**
+- M365 and Google scans shared `_scan_lock` — Google now uses `_google_scan_lock` and `_google_scan_abort` so both platforms scan in parallel.
+
+**`templates/index.html`**
+- Sources, Settings and Schedule indicator moved from sidebar section header / footer into the topbar, to the right of the Profiles button.
+- Source filter dropdown and bulk delete dropdown updated with Google and file source options.
+
+**`README.md`**
+- All emoji removed (role badges, action icons, status indicators). Plain text equivalents used throughout.
+- `lang/da.json` and `lang/de.json` updated with Google Workspace methodology text for the Article 30 report.
+
+---
+
+## [1.6.4] — 2026-04-03
+
+### Added — Full profile editor (#15e ✅)
+
+- Two-panel modal (profile list left, full editor right). Click a profile row to edit it; the active row is highlighted.
+- **+ Ny profil** button in the left panel footer — creates a blank profile and opens the editor immediately, works when no profiles exist.
+- Editor sections match the sidebar exactly:
+  - **Navn** — name + description fields
+  - **Kilder** — same rendering as the main KILDER panel, including M365, Google Workspace, and file/SMB sources
+  - **Konti** — role filter (Alle / Ansat / Elev), text search, Alle / Ingen select buttons, + Tilføj konto manual entry, platform badges (M365 / GWS / M365+GWS), role badges
+  - **Indstillinger** — date picker with year presets (1/2/5/10/Alle), Scan e-mailindhold, Scan vedhæftede filer, Maks. vedhæftet filstørrelse (MB), Maks. e-mails pr. bruger, Delta-scanning, Søg efter ansigter i billeder — all as toggle sliders
+  - **Opbevaringspolitik** — always visible; Opbevaringsår + Regnskabsår slut dropdown
+- Annuller, ×, and Gem all close the full modal. Auto-opens first profile on modal open.
+- Profile editor defaults match the main window: accounts are unchecked by default; only explicitly saved `user_ids` are shown as checked.
+
+### Fixed — Parallel M365 + Google scanning
+
+- M365 and Google scans shared `_scan_lock` — starting both simultaneously caused "Google scan already running" immediately after scan start. Fixed: `routes/state.py` now defines `_google_scan_lock` and `_google_scan_abort` as separate threading primitives; `routes/google_scan.py` uses these instead of the M365 lock. Both platforms now scan in parallel.
+
+### Fixed — User selection defaults
+
+- All users now default to `selected: false` on page load (previously `true`). The profile editor follows the same rule.
+- "Vælg alle" button renamed to "Alle" to match the main sidebar.
+
+---
+
+## [1.6.3] — 2026-04-03
+
+### Fixed — Post-v1.6.3 release bugs
+
+**`static/app.js`**
+- Source toggle state (Email, OneDrive, SharePoint, Teams, Gmail, Google Drev) not persisted across restarts. Fixed: all toggles now save to `~/.gdprscanner/src_toggles.json` via a new `/api/src_toggles` endpoint and are restored on page load.
+- Deselecting M365 sources in Source Management did not update account badges — `M365 + GWS` still shown. Fixed: badge now uses `hasM365Src` and `effectiveGws` computed inside `renderAccountList()`, and M365 source toggles now call `renderAccountList()` on change.
+- Google-only scans reported wrong account count in live log (e.g. "26 konto(er)" when 1 was selected). Root cause: `getSelectedUsers()` returned all selected users including Google-only accounts. Fixed: `getSelectedUsers()` now returns only M365 users; Google users are counted separately for the log message. The "select at least one account" guard no longer blocks Google-only scans.
+- Cross-platform identity matching used email prefix (`anne.hansen` before `@`) — changed to `displayName` matching since both M365 and GWS are maintained from the same AD source.
+- `_onGoogleSourceToggle()` and M365 source toggles did not call `renderAccountList()` — account badges not updated when toggling sources in Source Management.
+
+**`routes/google_auth.py`**
+- Removed `/api/google/auth/sources` endpoint and `src_gmail`/`src_drive` keys from the status response — replaced by unified `/api/src_toggles` endpoint in `gdpr_scanner.py`.
+
+**`app_config.py` / `gdpr_db.py` / `checkpoint.py` / `google_connector.py` / `m365_connector.py` / `scan_scheduler.py` / `scheduler.py` / `gdpr_scanner.py`**
+- All data files moved from `~/` root into `~/.gdprscanner/` subdirectory with cleaner short names (`scanner.db`, `config.json`, `token.json`, etc.). A migration shim runs on first startup and moves existing `~/.gdpr_scanner_*` files automatically. `MAINTAINER.md` updated with new file locations.
+
+**`scan_scheduler.py`**
+- Scheduled scans ignored `file_sources` from the profile — `_build_options()` dropped them. Fixed: `file_sources` now included in opts, and `run_file_scan()` is called for each file source in the profile during a scheduled run (#15f ✅).
+
+**`static/app.js` — profile save**
+- `file_sources` in profile was hardcoded to `[]` — now saves the actual checked file sources from `buildScanPayload()` (#15f).
+
+### Fixed — Post-release (continued)
+
+**`routes/state.py` / `routes/google_scan.py`**
+- M365 and Google scans shared `_scan_lock` — starting both simultaneously caused "Google scan already running" immediately. Fixed: Google scan now uses its own `_google_scan_lock` and `_google_scan_abort` so both platforms can run in parallel.
+
+**`static/app.js`** — profile editor (#15e ✅)
+- Profile editor drawer implemented: two-panel modal (profile list left, full editor right). Click any profile to open its editor.
+- Editor sections: Navn + beskrivelse, Kilder (same rendering as main KILDER panel, including Google and file sources), Konti (with Alle / Ansat / Elev role filter, text search, Alle / Ingen select buttons, + Tilføj konto manual add), Indstillinger (full mirror of sidebar — date picker with year presets, Scan e-mailindhold, Scan vedhæftede filer, Maks. vedhæftet filstørrelse, Maks. e-mails pr. bruger, Delta-scanning, Søg efter ansigter i billeder, all as toggle sliders), Opbevaringspolitik (always visible — Opbevaringsår + Regnskabsår slut).
+- + Ny profil button in left panel footer — creates a blank profile and opens the editor immediately, works even when no profiles exist.
+- Annuller, ×, and Gem all close the full modal (not just the editor panel).
+- Auto-opens first profile's editor when modal opens.
+
+**`static/app.js`** — defaults
+- All users now default to `selected: false` on load (were `true`). Profile editor follows the same rule — only explicitly saved user_ids are shown as checked.
+- "Vælg alle" button renamed to "Alle" to match the main sidebar.
+
+**`routes/state.py`**
+- Added `_google_scan_lock` and `_google_scan_abort` as separate threading primitives for Google scans.
+
+---
+
+### Added — Google Workspace full integration
+
+**Accounts panel**
+- Google Workspace users now appear in the Accounts panel alongside M365 users. Each row shows a platform badge: `M365` (blue) or `GWS` (green).
+- Account list filters by checked sources: check only Google sources → only GWS accounts shown; check only M365 → only M365 accounts; check both → all; check none → empty.
+- Role filter (All / Ansat / Elev) works across both platforms.
+- `_mergeGoogleUsers()` — dedicated async function fetches `/api/google/scan/users` and merges results into `_allUsers` independently of M365 auth timing. Called on page load, on Google connect/disconnect, and after M365 `loadUsers()`.
+
+**Scanning**
+- Selected Google user emails are now passed as `user_emails` to `/api/google/scan/start` — only selected accounts are scanned, not all users in the domain.
+- `routes/google_scan.py` — `_scan_lock` and `_scan_abort` now imported directly from `routes.state` (previously relied on `__getattr__`, which does not resolve bare names inside function bodies — caused `NameError` on scan start).
+- `user_emails` now read from the top-level request body in addition to the nested `options` dict.
+- Gmail scan result cards now correctly labelled "Gmail" (source_type was `email` → mapped to "Exchange"). Fixed in `google_connector.py`.
+- Gmail and Google Drive cards now show styled source badges (`badge-gmail` red tint, `badge-gdrive` blue tint). Previously fell back to unstyled.
+
+**Profiles**
+- Google sources (`gmail`, `gdrive`) and selected Google user emails are now saved to scan profiles and correctly restored on load.
+- Fixed `googleSources` `const` temporal dead zone — declaration moved before use in `buildScanPayload()`.
+
+### Added — OU-based role classification for Google Workspace (#23 Phase 1 ✅)
+
+- **`classification/google_ou_roles.json`** — maps Google Workspace Organisational Unit paths to roles. Edit to match your school's OU structure; no code change required. Default: `/Elever` → student, `/Personale` → staff.
+- **`google_connector.py`** — `list_users()` fetches `orgUnitPath` via `projection=full` and classifies each user via `classify_ou_role()`.
+- **`routes/google_scan.py`** — role map built from `list_users()` result; each scan card now carries the correct `user_role`.
+
+### Added — Documentation split
+
+- **`M365_SETUP.md`** — step-by-step Microsoft 365 setup (app registration, permissions, auth modes, headless config, troubleshooting).
+- **`GOOGLE_SETUP.md`** — step-by-step Google Workspace setup (service account, domain-wide delegation, scopes, OU role mapping, troubleshooting).
+- **`README.md`** — trimmed from 774 to 611 lines; setup detail moved to the two new files.
+
+### Changed — i18n migrated from `.lang` to JSON (#27 ✅)
+
+- `lang/en.json`, `da.json`, `de.json` — 709 keys each, standard flat JSON.
+- `app_config.py` — loader now prefers `.json`, falls back to `.lang` for backward compatibility.
+- Old `.lang` files retained as fallback; can be deleted once JSON files are confirmed working.
+
+### Changed — `skus/` renamed to `classification/` (#29 ✅)
+
+- `skus/education.json` → `classification/m365_skus.json`
+- `skus/google_ou_roles.json` → `classification/google_ou_roles.json`
+- All path references updated in `m365_connector.py`, `google_connector.py`, `routes/users.py`, `gdpr_scanner.py`, `build_gdpr.py`, all lang files, and `static/app.js`.
+
+### Changed — UI polish (icons removed, badges added)
+
+- Role filter buttons (Staff / Student), scan option labels (Delta scan, Scan photos, Retention policy), and account list role badges — all emoji removed, plain text only.
+- Role badge on account rows changed from emoji icon button to plain outline pill (`Ansat` / `Elev` / `Anden`).
+- Scan result cards — role icon prefix replaced with small inline badge.
+- All six lang files cleaned of emoji in role, mode, option, and Art.30 inventory keys.
+- Progress bar fixed at 32px height — emoji in filenames no longer push the bar taller.
+- Scrollbars in Sources and Accounts panels thinned to 4px.
+
+### Fixed — Account list / source interaction
+
+- Deselecting all sources now empties the account list.
+- Deselecting M365 sources no longer disables Accounts when Google sources are still checked.
+- `_updateAccountsVisibility()` now checks all source types, not just M365.
+
+### Fixed — Role override cycling
+
+- Role override never cleared for users loaded with a pre-existing override (`roleOverride: true` from a previous session) because `_autoRole` was never populated from the server. Fixed: replaced `_autoRole` comparison with a step counter — after 3 clicks the override clears regardless of the original auto role.
+- Role badge changed from `<span>` to `<button type="button">` inside label rows — prevents label click-forwarding to the checkbox (which caused the first user to receive the override instead of the clicked user).
+
+---
+
+## [1.6.2] — 2026-03-28
+
+### Added — Google Workspace account list and source integration
+
+- **`static/app.js`** — Google Workspace users (292 users in testing) now appear in the Accounts panel with `GWS` badge (blue = M365, green = GWS). M365 users carry `M365` badge.
+- Account list filters by checked sources: check only Google sources → only GWS accounts shown; check only M365 → only M365 accounts; check both → all accounts; check none → empty list.
+- Role filter buttons (All / Ansat / Elev) work across both platforms.
+- `_mergeGoogleUsers()` — dedicated function fetches `/api/google/scan/users` and merges results into `_allUsers` independently of M365 auth timing. Called on page load, on Google connect/disconnect, and after M365 `loadUsers()`.
+- `startScan()` — selected Google user emails now passed as `user_emails` to `/api/google/scan/start`, so only the chosen accounts are scanned (previously ignored selection and scanned all users).
+- **`routes/google_scan.py`** — `_scan_lock` and `_scan_abort` now imported directly from `routes.state` (previously relied on `__getattr__` which doesn't resolve bare names inside function bodies — caused `NameError` on scan start).
+- `user_emails` now read from the top-level request body in addition to the nested `options` dict.
+
+### Added — OU-based role classification for Google Workspace (#23 Phase 1)
+
+- **`classification/google_ou_roles.json`** — new file mapping Google Workspace Organisational Unit paths to roles. Edit to match your school's OU structure; no code change required. Default: `/Elever` → student, `/Personale` → staff.
+- **`google_connector.py`** — `list_users()` now fetches `orgUnitPath` via `projection=full` and classifies each user via `classify_ou_role()`. Each user dict now includes `userRole` and `orgUnitPath`.
+
+### Added — Documentation split
+
+- **`M365_SETUP.md`** — step-by-step Microsoft 365 setup guide (app registration, permissions, auth modes, headless config, role classification, troubleshooting).
+- **`GOOGLE_SETUP.md`** — step-by-step Google Workspace setup guide (service account, domain-wide delegation, OAuth scopes, OU role mapping, troubleshooting).
+- **`README.md`** — trimmed from 774 to 611 lines. Auth/permissions/headless detail moved to setup guides. Two new "Microsoft 365" and "Google Workspace" sections link to the respective files.
+
+### Changed — UI polish (icons removed)
+
+- Role filter buttons (Staff / Student) — emoji removed, plain text only.
+- Scan option labels (Delta scan, Scan photos for faces, Retention policy) — emoji removed.
+- Account list role badge — replaced clickable emoji button (`👔`/`🎓`/`👤`) with plain outline pill badge (`Ansat` / `Elev`), matching the platform badge style.
+- Scan result cards — role icon prefix removed from account name; replaced with small inline outline badge.
+- All three lang files (`en.lang`, `da.lang`, `de.lang`) cleaned of emoji in `m365_role_staff`, `m365_role_student`, `m365_opt_delta`, `m365_opt_scan_photos`, `m365_opt_retention`, `m365_mode_delegated`, `m365_bulk_overdue_btn`, `a30_inv_staff`, `a30_inv_students`.
+
+### Fixed — Profile save/load with Google sources
+
+- Google sources (`gmail`, `gdrive`) and selected Google user emails now saved in scan profiles and correctly restored on load.
+- `googleSources` `const` declaration moved before use in `buildScanPayload()` — fixed temporal dead zone `ReferenceError`.
+
+### Fixed — Account list / source interaction
+
+- Deselecting all sources now empties the account list (previously kept showing all users).
+- Selecting only Google sources no longer disables the Accounts section (previously greyed out when no M365 sources were checked).
+- `_updateAccountsVisibility()` now checks all source types, not just M365.
+
+### Added — Google Workspace role classification via OU mapping (#23 Phase 1)
+
+- **`classification/google_ou_roles.json`** — new file mapping Google Workspace Organizational
+  Unit paths to roles (`student` / `staff`). Edit to match your school's OU structure;
+  no code change required. Default prefixes: `/Elever` → student, `/Personale` → staff.
+- **`google_connector.py`** — `list_users()` now requests `orgUnitPath` from the Admin
+  Directory API and classifies each user via `classify_ou_role()`. Each user dict now
+  includes `userRole` and `orgUnitPath`.
+- **`routes/google_scan.py`** — role map built from `list_users()` result; scan cards
+  now carry the correct `user_role` instead of always `"other"`.
+
+### Fixed — Post-split and app runtime bugs (additional)
+
+**`routes/database.py`**
+- Settings panel showed "Scanned: 0, Flagged: 0, Scans: 0" because `get_stats()`
+  returns `{}` when no scan has a `finished_at` timestamp (interrupted or first-run).
+  Fixed: stats endpoint now queries `flagged_items` and `scans` tables directly so
+  counts are always correct regardless of scan completion state. Stats populate on
+  app start from existing DB data — no re-scan required.
+- DB export produced a ZIP but nothing was downloaded in the native app because
+  `URL.createObjectURL()` does not work in pywebview. Fixed: `exportDB()`,
+  `exportExcel()`, and `exportArticle30()` in `static/app.js` now detect pywebview
+  and call `window.pywebview.api.save_db_export()` / `save_excel()` / `save_article30()`
+  which use the native macOS/Windows save dialog. Browser fallback preserved.
+- Added `save_db_export()` and `save_article30()` methods to the pywebview `Api`
+  class in `build_gdpr.py`. Fixed `save_excel` filename from `m365_scan_` to `gdpr_scan_`.
+
+**`scan_engine.py`**
+- `run_file_scan()` called `_db.start_scan()` which does not exist — the correct
+  method is `begin_scan()`. Silent exception meant `_db_scan_id` was always `None`
+  and no file scan results were ever written to the database. Fixed.
+
+### Added — Personal use disposition value (#28)
+
+Staff members using work equipment for private purposes will now appear in scan
+results. Added `personal-use` as a disposition value so reviewers can explicitly
+mark items as outside the organisation's compliance scope.
+
+- New disposition: **Personal use — out of scope** in both UI dropdowns
+- Art. 30 report labels it "Personal use — out of GDPR scope (Art. 2(2)(c))"
+- Translated in EN / DA / DE
+
+**Legal basis:** GDPR Article 2(2)(c) — processing by a natural person in the
+course of a purely personal activity is outside GDPR scope.
+
+### Added — pytest test suite (#26)
+
+112 tests across 4 modules — all passing.
+
+| Test module | Tests | What it covers |
+|---|---|---|
+| `tests/test_document_scanner.py` | 36 | `is_valid_cpr`, `extract_matches`, `scan_docx`, `scan_xlsx`, `_scan_bytes` — CPR detection, false-positive suppression, binary edge cases |
+| `tests/test_app_config.py` | 34 | i18n loading, Article 9 keyword detection, config round-trip, admin PIN, profiles CRUD, Fernet encryption |
+| `tests/test_checkpoint.py` | 18 | `_checkpoint_key` stability, save/load/clear, wrong-key isolation, delta token round-trip |
+| `tests/test_db.py` | 24 | Scan lifecycle, `save_item`, CPR hash-only storage, `lookup_data_subject`, dispositions, export/import cycle |
+
+**Support files:**
+- `tests/conftest.py` — shared fixtures: `docx_with_cpr`, `docx_no_cpr`, `xlsx_with_cpr`, `xlsx_no_cpr`, `txt_with_art9`, `binary_garbage`, `tmp_db`
+- `pytest.ini` — test discovery config
+
+**Run with:** `pytest tests/` from the project root.
+
+### Fixed — Six post-split runtime bugs
+
+All bugs introduced by the #25 module split — the pre-split code had none of these.
+
+**`gdpr_scanner.py`**
+- `_current_scan_id` imported as a string binding (`from sse import _current_scan_id`), so `scan_stream()` always saw `""` — SSE replay filter excluded all events and the progress bar showed nothing. Fixed: reads `sse._current_scan_id` at call time via module reference.
+- `_connector` assignment only updated the local module global, not `_state.connector`. `scan_engine.py` reads `_state.connector`, which stayed `None` after sign-in — every scan reported "Not connected to M365". Fixed: all five `_connector = ...` assignments now dual-assign `_connector = _state.connector = ...`.
+
+**`scan_engine.py`**
+- `_load_role_overrides`, `_resolve_display_name`, `_scan_text_direct` were undefined bare names inside `run_scan()` — raised `NameError` at runtime. Fixed: proper imports from `app_config` and `cpr_detector`.
+- `PHOTO_EXTS` and `SUPPORTED_EXTS` were stub empty sets at import time; injection via `_se.PHOTO_EXTS = ...` replaced the module attribute but function bodies still saw the empty stubs. Fixed: `scan_engine.py` now imports these directly from `cpr_detector` at module level.
+- `scan_progress` SSE event broadcasts `index` and `pct`; the UI handler read `d.completed` — progress bar was always 0%. Fixed in `static/app.js`: handler now reads `d.pct` (pre-calculated server-side) and populates `progressStats` (n / total) and `progressEta` elements that were wired in HTML but never written.
+- Source collection (OneDrive, SharePoint, Teams) completed silently with no count in the live log. Fixed: broadcasts `📁 OneDrive — user: N file(s)`, `🌐 SharePoint: N file(s)`, `💬 Teams — user: N file(s)` after each successful collection.
+
+**`cpr_detector.py`**
+- `_scan_text_direct()` called `ds.scan_text()` which internally calls `extract_cpr_and_dates()` — a function that does not exist in `document_scanner.py` (pre-existing bug in that module). Result: every email body scan returned zero CPRs. Same bug affected `.txt` files and the unknown-extension fallback in `_scan_bytes()`. Fixed: all three replaced with `ds.extract_matches(text, 1, "text")` which works correctly.
+
+**`static/app.js`**
+- `scan_file_flagged` handler called `renderCards()` which is not defined anywhere — silent `ReferenceError` in the browser, cards pushed to `flaggedData` but never displayed. Fixed: replaced with `applyFilters()` which calls `renderGrid()` and shows the filter bar.
+- `scan_done` handler never showed the filter bar (containing Excel and Art.30 export buttons) when results existed — only the stats numbers updated. Fixed: `scan_done` now explicitly shows the filter bar and calls `applyFilters()` when `flaggedData.length > 0`.
+
+---
+
+## [1.6.1] — 2026-03-28
+
+### Changed — Split `gdpr_scanner.py` into focused modules (#25)
+
+`gdpr_scanner.py` was 5554 lines. It is now 3591 lines and delegates to five
+focused modules. No behaviour changes — all existing routes, blueprints, and
+imports continue to work unchanged.
+
+**New files:**
+
+| Module | Lines | Contents |
+|---|---|---|
+| `sse.py` | 52 | `broadcast()`, `_sse_queues`, `_sse_buffer`, `_current_scan_id` |
+| `checkpoint.py` | 79 | `_save_checkpoint()`, `_load_checkpoint()`, `_checkpoint_key()`, delta token load/save |
+| `app_config.py` | 553 | i18n, Article 9 keywords, config, admin PIN, profiles, settings, SMTP, file sources, Fernet encryption |
+| `cpr_detector.py` | 381 | `_scan_bytes()`, `_extract_exif()`, `_detect_photo_faces()`, `_make_thumb()`, `_get_pii_counts()` |
+| `scan_engine.py` | 1006 | `run_scan()`, `run_file_scan()` — M365 and file-system scan orchestration |
+
+**Changed files:**
+
+- `gdpr_scanner.py` — imports and re-exports from all five modules; keeps Flask
+  app init, `@app.route` definitions, blueprint registration, and `__main__` entry point
+- `routes/state.py` — `_scan_lock` and `_scan_abort` moved here from `gdpr_scanner.py`
+  so `scan_engine.py` can reference them without a circular import
+
+**Isolation:** each new module is importable in isolation with fallback stubs,
+enabling unit tests (#26) to import `cpr_detector` or `checkpoint` without
+pulling in Flask, MSAL, or the full application.
+
+---
+
+## [1.6.0] — 2026-03-28
+
+### Changed — Rename: M365 Scanner → GDPRScanner (#24)
+
+The tool now scans M365, Google Workspace, local file systems, and SMB network
+shares. The name "M365 Scanner" was misleading. This release renames everything
+with no behaviour changes.
+
+**Files renamed:**
+
+| Old | New |
+|---|---|
+| `m365_scanner.py` | `gdpr_scanner.py` |
+| `m365_db.py` | `gdpr_db.py` |
+| `build_m365.py` | `build_gdpr.py` |
+| `build_m365.sh` | `build_gdpr.sh` |
+| `start_m365.sh` *(created by install_macos.sh)* | `start_gdpr.sh` |
+| `start_m365.bat` *(created by install_windows.ps1)* | `start_gdpr.bat` |
+
+**Config files renamed on first startup (migration shim):**
+
+| Old `~/` path | New `~/` path |
+|---|---|
+| `.m365_scanner_config.json` | `.gdpr_scanner_config.json` |
+| `.m365_scanner.db` | `.gdpr_scanner.db` |
+| `.m365_scanner_token.json` | `.gdpr_scanner_token.json` |
+| `.m365_scanner_delta.json` | `.gdpr_scanner_delta.json` |
+| `.m365_scanner_settings.json` | `.gdpr_scanner_settings.json` |
+| `.m365_scanner_smtp.json` | `.gdpr_scanner_smtp.json` |
+| `.m365_scanner_role_overrides.json` | `.gdpr_scanner_role_overrides.json` |
+| `.m365_scanner_file_sources.json` | `.gdpr_scanner_file_sources.json` |
+| `.m365_scanner_machine_id` | `.gdpr_scanner_machine_id` |
+| `.m365_scanner_checkpoint.json` | `.gdpr_scanner_checkpoint.json` |
+| `.m365_scanner_schedule.json` | `.gdpr_scanner_schedule.json` |
+| `.m365_scanner_msal_cache.bin` | `.gdpr_scanner_msal_cache.bin` |
+| `.m365_scanner_lang` | `.gdpr_scanner_lang` |
+
+The migration runs silently at startup — existing scan history, credentials,
+settings, and role overrides are preserved automatically.
+
+**Intentionally unchanged:**
+- `m365_connector.py` — kept as-is; it is the Microsoft Graph connector and
+  the `m365_` prefix accurately describes what it connects to
+- i18n keys with the `m365_` prefix that describe M365-specific UI elements
+  (Azure credential fields, device code flow screens) — the prefix is correct
+
+**Run with:**
+```
+python gdpr_scanner.py [--port 5100]
+```
+
+---
+
+## [1.5.9] — 2026-03-28
+
+### Added — Google Workspace scanning (#10)
+
+Organisations running mixed Microsoft/Google environments can now scan Gmail
+and Google Drive alongside M365 in a single tool. The Google Workspace tab in
+Source Management is now fully active (was "Coming soon" stub).
+
+**New files:**
+- `google_connector.py` — service account OAuth with domain-wide delegation;
+  Gmail message + attachment iterator; Drive file iterator with automatic export
+  of native Docs/Sheets/Slides → DOCX/XLSX/PPTX before scanning
+- `routes/google_auth.py` — `/api/google/auth/status`, `/connect`, `/disconnect`
+- `routes/google_scan.py` — `/api/google/scan/start`, `/cancel`, `/users`
+
+**Changed files:**
+- `routes/state.py` — `google_connector` slot added
+- `m365_scanner.py` — Google blueprints registered; `GOOGLE_CONNECTOR_OK` /
+  `GOOGLE_AUTH_OK` flags; connector auto-restored from saved key on startup
+- `templates/index.html` — Google tab activated; full credentials pane with key
+  file upload, admin email field, Gmail + Drive source toggles, and setup guide
+- `static/app.js` — `smGoogleRefreshStatus()`, `smGoogleConnect()`,
+  `smGoogleDisconnect()`, `getGoogleScanOptions()`, key file reader
+- `requirements.txt`, `install_windows.ps1`, `install_macos.sh` — three new
+  optional Google API dependencies
+- `lang/en.lang`, `da.lang`, `de.lang` — 14 new i18n keys each
+
+**Dependencies (optional — scanner starts without them):**
+```
+pip install google-auth google-auth-httplib2 google-api-python-client
+```
+
+**Setup required in Google Workspace Admin Console:**
+1. Create a Google Cloud project; enable Gmail API, Drive API, Admin SDK
+2. Create a service account; download the JSON key; enable domain-wide delegation
+3. In Workspace Admin → Security → API Controls → Domain-wide delegation add the
+   service account client ID with scopes:
+   `gmail.readonly`, `drive.readonly`, `admin.directory.user.readonly`
+
+**Scan results** write to the same SQLite database with `source_type = "gmail"`
+or `"gdrive"` — Article 30 reports and data subject lookups cover both platforms
+automatically.
+
+---
+
+## [1.5.8] — 2026-03-28
+
+### Fixed — Scheduled scans invisible in the browser (#21)
+
+Scheduled scans now show full live progress in the browser — progress bar,
+phase text, flagged cards, and log entries — exactly like manual scans.
+
+**Root cause (critical):** When run as `python m365_scanner.py`, the module
+loads as `__main__`. The scheduler's `import m365_scanner as _m` loaded a
+**second copy** of the module with its own empty `_sse_queues`. Events from
+`_m.broadcast()` went nowhere — the browser's SSE connection was reading from
+`__main__`'s queues.
+
+**Fix:** `sys.modules["m365_scanner"] = sys.modules[__name__]` at the top of
+the module ensures all imports share the single running instance.
+
+### Fixed — SSE event replay for late-connecting browsers (#21)
+
+Opening the browser mid-scan (manual or scheduled) now replays all buffered
+progress events so the live log and card grid are fully populated.
+
+**Additional root causes and fixes:**
+
+- `_autoConnectSSEIfRunning()` only attached `scheduler_*` listeners on page
+  load — replayed `scan_phase`, `scan_file_flagged`, and `scan_done` events
+  were silently ignored
+- Idle SSE connections died silently (Flask/Werkzeug threading); the browser
+  had no live connection when a scheduled scan fired minutes/hours later
+
+**Changes — Python (`m365_scanner.py`):**
+- Module identity fix: `sys.modules["m365_scanner"] = sys.modules[__name__]`
+- Added `_current_scan_id` global — unique timestamp-based ID set at the start
+  of every scan (M365 and file scans) and cleared after `scan_done`
+- `broadcast()` injects `scan_id` into every SSE event payload
+- `scan_stream()` filters the replay buffer to only include events matching the
+  current `scan_id`, preventing stale replay from previous scans
+- New `sse_replay` / `sse_replay_done` marker events bracket the replayed block
+  so the browser can distinguish replay from live events
+- New `GET /api/scan/status` lightweight endpoint returning `{running, scan_id}`
+
+**Changes — JavaScript (`static/app.js`):**
+- Extracted `_attachScanListeners(es)` and `_attachSchedulerListeners(es)` —
+  shared by both `startScan()` and `_autoConnectSSEIfRunning()`
+- `_attachSchedulerListeners` now shows the progress bar on `scheduler_started`
+  and hides it on `scheduler_done` / `scheduler_error`
+- SSE polling watchdog (`_sseWatchdog`) checks `/api/scan/status` every 4s;
+  reopens the SSE connection via `_ensureSSE()` if it has died
+- `_userStartedScan` flag — `scan_done` only closes the SSE connection for
+  user-initiated scans; scheduled scans keep it alive
+- Fixed `es.onerror` handler — no longer silently nulls `es`
+
+### Fixed — File scan `scan_complete` → `scan_done` event name
+
+`run_file_scan()` was broadcasting `scan_complete` on finish, but the JS only
+listens for `scan_done`. Renamed to `scan_done` with the same `total_scanned` /
+`flagged_count` payload shape as M365 scans.
+
+### Fixed — Resume scan used wrong profile
+
+`startScan()` never told the server which profile was active. Settings were
+always saved to the Default profile. Now `profile_id` is sent in the scan start
+payload and `_save_settings()` accepts a `profile_id` parameter (takes
+precedence over `profile_name`).
+
+### Fixed — `install_macos.sh` launcher scripts
+
+- `start_gdpr.sh` and `build_m365.sh` templates now use `exec python3` instead
+  of `exec python` — fixes "not found" after removing python.org interpreter
+- spaCy model install: creates a `pip` shim in `venv/bin/` (spaCy's
+  `shutil.which("pip")` couldn't find the venv's pip3), falls back to direct
+  `pip install` if `spacy download` still fails, and prepends `venv/bin` to
+  PATH explicitly
+
+### Added — Diagnostic logging
+
+- `[run_scan]` prints sources, user count, app_mode, and a sample user entry
+  at scan start — helps verify scheduled scans use the correct profile
+- `[SSE]` console.log messages in the browser for `scan_phase`, `scan_done`,
+  `scan_file_flagged`, `scheduler_started`, `scheduler_done`, `scheduler_error`
+  — aids debugging SSE delivery issues
+
+### Added — i18n keys (EN / DA / DE)
+
+- `m365_sse_reconnecting` — shown when page load detects a running scan
+- `m365_sse_replay_note` — logged after replayed events finish
+
+---
+
+## [1.5.7] — 2026-03-28
+
+### Fixed — Missing translations in Settings modal
+
+Several strings in the Settings → General and Settings → Scheduler tabs were
+displaying in English regardless of the active language.
+
+**Missing lang keys added** (EN / DA / DE):
+- `btn_save` — Save / Gem / Speichern (used by scheduler editor Save button and others)
+- `m365_settings_about` — About / Om / Über
+- `m365_settings_save_pin` — Save PIN / Gem PIN / PIN speichern
+- `m365_sched_freq_daily/weekly/monthly` — frequency labels in job list and editor
+- `m365_sched_dow_mon` through `_sun` — day-of-week labels
+
+**Template fixes:**
+- "About" group heading now has `data-i18n="m365_settings_about"`
+- "Save PIN" button uses dedicated key `m365_settings_save_pin` instead of generic `btn_save`
+- Frequency and day-of-week `<option>` elements now have `data-i18n` attributes
+- Scheduler job list (`schedRenderJobs`) and status update now use `t()` for frequency labels
+
+### Changed — Theme toggle replaced with slider
+
+The "Toggle dark / light" text button in Settings → General is replaced with a
+standard toggle slider (consistent with all other toggles in the UI). The slider
+reflects the current theme state when the tab opens and toggles the theme on click.
+
+---
+
+## [1.5.6] — 2026-03-28
+
+### Feature — SSE event replay (#21)
+
+Opening the browser mid-scan (e.g. while a scheduled scan is running) now
+replays all buffered events so the live log and result cards populate
+immediately, rather than showing nothing until the next event fires.
+
+**`m365_scanner.py`:**
+- Added `_sse_buffer: deque = deque(maxlen=500)` — a ring buffer that stores
+  every `broadcast()` event
+- `broadcast()` appends to the buffer before sending to SSE clients
+- `run_scan()` clears the buffer at the start of each scan so stale events
+  from the previous scan are not replayed
+- Removed duplicate `@app.route("/api/scan/stream")` — route is now handled
+  exclusively by the `routes/scan.py` blueprint
+
+**`routes/scan.py`:**
+- `scan_stream()` replays `_m._sse_buffer` immediately when a new client
+  connects, then switches to live events
+- All globals accessed directly via `import m365_scanner as _m` to avoid
+  `__getattr__` resolution failures that caused 500 errors
+- A `: connected` comment line is sent first to confirm the stream is flowing
+
+**`static/app.js`:**
+- `_autoConnectSSEIfRunning()` — new function called on `DOMContentLoaded`
+  that always opens the SSE connection on page load. If a scan is already
+  running, buffered events replay immediately. If the buffer is empty, no
+  events fire and the log stays quiet.
+- Handles `scan_phase`, `scan_progress`, `scan_start`, `scan_file_flagged`,
+  `scan_done`, `scheduler_started`, `scheduler_done`, `scheduler_error` events
+- `startScan()` closes and reopens the SSE connection to get a clean stream
+  for each manual scan
+
+**`m365_scanner.py` — CLI output when no browser connected:**
+- `broadcast()` now prints key events to the terminal when `_sse_queues` is
+  empty (i.e. no browser tab is watching), so scheduled scans are visible in
+  the CLI: scan phases, file progress, errors, and completion summary
+
+---
+
+## [1.5.5] — 2026-03-28
+
+### Fixed — Scheduler: multiple bugs after multi-job implementation
+
+**`scheduler.py` renamed to `scan_scheduler.py`**
+
+Python's stdlib includes a `sched`/`scheduler` module that was being resolved
+instead of the project's own `scheduler.py`, causing `module 'scheduler' has no
+attribute 'load_jobs'`. Renaming the project file to `scan_scheduler.py` eliminates
+the collision entirely. All imports updated in `routes/scheduler.py` and
+`m365_scanner.py`.
+
+**Jobs with missing UUID assigned on load**
+
+Jobs saved before the multi-job refactor had `"id": ""`. `load_jobs()` now detects
+any job with a missing or empty id and assigns a fresh UUID, then rewrites the file.
+This fixed "Delete failed: id required" and silent edit failures.
+
+**Enabled toggle added to each scheduler row**
+
+Each job row now has an inline toggle switch instead of a static ✓/— indicator.
+Toggling saves the change immediately via `/api/scheduler/jobs/save`. The job
+description also shows "Next: [date]" after the status fetch resolves.
+
+**Edit no longer duplicates the job**
+
+`_sched().reload()` inside the save route was not wrapped in its own try/except.
+If APScheduler threw (e.g. not yet started), the exception propagated and caused
+the save to fall through to the "create new" path. Both `reload()` calls (save and
+delete) are now wrapped in `try/except: pass`.
+
+**Delete button now works**
+
+The delete button was passing the HTML-escaped job name through the onclick
+attribute — names with apostrophes or special characters broke the JS string.
+Fixed by passing only `id` and looking up the name from `_schedJobs` inside
+`schedDeleteJob()`. The route and JS both have proper error handling now.
+
+**"Not authenticated" on scheduled run**
+
+`state.connector` is assigned once at startup (`_state.connector = _connector`)
+and never updated when the user authenticates later. The scheduler now reads
+`_m._connector` directly from the live `m365_scanner` module at run time,
+guaranteeing it sees the current authenticated connector.
+`flagged_items` and `scan_meta` reads also updated to use `_m.flagged_items`
+and `_m.scan_meta` directly.
+
+---
+
+## [1.5.4] — 2026-03-28
+
+### Feature — Multiple scheduled scans
+
+The Settings → Scheduler tab now supports multiple independent named scan jobs,
+replacing the previous single-job form.
+
+**`scheduler.py`**
+- Config format changed from a single dict to `{"jobs": [...]}`. Each job has
+  its own `id` (UUID), `name`, and all existing fields (frequency, time, profile,
+  auto-email, auto-retention).
+- Old single-job `~/.m365_scanner_schedule.json` files are automatically migrated
+  to the new format on first load — no manual changes needed.
+- `ScanScheduler` registers one APScheduler job per enabled scan and tracks
+  running state and last-run info per job independently.
+- Backward-compat shims (`load_schedule_config`, `save_schedule_config`) kept
+  for any existing integrations.
+
+**`routes/scheduler.py`** — new CRUD endpoints:
+- `GET  /api/scheduler/jobs` — list all jobs
+- `POST /api/scheduler/jobs/save` — create or update a job (by id)
+- `POST /api/scheduler/jobs/delete` — delete a job by id
+- `POST /api/scheduler/jobs/run_now` — run a specific job immediately by id
+- Old `/api/scheduler/config` and `/api/scheduler/run_now` kept as backward-compat shims
+
+**`templates/index.html`** — scheduler pane replaced with a job list (styled like
+File sources) and an inline editor that slides open when adding or editing. Each
+row shows the job name, frequency summary, enabled/running status pill, and
+▶ Run / ✏ Edit / ✕ Delete buttons. Schedule configuration lives exclusively in
+the editor — nothing schedule-related appears in the sidebar except the existing
+"Next: …" indicator.
+
+**`static/app.js`** — all `sched*` functions rewritten for multi-job:
+`schedLoad`, `schedRenderJobs`, `schedAddJob`, `schedEditJob`, `schedSaveJob`,
+`schedDeleteJob`, `schedRunJob`, `schedCancelEdit`, `schedLoadHistory`,
+`schedUpdateSidebarIndicator`.
+
+**Lang keys added:** `m365_sched_add`, `m365_sched_name`, `m365_sched_editor_new`,
+`m365_sched_editor_edit`, `m365_sched_name_required`, `m365_sched_no_runs`,
+`btn_cancel` (da/en/de).
+
+---
+
+## [1.5.3] — 2026-03-27
+
+### Added — Suggestion #19: Scheduled / automatic scans
+
+In-process scheduler using APScheduler so GDPR scans run automatically on a
+configurable cadence — no cron or Task Scheduler setup required.
+
+**Backend:**
+- New `scheduler.py` module wrapping APScheduler `BackgroundScheduler` with a
+  single coalescing job; misfire grace time 1 hour.
+- Config stored in `~/.m365_scanner_schedule.json` (daily/weekly/monthly,
+  time-of-day, profile selector, auto-email, auto-retention).
+- Run history persisted in new `schedule_runs` DB table (migration #7).
+- `routes/scheduler.py` blueprint — `GET/POST /api/scheduler/config`,
+  `GET /api/scheduler/status`, `POST /api/scheduler/run_now`,
+  `GET /api/scheduler/history`.
+- Scheduler starts automatically on `app.run`; status printed at boot.
+- Scheduled scans reuse the full `run_scan()` pipeline (checkpoints, delta,
+  broadcast, DB) — identical to interactive scans.
+- Auto-email sends the Excel report via Graph or SMTP after each scheduled scan.
+- Auto-retention optionally enforces the retention policy on overdue items.
+
+**UI:**
+- **Settings → Scheduler** tab — enable/disable toggle, frequency picker
+  (daily/weekly/monthly), time-of-day, profile selector, auto-email and
+  auto-retention toggles, status display, run history, "Run now" button.
+- **Sidebar** — 🕐 next-scan indicator near the settings button; click to
+  open scheduler config. Polls every 60 s.
+- **Scan log** — scheduled scans appear with 🕐 prefix via SSE events
+  (`scheduler_started`, `scheduler_done`, `scheduler_error`).
+
+**Build / deps:**
+- `APScheduler>=3.10` added to `requirements.txt`.
+- `scheduler.py` and APScheduler hidden imports added to `build_m365.py`.
+- Schedule config added to `--purge` cleanup list.
+- Lang keys added for DA / EN / DE.
+
+---
+
+## [1.5.2] — 2026-03-27
+
+### Fixed — File/SMB scan: image-only PDFs no longer hang the scanner
+
+`scan_pdf()` in `document_scanner` launches Tesseract OCR and Poppler subprocesses
+when a PDF has no text layer. These subprocesses cannot be killed from a Python thread,
+causing the scanner to hang indefinitely on scanned documents (e.g. ESTA applications,
+invoice scans).
+
+**Fix:** Before calling `scan_pdf()`, `_scan_bytes()` now opens the PDF with `pdfplumber`
+(pure Python, no subprocesses) and checks whether any page has a text layer using the
+existing `is_text_page()` helper. If all pages are image-only, the file is skipped
+immediately with no CPR hits — which is correct, since machine-readable CPR numbers
+cannot exist in an image-only PDF.
+
+Text-layer PDFs (the majority) pass the check and are scanned normally. Only image-only
+PDFs (scanned documents) are skipped.
+
+This replaces multiple failed approaches (`ThreadPoolExecutor` timeouts,
+`shutdown(wait=False)`, extension-based skipping) that either blocked on context manager
+exit or removed legitimate file types from scanning.
+
+### Fixed — SMB scanning: multiple smbprotocol 1.14+ API changes
+
+See v1.5.1 for details. Additional fix in this release:
+
+- `smb_host` is now auto-derived from the path (`//host/share` → `host`) when not
+  explicitly stored in the source JSON, so SMB sources saved without an explicit host
+  field still connect correctly.
+
+### Fixed — Routes blueprint: globals resolved lazily to prevent circular imports
+
+Each route blueprint (`routes/*.py`) now uses Python's module `__getattr__` hook to
+lazily resolve globals from `m365_scanner` at call time, not at import time. This
+prevents the circular import that caused double blueprint registration on startup.
+
+### Added — File source Edit button
+
+See v1.5.1.
+
+---
+
+## [1.5.1] — 2026-03-27
+
+### Fixed — SMB scanning: multiple smbprotocol 1.14+ API incompatibilities
+
+Several functions in `file_scanner.py` used deprecated or renamed smbprotocol APIs:
+
+- **`uuid4_str()` removed** — `Connection()` now requires a `uuid.UUID` object, not a string. Changed to `uuid.uuid4()` directly; added `import uuid` at module level.
+- **`RequestedOpcodes` removed from `smbprotocol.open`** — was imported but never used; removed.
+- **`FilePipePrinterAccessMask.FILE_LIST_DIRECTORY` → `DirectoryAccessMask.FILE_LIST_DIRECTORY`** — directory listing requires `DirectoryAccessMask`, not the file/pipe mask.
+- **`FileDirectoryInformation` moved** — from `smbprotocol.query_info` to `smbprotocol.file_info`; import updated.
+- **`FileInformationClass` enum** — `query_directory()` expects `FileInformationClass.FILE_DIRECTORY_INFORMATION` (int enum), not a class instance.
+- **`query_directory()` kwargs renamed** — `file_name=` → `pattern=`, `output_buffer_length=` → `max_output=`.
+- **Filename bytes** — `file_name` field now returns UTF-16-LE bytes; decoded to str with error handling.
+- **`smb_host` auto-derivation** — if `smb_host` is not explicitly stored in the source JSON, it is now extracted from the path (`//host/share` → `host`). `is_smb` no longer requires `smb_host` to be pre-set.
+
+### Fixed — SMB scanning: junk directories skipped
+
+Added `SKIP_DIRS` constant — a set of folder names silently skipped in both local and SMB walks:
+
+```
+.recycle  .recycler  $recycle.bin  .trash  .trashes
+.sync  .btsync  .syncthing
+.git  .svn  .hg
+__pycache__  node_modules
+.spotlight-v100  .fseventsd  .temporaryitems
+system volume information  lost+found
+```
+
+Local walker prunes these from `_dirs[:]` before `os.walk` descends. SMB walker checks before recursing. Hidden directories (`.` prefix) are also skipped in both.
+
+`STATUS_END_OF_FILE` errors (zero-byte placeholder files from Bittorrent Sync, `.sync/stream_test.txt` etc.) are now silently skipped instead of logged as warnings.
+
+### Fixed — SMB/local file scans: OCR disabled, per-file timeout added
+
+PDF scanning via `document_scanner.scan_pdf()` would trigger Tesseract OCR on image-based PDFs (scanned forms, photos) causing single files to hang for minutes.
+
+**`_scan_bytes_timeout()`** — new wrapper around `_scan_bytes` using `ThreadPoolExecutor` with a 30-second deadline per file. Timed-out files are logged as errors and scanning continues.
+
+**`skip_ocr=True`** — file scan loop now passes `skip_ocr=True` to `_scan_bytes`, disabling OCR and reducing DPI to 150. Only the text layer is extracted from PDFs. This is appropriate for bulk compliance scanning where image-only PDFs rarely contain machine-readable CPR numbers.
+
+### Added — File source Edit button
+
+Each file source row in **⚙ Sources → File sources** now has an **✏ Edit** button between Scan and Delete. Clicking it pre-fills the add form with the existing name, path, SMB host, and username (password shown as placeholder dots). Saving with an existing ID updates the source in-place. The Add button label changes to **Save changes** while editing and reverts on save.
+
+---
+
+## [1.5.0] — 2026-03-27
+
+### Refactor — HTML template and JavaScript extracted from m365_scanner.py
+
+`m365_scanner.py` was a ~9600-line monolith containing HTML, CSS, JavaScript,
+and Python all in one string. This made frontend edits unsafe (no linting,
+no syntax highlighting, string escaping hazards) and diffs unreadable.
+
+**What changed:**
+
+- `templates/index.html` — the full HTML/CSS template (1418 lines), served via
+  Flask's `render_template()` with two Jinja2 variables: `app_version` and
+  `lang_json`
+- `static/app.js` — all JavaScript (2832 lines), served by Flask's built-in
+  static file handler at `/static/app.js`
+- `m365_scanner.py` — reduced from 9586 to 5334 lines (44% smaller);
+  now contains only Python: business logic, API routes, and configuration
+
+**Flask configuration updated:**
+
+```python
+app = Flask(__name__,
+            template_folder=os.path.join(BASE_DIR, "templates"),
+            static_folder=os.path.join(BASE_DIR, "static"))
+```
+
+`BASE_DIR` resolves to `sys._MEIPASS` when running as a PyInstaller bundle,
+or to the directory containing `m365_scanner.py` otherwise — the same pattern
+already used for `lang/`, `keywords/`, and `classification/`.
+
+**Build script updated:**
+
+`build_m365.py` now bundles `templates/` and `static/` alongside the existing
+`lang/`, `keywords/`, and `classification/` directories.
+
+**Zero behaviour change** — the app works identically. Only the file organisation changed.
+
+---
+
+## [1.5.0] — 2026-03-27
+
+### Refactor — HTML template and JavaScript extracted from m365_scanner.py
+
+`m365_scanner.py` was a ~9600-line monolith containing HTML, CSS, JavaScript,
+and Python all in one string. This makes frontend edits unsafe (no linting,
+no syntax highlighting, string-escaping hazards) and diffs unreadable.
+
+**New files:**
+
+- `templates/index.html` — full HTML/CSS template (1452 lines) served via
+  Flask `render_template()`. Two Jinja2 variables: `{{ app_version }}` and
+  `{{ lang_json | safe }}`.
+- `static/app.js` — all JavaScript (2832 lines) served by Flask's built-in
+  static file handler at `/static/app.js`.
+
+**Flask app updated:**
+
+```python
+app = Flask(__name__,
+            template_folder=os.path.join(BASE_DIR, "templates"),
+            static_folder=os.path.join(BASE_DIR, "static"))
+```
+
+`BASE_DIR` resolves to `sys._MEIPASS` when running as a PyInstaller bundle,
+or the directory of `m365_scanner.py` otherwise — the same pattern already
+used for `lang/`, `keywords/`, and `classification/`. `build_m365.py` updated to bundle
+both new directories.
+
+**Result:** `m365_scanner.py` reduced from 9586 to ~2100 lines of pure Python.
+Zero behaviour change.
+
+### Refactor — Routes split into Flask Blueprints
+
+All 55 API routes extracted from `m365_scanner.py` into a `routes/` package.
+Shared mutable state lives in `routes/state.py`; blueprints import from there
+to avoid circular imports.
+
+```
+routes/
+  __init__.py       package marker
+  state.py          shared globals: connector, flagged_items, LANG, …
+  auth.py           /api/auth/*                           174 lines
+  users.py          /api/users/* + role overrides         222 lines
+  scan.py           /api/scan/* + /api/settings/*         123 lines
+  sources.py        /api/file_sources/* + /api/file_scan   93 lines
+  profiles.py       /api/profiles/*                        48 lines
+  email.py          /api/smtp/* + /api/send_report        210 lines
+  database.py       /api/db/* + /api/admin/* + preview    536 lines
+  export.py         Excel + Art.30 export + bulk delete  1177 lines
+  app_routes.py     /api/about + /api/langs + /api/lang    67 lines
+```
+
+### Housekeeping — Document Scanner files removed
+
+The following files belonged to the standalone Document Scanner product and
+have been removed from this repository:
+
+- `server.py` — Document Scanner web app
+- `scanner_worker.py` — Document Scanner process-pool worker
+- `build.py` — Document Scanner build script
+- `build_app.sh` — Document Scanner shell build script
+- `Dockerfile` — Document Scanner Docker image
+- `docker-compose.yml` — Document Scanner Docker Compose file
+- `doc_scanner_icon.png` — Document Scanner app icon
+
+`requirements.txt` rewritten for the M365 Scanner only. Removed
+`pdf2image`, `pytesseract`, `pypdf`, `reportlab`, `img2pdf`, and `py7zr`
+(Document Scanner dependencies). Added `cryptography>=42.0` (SMTP password
+encryption, already in use since v1.4.7).
+
+---
+
+## [1.4.8] — 2026-03-27
+
+### Changed — Email: Microsoft Graph API preferred over SMTP
+
+Both **Test** and **Send now** now try the Microsoft Graph API first when the
+scanner is authenticated to Microsoft 365. This avoids SMTP AUTH entirely —
+no port 587, no app password, no admin centre changes needed.
+
+**New `_send_email_graph()` helper** — sends via `/me/sendMail` (delegated mode)
+or `/users/{sender}/sendMail` (app mode). Supports optional Excel attachment for
+the full report. Requires the `Mail.Send` Graph permission on the Azure app
+registration (Application or Delegated, depending on auth mode).
+
+**Priority order:**
+1. **Microsoft Graph API** — used when connected to M365
+2. **SMTP** — fallback if not connected or Graph fails
+
+**Error surfacing** — Graph permission errors (403 / Forbidden / Mail.Send /
+insufficient privileges) are now returned directly with a clear actionable
+message: add `Mail.Send` permission to the Azure app registration and grant
+admin consent. Previously the error was silently swallowed and the scanner
+fell through to SMTP, masking the real problem.
+
+**SMTP AUTH error** — if SMTP is used and Microsoft 365 returns error 530 5.7.57
+("Client not authenticated"), the error message now includes a plain-English tip
+explaining how to enable SMTP AUTH in the M365 admin centre, or how to use Graph
+instead.
+
+### Changed — Test button sends a real email to configured recipients
+
+The SMTP **Test** button previously only verified connectivity (EHLO/STARTTLS
+handshake). It now sends an actual HTML test email to the configured recipients,
+making it easy to verify end-to-end delivery including spam filtering.
+
+---
+
+## [1.4.7] — 2026-03-27
+
+### Security — SMTP password encrypted at rest
+
+Previously the SMTP password was stored as plaintext in `~/.m365_scanner_smtp.json`.
+It is now encrypted using **Fernet symmetric encryption** (`cryptography` library,
+already a dependency).
+
+**Implementation:**
+- A random Fernet key is generated on first use and saved to
+  `~/.m365_scanner_machine_id` (chmod 0o600 — owner-readable only)
+- Passwords are stored as `enc:<ciphertext>` in the JSON file
+- `_encrypt_password()` / `_decrypt_password()` handle the encode/decode cycle
+- `_load_smtp_config()` transparently decrypts on load; `_save_smtp_config()`
+  encrypts on save
+- **Legacy plaintext passwords** (no `enc:` prefix) are read as-is and
+  re-encrypted next time settings are saved — no migration step required
+- Encrypted blobs are **machine-specific** — the ciphertext cannot be decrypted
+  on another machine without the key file
+- Graceful fallback to plaintext if `cryptography` is unavailable (rare)
+- The GET `/api/smtp/config` endpoint never returns the password to the browser;
+  it returns only `has_password: true/false`
+
+### Fixed — EXIF `has_pii` false positives on screenshots
+
+`_EXIF_PII_TAGS` previously included `HostComputer`, `DocumentName`, and `PageName`.
+These are set automatically by macOS/Windows on every screenshot (machine name, app
+name) and contain no personal data about an individual. Removed from the tag set.
+
+Minimum content length of 3 characters added — a field must contain at least 3
+non-whitespace characters to trigger a `has_pii` flag. Prevents empty or
+single-character values from causing false positives.
+
+**Affected fields retained:** `Artist`, `Copyright`, `ImageDescription`,
+`UserComment`, `XPAuthor`, `XPSubject`, `XPComment`, `XPKeywords` — all fields
+a human would deliberately fill with personal information.
+
+### Fixed — Accounts section not greyed out when switching to a file-only profile
+
+`_applyProfile()` restores source checkboxes but did not call
+`_updateAccountsVisibility()` afterwards. Switching to a profile with no M365
+sources selected left the accounts section fully interactive. Fixed by calling
+`_updateAccountsVisibility()` immediately after the checkbox restore loop.
+
+---
+
+## [1.4.6] — 2026-03-27
+
+### Changed — Excel export updated for EXIF, GPS, and file sources
+
+**New columns in all source sheets:**
+- **GPS** — ✔ tick when GPS coordinates are present in the item's EXIF data
+- **EXIF author** — author/artist name extracted from EXIF metadata
+- Special category column now filters out `gps_location` and `exif_pii` (represented by the dedicated GPS column instead)
+
+**New source types in `SOURCE_MAP`:**
+- `local` — 📁 Local (green tab), for files from local folder scans
+- `smb` — 🌐 Network (blue tab), for files from SMB/CIFS network shares
+- Both get their own sheet when results exist; skipped silently if empty
+
+**Summary sheet:**
+- Row 4: "Items with GPS data" count (shown only when non-zero)
+- Summary table shifted to row 7 to accommodate (was row 6)
+- Source rows now skipped when a source has zero items
+
+**New GPS locations sheet:**
+- Teal tab — created only when GPS items exist
+- Columns: Name, Latitude, Longitude, Maps link (blue hyperlink), Account, Date Modified
+- Auto-filter enabled; alternating row colours
+
+**Bug fix:** dead old function body (164 lines after the `return`) removed — the previous `str_replace` only replaced the docstring, leaving unreachable code in the file.
+
+---
+
+## [1.4.5] — 2026-03-26
+
+### Fixed — `_detect_photo_faces` missing after EXIF insertion
+
+The `str_replace` that added `_extract_exif()` accidentally consumed the
+`def _detect_photo_faces` function definition (it was part of the replaced
+string). All image scans raised `NameError: name '_detect_photo_faces' is not
+defined`. Function restored at its original position before `_scan_bytes()`.
+
+### Fixed — Progress bar shows "undefined / undefined" during file scans
+
+The M365 `scan_progress` SSE event sends `{index, total, pct, file, eta}`.
+The file scanner sent only `{scanned, flagged}`. The JS handler blindly read
+`d.index` and `d.total`, producing `undefined / undefined`.
+
+**Fixes:**
+- `run_file_scan()` now broadcasts `{scanned, flagged, file, pct}` so the
+  current filename and a progress indicator are shown while scanning.
+- The `scan_progress` JS handler now checks which fields are present and
+  renders accordingly: `index / total` for M365 scans, `N · M flagged` for
+  file scans.
+
+### Fixed — Local file preview: PDF, XLSX, DOCX now render content
+
+`/api/preview/<id>` for `source_type=local` previously showed only a metadata
+placeholder for PDF and Office files. Now:
+
+| Type | Preview |
+|---|---|
+| PDF | First 5 pages extracted via `pdfplumber`, CPR numbers highlighted in red |
+| XLSX / XLSM | First 50 rows of up to 3 sheets as a styled table |
+| CSV | First 50 rows as a table |
+| DOCX / DOC | First 80 paragraphs as text, CPR numbers highlighted |
+
+All fall back to a metadata card if the library is unavailable or the file
+cannot be parsed. `document_scanner` (already imported) provides access to
+`pdfplumber` and `openpyxl`.
+
+---
+
+## [1.4.4] — 2026-03-26
+
+### Added — #18 EXIF metadata extraction from images
+
+**New function `_extract_exif(content, filename)`** — extracts structured EXIF data from JPEG, PNG, TIFF, WEBP, and HEIC images using Pillow (already a dependency). No new packages required.
+
+**Extracted fields:**
+- **GPS coordinates** — converted from DMS rational values to decimal degrees; Google Maps link generated
+- **Author / Artist / Copyright / Description / UserComment / Keywords** — checked for PII content
+- **Device** — camera make and model
+- **Datetime** — DateTimeOriginal or DateTime
+
+**Behaviour changes:**
+- EXIF extraction runs on all scanned images regardless of the "Scan photos" toggle — it is lightweight (no CV processing) and always relevant
+- Images with GPS or PII-bearing EXIF fields are flagged even without CPR hits
+- `special_category` gains `"gps_location"` and/or `"exif_pii"` entries as appropriate
+- Face detection (`_detect_photo_faces`) still requires the "🖼 Scan photos for faces" opt-in
+
+**UI:**
+- **🌍 GPS badge** — teal pill on result cards (grid and list view) when GPS coordinates are present
+- **Preview panel** — local image previews now show a collapsible "EXIF data" section beneath the image with GPS (clickable Google Maps link), author, date, device, and any other PII-bearing fields
+
+**Applies to both M365 and file system scans** — OneDrive/SharePoint images and local/SMB files go through the same extraction path.
+
+---
+
+## [1.4.3] — 2026-03-26
+
+### Added — General Settings modal
+
+Three sidebar sections (✉ Email report, 🗄 Database, and the language selector + About link) have been removed from the sidebar and consolidated into a single **⚙ Settings** modal, opened via a button in the sidebar footer.
+
+**General tab** — language selector (mirrors the hidden `langSelect`), theme toggle, and About info (version, Python, MSAL, Requests, openpyxl versions).
+
+**Email report tab** — full SMTP configuration (host, port, username, password, from address, STARTTLS, recipients), Save, and Send now. Pre-fills from saved config. `openSmtpModal()` now redirects to this tab for backward compatibility.
+
+**Database tab** — DB stats (total items, flagged items, scan count), ⬇ Export, ⬆ Import, and 🗑 Reset DB. `exportDB()` and `openImportDBModal()` work unchanged.
+
+**🔍 Data subject lookup** remains as a sidebar shortcut since it is part of the active compliance workflow.
+
+---
+
+## [1.4.2] — 2026-03-26
+
+### Added — Dynamic sources panel in sidebar
+
+The sidebar sources panel is now fully dynamic. Previously the four M365 sources (Email, OneDrive, SharePoint, Teams) were hardcoded checkboxes. Now:
+
+- **`renderSourcesPanel()`** builds the list at runtime from `_M365_SOURCES` (the four fixed M365 entries) and `_fileSources` (saved local/SMB sources). A "File sources" group header appears automatically when any file sources are configured.
+- Per-source visibility toggles in the ⚙ Sources modal (Microsoft 365 tab) control which M365 sources appear in the panel. Toggling one off removes it from the panel immediately.
+- File sources added in the Sources modal appear as checkboxes in the panel alongside the M365 sources, with 📁 (local) or 🌐 (SMB) icons.
+- The panel shows up to 5 rows before scrolling (`max-height: calc(5 * 26px)`).
+- **Profile save/restore** — file source selections are now included when saving a profile. `buildScanPayload()` merges M365 and file source IDs into `allSources`; `_applyProfile()` restores all of them. A `_pendingProfileSources` mechanism handles the async case where file sources load after the profile is applied.
+
+### Added — Hint tooltips on Delta scan, Scan photos, Retention policy toggles
+
+Each of the three advanced option toggles now has a circled **?** icon to the right of the label. Clicking it shows a speech bubble (fixed-positioned, `z-index: 9999`) with the hint text, positioned to the right of the icon and visible above the main content area. Only one bubble can be open at a time; clicking anywhere outside closes it.
+
+### Changed — ⚙ Profiles button moved to topbar
+
+The accent-coloured **⚙ Profiles** button was removed from the Database section in the sidebar. A plain **⚙ Profiles** button (matching the style of **⚙ Sources**) now appears to the right of the 💾 save button in the topbar profile bar.
+
+### Changed — App mode badge (modeBadge) removed
+
+The `modeBadge` button and `userBar` div have been removed from the sidebar. Connection status and mode (App / Delegated) are now shown exclusively in the Sources modal (Microsoft 365 tab) — connection info row with green/grey status dot, display name, email, and mode label.
+
+### Fixed — Sources modal: credentials pre-filled from saved config
+
+`smRefreshStatus()` now calls `/api/auth/status` (correct endpoint) and pre-fills Client ID, Tenant ID, and Client Secret fields from the saved config. Connects via `/api/auth/config` + `/api/auth/start`; disconnects via `/api/auth/signout` + `signOut()`.
+
+### Fixed — File source naming: Name field required; auto-suggest from path
+
+The "Label" field renamed to "Name" and marked required (red asterisk). `fsrcAutoName()` suggests a name as the user types the path — last path segment for local paths, `host / share` for SMB paths. The user's own name is never overwritten once typed.
+
+### Fixed — Sources panel fixed height with scroll
+
+`#sourcesPanel` in the sidebar now has `max-height: calc(5 * 26px); overflow-y: auto` so it shows exactly 5 rows before scrolling, regardless of how many sources are configured.
+
+### Fixed — Fiscal year end dropdown alignment
+
+The "Fiscal year end" label and select were previously side-by-side, causing the label to wrap on long translations (e.g. "Regnskabsårs afslutning"). Now stacked vertically (`flex-direction: column`) with `width: 100%` on the select.
+
+### Fixed — ⚙ cog size inconsistency between Sources and Profiles buttons
+
+Both buttons previously used `⚙️` (U+2699 + variation selector U+FE0F), which can render at emoji size rather than text size. Replaced with plain `⚙` (U+2699) in both so they render at identical size.
+
+### Fixed — MB label removed from max attachment size picker
+
+The "MB" text span to the right of the attachment size number input has been removed.
+
+### Fixed — File source selections included in profiles
+
+`buildScanPayload()` now collects both M365 and file source IDs and merges them into `allSources`, which is saved as `profile.sources`. Previously only M365 source IDs were saved.
+
+---
+
+## [1.4.1] — 2026-03-26
+
+### Added — #17 Unified source management modal
+
+Replaced the fragmented sidebar source configuration with a single **⚙️ Sources** button above the sources panel. This opens a tabbed modal:
+
+**Microsoft 365 tab:** Azure credentials (Client ID, Tenant ID, Client Secret) moved from the auth screen into the modal — can be updated or cleared post-connect. Per-source toggles (Email, OneDrive, SharePoint, Teams) control which sources appear in the sidebar panel. Disconnect button signs out without leaving the page.
+
+**Google Workspace tab:** Stub with "Coming soon" — placeholder for Gmail and Google Drive when implemented.
+
+**File sources tab:** Full file source management (list, add, delete, scan) moved from the standalone "📁 File sources" sidebar row into this tab. The separate sidebar row is removed.
+
+**Sidebar change:** The "📁 File sources" sidebar section is removed. The sources panel now has a compact **⚙️ Sources** button in its header row. The panel itself respects the per-source visibility toggles set in the modal — if a user disables OneDrive, it disappears from the panel immediately.
+
+**Backward compatibility:** `openFileSourcesModal()` redirects to `openSourcesMgmt('files')` so any existing call sites continue to work.
+
+---
+
+## [1.4.0] — 2026-03-26
+
+### Added — #8 File system scanning (local folders and SMB/CIFS network shares)
+
+**New file: `file_scanner.py`** — unified local + network file iterator.
+
+`FileScanner.iter_files()` yields `(relative_path, bytes, metadata)` regardless
+of whether the source is a local path or a network share. All CPR scanning, card
+streaming, and DB persistence stay in `m365_scanner.py` — `file_scanner.py` only
+handles how files are accessed.
+
+**Local scanning** uses `os.walk()` on any path (workstation, USB drive, or
+already-mounted network share). **SMB/CIFS scanning** uses `smbprotocol` directly
+without requiring a mount — supports SMB2/3 with NTLM or domain credentials.
+`smbprotocol` is optional: if not installed, the scanner falls back to local-only
+mode with a logged warning.
+
+**Credential storage priority (SMB):**
+1. OS keychain via `keyring` (recommended — password never touches the filesystem)
+2. `NAS_PASSWORD` environment variable
+3. `.env` file (chmod 600) via `python-dotenv`
+
+Both optional dependencies (`smbprotocol`, `keyring`, `python-dotenv`) are added
+to `requirements.txt` as opt-in extras.
+
+**Results** write to the same SQLite DB as M365 items with
+`source_type = "local"` or `"smb"`, so the Article 30 report and data subject
+lookup cover all sources in a single view. File/network cards use 📁 and 🌐
+source badges respectively.
+
+**UI — 📁 File sources sidebar section:**
+
+- **Manage button** → opens the File Sources modal
+- **Add source form** — label, path; SMB fields (host, user, password) appear
+  automatically when the path starts with `//` or `\`; host is auto-filled from
+  the path
+- **Per-source ▶ Scan button** — starts a scan immediately; results stream into
+  the main grid via SSE exactly like an M365 scan
+- **Delete** — removes a source definition (does not affect scan results already
+  in the DB)
+- Sources persist in `~/.m365_scanner_file_sources.json`
+
+**New API routes:**
+
+| Route | Method | Description |
+|---|---|---|
+| `/api/file_sources` | GET | List all file source definitions |
+| `/api/file_sources/save` | POST | Add or update a source |
+| `/api/file_sources/delete` | POST | Remove a source by id |
+| `/api/file_sources/store_creds` | POST | Store SMB password in OS keychain |
+| `/api/file_scan/start` | POST | Start a file scan (non-blocking) |
+
+**New CLI flags:**
+
+```bash
+# Scan a local folder
+python m365_scanner.py --scan-path ~/Documents
+
+# Scan an SMB share (password from OS keychain)
+python m365_scanner.py --scan-path //nas.school.dk/shares \
+  --smb-user "DOMAIN\\henrik" --smb-keychain-key gdpr-scanner-nas
+
+# One-time credential storage
+python m365_scanner.py --smb-store-creds --smb-host nas.school.dk \
+  --smb-user "DOMAIN\\henrik"
+
+# With photo scanning and file size limit
+python m365_scanner.py --scan-path //nas/staff --scan-photos --max-file-mb 100
+```
+
+**`build_m365.py`** — `file_scanner.py` added to PyInstaller datas bundle.
+
+---
+
+## [1.3.11] — 2026-03-26
+
+### Fixed — Face detection: excessive false positives on background elements
+
+Haar cascade detection with `minNeighbors=5` and `min_size=40px` was triggering
+on background textures, bottle labels, artwork, and out-of-focus persons,
+reporting up to 16 faces for a photo containing 1–2 actual subjects.
+
+**Changes in `_detect_photo_faces()` (`m365_scanner.py`):**
+
+- `min_size` raised **40 → 80 px** — eliminates detections on small background
+  features; out-of-focus background persons and objects are too small in pixels
+  to exceed this threshold
+- `minNeighbors` raised **5 → 8** — each candidate region must be confirmed by
+  8 overlapping scale-pyramid detections instead of 5; random texture patterns
+  rarely survive this many confirmations
+
+If over-detection persists on a specific image, `minNeighbors=10` and
+`min_size=100` are reasonable next steps before genuine faces are missed.
+
+### Fixed — Result cards: replaced 👤 + separate role-pill with unified role icon
+
+The account-pill (showing the owner's display name) previously prepended a
+static `👤` via CSS `::before` and rendered a separate `role-pill` span
+(🎓/👔) alongside it. Both elements have been merged: the account-pill now
+prefixes the display name directly with the role icon — **🎓 name** for
+students, **👔 name** for staff, **👤 name** for unclassified — removing the
+redundant separate badge and saving horizontal space in both grid and list view.
+
+---
+
+## [1.3.10] — 2026-03-26
+
+### Changed — Role classification: fragment-first, ID-second
+
+**Motivation:** Microsoft has reissued new UUIDs for the same licence multiple
+times over the past 5–6 years (EA → A1/A3/A5 → new commerce/CSP → benefit
+variants). `skuPartNumber` strings like `STANDARDWOFFPACK_FACULTY` have been
+stable across all those generations while UUIDs change with every new issuance.
+
+**New `classify_user_role()` order:**
+
+1. **Fragment match on `skuPartNumber`** (runs first when `sku_map` available) — staff fragments checked before student across all licences, so a `STUDENT_BENEFIT` add-on cannot mask a `FACULTY` licence.
+2. **SKU ID lookup from `m365_skus.json`** — fallback when `sku_map` is empty or when a licence has no recognisable fragment (e.g. Power Automate Free assigned to faculty).
+
+Any future Microsoft SKU re-issuance is classified correctly without updating `m365_skus.json`, as long as the part number still contains `FACULTY` or `STUDENT`.
+
+### Fixed — `m365_skus.json`: added two missing faculty SKUs
+
+- `c2273bd0-dff7-4215-9ef5-2c7bcfb06425` — Microsoft 365 Apps for Faculty (primary licence at Gudenåskolen, absent from all previous versions)
+- `f30db892-07e9-47e9-837c-80727f46fd3d` — relabelled Microsoft Power Automate Free (assigned to faculty)
+
+---
+
+## [1.3.9] — 2026-03-26
+
+### Fixed — `m365_skus.json` not deployed; `build_sku_map_from_users` sampled wrong users
+
+**File missing:** `m365_skus.json` was never copied into `classification/` on disk. `_load_sku_data()` fell back to empty sets (`staff_ids_count: 0`). Students still classified via `STUDENT` fragment; staff always `"other"`. Fix: file now shipped. Place in `GDPRScanner/classification/m365_skus.json`.
+
+**Wrong sample:** `build_sku_map_from_users` took the first 20 alphabetical users — all students at Gudenåskolen — so it never fetched a staff part number. Fixed to sample evenly across the full list and always include the last 5 users.
+
+---
+
+## [1.3.8] — 2026-03-26
+
+### Fixed — `m365_skus.json` not found in PyInstaller bundle; `🔍` SKU debug modal
+
+`_SKU_FILE = Path(__file__).parent / ...` evaluated at class-definition time, before `sys._MEIPASS` is set in a frozen build. Replaced with `_sku_file_path()` classmethod that checks `_MEIPASS` at call time.
+
+Added 🔍 SKU debug button to the accounts panel role-filter row. Opens a modal showing every tenant SKU ID colour-coded as 🎓 student / 👔 staff / ❓ unknown, with selectable text for pasting unknowns into `m365_skus.json`.
+
+`/api/users/license_debug` extended: now returns `student_ids`, `staff_ids`, `runtime` block (set sizes, fragment lists, file path, sku_map entry count), and per-licence `in_staff`/`in_student`/`frag_staff`/`frag_student` trace for every user — sufficient to diagnose any classification failure without reading server logs.
+
+---
+
+## [1.3.7] — 2026-03-26
+
+### Fixed — `license_debug` extended for full runtime diagnostics
+
+`/api/users/license_debug` rewritten to expose all runtime state: `staff_ids_count`, `student_ids_count`, fragment lists, `sku_file_path`, `sku_map_entries`, and a step-by-step per-licence classification trace for every user (`in_staff`, `in_student`, `frag_staff`, `frag_student`, `skuName`).
+
+---
+
+## [1.3.6] — 2026-03-26
+
+### Fixed — Staff misclassified as student: two-pass classify_user_role
+
+**Root cause:** `f30db892-07e9-47e9-837c-80727f46fd3d` is a Microsoft *Student
+Use Benefit* add-on that Microsoft automatically assigns alongside faculty
+licences in Education tenants. Its `skuPartNumber` contains `"STUDENT"`. Because
+the old single-pass loop checked student and staff in per-licence order, the
+fragment match on this add-on fired before the authoritative faculty ID
+(`94763226`) was ever reached, returning `"student"` instead of `"staff"`.
+
+**Fix — `classify_user_role()` now uses a strict two-pass approach:**
+
+**Pass 1 — authoritative ID match (m365_skus.json), staff before student:**
+All licences are scanned for staff IDs first, then student IDs. A single faculty
+SKU ID anywhere in the licence list wins regardless of what other add-on licences
+appear before it.
+
+**Pass 2 — skuPartNumber fragment match, staff before student:**
+Only reached if no ID match was found. Staff fragments are checked across every
+licence before student fragments — preventing a `STUDENT_BENEFIT` add-on from
+masking a `FACULTY` licence later in the list.
+
+**Result:** A staff member holding `[STUDENT_BENEFIT_ADDON, FACULTY_A1, STUDENT_DEVICE]`
+is now correctly classified as `"staff"` in all cases, whether `sku_map` is
+populated or not.
+
+---
+
+## [1.3.5] — 2026-03-26
+
+### Fixed — Staff not recognised: always merge per-user SKU map
+
+**Root cause:** `build_sku_map_from_users()` (which calls `/users/{id}/licenseDetails`
+for up to 20 sampled users) was only called when `sku_map` was completely empty.
+In practice `get_subscribed_skus()` tier 2 (`/me/licenseDetails`) always succeeds
+in delegated mode, returning the signed-in admin's own license — making `sku_map`
+non-empty and silently skipping the per-user sampling.
+
+If the admin's license happened to be a faculty A1 and other staff held A3 or an
+unlisted variant, those A3 users were never added to `sku_map` and fragment
+matching could not fire for them, leaving them as `"other"`.
+
+**Fix:** `build_sku_map_from_users()` is now **always called** and its results
+**merged** into `sku_map`, regardless of whether `get_subscribed_skus()` already
+returned entries. This guarantees that every distinct SKU ID actually in use by
+any of the first 20 users gets a `skuPartNumber` entry, enabling fragment matching
+for all staff variants — including those not yet listed in `m365_skus.json`.
+
+Same merge applied in `license_debug` so the 🔍 modal also sees complete data.
+
+---
+
+## [1.3.4] — 2026-03-26
+
+### Fixed — Role classification: three-tier SKU map fallback
+
+**Root cause:** `get_subscribed_skus()` requires `Directory.Read.All` or
+`Organization.Read.All`. If the Azure app registration does not have that
+permission (typical delegated/device-code setups), it silently returned `{}`
+and the fragment fallback never ran, leaving every user as `"other"`.
+
+**Fix — `get_subscribed_skus()` now tries three endpoints in order:**
+
+| Tier | Endpoint | Permission needed |
+|---|---|---|
+| 1 | `/subscribedSkus` | Directory.Read.All (admin) |
+| 2 | `/me/licenseDetails` | User.Read only |
+| 3 | `build_sku_map_from_users()` via `/users/{id}/licenseDetails` (up to 20 users) | User.Read.All |
+
+Each tier logs how many SKU entries it found. Tier 2 always works in delegated
+mode and covers the signed-in user's licenses. Tier 3 covers all distinct SKUs
+used in the tenant by sampling up to 20 users. If any tier returns results, the
+others are skipped.
+
+**UI warning banner** — when every fetched user resolves to `"other"`, a red
+banner appears above the accounts list: *"No users classified — click 🔍 to
+diagnose."*  It disappears automatically once classification succeeds.
+
+---
+
+## [1.3.3] — 2026-03-26
+
+### Fixed — Role classification: SKU debug modal + path resolution
+
+**Problem:** Even with `classification/m365_skus.json` loading correctly, users showed as
+unclassified because the tenant's actual SKU IDs were not in the file. There was
+no easy way to discover which IDs to add.
+
+**Changes:**
+
+- **🔍 SKU debug button** — a small magnifying-glass button added to the role
+  filter row (next to 🎓 Elev). Clicking it opens a modal that calls
+  `GET /api/users/license_debug` and lists every unique SKU ID in the tenant,
+  colour-coded: `🎓 student` / `👔 staff` / `❓ unknown`. Unknown IDs can be
+  selected and copied directly into `classification/m365_skus.json`.
+
+- **`/api/users/license_debug`** extended — now also returns `student_ids` and
+  `staff_ids` arrays from the loaded SKU file so the frontend can mark each
+  tenant SKU as known or unknown without a second round-trip.
+
+- **`_sku_file_path()` classmethod** — replaced the static `_SKU_FILE` class
+  attribute with a method that checks `sys._MEIPASS` first (PyInstaller bundle)
+  then falls back to `Path(__file__).parent / "skus" / "m365_skus.json"`.
+  The static attribute evaluated at class-definition time before `_MEIPASS` was
+  set, causing the frozen app to look in the wrong directory.
+
+- **Server-side warning** — `GET /api/users` now logs a `WARNING` to stdout
+  when 0 out of N users are classified, including a sample of the unrecognised
+  SKU IDs seen in the first 20 users.
+
+- **Translated** — EN / DA / DE (3 new keys)
+
+---
+
+## [1.3.2] — 2026-03-26
+
+### Fixed — Student/Staff misclassification: incomplete SKU lists + no override (#1.3.2)
+
+**Root cause:** The hardcoded SKU lists introduced in v1.0.0 covered only ~8 student
+and 6 staff SKUs. Microsoft publishes 100+ Education SKU IDs; any tenant using a SKU
+not in those lists silently fell through to `"other"`, leaving users unclassified
+or relying solely on the `skuPartNumber` fragment fallback — which itself was too
+specific (`STANDARDWOFFPACK_STUDENT` instead of just `STUDENT`).
+
+#### `m365_connector.py` — Expanded SKU lists and broader fragment matching
+
+**Student set** expanded from 8 → 12 SKUs:
+- Added `46c119d4` (M365 A1 for Students — student use benefit)
+- Added `8fc2205d` (O365 A5 for Students)
+- Added `160d616a` (O365 A3 for Students device)
+- Added `a4e376bd` (M365 A1 for Students new commerce)
+
+**Staff set** expanded from 6 → 9 SKUs:
+- Added `2d61d025` (M365 A1 for Faculty — faculty use benefit)
+- Added `15b1d32e` (O365 A3 for Faculty device)
+- Added `ba04c29e` (M365 A1 for Faculty new commerce)
+
+**Fragment patterns** broadened — `"STUDENT"` and `"FACULTY"` now catch all
+part-number variants (`_STUDENT`, `STUDENT_`, `STUDENT_BENEFIT`, `_FAC`, etc.)
+without needing to enumerate every Microsoft naming permutation.
+
+#### `m365_scanner.py` — Manual role overrides
+
+Because no SKU list can ever be complete, admins can now correct individual users
+directly from the accounts panel:
+
+- **🎓/👔/❓ role badge** on every user row — click to cycle:
+  `auto → student → staff → other → (clear, back to auto)`
+- Overridden rows show the badge in accent colour with a **✎** indicator
+- Overrides persisted to `~/.m365_scanner_role_overrides.json` — survive
+  restarts and re-authentication
+- Applied at both display time (`/api/users`) and scan time (`_user_role_map`)
+  so card badges, filter buttons, Excel Role column, and Article 30 inventory
+  split all reflect the corrected role
+- `GET /api/users/role_override` — returns all current overrides
+- `POST /api/users/role_override` — sets or clears one override
+- Override file added to `--purge` file list
+- Translated — EN / DA / DE (3 new keys)
+
+---
+
+## [1.3.1] — 2026-03-26
+
+### Fixed — Student/Staff role misclassification (`m365_connector.py`)
+
+Two SKU ID collisions in `_STUDENT_SKU_IDS` / `_STAFF_SKU_IDS` caused Faculty
+users to be shown as Students (and vice versa) for any tenant using A5 or A3
+Education licenses:
+
+| SKU ID | Correct role | Bug |
+|---|---|---|
+| `e578b273-6db4-4691-bba0-8d691f4da603` | Staff (M365 Education A5 for Faculty) | Was also in `_STUDENT_SKU_IDS` as "O365 A5 for Students" — Faculty A5 users always showed as 🎓 Student |
+| `78e66a63-337a-4a9a-8959-41c6654dfb56` | Student (Office 365 A3 for Students) | Was also in `_STAFF_SKU_IDS` as "M365 A1 for Faculty (device)" — this had no effect because student is checked first, but the comment was wrong and the duplicate entry was confusing |
+
+`classify_user_role()` checks student first, so any overlap resolves to student,
+silently misclassifying all affected Faculty accounts.
+
+**Fix:** removed `e578b273` from `_STUDENT_SKU_IDS` and `78e66a63` from
+`_STAFF_SKU_IDS`. Also removed a stale duplicate of `e578b273` that appeared
+twice in `_STAFF_SKU_IDS`. Added a `RuntimeWarning` guard inside
+`classify_user_role()` that logs any future collision between the two sets.
+
+**Impact:** Article 30 staff/student inventory split, role filter buttons (👔 / 🎓),
+role badges on cards, and Excel Role column are all now correct for A5 and A3
+Education tenants.
+
+**Workaround until update:** use `GET /api/users/license_debug` to see the raw
+SKU IDs and current classification for each user.
+
+---
+
+## [1.3.0] — 2026-03-26
+
+### Added — Biometric photo scanning (#9)
+
+**GDPR reference:** Article 9 (special categories — biometric data), Article 5(1)(b)(e), Recital 38, Databeskyttelsesloven §6
+
+- **`PHOTO_EXTS`** — new constant covering `.jpg .jpeg .png .bmp .tiff .tif .webp .heic .heif`
+- **`_detect_photo_faces(content, filename)`** — calls `ds._get_cv2()` + `ds.detect_faces_cv2()` (already in `document_scanner.py`); PIL fallback for HEIC/HEIF; `minNeighbors=5` for conservative detection; returns face count or 0 on any failure; entirely safe — exceptions swallowed silently
+- **`scan_photos` option** — new boolean scan option (default `False` — opt-in); extracted from `scan_opts` alongside `delta` and `email_body`
+- **`🖼 Scan photos for faces` toggle** in the Options panel, with hint: "Slower — opt in"
+- **Photo items flagged even without CPRs** — a file is added to results if `face_count > 0`, even if no CPR number is found; photographs of identifiable people are Art. 9 data regardless of CPR content
+- **`"biometric"` auto-injected** into `special_category` when faces are detected and `"biometric"` is not already present
+- **`face_count`** field added to card payload, DB, Excel, and Article 30 report
+
+**DB (migration #4):**
+- `face_count INTEGER NOT NULL DEFAULT 0` added to `flagged_items` via auto-migration
+- `save_item()` updated to persist `face_count`
+
+**UI:**
+- **`📷 N faces` badge** — teal `photo-face-badge` pill shown on cards in both grid and list view when `face_count > 0`
+- **`📷 Photos / biometric` filter** added to the Special dropdown in the filter bar; `applyFilters()` handles `specialVal === 'photo'`
+- `buildScanPayload()` includes `scan_photos`; `_applyProfile()` restores it when loading a profile
+
+**Excel export:**
+- `Face count` column added as column 3 (between CPR Hits and Special category); URL column index updated from 10 → 11 for hyperlink styling
+
+**Article 30 report:**
+- Summary section: `Photos with detected faces (Art. 9 biometric)` row with item + face count; explanatory note on legal basis and parental consent (Databeskyttelsesloven §6)
+- New dedicated section: *Photographs and Biometric Data (Article 9)* — intro paragraph, 4-bullet retention guidance (purpose limitation, pupil consent, website removal, archiving), item table (name, account, source, faces, modified date), capped at 50 rows
+- Methodology section: bullet added describing OpenCV Haar cascade detection
+
+**Translated** — EN / DA / DE (16 new keys per language)
+
+---
+
+## [1.2.3] — 2026-03-26
+
+### Added — Profile management modal (#15d)
+
+- **⚙ Profiles button** in the sidebar Database row opens a modal listing all saved profiles
+- **Each profile row** shows name (with ● active indicator), sources summary, description, and last run timestamp
+- **Use** — loads the profile into the sidebar and updates the topbar dropdown; closes the modal
+- **Edit** — expands an inline edit form directly in the row; saves name and description via `POST /api/profiles/save`
+- **Duplicate** — creates a copy with a unique `(copy)` / `(copy 2)` suffix; reloads the list
+- **Delete** — confirms, removes via `POST /api/profiles/delete`, clears `_activeProfileId` if the deleted profile was active
+- Empty state shown when no profiles have been saved yet
+- Translated — EN / DA / DE (14 new keys per language)
+
+### Added — Database export/import UI (#11)
+
+- **🗄 Database** sidebar section with **Export** and **Import** buttons (always visible; sits between Email report and User info)
+- **Export button** — calls `GET /api/db/export`; triggers a browser download of a timestamped ZIP (`gdpr_export_YYYYMMDD_HHmmss.zip`) containing 8 JSON files; CPR hashes only, thumbnails stripped
+- **Import modal** — file picker (`.zip` only), mode selector (Merge / Replace), replace warning panel, status line, and Import button; calls `POST /api/db/import` with multipart form data
+- **`GET /api/db/export`** Flask route — generates ZIP in a temp file, streams bytes as `application/zip` attachment
+- **`POST /api/db/import`** Flask route — accepts multipart `file`, `mode`, `confirm`; validates replace confirmation server-side; returns `{ok, mode, imported: {table: count}}`
+- Translated — EN / DA / DE (17 new keys per language)
+
+### Changed — Article 9 keyword matching compiled to regex (#13)
+
+- `_load_keywords()` now compiles one `re.Pattern` per Article 9 category at startup using a longest-first alternation: `(?:keyword_a|keyword_b|…)` with `re.IGNORECASE`
+- Short keywords (≤ 4 chars) retain `(?<!\w)…(?!\w)` word-boundary anchors to prevent substring false positives
+- `_check_special_category()` uses the compiled patterns via `pattern.finditer()` instead of a sequential `str.find()` loop over up to 459 entries
+- Startup log now reports compiled category count: `Loaded 459 keywords (9 categories compiled)`
+- **Performance:** ~10–50× faster for large tenants; negligible difference for typical school tenants (~100 flagged items); meaningful saving at 1 000+ items
+
+---
+
+## [1.2.2] — 2026-03-21
+
+### Added — Profile selector in topbar (15c)
+
+- **Profile dropdown** in the topbar, between the Scan button and the spacer — shows "Default (sidebar)" plus all saved profiles with their last run date
+- **💾 Save button** next to the dropdown — prompts for a name and saves the current sidebar state (sources, options, user selection, retention settings) as a named profile via `POST /api/profiles/save`
+- **`onProfileChange()`** — fires when the dropdown changes; calls `_applyProfile()` to populate the sidebar controls from the selected profile
+- **`_applyProfile(profile)`** — sets all source checkboxes, scan options, retention fields, and queues user selection for when the accounts list is loaded
+- **`_applyPendingProfileUsers()`** — applies a profile's `user_ids` to the accounts list after `loadUsers()` completes; safe to call multiple times
+- **`loadProfiles()`** — fetches `/api/profiles` and populates the dropdown; called on `onAuthenticated()`
+- **`saveCurrentAsProfile()`** — collects the full `buildScanPayload()` state and posts it as a new or updated profile
+- Profiles with a description show it as a tooltip on the dropdown option
+- Selecting "Default (sidebar)" clears `_activeProfileId` so the sidebar is used directly with no profile applied
+- **Translated** — EN / DA / DE (6 new keys)
+
+---
+
+## [1.2.1] — 2026-03-21
+
+### Added — Scan profiles 15a + 15b
+
+**15a — Backend profile storage**
+
+- `_profiles_load()` — reads all profiles from `~/.m365_scanner_settings.json`
+- `_profiles_write()` — atomic write of the full settings dict
+- `_profile_from_settings()` — wraps a flat settings dict as a profile object
+- `_profile_get(name_or_id)` — case-insensitive lookup by name or UUID
+- `_profile_save(profile)` — insert or update a profile
+- `_profile_delete(name_or_id)` — delete by name or UUID
+- `_profile_touch(id, scan_id)` — updates `last_run` and `last_scan_id` after a successful scan
+- **Automatic migration** — on first run, existing flat `~/.m365_scanner_settings.json` is silently wrapped into a profile named "Default"; no user action required
+- **Legacy shim** — `_save_settings()` and `_load_settings()` continue to work unchanged; all existing headless setups are unaffected
+- **Profile API routes** — `GET /api/profiles`, `POST /api/profiles/save`, `POST /api/profiles/delete`, `GET /api/profiles/get` for future UI use (15c/15d)
+
+**15b — CLI profile support**
+
+- `--list-profiles` — tabular listing of all profiles with name, sources, last run, and scan ID
+- `--save-profile NAME` — saves current CLI options as a named profile; updates existing if name matches
+- `--delete-profile NAME` — removes a profile by name
+- `--profile NAME` — loads a named profile for `--headless` runs; populates sources, retention, fiscal year end, and email recipients from the profile; prints profile name, description, and last run before scanning
+- After a successful headless scan, the active profile's `last_run` and `last_scan_id` are updated automatically
+
+---
+
+## [1.2.0] — 2026-03-20
+
+### Added — Article 9 sensitive category detection (#3)
+
+- **`keywords/da.json`** — 459 Danish keywords across 9 Article 9 categories: health, mental health, criminal (Art. 10), trade union, religion, ethnicity, political, biometric, and sexual orientation. Includes `_false_positive_guidance` for ambiguous terms and `_proximity_note` explaining the matching strategy
+- **`keywords/` subfolder** — mirrors the `lang/` pattern; `keywords/en.json` and `keywords/de.json` can be added without code changes
+- **`_load_keywords()`** — loads the keyword file at startup matching the active UI language; falls back to `da.json`
+- **`_check_special_category(text, cprs)`** — returns a sorted list of matched Article 9 category keys; a keyword only triggers when within 150 characters of a CPR number (proximity filter); if no CPRs are present in the text, any keyword occurrence triggers
+- **Card badge** — purple `⚠ Art.9 — health, criminal` pill on flagged cards showing all detected categories
+- **Filter bar dropdown** — "All risk levels / Art. 9 special category" quick filter in the results grid
+- **DB migration #3** — `special_category TEXT NOT NULL DEFAULT '[]'` added to `flagged_items` via auto-migration; stored as JSON array
+- **`finish_scan()`** — counts special category items per scan and writes to `scan_history.special_category` for trend tracking
+- **Excel export** — "Special category" column added as column 3 on all per-source sheets
+- **Article 30 report** — special category item count and DPIA warning added to the summary section; "Art. 9" column added to the per-source breakdown table with purple highlighting on non-zero values
+- **Translated** — EN / DA / DE (6 new keys per language)
+- **Build scripts** — `keywords/` folder bundled into PyInstaller app alongside `lang/`
+- **`.gitignore`** — `!keywords/*.json` added to prevent keyword files being excluded by the `*.json` catch-all
+
+---
+
+## [1.1.3] — 2026-03-20
+
+### Fixed
+
+- **Stray duplicate `_get_bytes` body** — dead code block left after `delete_drive_item_for_user` from a previous edit has been removed
+
+### Changed — `m365_connector.py`
+
+- **Split timeouts** — replaced all hardcoded `timeout=30` / `timeout=60` with two tuned constants:
+  - `_TIMEOUT_API = (10, 45)` — 10s connect, 45s read for JSON API calls
+  - `_TIMEOUT_BYTES = (10, 120)` — 10s connect, 120s read for file/attachment downloads
+  - The 10s connect timeout makes hung connections fail fast; the read timeout allows slow wireless links to complete a transfer without aborting
+
+- **Exponential backoff with retry** — all four core request methods (`_get`, `_post`, `_get_bytes`, `_delete`) now retry up to 4 times on transient network errors:
+  - Retried: `ConnectionError`, `Timeout`, `ChunkedEncodingError`, `ReadTimeout`, HTTP 429, HTTP 503, HTTP 504
+  - Not retried: HTTP 403 (permission), HTTP 410 (delta token expired) — raised immediately
+  - Backoff: 2s → 4s → 8s between attempts (capped at 30s); 429 responses use the `Retry-After` header value
+  - Intermittent wireless dropouts and brief gateway errors are now absorbed transparently without interrupting a scan
+
+- **Streaming file downloads** — `_get_bytes` now uses `stream=True` and `iter_content(65536)` so large attachments are received in 64 KB chunks rather than one blocking read; prevents read timeouts on slow connections for large files
+
+- **`list_users` inline timeout** — the `_fetch` helper inside `list_users` was using its own hardcoded `timeout=30`; updated to use `_TIMEOUT_API`
+
+---
+
+## [1.1.2] — 2026-03-20
+
+### Fixed
+
+- **App does not start after build** — `m365_db.py`, `scanner_worker.py`, and `VERSION` were missing from PyInstaller `datas` in `build_m365.py`; the app crashed immediately on launch because these files could not be found inside the bundle
+- **`_read_app_version()` broken in both build scripts** — still searched for `APP_VERSION = "..."` as a string literal in the scanner source, but both scanners now read from the `VERSION` file; build scripts updated to read `VERSION` directly
+- **`VERSION` not bundled** — `build.py` (Document Scanner) also missing the `VERSION` file in `datas`
+
+### Added
+
+- **`--purge` CLI flag** — permanently deletes all data files created by the scanner (SQLite database, Azure credentials, SMTP credentials, settings, checkpoint, delta tokens, language preference, OCR cache, MSAL token cache); prompts for `yes` confirmation; `--yes` skips prompt for scripted use
+- **`--export-db FILE`** — exports the database to a structured ZIP archive containing 8 JSON files; thumbnails excluded; CPR stored as hashes only
+- **`--import-db FILE`** — imports a previously exported ZIP; `--import-mode merge` (default) adds dispositions and deletion log only; `--import-mode replace` wipes and restores all tables; `--yes` skips confirmation on replace
+
+---
+
+## [1.1.1] — 2026-03-19
+
+### Fixed
+
+- **Layout collapse in light mode** — `.topbar` CSS rule was broken by an earlier edit; `border-bottom` and `background` properties were orphaned onto a dangling line, causing the topbar to render with no background and the Scan button to be nearly invisible
+- **Sidebar missing** — `.layout` used `height: 100vh` which ignored `body` padding, causing the flex layout to overflow and the sidebar to disappear
+- **macOS pywebview titlebar overlap** — content rendered behind the traffic-light buttons; fixed with `padding-top: 30px` on `body` when running inside pywebview on macOS, combined with `box-sizing: border-box` and `height: 100%` on `.layout`
+- **`<option>` elements not translated** — `applyI18n()` used `el.innerHTML` on `<option>` elements; some browsers do not re-render the select's visible text when `innerHTML` is set on an already-mounted option; switched to `el.textContent` for option elements
+- **Disposition filter dropdown not translated on load** — filter bar is hidden until first scan result arrives so `applyI18n()` on `DOMContentLoaded` missed it; `applyI18n()` is now called when the filter bar is first shown
+- **Card delete button z-index** — added `z-index: 1` to `.card-delete-btn` so it stacks correctly within its card context
+
+### Added
+
+- **`--reset-db` CLI flag** — permanently drops and recreates all database tables; shows a summary of what will be deleted and requires typing `yes` to confirm
+- **`--yes` flag** — skips confirmation prompts; use with `--reset-db` for scripted/automated resets
+- **`ScanDB.reset()`** — new method in `m365_db.py` that drops all tables in correct foreign-key order, resets `user_version` to 0, and reopens the connection with a fresh schema
+
+---
+
+## [1.1.0] — 2026-03-19
+
+### Added — M365 Scanner
+
+- **Student / staff role classification** — O365 license SKU IDs used to classify users as 🎓 Student or 👔 Staff with no extra Azure permissions required. Hardcoded known Microsoft Education SKU IDs cover M365/Office 365 A1/A3/A5 for Students and Faculty. Fragment fallback for future SKUs.
+- **Role filter in accounts panel** — All / 👔 Ansat / 🎓 Elev buttons filter the user list before selecting accounts to scan
+- **Role badge on result cards** — 🎓/👔 pill shown on every card in grid and list view
+- **`user_role` in SQLite DB** — stored in `flagged_items` table; DB migration applied automatically on first run
+- **Licensed users only** — accounts without an assigned O365 license are excluded from the user list
+- **Disposition filter in filter bar** — filter results grid by compliance disposition status
+- **Headless auto-delete of `delete-scheduled` items** — items tagged for deletion are removed automatically after each headless scan
+- **Deletion audit log** — every deletion logged to `deletion_log` table with timestamp, actor, reason, and legal basis
+- **`GET /api/db/deletion_log`** — API endpoint for the deletion log
+- **Deletion log in Article 30 report** — dedicated section with summary-by-reason table and full 7-column log
+- **Article 30 — student/staff split** — Section 3 (Data Inventory) now shows Staff and Student tables separately; parental consent note added for student items (Databeskyttelsesloven §6)
+- **`GET /api/users/license_debug`** — diagnostic endpoint showing raw SKU IDs and classified roles for each user
+- **`_resolve_display_name()`** — resolves GUIDs and "Microsoft Konto" guest account placeholders to email address throughout UI and Article 30 report
+- **Account name in Article 30** — resolved via `user_ids` stored in scan options; GUID no longer shown in any column
+- **All Article 30 strings translated** — deletion log section now uses `L()` throughout; 19 new keys in EN/DA/DE
+- **`VERSION` file** — single source of truth; both scanners read version at startup via `Path(__file__).parent / "VERSION"`
+- **`CHANGELOG.md`** — release history and versioning policy
+- **`SECURITY.md`** — responsible disclosure process
+- **`CONTRIBUTING.md`** — development setup, code style, PR process
+- **`LICENSE`** — AGPL-3.0 with commercial licensing note and GDPR disclaimer
+- **`.gitignore`** — covers credentials, databases, audit logs, venv, build artefacts
+
+### Fixed — M365 Scanner
+
+- Language switching no longer reloads the page — translations applied in-place, scan results preserved
+- Connect screen freeze — duplicate `renderAccountList` function definition caused a JavaScript syntax error that prevented `onAuthenticated()` from firing
+- Account column in Article 30 report showing GUIDs — resolved via `_acct_map` built from stored `user_ids`
+- "Microsoft Konto" / GUID display names on cards and in reports — resolved to email address
+
+### Changed — M365 Scanner
+
+- **Excel export** — 9 columns (was 7): added Account (display name), Role, and Disposition; URL hyperlink column index updated accordingly
+- **Accounts list** — licensed users only; `assignedLicenses` post-filter applied
+
+---
+
+## [1.0.0] — 2026-03-19 — Initial public release
+
+### Document Scanner (`server.py`)
+
+- Scan PDFs, Word, Excel, CSV, and image files for Danish CPR numbers
+- OCR support via Tesseract for scanned/image-based PDFs
+- NER-based detection of names, addresses, phone numbers, emails, IBANs, and bank accounts via spaCy
+- CPR validation: strict Modulus 11 check + century-digit verification
+- Redaction modes: mask CPR only, or full anonymisation of all personal data
+- Face detection and blurring in image files via OpenCV
+- Risk scoring per file based on CPR count, age, and PII density
+- Dry-run mode — scan without writing any output files
+- JSON audit log (`scanner_audit.jsonl`) — append-only, records every action
+- SQLite OCR cache (`~/.document_scanner_ocr_cache.db`) — avoids re-OCR of unchanged pages
+- Web UI on port 5000 with grid and list view, live progress, drag-and-drop upload
+- Standalone macOS `.app` and Windows `.exe` via PyInstaller + pywebview
+
+### M365 Scanner (`m365_scanner.py`)
+
+#### Scanning
+- Exchange mailboxes: all folders and subfolders, recursive, language-independent using `wellKnownName` identifiers
+- OneDrive, SharePoint, Teams file scanning via Microsoft Graph API
+- Attachment scanning: PDF, Word, Excel inside emails
+- CPR detection with the same strict validator as the Document Scanner
+- NER-based PII detection (phone, IBAN, bank account, name, address, org)
+- Progressive streaming — results appear card-by-card via Server-Sent Events
+- Incremental / resumable scans — checkpoint saved on interruption, resume on next run
+- Delta scan — Graph `/delta` endpoints fetch only changed items since last scan
+- Per-item thumbnail generation — image previews and placeholder SVGs
+
+#### Results
+- Results grid with grid and list view, search, source filter, and disposition filter
+- Account name and role (🎓 Student / 👔 Staff) badge on every card
+- 🗓 Overdue badge on items exceeding the retention cutoff
+- Preview panel with iframe preview, metadata strip, and disposition dropdown
+
+#### Compliance features
+- **Retention policy enforcement** (GDPR Art. 5(1)(e)): rolling or fiscal-year cutoff (e.g. Bogføringsloven Dec 31), 🗓 Overdue badge, bulk-delete quick filter, headless auto-delete via `--retention-years` and `--fiscal-year-end`
+- **Data subject lookup** (Art. 15/17): modal, CPR hashed before query, bulk delete with audit logging
+- **Disposition tagging** (Art. 5(1)(a)): Unreviewed / Retain (legal/legitimate/contract) / Delete-scheduled / Deleted — filter bar, preview panel, Excel export, headless auto-delete of scheduled items
+- **Deletion audit log** (Art. 5(2)): every deletion logged with timestamp, actor, reason, legal basis
+- **Article 30 report** (Art. 30): structured `.docx` export — summary, data categories, data inventory (staff and student sections), retention analysis, compliance trend, deletion audit log, methodology
+
+#### User management
+- Application mode (service account) and Delegated mode (device code flow)
+- License-based role classification: 🎓 Student / 👔 Staff detected from O365 SKU IDs — no extra permissions needed
+- Role filter buttons in accounts panel (All / 👔 Ansat / 🎓 Elev)
+- Licensed users only — accounts without an assigned license are excluded
+- Display name resolution: GUIDs and "Microsoft Konto" guest placeholders resolved to email address
+
+#### Database (`m365_db.py`)
+- SQLite persistence layer alongside JSON session cache
+- Tables: `scans`, `flagged_items`, `cpr_index`, `pii_hits`, `dispositions`, `scan_history`, `deletion_log`
+- CPR numbers stored as SHA-256 hashes only — never in plaintext
+- Schema migration support via `_MIGRATIONS` + `user_version` pragma
+
+#### Exports
+- Excel export: 9 columns including Account, Role, Disposition; per-source sheets with auto-filter
+- Article 30 Word document export
+- Email report via SMTP (STARTTLS / SMTPS / plain); headless `--email-to` flag
+
+#### Headless / scheduled mode
+- `--headless --output DIR --settings FILE` for cron / Task Scheduler
+- `--retention-years N --fiscal-year-end MM-DD` for automated retention enforcement
+- `--email-to` for automated report delivery
+- Non-interactive: deletes automatically; interactive (TTY): prompts for confirmation
+
+#### Internationalisation
+- Language files: English (`en`), Danish (`da`), German (`de`)
+- Language switching applies in-place — no page reload, scan results preserved
+
+#### Installation
+- `install_windows.ps1`: Python, Tesseract, Poppler, venv — all local to project folder, no system PATH changes; all downloads via `curl.exe`
+- `install_macos.sh`: Homebrew, Python 3.12, Tesseract, Poppler, spaCy model
+- `Dockerfile` + `docker-compose.yml` for containerised deployment
+- GitHub Actions: 4 parallel build jobs (Document Scanner + M365 × Windows + Linux), auto-release on `v*` tags
+
+---
+
+## Versioning policy
+
+- **PATCH** (`1.0.x`) — bug fixes, translation updates, minor UI tweaks
+- **MINOR** (`1.x.0`) — new feature, new suggestion from SUGGESTIONS.md implemented
+- **MAJOR** (`x.0.0`) — breaking change: DB migration required, config format change, or Azure permission requirement change
+
+To release a new version:
+
+```bash
+# 1. Update VERSION
+echo "1.1.0" > VERSION
+
+# 2. Update CHANGELOG (add new section above [1.0.0])
+
+# 3. Commit and tag
+git commit -am "Release 1.1.0"
+git tag v1.1.0
+git push && git push --tags
+# GitHub Actions builds and publishes automatically
+```
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..971ca70
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,84 @@
+# GDPRScanner — Claude Code Context
+
+A GDPR compliance scanner for Danish educational and municipal organisations. Scans Microsoft 365 (Exchange, OneDrive, SharePoint, Teams), Google Workspace (Gmail, Google Drive), and local/SMB file systems for CPR numbers and PII. Produces Excel reports, GDPR Article 30 Word documents, and supports disposition tagging, bulk deletion, scheduled scans, and multi-language UI.
+
+## How to run
+
+```bash
+source venv/bin/activate
+python gdpr_scanner.py          # http://localhost:5100
+python -m pytest tests/ -q
+```
+
+## Architecture
+
+**Entry point:** `gdpr_scanner.py` — Flask app, scan orchestration globals. SSE route must stay here — blueprints can't stream.
+
+**Split modules:** `scan_engine.py` (M365 + file scan), `sse.py` (SSE broadcast), `checkpoint.py`, `app_config.py` (all persistence), `cpr_detector.py`
+
+**Blueprints** in `routes/` — see `routes/CLAUDE.md` for state/SSE rules.
+
+**Frontend:** `templates/index.html` (SPA), `static/style.css` (all styles), `static/js/*.js` (11 ES modules + `state.js`). `static/app.js` is an archived monolith — no longer loaded.
+
+**Data dir** `~/.gdprscanner/`: `scanner.db`, `config.json`, `settings.json`, `schedule.json`, `token.json`, `delta.json`, `checkpoint.json`, `smtp.json`, `machine_id` (**never delete** — Fernet key), `role_overrides.json`, `google_sa.json`, `google.json`, `src_toggles.json`, `app.lock`, `viewer_tokens.json`
+
+## Non-obvious files
+
+| File | Why it's not obvious |
+|---|---|
+| `app_config.py` | All persistence — profiles, settings, SMTP, lang loading, viewer tokens + PIN |
+| `routes/state.py` | Shared mutable state + scan locks (not a typical Flask state file) |
+| `routes/google_scan.py` | Google scan execution lives here, not in `google_connector.py` |
+| `routes/viewer.py` | Viewer token + PIN API; also owns brute-force rate-limit state |
+| `static/js/viewer.js` | Share modal, token CRUD, viewer PIN settings UI |
+| `lang/da.json` | Primary language — source of truth is `en.json` |
+| `build_gdpr.py` | Desktop app builder; contains embedded `LAUNCHER_CODE` for PyInstaller |
+
+## Tests
+
+128 tests in `tests/`. No integration tests for Flask routes or live M365/Google connections.
+
+## Viewer mode (#33) — routes/viewer.py + static/js/viewer.js
+
+Read-only access for DPOs and reviewers. Key invariants:
+
+- **`/view` auth chain** — token (`?token=`) → session cookie (`session["viewer_ok"]`) → PIN form (if PIN configured) → 403. Never skip this order.
+- **`window.VIEWER_MODE`** — injected by Jinja2 in `index.html`. `auth.js` reads it at startup; adds `viewer-mode` class to `<body>`. All hide rules are CSS (`body.viewer-mode …`), not scattered JS checks — except `delBtn` in the card builder which is also guarded in JS. Hidden in viewer mode: `.sidebar` (entire left panel), `#logWrap`, `#progressBar`, scan/stop/profile/bulk-delete buttons, share button.
+- **`viewer_tokens.json` format** — stored as `{"tokens": [...], "__pin__": {"hash": "…", "salt": "…"}}`. The old bare-list format is migrated transparently on first write. Do not write the file as a bare list.
+- **`app.secret_key`** — derived from `machine_id` bytes so Flask sessions survive restarts. Set once at startup in `gdpr_scanner.py`; do not override it.
+- **`GET /api/db/flagged`** — returns `get_session_items()` (last completed scan session, joined with dispositions). Used exclusively by `_loadViewerResults()` in `results.js`. Do not confuse with `get_flagged_items()` (single scan_id, no disposition join).
+- **Rate-limit state** (`_pin_attempts` dict in `routes/viewer.py`) — in-memory only, resets on server restart. Intentional — a restart clears lockouts without a persistent store.
+- **Token onclick attributes** — Copy/Revoke buttons in `_renderTokenList()` pass the token as a single-quoted JS string literal (`'\'' + tok.token + '\''`), never via `JSON.stringify`. `JSON.stringify` produces double-quoted strings that break the surrounding `onclick="…"` HTML attribute.
+- **Settings Security pane** — Admin PIN and Viewer PIN groups live in `stPaneSecurity`, not `stPaneGeneral`. `switchSettingsTab('security')` in `sources.js` triggers both `stLoadPinStatus()` and `stLoadViewerPinStatus()`. The Share modal Configure button opens `openSettings('security')`.
+- **`stClearViewerPin` guard** — validates that the current-PIN field is non-empty client-side before sending the DELETE request; shows an inline error and focuses the field if empty.
+
+## Sources panel resize — static/js/log.js + sources.js
+
+- **`_fitSourcesPanel()`** — called at the end of every `renderSourcesPanel()` call. Clears the panel's inline height, reads `scrollHeight` (natural content height), then either restores a saved smaller preference from `localStorage` (`gdpr_sources_h`) or pins the height to `scrollHeight`. This keeps the panel exactly as tall as needed to show all sources.
+- **`_initSourcesResize()`** — attaches pointer-drag to `#sourcesResizeHandle`. On `pointerdown` it captures `scrollHeight` as the hard max; drag up shrinks, drag down is capped at that max. Saves to `localStorage` on release; clears the key if the user drags back to full height.
+- **Do not add a fixed `max-height` or `height` to `#sourcesPanel` in HTML** — height is controlled entirely by `_fitSourcesPanel()` at runtime.
+- **Do not call `_fitSourcesPanel()` before the panel has rendered** — `scrollHeight` will be 0. The call in `renderSourcesPanel()` is the correct hook; `_initSourcesResize()` only sets up the drag handler.
+
+## Memory management — scan_engine.py
+
+Large M365 tenants can generate enormous memory pressure. Key rules to preserve:
+
+- **Email body stripped at collection time** — `_scan_user_email` calls `conn.get_message_body_text(msg)`, stores the result as `msg["_precomputed_body"]`, then deletes `msg["body"]` and `msg["bodyPreview"]` before appending to `work_items`. The processing loop reads `meta.pop("_precomputed_body", "")`. Do not re-add `body` to the `$select` query without also stripping it here.
+- **`work_items` → `deque` before processing** — converted with `deque(work_items)` and drained via `popleft()` so each item's memory is released immediately after processing. Do not convert back to a list or iterate with `enumerate()`.
+- **`del content` in file branch** — raw download bytes are deleted as soon as `content.decode()` is done (before NER/PII counting). Both the hit and no-hit paths have explicit `del content`.
+- **`del body_text` in email branch** — deleted after `_broadcast_card` call.
+- **PDF OCR images freed page-by-page** — in `document_scanner.scan_pdf`, `images[page_num-1] = None` immediately after OCR. Do not cache or accumulate page images.
+- **Memory guard** — `psutil.virtual_memory().available` checked before each M365 file download; scan skips the file if < 300 MB free.
+
+## Global gotchas
+
+- **Pattern matching in Python** — when using `str.replace()` to patch JS/HTML, whitespace and quote style must match exactly. Use `in` check first and print if not found.
+- **`__getattr__` on modules** — only resolves `module.name` access from outside, not bare name lookups inside function bodies. Always import directly.
+- **`JSON.stringify` inside `onclick="…"` attributes** — produces double-quoted strings that terminate the HTML attribute early. Use single-quoted JS string literals instead, or `data-*` attributes read from the handler.
+
+## Directory-scoped rules
+
+- `routes/CLAUDE.md` — SSE constraints, scan_progress source field, file_sources, Python gotchas
+- `static/js/CLAUDE.md` — profile dropdown, progress bar phase parsing, JS gotchas
+- `templates/CLAUDE.md` — CSS variable names, sizing rules, badge standard, design rules
+- `lang/CLAUDE.md` — i18n conventions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..3ea6908
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,130 @@
+# Contributing to GDPR Scanner
+
+Thank you for considering a contribution. This project helps organisations find
+and manage personal data in Microsoft 365 tenants. Contributions that improve
+compliance coverage, reliability, and usability are very welcome.
+
+---
+
+## Before You Start
+
+- Check the [open issues](../../issues) and [SUGGESTIONS.md](SUGGESTIONS.md) to
+  see if your idea is already tracked
+- For large features, open an issue first to discuss the approach — this avoids
+  wasted effort if the direction doesn't fit
+- Security vulnerabilities: see [SECURITY.md](SECURITY.md) — do not file public issues
+
+---
+
+## Development Setup
+
+```bash
+# Clone and set up a virtual environment
+git clone https://github.com/your-org/gdpr-scanner.git
+cd gdpr-scanner
+python3 -m venv venv
+source venv/bin/activate          # macOS / Linux
+venv\Scripts\activate             # Windows
+
+pip install -r requirements.txt
+
+# Danish NER model (optional — needed for name/address detection)
+python -m spacy download da_core_news_lg
+
+# Run the Document Scanner
+python server.py
+
+# Run the GDPRScanner
+python gdpr_scanner.py
+```
+
+You will need a Microsoft Azure app registration with the permissions described
+in the README to test GDPRScanner against a real tenant. A developer tenant
+is available for free via the [Microsoft 365 Developer Program](https://developer.microsoft.com/microsoft-365/dev-program).
+
+---
+
+## What We Welcome
+
+- Bug fixes
+- Improved CPR false-positive reduction
+- New language files (see `lang/en.lang` for the key list)
+- Items from [SUGGESTIONS.md](SUGGESTIONS.md) — check the status column first
+- Performance improvements for large tenants
+- Docker / deployment improvements
+- Documentation fixes
+
+---
+
+## Code Style
+
+**Python**
+- Follow PEP 8 with a line length of 100
+- Use type hints for function signatures
+- No external formatters are enforced — just keep it consistent with the surrounding code
+- All personal data (CPR numbers) must be SHA-256 hashed before storage — never store or log raw CPR values
+- Wrap Graph API calls in try/except and handle `M365PermissionError` gracefully
+
+**JavaScript (embedded in the Flask templates)**
+- `const` / `let` — no `var`
+- `async/await` over `.then()` chains
+- All user-visible strings must have a `data-i18n` key so translations work
+
+**SQL**
+- Use parameterised queries — never string-format SQL
+- New columns on existing tables must have a corresponding migration in `_MIGRATIONS` in `gdpr_db.py`
+
+---
+
+## Adding a Language
+
+1. Copy `lang/en.lang` to `lang/xx.lang` (ISO 639-1 code)
+2. Translate all values — keys must stay identical
+3. Test by setting `~/.m365_scanner_lang` to `xx` and restarting
+
+---
+
+## Pull Request Process
+
+1. Fork the repository and create a branch: `git checkout -b feature/my-feature`
+2. Make your changes and test them
+3. Run a syntax check: `python -m py_compile gdpr_scanner.py m365_connector.py gdpr_db.py`
+4. Update `README.md` if your change adds or changes user-visible behaviour
+5. Open a pull request with a clear description of what it does and why
+6. Link to the relevant issue or SUGGESTIONS.md item if applicable
+
+We aim to review pull requests within one week.
+
+---
+
+## Personal Data in Tests and Examples
+
+**Do not include real CPR numbers, email addresses, or names in test data,
+example output, or documentation.** Use clearly fictional values:
+
+```python
+# Good
+test_cpr = "010101-1234"   # fictional — fails Modulus 11 check
+
+# Bad
+test_cpr = "150385-1234"   # could be a real person
+```
+
+If you are testing with a real Microsoft 365 tenant, ensure you have appropriate
+authorisation to access that data.
+
+---
+
+## Contributor License Agreement
+
+By submitting a pull request you confirm that:
+
+- You wrote the contribution yourself or have the right to submit it
+- You license your contribution under the same AGPL-3.0 terms as this project
+- You understand the disclaimer in LICENSE — this is a compliance tool, not legal advice
+
+---
+
+## Code of Conduct
+
+Be respectful. Harassment of any kind will not be tolerated.
diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md
new file mode 100644
index 0000000..4aa0fd6
--- /dev/null
+++ b/DEPENDENCIES.md
@@ -0,0 +1,140 @@
+# Python Dependencies
+
+All Python modules used in the GDPR Scanner project, with a short explanation of each.
+
+## Third-party packages (install via `pip install -r requirements.txt`)
+
+### Web server
+| Module | Purpose |
+|---|---|
+| `flask` | Web server and API routing for both the GDPRScanner UI |
+
+### Microsoft 365 authentication and API
+| Module | Purpose |
+|---|---|
+| `msal` | Microsoft Authentication Library — handles OAuth2 device code flow (delegated) and client credentials (application) for Microsoft Graph API access |
+| `requests` | HTTP client used for all Microsoft Graph API calls |
+
+### PDF handling
+| Module | Purpose |
+|---|---|
+| `pdfplumber` | Text extraction from PDFs with a selectable text layer — fast and accurate for native PDFs |
+| `pdf2image` | Converts PDF pages to images (via Poppler) for OCR processing of scanned/image-based PDFs |
+| `pytesseract` | Python wrapper for the Tesseract OCR engine — extracts text from rasterised PDF pages and images |
+| `pypdf` | PDF metadata reading and low-level page manipulation |
+| `reportlab` | Fallback PDF redaction via overlay rendering — used when PyMuPDF is unavailable |
+| `pymupdf` (fitz) | Physically removes the text layer from PDFs — preferred GDPR-compliant redaction method |
+
+### Document formats
+| Module | Purpose |
+|---|---|
+| `python-docx` | Read and write `.docx` Word documents; also used to generate the Article 30 Register of Processing Activities report |
+| `openpyxl` | Read and write `.xlsx` Excel files — used for the scan result export workbook |
+| `img2pdf` | Converts images to PDF for archiving redacted output |
+
+### Image processing and face detection
+| Module | Purpose |
+|---|---|
+| `opencv-python` (cv2) | Face detection in images via Haar cascade classifiers; also used for face blurring during anonymisation |
+| `numpy` | Array operations required internally by OpenCV |
+| `Pillow` (PIL) | Image manipulation — thumbnail generation, format conversion, image resizing |
+
+### NLP / Named Entity Recognition
+| Module | Purpose |
+|---|---|
+| `spacy` | NLP engine for Danish Named Entity Recognition — detects person names, addresses, and organisations in text. Requires the `da_core_news_lg` model (~500 MB) |
+
+### Archive scanning
+| Module | Purpose |
+|---|---|
+| `py7zr` | 7-Zip archive support — allows the scanner to inspect `.7z` compressed files |
+
+### Desktop app packaging
+| Module | Purpose |
+|---|---|
+| `pywebview` | Renders the Flask web UI inside a native OS window, creating a macOS `.app` or Windows `.exe` without requiring a browser |
+| `pystray` | System tray icon integration for the desktop app builds |
+| `pyinstaller` | Packages the Python application and all dependencies into a standalone executable |
+| `pyinstaller-hooks-contrib` | Community-maintained hooks that help PyInstaller correctly bundle complex packages like spaCy and OpenCV |
+
+---
+
+## Standard library modules (no installation needed)
+
+### Data storage
+| Module | Purpose |
+|---|---|
+| `sqlite3` | SQLite database — stores scan results, CPR index (hashed), dispositions, deletion audit log, and scan history in `~/.gdpr_scanner.db` |
+| `json` | Config files, checkpoint files, language files, API request/response serialisation |
+| `zipfile` | Database export/import archive creation and reading; also used in the PyInstaller build process |
+| `csv` | CSV file scanning support in the Document Scanner |
+
+### Security and hashing
+| Module | Purpose |
+|---|---|
+| `hashlib` | SHA-256 hashing of CPR numbers before storage — raw CPR values are never written to the database |
+| `secrets` | Cryptographically secure random values (used in auth state parameters) |
+
+### File system and paths
+| Module | Purpose |
+|---|---|
+| `pathlib` | Cross-platform file and directory path handling throughout the codebase |
+| `tempfile` | Temporary files for PDF and image processing — avoids leaving artefacts on disk |
+| `shutil` | File copy and directory tree operations used in the build scripts |
+
+### Networking and email
+| Module | Purpose |
+|---|---|
+| `smtplib` | SMTP email delivery for the headless report feature — supports STARTTLS and SMTPS/SSL |
+| `email` | Email message construction (MIME) for the SMTP report feature |
+
+### Text and pattern matching
+| Module | Purpose |
+|---|---|
+| `re` | Regular expression engine — CPR pattern matching, phone numbers, IBANs, email addresses, Danish bank account numbers |
+
+### Concurrency
+| Module | Purpose |
+|---|---|
+| `threading` | Background scan thread so the Flask web UI stays responsive during long scans |
+| `queue` | Server-Sent Events message queue — passes scan results from the background thread to the browser |
+| `concurrent.futures` | `ProcessPoolExecutor` for parallel OCR processing of multi-page PDFs |
+
+### I/O and streams
+| Module | Purpose |
+|---|---|
+| `io` | In-memory byte streams for generating Excel and Word documents without writing to disk |
+| `struct` | Binary data unpacking (used in some PDF processing paths) |
+
+### Date and time
+| Module | Purpose |
+|---|---|
+| `time` | Unix timestamps for scan records, audit log entries, and token expiry tracking |
+| `datetime` | Human-readable date/time formatting for reports, filenames, and retention cutoff calculations |
+
+### System and process
+| Module | Purpose |
+|---|---|
+| `platform` | Detects the operating system for macOS/Windows-specific code paths |
+| `subprocess` | Launches Tesseract and Poppler as external processes for OCR and PDF rendering |
+| `argparse` | CLI argument parsing for `--headless`, `--reset-db`, `--export-db`, `--import-db` etc. |
+| `sys` | Python runtime access — sys.exit(), sys.path, sys.version |
+| `os` | Environment variables and low-level file operations |
+
+### Encoding and serialisation
+| Module | Purpose |
+|---|---|
+| `base64` | Encodes thumbnail images as base64 strings for embedding in JSON API responses |
+| `struct` | Binary format parsing used in some document processing paths |
+
+---
+
+## External system dependencies (not Python packages)
+
+These must be installed separately — the installers (`install_windows.ps1`, `install_macos.sh`) handle this automatically.
+
+| Tool | Purpose |
+|---|---|
+| Tesseract OCR | The OCR engine called by `pytesseract` — required for scanning image-based PDFs |
+| Tesseract language packs | `dan` (Danish) and `eng` (English) language data files for Tesseract |
+| Poppler | PDF rendering tools (`pdftoppm`, `pdfinfo`) required by `pdf2image` |
diff --git a/EFFORT_ESTIMATE.md b/EFFORT_ESTIMATE.md
new file mode 100644
index 0000000..7b775f9
--- /dev/null
+++ b/EFFORT_ESTIMATE.md
@@ -0,0 +1,67 @@
+# GDPRScanner — Build Effort Estimate
+
+Estimated man-hours to build this project from scratch, based on static analysis of v1.6.13.
+
+---
+
+## Codebase Stats
+
+| Metric | Count |
+|---|---|
+| Source files (excl. dist / build / venv) | ~70 |
+| Lines of code (Python + JS + HTML + CSS) | ~25,400 |
+| Test lines | ~1,280 (128 tests) |
+| Language files | ~2,300 lines (DA / EN / DE) |
+| Current version | v1.6.13 |
+
+---
+
+## Estimate by Component
+
+| Component | Key Files | LOC | Hours |
+|---|---|---|---|
+| **CPR detector** — regex, modulo-11 validation, context filtering, false-positive suppression | `cpr_detector.py` | 446 | 40–60 |
+| **Document scanner** — PDF text + OCR, Word, Excel, PowerPoint, images; memory-safe page-by-page processing | `document_scanner.py` | 2,659 | 160–240 |
+| **Microsoft 365 connector** — Exchange mail, OneDrive, SharePoint, Teams, delta sync, Microsoft Graph API, MSAL auth | `m365_connector.py`, `scan_engine.py`, `m365_launcher.py` | 2,748 | 240–320 |
+| **Google Workspace connector** — Gmail, Google Drive, service account + OAuth 2.0 flows | `google_connector.py`, `routes/google_scan.py`, `routes/google_auth.py` | 1,300 | 120–160 |
+| **File / SMB scanner** — local filesystem and network share scanning | `file_scanner.py` | 600 | 40–80 |
+| **Database layer** — SQLite schema, migrations, scan sessions, dispositions, delta tracking | `gdpr_db.py` | 954 | 80–120 |
+| **Export system** — formatted Excel reports, GDPR Article 30 Word documents | `routes/export.py` | 1,222 | 120–160 |
+| **Flask app + SSE + orchestration** — server-sent events, scan threading, checkpointing, resume | `gdpr_scanner.py`, `sse.py`, `checkpoint.py` | 2,400 | 120–160 |
+| **Frontend SPA** — 11 ES modules, real-time progress, results viewer, profiles, sources panel, viewer mode | `static/js/*.js`, `templates/index.html`, `static/style.css` | 7,800 | 200–280 |
+| **App config + persistence + encryption** — profiles, settings, SMTP, Fernet key, viewer tokens + PIN | `app_config.py` | 794 | 40–80 |
+| **Desktop app builder** — PyInstaller packaging for macOS and Windows, embedded webview | `build_gdpr.py` | 1,095 | 80–120 |
+| **Scheduler** — cron-like scheduled scans, background thread management | `scan_scheduler.py`, `routes/scheduler.py`, `static/js/scheduler.js` | 1,084 | 40–80 |
+| **Auth + viewer mode + roles** — M365 / Google OAuth, viewer tokens, PIN brute-force protection, SKU role classification | `routes/auth.py`, `routes/viewer.py`, `static/js/auth.js`, `static/js/viewer.js` | 750 | 80–120 |
+| **Multi-language support** — Danish, English, German UI strings | `lang/da.json`, `lang/en.json`, `lang/de.json` | 2,300 | 40–60 |
+| **Test suite** — 128 unit tests | `tests/` | 1,282 | 40–80 |
+| **Documentation + CI/CD + install scripts** — GitHub Actions, macOS / Windows installers, user manuals | `docs/`, `.github/`, `*.sh`, `*.ps1` | — | 40–60 |
+
+---
+
+## Total Estimate
+
+| Scenario | Hours | Calendar time (1 dev, 40 hrs/wk) | Calendar time (2-person team) |
+|---|---|---|---|
+| **Low** | ~1,500 | ~9 months | ~5 months |
+| **Mid** | ~2,000 | ~12 months | ~6 months |
+| **High** | ~2,500 | ~15 months | ~8 months |
+
+The mid estimate (~2,000 hours) is the most realistic for a single senior developer building iteratively toward a v1.6 release.
+
+---
+
+## Complexity Drivers
+
+These factors push the estimate beyond what raw line counts suggest:
+
+- **Microsoft Graph API** — Exchange, SharePoint, and Teams scanning involve underdocumented API behaviour, throttling, delta-token management, and permission edge cases. Research and debugging overhead is substantial.
+- **CPR validation domain knowledge** — Danish modulo-11 rules, context-aware false-positive filtering, and handling of anonymised or test numbers requires specialised understanding.
+- **Memory management at scale** — The `deque`-drain pattern, page-by-page OCR image freeing, and pre-scan memory guards (`psutil`) are non-obvious and emerged through iteration on large tenants.
+- **Cross-platform desktop packaging** — Producing a signed `.app` for macOS and an `.exe` for Windows via PyInstaller, with an embedded webview, is a significant and ongoing maintenance burden.
+- **SSE + Flask threading** — Correct scan locking, SSE fan-out, and safe state sharing across threads is difficult to get right without subtle race conditions.
+- **Version iteration** — v1.6.13 represents at least 13 significant release cycles. The first working prototype likely consumed roughly half the total hours; the accumulated refinement accounts for the rest.
+
+---
+
+*Generated 2026-04-11 based on static analysis of GDPRScanner v1.6.13.*
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e3d582a
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,49 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2024-2026 Henrik Højmark
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+
+AUTHORSHIP AND AI ASSISTANCE
+
+This software was developed by Henrik Højmark. Development was conducted with
+substantial AI assistance (Claude by Anthropic), used as a pair-programming
+tool. All design decisions, architecture, requirements, and validation were made
+by the author. The use of AI tooling does not diminish authorship — it is
+analogous to the use of any other development tool or reference.
+
+ADDITIONAL TERMS — COMMERCIAL USE
+
+If you wish to use this software in a commercial SaaS product or managed
+service without complying with the AGPL-3.0 source disclosure requirements,
+a commercial license is available. Please contact the project maintainers.
+
+--------------------------------------------------------------------------------
+
+DISCLAIMER — NOT LEGAL ADVICE
+
+This software is a technical tool intended to assist with GDPR compliance
+activities. It does not constitute legal advice. The authors make no
+representation that use of this tool satisfies any specific legal obligation.
+You are responsible for ensuring your use of this software complies with
+applicable law, including GDPR, Databeskyttelsesloven, and any other relevant
+regulations in your jurisdiction.
+
+CPR numbers (Danish personal identification numbers) are special category
+personal data. Handle scan results with appropriate care and access controls.
diff --git a/MAINTAINER.md b/MAINTAINER.md
new file mode 100644
index 0000000..37da92d
--- /dev/null
+++ b/MAINTAINER.md
@@ -0,0 +1,205 @@
+# Maintainer Guide
+
+*Written for future Henrik — assuming Python proficiency, returning after time away.*
+
+---
+
+## The short version
+
+When something breaks, the structure tells you where to look.
+When you want to add something, `SUGGESTIONS.md` has the context.
+When you're unsure if a change broke anything, run `pytest tests/`.
+
+---
+
+## Project structure
+
+```
+gdpr_scanner.py        Entry point. Flask app, route definitions, blueprint
+                       registration, CLI argument handling. Thin coordinator —
+                       it imports from the modules below and re-exports them.
+
+sse.py                 Server-Sent Events. broadcast(), the SSE queues, and
+                       the replay buffer. Touch this if live progress breaks.
+
+checkpoint.py          Scan checkpoint and delta token persistence. Touch this
+                       if resume/incremental scanning breaks.
+
+app_config.py          Everything configuration: i18n loading, Article 9
+                       keywords, admin PIN, scan profiles, SMTP config, file
+                       source definitions, Fernet encryption. Touch this if
+                       settings, language, or profiles break.
+
+cpr_detector.py        CPR detection engine. _scan_bytes() dispatches to the
+                       right scanner by file type. Touch this if detection
+                       accuracy changes or file type support is needed.
+
+scan_engine.py         M365 and file-system scan orchestration. run_scan() and
+                       run_file_scan(). The most complex file — ~1000 lines.
+                       Touch this for scan behaviour, collection logic, or
+                       new M365 sources.
+
+gdpr_db.py             SQLite persistence layer. ScanDB class. Touch this for
+                       DB schema changes, new tables, or query logic.
+
+document_scanner.py    CPR regex, NER, OCR, face detection, PDF/DOCX/XLSX
+                       scanning. Pre-existing module — treat as a dependency.
+                       Avoid modifying unless you really need to.
+
+m365_connector.py      Microsoft Graph API client. Auth, token refresh, all
+                       the iter_* fetchers. Touch this for M365 API changes.
+
+google_connector.py    Google Workspace connector. Service account auth, Gmail
+                       and Drive iterators. Touch this for Google API changes.
+
+routes/                Flask blueprints — one file per functional area.
+  auth.py              M365 sign-in / sign-out / device code flow
+  scan.py              /api/scan/start, /api/scan/stop, /api/scan/status
+  export.py            Excel and Article 30 Word export
+  database.py          DB query endpoints (stats, trend, overdue, subject lookup)
+  users.py             User listing, role classification, SKU debug
+  sources.py           File source management (local and SMB)
+  profiles.py          Scan profile CRUD
+  email.py             Email report sending via SMTP / Graph API
+  scheduler.py         APScheduler integration
+  google_auth.py       Google service account connect / disconnect
+  google_scan.py       Google Workspace scan start / cancel / users
+  app_routes.py        Misc: about, language selector, settings, delta status
+
+tests/                 pytest test suite — 112 tests, all should pass.
+  test_document_scanner.py   CPR detection accuracy and false positive checks
+  test_app_config.py         i18n, keywords, config, profiles, encryption
+  test_checkpoint.py         Checkpoint and delta token persistence
+  test_db.py                 Database round-trips, CPR hashing, dispositions
+```
+
+---
+
+## When something breaks
+
+**Scan finds nothing / wrong count**
+→ `cpr_detector.py` → `_scan_bytes()` and `_scan_text_direct()`
+→ `scan_engine.py` → `run_scan()` for M365, `run_file_scan()` for files
+
+**Progress bar / live log not updating**
+→ `sse.py` → `broadcast()`
+→ `gdpr_scanner.py` → `scan_stream()` — check `sse._current_scan_id`
+→ `static/app.js` → `_attachScanListeners()` and `scan_progress` handler
+
+**Cards not appearing after scan**
+→ `static/app.js` → `scan_file_flagged` handler → calls `applyFilters()`
+→ `static/app.js` → `scan_done` handler → shows `filterBar`
+
+**Export (Excel / Art.30) fails**
+→ `routes/export.py` → checks `state.flagged_items`, falls back to DB
+→ If DB is empty, a scan has not been run or results were cleared
+
+**Authentication / sign-in issues**
+→ `routes/auth.py` for M365
+→ `routes/google_auth.py` for Google Workspace
+→ `gdpr_scanner.py` — `_connector = _state.connector = ...` must stay dual-assigned
+
+**Settings stats show 0 (Scanned / Flagged / Scans)**
+→ `routes/database.py` → `db_stats()` — queries `flagged_items` and `scans` directly
+→ Stats populate from existing DB on app start — no re-scan needed
+→ If still 0 after a completed scan: check `~/.gdpr_scanner.db` exists and is not empty
+
+**File scan results not persisting to DB**
+→ `scan_engine.py` → `run_file_scan()` — must call `_db.begin_scan()` not `start_scan()`
+→ Check terminal output for `[db] begin_scan failed` to confirm
+
+**Settings / profiles / language not loading**
+→ `app_config.py`
+→ Config files live in `~/` — see the migration shim in `gdpr_scanner.py` for paths
+
+**Scheduled scans not running or not showing in UI**
+→ `scan_scheduler.py` / `scheduler.py`
+→ `routes/scheduler.py`
+→ Schedule config: `~/.gdpr_scanner_schedule.json`
+
+---
+
+## Running the tests
+
+```bash
+cd GDPRScanner_v1.6.x
+pytest tests/
+```
+
+Run this before every release and after any change to:
+- `document_scanner.py` — CPR detection
+- `cpr_detector.py` — file type dispatch
+- `gdpr_db.py` — database layer
+
+A failing CPR detection test is a compliance issue, not just a software bug.
+
+---
+
+## Key data files (all in `~/`)
+
+All data files live in **`~/.gdprscanner/`** (created automatically on first run).
+Existing `~/.gdpr_scanner_*` files are migrated automatically.
+
+| File | Contents |
+|---|---|
+| `scanner.db` | SQLite — all scan results, CPR index, dispositions, history |
+| `config.json` | Azure client ID / tenant ID |
+| `settings.json` | Last-used scan options |
+| `schedule.json` | Scheduled scan configuration |
+| `token.json` | Cached MSAL token (delegated mode) |
+| `delta.json` | Microsoft Graph delta tokens |
+| `checkpoint.json` | Mid-scan checkpoint (deleted on completion) |
+| `smtp.json` | SMTP config (password Fernet-encrypted) |
+| `machine_id` | Fernet key for SMTP password — never move without this |
+| `role_overrides.json` | Manual staff/student role overrides |
+| `google_sa.json` | Google service account key (chmod 600) |
+| `google.json` | Google admin email and source toggle state |
+| `src_toggles.json` | Source panel toggle state (Email, OneDrive, Gmail, etc.) |
+
+---
+
+## The files you will rarely touch
+
+- `document_scanner.py` — treat as a dependency
+- `build_gdpr.py` — only when adding new `.py` files to the project (bundle the new file in the `datas` list)
+- `install_windows.ps1` / `install_macos.sh` — only when adding new pip dependencies
+
+---
+
+## Adding a new pip dependency
+
+1. Add to `requirements.txt` with a version pin and a comment
+2. Add to `install_windows.ps1` (the packages array)
+3. Add to `install_macos.sh` (the packages array)
+4. If building the app: no change needed — PyInstaller follows imports automatically
+
+---
+
+## The documents that have the history
+
+| Document | What it contains |
+|---|---|
+| `SUGGESTIONS.md` | Every feature idea, why it was or wasn't implemented, current status |
+| `CHANGELOG.md` | What changed in each version, including root causes of bugs fixed |
+| `CONTRIBUTING.md` | How to contribute, code style, translation guide |
+| `DEPENDENCIES.md` | What each dependency is for and why it was chosen |
+
+When you're unsure why something was done a certain way, read `SUGGESTIONS.md` first.
+When you're debugging a regression, read `CHANGELOG.md` for the version where it appeared.
+
+---
+
+## The one thing to know about the module split
+
+`gdpr_scanner.py` imports from all five sub-modules and re-exports them.
+The Flask blueprints in `routes/` use `__getattr__` to lazily resolve names
+from `gdpr_scanner` — so they work unchanged even though the code moved.
+
+If you add a new function to `app_config.py` or `cpr_detector.py` and need
+it accessible from a route blueprint, add it to the `from app_config import (...)`
+block near the top of `gdpr_scanner.py`.
+
+---
+
+*This project was built by Henrik Højmark with AI assistance (Claude by Anthropic)
+as a pair-programming tool. All design decisions were made by the author.*
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..aa58d31
--- /dev/null
+++ b/README.md
@@ -0,0 +1,629 @@
+# GDPRScanner
+
+Scans Microsoft 365, Google Workspace, and local/network file systems for Danish
+CPR numbers and personal data (PII). Produces GDPR compliance reports and supports
+Article 30 record-keeping obligations.
+
+---
+
+**Developed by Henrik Højmark**
+
+This project was built with substantial assistance from AI (Claude by Anthropic),
+used as a pair-programming tool throughout development. All design decisions,
+requirements, testing, and validation were made by the author. The AI generated
+code under direction — the same way a developer might use a senior colleague or
+an IDE with intelligent completion. The result is the author's work.
+
+---
+
+`gdpr_scanner.py` scans Microsoft 365 cloud sources — Exchange email (including all subfolders), OneDrive, SharePoint, and Teams — for Danish CPR numbers and PII. It connects to the Microsoft Graph API and does not require local file access.
+
+### What it does (M365)
+
+- **Scans Exchange mailboxes** — email body and attachments, across **all folders and subfolders** recursively (Inbox, custom folders, nested folders). System folders (Deleted Items, Junk, Drafts, Sent, etc.) are automatically skipped using Exchange `wellKnownName` identifiers (language-independent — works correctly for Danish, German, and other locales)
+- **OneDrive, SharePoint, Teams** — scans files in all connected sources
+- **Subfolder prioritisation** — custom subfolders are scanned before Inbox to prevent a large Inbox from exhausting the per-user email cap
+- **EML attachment preview** — email attachments with CPR hits are listed in the preview panel with per-attachment CPR counts
+- **Folder path in results** — each email result shows its full folder path (e.g. `Inbox / Ansøgninger pædagog SFO`) in the card and in Excel export
+- **Delete items** — flagged results can be deleted directly from the UI, individually or in bulk
+- **CPR false-positive reduction** — strict CPR validation
+- **Excel export** — multi-tab `.xlsx` report with per-source breakdown, auto-filters, and URL hyperlinks. Columns include: Name, CPR Hits, Face count, GPS (✔ if GPS in EXIF), Special category, EXIF author, Folder, Account, Role, Disposition, Date Modified, Size (KB), URL. A dedicated **GPS locations** sheet lists all items with GPS coordinates including a Google Maps link. Separate tabs for Outlook (Exchange), OneDrive, SharePoint, Teams, Gmail, Google Drive, local folders, and SMB/network shares. Summary sheet shows counts by source and GPS item total. When M365, Google Workspace, and file scans run concurrently, all results are captured in the export — not just the last completed scan
+- **Progressive streaming** — results stream card-by-card via Server-Sent Events as the scan runs
+- **Token auto-refresh** — expired tokens are detected and silently refreshed mid-scan without interrupting the UI
+- **Incremental / resumable scans** — interrupted scans save a checkpoint; the next run resumes from where it stopped rather than starting over
+- **Delta scan** — uses Graph `/delta` endpoints to fetch only changed items since the last scan, cutting API quota usage and scan time on large tenants
+- **Headless / scheduled mode** — `--headless` flag runs a non-interactive scan and writes an Excel report to disk; combine with cron or Windows Task Scheduler for fully automated compliance scans. **Settings → Scheduler** supports multiple named scan jobs, each with its own frequency (daily/weekly/monthly), time, profile, auto-email, and retention settings. Enable/disable each job with an inline toggle. In application mode, scheduled jobs reconnect automatically without requiring the browser to be open
+- **EXIF metadata extraction** — GPS coordinates, author, description, device extracted from all scanned images. GPS badge on cards when location data is present. Collapsible EXIF panel in local file previews. No extra dependencies — uses `Pillow` which is already required.
+- **`--purge`** — permanently deletes all data files created by the scanner (database, credentials, cache); use before decommissioning
+- **`--export-db`** / **`--import-db`** — export the database to a ZIP archive or restore from one; supports `--import-mode merge` (default) and `--import-mode replace`
+- **`--reset-db`** — wipe and recreate the database; also clears the checkpoint and delta tokens
+- **Email report** — send the Excel report by email directly from the UI or via `--email-to` in headless mode. Prefers **Microsoft Graph API** when connected to M365 (no SMTP AUTH needed — requires `Mail.Send` permission). Falls back to `smtplib` SMTP with STARTTLS/SSL support. A **Test** button verifies end-to-end delivery.
+- **Account name on cards** — when scanning multiple users, each card displays the owner's display name so results from different mailboxes are instantly distinguishable
+- **Retention policy enforcement** — flag items older than a configurable retention period with a Overdue badge; supports both rolling and fiscal-year-aligned cutoffs (e.g. Bogføringsloven Dec 31); headless auto-delete via `--retention-years`
+- **Data subject lookup** — find all flagged items containing a specific CPR number across all scans; CPR is SHA-256 hashed before querying — never stored in plaintext
+- **Disposition tagging** — compliance officers can tag each flagged item with a legal basis (retain / delete-scheduled / deleted) directly from the preview panel
+- **Read-only viewer mode** — share scan results with a DPO or manager via a secure token URL (`/view?token=…`) or a numeric PIN; viewers see the full results grid and disposition panel but cannot scan, delete, or change settings
+- **Article 30 report** — one-click export of a structured Word document (`.docx`) satisfying the GDPR Article 30 register of processing activities obligation
+- **SQLite results database** — scan results, CPR index, PII breakdown, disposition decisions, and scan history are persisted to `~/.gdprscanner/scanner.db` alongside the JSON cache, enabling cross-scan queries and trend tracking
+- **Built-in user manual** — click the **?** button in the top bar to open the manual in a dedicated window. Available in Danish and English. Printable via the browser's print function. Served from `MANUAL-DA.md` / `MANUAL-EN.md` at `/manual?lang=da|en` — always in sync with the installed version, no internet required. In the packaged desktop app the manual opens as a native pywebview window; in the browser it opens as a popup.
+
+---
+
+## Microsoft 365
+
+See [M365_SETUP.md](docs/setup/M365_SETUP.md) for step-by-step instructions — app registration, permissions, authentication modes, and headless configuration.
+
+---
+
+### M365 Web UI
+
+```
+python gdpr_scanner.py [--port PORT]
+```
+
+> The scanner expects `templates/` and `static/` in the same directory as `gdpr_scanner.py`. Flask serves `templates/index.html` as the UI. The JavaScript is split across 12 ES modules in `static/js/` (`state.js` + 11 feature modules loaded as `<script type="module">`). All API routes live in `routes/` as Flask Blueprints registered at startup.
+
+Default port: **5100**. If that port is already in use the server auto-increments (5101, 5102, …) and logs which port was chosen. Override with `--port N`. Only one instance may run at a time — a second launch exits immediately with an error rather than corrupting the shared database.
+
+#### Sources panel
+
+The sidebar sources panel lists all configured scan sources. Click **Sources** to open the unified Source Management modal. The panel is collapsible (▾/▸ toggle, state persisted) and resizable — drag the handle at the bottom edge to shrink it; the maximum height is automatically capped to show all available sources with no empty space.
+
+**Microsoft 365 tab** — Azure credentials (Client ID, Tenant ID, Client Secret), auth mode (Application / Delegated), and per-source visibility toggles (Email, OneDrive, SharePoint, Teams). Sources toggled off are hidden from the sidebar panel and excluded from scans.
+
+**Google Workspace tab** — Two authentication modes: **Workspace** (service account with domain-wide delegation — scans all users) and **Personal account** (OAuth 2.0 device-code flow — scans the signed-in account only). Once connected, per-source toggles control whether Gmail and/or Google Drive appear in the sidebar panel and are included in scans. See [GOOGLE_SETUP.md](docs/setup/GOOGLE_SETUP.md) for setup instructions.
+
+**File sources tab** — Add local folder paths or SMB/CIFS network shares with a name, path, and optional SMB credentials. Each saved source appears as a checkbox in the sidebar panel (local, SMB/network). Use the **Edit** button on each row to update credentials or rename a source without deleting it.
+
+**Skipped automatically:** `.recycle`, `.sync`, `.btsync`, `.trash`, `.git`, `node_modules`, `System Volume Information`, and other system/sync folders. Hidden directories (`.` prefix) are skipped too.
+
+**PDF scanning in file scans:** PDFs are scanned in a dedicated subprocess spawned via `multiprocessing.get_context("spawn")` with a 60-second hard timeout. If a PDF's OCR (Tesseract/Poppler) stalls, the subprocess is terminated and the file is skipped with an error card — the scan thread is never blocked. The `spawn` context is required on macOS + Flask to avoid duplicating the server socket.
+
+**Preview panel** — opens to the right of the results grid when a card is clicked. The panel is resizable: drag the left edge to adjust its width (min 280 px, max 70% of window). Width is remembered for the session. Click **×** to close.
+
+**Local file preview** — clicking a result card renders the file content inline:
+
+| Type | Preview |
+|---|---|
+| PDF | First 5 pages as text via `pdfplumber`, CPR numbers highlighted |
+| XLSX / XLSM / CSV | First 50 rows as a table (up to 3 sheets for Excel) |
+| DOCX / DOC | First 80 paragraphs as text, CPR numbers highlighted |
+| Images | Inline image + collapsible EXIF metadata panel (GPS, author, device, datetime) |
+| TXT / EML / MD / log | Full text with CPR highlights |
+
+Sources from all tabs can be selected independently in the sidebar before scanning. The selection is saved as part of scan profiles.
+
+#### User accounts panel
+
+In Delegated mode, accounts are added via the device code flow. In Application mode, the scanner fetches all users in the tenant. Users are listed with checkboxes — all unchecked by default. Use **All / None** to select or deselect everyone, filter by name with the search field, or add a user manually by email with the **+** button.
+
+**Role classification** — users are automatically classified as Student or Staff based on their Microsoft 365 licence. Role badges appear on every account row, on result cards, and in the Article 30 report (separate Staff and Student inventory tables).
+
+Role detection works in two passes:
+1. **`skuPartNumber` fragment match** (preferred) — strings like `STANDARDWOFFPACK_FACULTY` are stable across all Microsoft licensing generations (EA, A1/A3/A5, new commerce/CSP). Runs first whenever part numbers are available.
+2. **SKU ID lookup** from `classification/m365_skus.json` — fallback for when part numbers are unavailable or for licences with no recognisable fragment (e.g. Power Automate Free assigned to faculty).
+
+**Filter buttons** — **All / Ansat / Elev** filter the accounts list before selecting who to scan.
+
+**SKU debug** — the magnifying-glass button next to the role filters opens a modal listing every unique SKU ID in the tenant, colour-coded student / staff / unknown. Unknown IDs can be copied directly into `classification/m365_skus.json` and take effect on the next restart.
+
+**Manual role override** — if auto-classification is wrong for a specific user, click the role badge (role badge) on their row to cycle through `student → staff → other → (clear)`. Overrides are stored in `~/.gdpr_scanner_role_overrides.json` and persist across restarts. A pencil indicator appears on overridden rows. Click through until the pencil disappears to revert to auto-detection.
+
+**`classification/m365_skus.json`** — the SKU ID and fragment file lives in the `classification/` folder alongside `lang/` and `keywords/`. Edit it to add new or tenant-specific SKU IDs without any code change; the file is reloaded on every restart.
+
+#### Date filter
+
+A date-from picker limits the scan to items modified after the selected date. Quick presets: **1 yr / 2 yr / 5 yr / 10 yr / Any**. Selecting "Any" sets the date to today (no cutoff).
+
+#### Options
+
+| Option | Default | Description |
+|---|---|---|
+| Scan email body | On | Scan the plain-text body of each email |
+| Scan attachments | On | Scan PDF/Word/Excel attachments inside emails |
+| Max attachment size | **20 MB** | Skip attachments larger than this threshold |
+| Max emails per user | **2000** | Cap per mailbox to avoid very long scans |
+| **Δ Delta scan** | Off | Fetch only changed items since the last scan (see [Delta scan](#delta-scan) below) |
+| **Δ Delta scan** | Off | Fetch only changed items since the last scan — hover the **?** for details (see [Delta scan](#delta-scan) below) |
+| ** Scan photos for faces** | Off | Detect faces in image files and flag as Art. 9 biometric data — hover the **?** for details (see [Photo scanning](#photo--biometric-scanning) below) |
+| **Retention policy** | Off | Flag items older than N years — hover the **?** for details (see [Retention policy](#retention-policy-enforcement)) |
+
+#### Results grid
+
+Each flagged item appears as a card showing:
+- File / subject name
+- CPR hit count badge
+- Source badge (Email / OneDrive / SharePoint / Teams)
+- Source account with role badge (**Student** / **Staff**)
+- Modified / received date
+- **Folder path** — shown for emails (e.g. ` Inbox / Ansøgninger pædagog SFO`)
+- **Account name** — owner's display name shown on every card when scanning multiple users
+- **Overdue badge** — amber badge on items exceeding the configured retention cutoff
+- **Art.9** badge — purple pill listing detected Article 9 special categories (health, criminal, biometric, etc.)
+- ** N faces** badge — teal pill on image files where face detection found identifiable persons (biometric data)
+- **Ext.** / **** badge — external email recipient or externally shared file (Art. 44–46 transfer risk)
+- **delete button** — appears on hover (grid view) or always visible (list view)
+
+**Filter bar** — always visible above both the results grid and the preview panel. Narrow results by source, disposition, transfer risk, and risk level:
+
+| Filter | Options |
+|---|---|
+| Source | All / Email / OneDrive / SharePoint / Teams |
+| Disposition | All / Unreviewed / Retain (legal/legitimate/contract) / Delete-scheduled / Deleted |
+| Transfer risk | All / External recipient / External share / Shared |
+| Risk level | All risk levels /  Art. 9 special category /  Photos / biometric |
+
+#### Delete items
+
+Individual items can be deleted directly from their card (hover to reveal , confirm). Emails are moved to Deleted Items; files go to the recycle bin.
+
+The **Delete** button in the filter bar opens the **Bulk Delete** modal, which lets you filter by:
+
+| Criterion | Description |
+|---|---|
+| Source type | Email / OneDrive / SharePoint / Teams / All |
+| Min CPR hits | Only delete items with at least N CPR numbers found |
+| Older than date | Only delete items older than a given date |
+
+The **Filter overdue** quick button pre-populates the date filter with the exact retention cutoff from the database, making it one click to select all overdue items for deletion.
+
+A live preview shows how many items match before you confirm. Errors are reported per-item in the log panel.
+
+> **Requires write permissions** — see [Azure permissions](#azure-permissions) above.
+
+#### Excel export
+
+The **⬇ Excel** button exports all current results to a `.xlsx` file (`m365_scan_YYYYMMDD_HHMMSS.xlsx`) with five sheets:
+
+| Sheet | Contents |
+|---|---|
+| Summary | Scan timestamp, total count, per-source breakdown |
+| Email | Flagged emails — Name/Subject, CPR Hits, **Folder**, Source Account, Date Modified, Size, URL |
+| OneDrive | Flagged OneDrive files |
+| SharePoint | Flagged SharePoint files |
+| Teams | Flagged Teams files |
+
+In macOS app builds, the export opens a native Save dialog instead of a browser download.
+
+The **Art.30** button generates a **GDPR Article 30 Register of Processing Activities** as a structured Word document (`.docx`). See [Article 30 report](#article-30-report) below.
+
+#### Email report
+
+Configure email delivery in **Settings → Email report**. Click **Save** to store your SMTP settings, **Test** to send a real test email to the configured recipients, and **Send now** to dispatch the latest scan report. When connected to Microsoft 365, the scanner sends via the **Graph API** (`Mail.Send` permission required — add it in Azure AD → App registrations → API permissions). SMTP is used as a fallback when Graph is unavailable.
+
+| Field | Description |
+|---|---|
+| SMTP host | e.g. `smtp.office365.com`, `smtp.gmail.com` |
+| Port | `587` for STARTTLS (default), `465` for SMTPS/SSL |
+| Username | SMTP login — usually your sender email address |
+| Password | Saved to `~/.gdpr_scanner_smtp.json` (permissions 600). Encrypted at rest using Fernet — key in `~/.gdpr_scanner_machine_id` (chmod 0o600, never share) |
+| Graph API | When connected to M365, email is sent via `/me/sendMail` (delegated) or `/users/{sender}/sendMail` (app mode) — no SMTP password needed. Requires `Mail.Send` Graph permission with admin consent. |
+| From address | Sender address (defaults to username if blank) |
+| STARTTLS | Enable STARTTLS on port 587 (recommended) |
+| SSL | Use SMTPS on port 465 instead |
+| Recipients | Comma or semicolon separated list of addresses |
+
+Click **Save** to persist the settings. The password is stored separately from scan settings and never returned to the browser — subsequent loads show "(password saved)". Click **Send now** to email the report immediately with the current results.
+
+> **No extra dependencies** — uses Python's built-in `smtplib`. Works with Office 365, Gmail, and any standard SMTP server.
+
+#### About
+
+Click **About** in the sidebar footer to see app version, Python version, MSAL version, Requests version, and openpyxl version.
+
+---
+
+## Google Workspace
+
+See [GOOGLE_SETUP.md](docs/setup/GOOGLE_SETUP.md) for step-by-step instructions — service account creation, domain-wide delegation, OAuth scopes, and OU-based role classification.
+
+---
+
+### Incremental / resumable scans
+
+If a scan is stopped (via **■ Stop** or by closing the app) before it finishes, a checkpoint is saved to `~/.gdpr_scanner_checkpoint.json`. The next time you click **▶ Scan** with the same configuration, a banner appears above the progress bar:
+
+```
+⏸  Previous scan interrupted — 847 scanned, 12 found  [Resume]  [Start fresh]
+```
+
+- **Resume** — skips the 847 already-scanned items, re-emits the 12 previously found cards immediately, and continues from where it left off
+- **Start fresh** — discards the checkpoint and starts a new full scan
+
+The checkpoint is keyed by a hash of the scan configuration (sources + users + date cutoff). Changing any of those settings automatically starts fresh. The checkpoint is deleted automatically when a scan completes successfully.
+
+---
+
+### Delta scan
+
+Delta scan uses the Microsoft Graph `/delta` API to fetch only items that have **changed since the last scan**, dramatically reducing Graph API quota usage and scan time on large tenants.
+
+#### How it works
+
+1. Run one **full scan** first (Delta checkbox off) — this establishes baseline delta tokens
+2. Tick **Δ Delta scan** and run again — only items added, modified, or deleted since the previous scan are fetched and CPR-scanned
+3. Delta tokens are saved automatically to `~/.gdpr_scanner_delta.json` after each successful scan
+4. To force a full rescan, click **Clear tokens** under the checkbox (or delete the file)
+
+Delta tokens are stored **per-source**:
+
+| Token key | Covers |
+|---|---|
+| `onedrive:{user_id}` | One user's OneDrive drive |
+| `sharepoint:{drive_id}` | One SharePoint document library |
+| `teams:{drive_id}` | One Teams channel file store |
+| `email:{user_id}:{folder_id}` | One mail folder for one user |
+
+If a token expires (Graph returns HTTP 410 Gone), that source falls back to a full collection automatically and a fresh token is saved. Other sources are unaffected.
+
+Deleted items returned by delta (items with a `deleted` or `@removed` marker) are skipped during CPR scanning.
+
+After each delta scan, the log panel shows:
+```
+Scan complete — 3 flagged of 41  (Δ delta — 6 source(s) indexed)
+```
+
+#### Delta in headless mode
+
+Pass `"delta": true` inside the `options` block of your `--settings` JSON to enable delta for scheduled scans:
+
+```json
+{
+  "options": { "delta": true, "older_than_days": 365 }
+}
+```
+
+---
+
+### Headless mode (scheduled / automated scans)
+
+> **Note:** The scheduler engine lives in `scan_scheduler.py`.
+
+Run the scanner without a browser UI for cron jobs and Windows Task Scheduler:
+
+```bash
+python gdpr_scanner.py --headless --output ~/Reports/ --settings settings.json
+```
+
+See [M365_SETUP.md](docs/setup/M365_SETUP.md) for the full settings file format, CLI flags, and SMTP configuration.
+
+
+---
+
+### SQLite results database
+
+Scan results are persisted to `~/.gdprscanner/scanner.db` (SQLite) automatically after every scan, alongside the existing JSON session cache. The database enables cross-scan queries, trend tracking, and compliance workflows that are impractical with JSON alone.
+
+**Tables:**
+
+| Table | Contents |
+|---|---|
+| `scans` | One row per completed scan run — sources, user count, options, delta flag |
+| `flagged_items` | One row per flagged file or email — full card data |
+| `cpr_index` | `(SHA-256(cpr), item_id, scan_id)` — CPR numbers stored as hashes only, never plaintext |
+| `pii_hits` | Per-type PII counts per item (phone, IBAN, name, address, etc.) |
+| `dispositions` | Compliance officer decisions per item |
+| `scan_history` | Aggregated stats per scan for trend tracking |
+
+**API endpoints:** `GET /api/db/stats`, `GET /api/db/trend`, `GET /api/db/scans`, `POST /api/db/subject`, `GET /api/db/overdue`, `POST /api/db/disposition`, `GET /api/db/disposition/<id>`
+
+If `gdpr_db.py` is not present, the scanner falls back to JSON-only mode silently.
+
+---
+
+### Data subject lookup
+
+The **Data subject lookup** button in the sidebar opens a modal where you can search for all flagged items containing a specific CPR number across all scans.
+
+- Enter a CPR number in `DDMMYY-XXXX` format and press Enter or click **Search**
+- Results show file/email name, source type, date, and CPR hit count
+- **Delete all for this person** button triggers bulk deletion of all matching items and refreshes the grid
+- The CPR number is SHA-256 hashed before querying — it is never stored in plaintext in the database or logs
+
+This directly supports the GDPR **right of access (Article 15)** and **right to erasure (Article 17)**.
+
+---
+
+### Disposition tagging
+
+Every flagged item can be tagged with a compliance decision from the preview panel. Open any card, and the **Disposition** dropdown appears below the metadata strip.
+
+| Value | Meaning |
+|---|---|
+| Unreviewed | Default — not yet assessed |
+| Retain — legal obligation | Must keep (e.g. Bogføringsloven) |
+| Retain — legitimate interest | Justified retention, documented |
+| Retain — contract | Part of an active contract |
+| Delete — scheduled | Mark for deletion at next cleanup run |
+| Deleted | Already actioned |
+
+Dispositions are saved to the `dispositions` table in the SQLite database and included in the Article 30 report.
+
+---
+
+### Retention policy enforcement
+
+Enable **Retention policy** in the options panel to flag items that exceed your retention threshold.
+
+**Settings:**
+
+| Setting | Description |
+|---|---|
+| Retention years | How many years to retain (default: 5) |
+| Fiscal year end | Rolling (from today) / 31 Dec (Bogføringsloven) / 30 Jun / 31 Mar |
+
+**Two cutoff modes:**
+
+- **Rolling** — exactly N years before today. Correct for GDPR general data minimisation.
+- **Fiscal year** — N years before the last completed fiscal year end. Correct for Bogføringsloven, which requires records for 5 years *from the end of the financial year*. A document from January 2020 with a Dec 31 FY must be kept until **31 December 2025**, not just until January 2025.
+
+A live hint below the settings shows the exact cutoff date before you scan.
+
+After scanning, items older than the cutoff receive an amber **Overdue** badge on their card. In the bulk-delete modal, **Filter overdue** pre-fills the date filter with the exact cutoff for one-click selection.
+
+**Headless mode:**
+```bash
+python gdpr_scanner.py --headless --output ~/Reports/   --retention-years 5 --fiscal-year-end 12-31
+```
+Non-interactive (cron): deletes automatically. Interactive (TTY): prompts for confirmation.
+
+---
+
+### Scan profiles
+
+Named, reusable scan configurations — save the current sidebar state as a profile, then load it in one click or run it headlessly by name.
+
+- **Save** — prompts for a name and saves all current settings (sources, options, user selection, retention) as a profile
+- **Profile dropdown** — switch between saved profiles; applying a profile populates the entire sidebar instantly
+- **Profiles button** — opens the profile management modal to rename, edit description, duplicate, or delete profiles
+- Profiles persist across restarts in `~/.gdprscanner/settings.json`
+
+**Headless profile usage:**
+```bash
+python gdpr_scanner.py --headless --profile "Nightly email scan"
+python gdpr_scanner.py --list-profiles
+python gdpr_scanner.py --save-profile "Weekly full scan" --sources email onedrive
+python gdpr_scanner.py --delete-profile "Old scan"
+```
+
+---
+
+### Photo / biometric scanning
+
+Enable ** Scan photos for faces** in the Options panel to detect photographs of identifiable persons in OneDrive, SharePoint, and Teams files.
+
+- **Formats:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`, `.webp`, `.heic`, `.heif`
+- **Face detection:** OpenCV Haar cascade (`minNeighbors=8`, `min_size=80px` — conservative; requires " Scan photos for faces" opt-in)
+- **EXIF extraction** — always-on for images regardless of the face detection toggle:
+  - **GPS coordinates** — extracted and converted to decimal degrees; GPS badge on cards; Google Maps link in preview
+  - **PII fields** — Author, Artist, Copyright, Description, UserComment, Keywords checked for content
+  - **Device** — camera make/model
+  - Images with GPS or PII-bearing EXIF are flagged even without CPR hits
+  - `special_category` gains `gps_location` and/or `exif_pii` entries
+- **GDPR classification:** Images with detected faces are automatically tagged as **Art. 9 biometric data** — the same heightened protection as health or criminal records
+- ** N faces badge** — teal pill on cards; filterable via " Photos / biometric" in the Risk level dropdown
+- **Article 30 report** — dedicated section listing all photo items with a 4-bullet retention guidance block (purpose limitation, pupil consent under Databeskyttelsesloven §6, website removal, archiving)
+- **Excel export** — Face count column added
+- **Performance:** Slower than CPR scanning — opt-in only. Recommended for targeted scans of known image folders rather than full-tenant scans
+
+> **Datatilsynet guidance:** Danish schools have received enforcement actions specifically for unlawful retention of pupil photographs. Pupils under 15 require parental consent (Databeskyttelsesloven §6).
+
+---
+
+### Article 9 special categories
+
+The scanner detects keywords from nine GDPR Article 9 special categories in proximity to CPR numbers:
+
+| Category | Examples |
+|---|---|
+| Health | diagnose, sygemelding, behandling, medicin, psykiatri |
+| Mental health | depression, angst, stress, selvskade |
+| Criminal records | straffeoplysning, dom, straffeattest, sigtelse |
+| Trade union | fagforening, tillidsrepræsentant, overenskomst |
+| Religion | kirke, moské, religiøs, konfirmation |
+| Ethnicity | nationalitet, herkomst, etnicitet |
+| Political opinions | politisk, parti, valgkreds |
+| Biometric | fingeraftryk, ansigtsgenkendelse, biometrisk |
+| Sexual orientation | seksuel orientering |
+
+Keywords are loaded from `keywords/da.json` (Danish). English (`en.json`) and German (`de.json`) files can be added without code changes. Detection uses compiled per-category regex patterns for efficient matching.
+
+---
+
+### Database export / import
+
+**Export** and **Import** buttons in the sidebar ** Database** section back up or restore the entire compliance record.
+
+```bash
+# CLI equivalents
+python gdpr_scanner.py --export-db ~/compliance/gdpr_export_2026.zip
+python gdpr_scanner.py --import-db ~/compliance/gdpr_export_2026.zip
+python gdpr_scanner.py --import-db ~/compliance/gdpr_export_2026.zip --import-mode replace --yes
+```
+
+**Export ZIP contents:**
+
+| File | Contents |
+|---|---|
+| `export_meta.json` | Export date, schema version, row counts |
+| `scans.json` | Scan run summaries |
+| `flagged_items.json` | Flagged items — thumbnails stripped |
+| `cpr_index.json` | CPR hashes (SHA-256 only) |
+| `pii_hits.json` | Per-type PII counts |
+| `dispositions.json` | Compliance decisions with legal basis |
+| `scan_history.json` | Aggregated trend data |
+| `deletion_log.json` | Full deletion audit trail |
+
+**Import modes:** `merge` (default — adds dispositions and deletion log only, safe on live DB) or `replace` (full restore, requires `--yes`).
+
+---
+
+### Article 30 report
+
+The **Art.30** button in the filter bar generates a GDPR **Article 30 Register of Processing Activities** as a Word document (`.docx`).
+
+**Document sections:**
+
+| Section | Contents |
+|---|---|
+| Summary | Scan date, items scanned, flagged count, CPR hits, estimated data subjects, overdue count, Art. 9 item count, photo/biometric count; per-source breakdown |
+| Data categories | Every detected PII type with hit counts and GDPR classification (Art. 9 vs Art. 4) |
+| Data inventory | Full item list sorted overdue-first; separate **Staff** and **Student** tables; name, source, account, date, CPR hits, disposition |
+| Retention analysis | Separate table of overdue items *(if any)* |
+| Art. 9 special categories | Item list with detected category breakdown *(if any)* |
+| Photographs / biometric data | Photo item list with face counts and 4-bullet retention guidance *(if photo scanning was enabled)* |
+| Compliance trend | Last 10 scans with flagged/overdue counts *(if scan history exists)* |
+| Deletion audit log | Every deletion with timestamp, actor, reason, and legal basis |
+| Methodology | Scanning approach and GDPR articles referenced (Art. 5, 9, 15, 17, 30) |
+
+The document is dated and can be stored as evidence of ongoing compliance activity for supervisory authorities.
+
+> **Requires** `python-docx` — included in `requirements.txt`.
+
+---
+
+### Building the M365 app
+
+`build_gdpr.py` packages `gdpr_scanner.py` + `m365_connector.py` + `lang/` into a standalone native app — same PyInstaller / pywebview approach as `build.py`.
+
+```bash
+python build_gdpr.py              # build for the current platform
+python build_gdpr.py --icons-only # regenerate icon_m365.icns / icon_m365.ico
+```
+
+> **Note:** Same cross-compilation restriction applies — must build on the target platform.
+
+---
+
+## Internationalisation
+
+Language files live in `lang/` alongside the scripts. As of v1.6.3 they are JSON files:
+
+| File | Language |
+|---|---|
+| `lang/en.json` | English |
+| `lang/da.json` | Danish |
+| `lang/de.json` | German |
+
+**Auto-detection:** On macOS and Linux the system locale is read from `defaults read -g AppleLocale` / `$LANG`. The detected language is used automatically.
+
+**Manual override:** Create `~/.document_scanner_lang` (or `~/.m365_scanner_lang` for M365) containing just the language code, e.g. `da`. This persists across restarts.
+
+**In-app switcher:** A language selector appears in the sidebar footer. Selecting a language saves the override and applies the new translations **in place** — the page does not reload and scan results are preserved.
+
+**Adding a language:** Copy `lang/en.json`, translate all values, save as e.g. `lang/fr.json`. The app picks it up automatically on next start.
+
+**Exchange folder names** are returned by Microsoft Graph in the account's own language (e.g. "Indbakke" for Danish users) and are displayed as-is. System folders are skipped using Exchange `wellKnownName` identifiers which are always in English regardless of locale, so skip logic is language-independent.
+
+---
+
+## Open Source
+
+GDPR Scanner is open source software, licensed under the **GNU Affero General Public License v3.0 (AGPL-3.0)**.
+
+This means you are free to use, study, modify, and distribute the software. If you run a modified version as a network service (e.g. a hosted GDPR compliance tool), you must publish the source of your modifications under the same licence.
+
+A **commercial licence** is available for organisations that need to deploy the software as a managed service without the AGPL source disclosure requirement. Contact the maintainers for details.
+
+> **Disclaimer:** This tool is intended to assist with GDPR compliance activities. It does not constitute legal advice. You are responsible for ensuring your use complies with applicable law.
+
+### Contributing
+
+Contributions are welcome — bug fixes, new language files, performance improvements, and items from [SUGGESTIONS.md](SUGGESTIONS.md).
+
+Please read [CONTRIBUTING.md](CONTRIBUTING.md) before submitting a pull request. For security vulnerabilities, follow the process in [SECURITY.md](SECURITY.md) — do not file public issues.
+
+```bash
+# Quick start for contributors
+git clone https://github.com/your-org/gdpr-scanner.git
+cd gdpr-scanner
+python3 -m venv venv && source venv/bin/activate
+pip install -r requirements.txt
+python gdpr_scanner.py    # GDPRScanner on port 5100 (auto-increments if in use)
+```
+
+### Test suite
+
+GDPRScanner ships with a `pytest` test suite covering the CPR detection engine, configuration layer, checkpoint persistence, and the SQLite database.
+
+```bash
+pip install pytest
+pytest tests/
+```
+
+**112 tests across 4 modules — all expected to pass.**
+
+| Module | Tests | Covers |
+|---|---|---|
+| `tests/test_document_scanner.py` | 36 | `is_valid_cpr`, `extract_matches`, `scan_docx`, `scan_xlsx`, `_scan_bytes` — CPR detection, false-positive suppression, binary crash safety |
+| `tests/test_app_config.py` | 34 | i18n loading, Article 9 keyword detection, config round-trip, admin PIN, profiles CRUD, Fernet encryption |
+| `tests/test_checkpoint.py` | 18 | Checkpoint key stability, save/load/clear, wrong-key isolation, delta token round-trip |
+| `tests/test_db.py` | 24 | Scan lifecycle, CPR hash-only storage, data subject lookup, dispositions, export/import cycle |
+
+Each new module (`cpr_detector.py`, `app_config.py`, `checkpoint.py`, `gdpr_db.py`) is importable in isolation without Flask or MSAL — tests run without any cloud credentials or a running server.
+
+The test suite should be run before every release and after any change to `document_scanner.py`, `cpr_detector.py`, or `gdpr_db.py`. CPR detection is the legal core of the tool — a false negative means a real GDPR violation goes undetected.
+
+### Roadmap
+
+See [SUGGESTIONS.md](SUGGESTIONS.md) for the full feature roadmap with implementation status.
+
+---
+
+## Project files
+
+| File | Description |
+|---|---|
+| `gdpr_scanner.py` | Flask entry point — scan orchestration, SSE route (`/api/scan/stream`), root route |
+| `scan_engine.py` | M365 and local/SMB scan logic — `run_scan()`, `run_file_scan()` |
+| `app_config.py` | All persistence — profiles, settings, SMTP config, lang loading, Fernet encryption |
+| `sse.py` | SSE broadcast queue and `_current_scan_id` |
+| `checkpoint.py` | Mid-scan checkpoint save/load, `_checkpoint_key()` |
+| `cpr_detector.py` | CPR pattern matching and validation |
+| `document_scanner.py` | Core scanning, redaction, OCR, NER, and PII detection engine |
+| `gdpr_db.py` | SQLite persistence layer — scan results, CPR index, PII hits, dispositions, scan history |
+| `m365_connector.py` | Microsoft Graph API client — auth, token refresh, email/OneDrive/SharePoint/Teams fetchers, delete methods |
+| `google_connector.py` | Google Workspace API client — Gmail, Drive, Admin SDK |
+| `file_scanner.py` | Unified local + SMB/CIFS file iterator — `FileScanner.iter_files()` yields `(path, bytes, metadata)`. SMB reads use a 1-slot sliding-window `ThreadPoolExecutor` (`PREFETCH_WINDOW=1`) with a 60-second per-file timeout. |
+| `scan_scheduler.py` | In-process APScheduler wrapper — multi-job scheduled scan engine |
+| `templates/index.html` | Single-page HTML shell — Jinja2 template. Two variables: `app_version`, `lang_json`. |
+| `static/style.css` | All application CSS — custom properties, layout, components, light/dark themes |
+| `static/js/state.js` | Shared mutable state module (`export const S`) — imported by all 11 feature modules |
+| `static/js/*.js` | 11 ES modules: `ui`, `log`, `users`, `auth`, `profiles`, `scan`, `results`, `sources`, `scheduler`, `connector`, `viewer` |
+| `static/app.js` | Archived JS monolith — no longer loaded |
+| `routes/__init__.py` | Blueprint package marker |
+| `routes/state.py` | Shared mutable state (`connector`, `flagged_items`, `LANG`, scan locks) — imported by all blueprints |
+| `routes/auth.py` | `/api/auth/*` — M365 connect, status, sign-out, config |
+| `routes/google_auth.py` | `/api/google/*` — Google Workspace connect, status, sign-out |
+| `routes/google_scan.py` | `/api/google/scan/*` — Google scan execution |
+| `routes/scan.py` | `/api/scan/*` — start/stop, checkpoint, settings, src toggles |
+| `routes/users.py` | `/api/users/*` — listing, role overrides, license debug |
+| `routes/sources.py` | `/api/file_sources/*` and `/api/file_scan/start` |
+| `routes/profiles.py` | `/api/profiles/*` and `/api/delta/*` |
+| `routes/scheduler.py` | `/api/scheduler/*` — job CRUD, status, history, run-now |
+| `routes/email.py` | `/api/smtp/*` and `/api/send_report` |
+| `routes/database.py` | `/api/db/*`, `/api/admin/*`, `/api/preview`, `/api/thumb` |
+| `routes/export.py` | `/api/export_excel`, `/api/export_article30`, `/api/delete_bulk` |
+| `routes/viewer.py` | `/view`, `/api/viewer/tokens`, `/api/viewer/pin` — read-only viewer mode: token + PIN auth, share-link management |
+| `routes/app_routes.py` | `/api/about`, `/api/langs`, `/api/lang`, `/manual` |
+| `docs/manuals/MANUAL-EN.md` | End-user manual in English (15 sections) — served at `/manual?lang=en` |
+| `docs/manuals/MANUAL-DA.md` | End-user manual in Danish (15 sections) — served at `/manual?lang=da` |
+| `docs/setup/M365_SETUP.md` | Step-by-step Microsoft 365 setup guide |
+| `docs/setup/GOOGLE_SETUP.md` | Step-by-step Google Workspace setup guide |
+| `build_gdpr.py` | PyInstaller build script — generates `m365_launcher.py`, packages desktop app |
+| `lang/en.json` | English translations (source of truth) |
+| `lang/da.json` | Danish translations (primary language) |
+| `lang/de.json` | German translations |
+| `keywords/da.json` | Danish Article 9 special-category keyword list (454 keywords, 9 categories) |
+| `classification/m365_skus.json` | Microsoft Education SKU IDs and part-number fragments for student/staff role classification — edit to add new SKUs without code changes |
+| `classification/google_ou_roles.json` | Google OU path → role mapping |
+| `requirements.txt` | Python dependency list — use with `pip install -r requirements.txt` |
+| `run_tests.sh` | Activates venv and runs the full test suite; forwards any extra args to pytest |
+| `install_macos.sh` | Bash installer — Homebrew, Python 3.12, Tesseract, Poppler, `./venv`, spaCy model |
+| `install_windows.ps1` | PowerShell installer — Chocolatey, Python 3.12, Tesseract, Poppler, `.\\venv`, spaCy model |
+| `VERSION` | Current version number — single source of truth |
+| `CHANGELOG.md` | Release history and versioning policy |
+| `LICENSE` | GNU Affero General Public License v3.0 |
+| `CONTRIBUTING.md` | Development setup, code style guide, and pull request process |
+| `SECURITY.md` | How to report security vulnerabilities responsibly |
+| `.gitignore` | Excludes credentials, databases, venv, and build artifacts from version control |
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..9bea20f
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,73 @@
+# Security Policy
+
+## Supported Versions
+
+| Version | Supported |
+|---------|-----------|
+| Latest  | ✅ Yes    |
+
+We support only the latest release. Please update before reporting a bug.
+
+---
+
+## Reporting a Vulnerability
+
+**Please do not file a public GitHub issue for security vulnerabilities.**
+
+This tool processes sensitive personal data including Danish CPR numbers (national
+identifiers). Security issues should be reported privately so a fix can be prepared
+before public disclosure.
+
+**Report to:** Open a [GitHub Security Advisory](https://github.com/your-org/gdpr-scanner/security/advisories/new)
+(Settings → Security → Advisories → New draft advisory)
+
+Please include:
+- A description of the vulnerability and its potential impact
+- Steps to reproduce the issue
+- Any relevant logs or screenshots (redact personal data)
+- Your suggested fix if you have one
+
+We will acknowledge receipt within **3 business days** and aim to release a fix
+within **14 days** for critical issues.
+
+---
+
+## Scope
+
+Issues we consider in scope:
+
+- Authentication bypass or token leakage in the M365 connector
+- Unauthorised access to scan results via the web UI
+- CPR numbers or other personal data exposed in logs, error messages, or API responses
+- SQL injection or path traversal in the local scanner or database layer
+- SSRF (Server-Side Request Forgery) via URL inputs
+- Dependency vulnerabilities with a known exploit path
+
+Out of scope:
+
+- Issues requiring physical access to the machine running the scanner
+- Vulnerabilities in Microsoft Graph API itself (report to Microsoft MSRC)
+- Social engineering attacks
+
+---
+
+## Data Handling Notes for Security Researchers
+
+- CPR numbers are stored in the SQLite database as **SHA-256 hashes only** — never in plaintext
+- SMTP passwords are stored in `~/.gdpr_scanner_smtp.json` with chmod 600
+- Microsoft OAuth tokens are stored in the MSAL token cache in `~/.gdpr_scanner_config.json`
+- Scan results are stored locally in `~/.gdpr_scanner.db` — never transmitted externally
+- The web UI binds to `127.0.0.1` by default — it is not designed to be exposed to the internet
+
+---
+
+## Dependency Security
+
+This project uses Python dependencies listed in `requirements.txt`. We recommend
+running `pip audit` or `safety check` periodically to identify known CVEs in
+dependencies.
+
+```bash
+pip install pip-audit
+pip-audit -r requirements.txt
+```
diff --git a/SUGGESTIONS.md b/SUGGESTIONS.md
new file mode 100644
index 0000000..1c15964
--- /dev/null
+++ b/SUGGESTIONS.md
@@ -0,0 +1,1537 @@
+# GDPRScanner — GDPR Improvement Suggestions
+
+These suggestions are grounded in GDPR requirements and the current state of the scanner. Items are ordered by compliance impact. All build on existing infrastructure (CPR detection, NER, Excel export, headless mode, delta scan, SQLite DB).
+
+> **Note:** File and config names currently use the `m365_scanner` / `m365_` prefix throughout. These will be renamed to `gdpr_scanner` / `gdpr_` as part of suggestion #24.
+
+---
+
+## 1. Retention policy enforcement ✅
+
+**GDPR reference:** Article 5(1)(e) — storage limitation
+
+**What was done:**
+
+- **Options panel** — 🗓 Retention policy toggle with configurable years (default 5) and fiscal year end selector: Rolling (today) / 31 Dec Bogføringsloven / 30 Jun / 31 Mar. Live cutoff hint updates as settings change.
+- **`overdue_cutoff(years, fiscal_year_end)`** — standalone helper in `m365_db.py` computing the correct cutoff in two modes:
+  - *Rolling*: exactly N years before today — correct for GDPR data minimisation
+  - *Fiscal year*: N years before the last completed fiscal year end — correct for Bogføringsloven (e.g. Dec 31 FY: items from FY ending 2020-12-31 expired on 2025-12-31)
+- **🗓 Overdue badge** — amber badge on cards in both grid and list view when an item's modified date falls before the cutoff. `markOverdueCards()` queries `/api/db/overdue` after each scan and re-renders affected cards.
+- **Bulk delete** — **🗓 Filter overdue** quick button in the bulk-delete modal pre-populates the "Older than date" filter with the exact cutoff date from the DB. **Clear filters** button resets all filters.
+- **`GET /api/db/overdue`** — accepts `years`, `fiscal_year_end`, `scan_id`; returns `{count, cutoff_date, cutoff_mode, items}`.
+- **Headless auto-delete** — `--retention-years N` and `--fiscal-year-end MM-DD` CLI flags. Non-interactive (cron): deletes automatically. Interactive (TTY): prompts for confirmation. Reports deleted/failed counts.
+- **`_do_retention_delete()`** — shared helper supporting email, OneDrive, SharePoint, and Teams items; removes from in-memory list and SQLite after each successful delete.
+
+---
+
+## 2. Article 30 report (Register of Processing Activities) ✅
+
+**GDPR reference:** Article 30 — Records of processing activities
+
+**What was done:** `_build_article30_docx()` in `m365_scanner.py` generates a structured Word document (`.docx`) via `python-docx`. Accessible via `GET /api/export_article30` and the **📋 Art.30** button in the filter bar.
+
+**Document sections:**
+
+| Section | Contents |
+|---|---|
+| Cover page | Title, generation timestamp |
+| 1. Summary | Scan date, items scanned, flagged count, total CPR hits, estimated data subjects, overdue count; per-source breakdown table |
+| 2. Data categories | Every detected PII type with hit counts and GDPR classification (Art. 9 vs Art. 4); CPR and sensitive entries highlighted |
+| 3. Data inventory | Full item list (≤500 rows) sorted overdue-first; columns: name, source, account, modified date, CPR hits, compliance disposition; overdue rows amber-highlighted |
+| 4. Retention analysis | Separate table of overdue items for easy review (only if overdue items exist) |
+| 5. Compliance trend | Last 10 scans with date, flagged count, overdue count, scan type (only if scan history exists) |
+| 6. Methodology | Scanning approach, GDPR articles referenced (Art. 5, 9, 15, 17, 30) |
+
+**Data sources used:** `db.get_stats()`, `db.get_flagged_items()`, `db.get_overdue_items()`, `db.get_trend()`, `db.get_disposition()`, `pii_hits` table aggregation, `flagged_items` in-memory list (fallback when DB unavailable).
+
+**Impact:** Directly satisfies the Article 30 obligation. Produces a dated, printable compliance document that can be shown to a supervisory authority on request.
+
+---
+
+## 3. Sensitive category detection (Article 9) ✅
+
+**GDPR reference:** Article 9 — Processing of special categories of personal data
+
+**Problem:** GDPR imposes stricter requirements on data revealing health, racial/ethnic origin, religious beliefs, trade union membership, and criminal records. The scanner currently treats all personal data at the same risk level.
+
+**Fix:** Add a keyword list for each Article 9 category, checked in the same pass as CPR scanning. When a keyword match occurs near a personal identifier (within ~150 characters), the file is flagged as **Special category data** with a distinct badge and automatically elevated to HIGH risk.
+
+**Danish keyword examples:**
+
+| Category | Keywords |
+|---|---|
+| Health | diagnose, sygemelding, indlæggelse, behandling, medicin, handicap, psykiatri, kræft, diabetes |
+| Criminal records | straffeoplysning, dom, straffeattest, sigtelse, fængsling, bøde |
+| Trade union | fagforening, tillidsrepræsentant, strejke, overenskomst |
+| Religion | kirke, moské, religiøs, baptism, konfirmation |
+| Ethnicity | nationalitet, herkomst, etnicitet |
+
+The keyword list is configurable and stored in `keywords/da.json` (following the same pattern as `lang/da.lang`). Additional language files (`keywords/en.json`, `keywords/de.json`) can be added without code changes. A `special_category` column should be added to `flagged_items` in the DB and included in `scan_history`.
+
+**What was done:**
+
+- `keywords/da.json` — 454 keywords across 9 Article 9 categories (health, mental health, criminal, trade union, religion, ethnicity, political, biometric, sexual orientation); stored in `keywords/` subfolder mirroring `lang/`
+- `_load_keywords()` — loads keyword file at startup matching current language; falls back to `da.json`
+- `_check_special_category(text, cprs)` — proximity-aware detection: keywords only trigger when within 150 characters of a CPR number (reduces false positives); short keywords (≤4 chars) use whole-word boundary matching to avoid substring matches
+- Card badge — purple **⚠ Art.9 — health, mental_health** pill shown on flagged cards in grid view
+- Filter bar — "Art. 9 only" dropdown option to filter the results grid
+- Excel export — "Special category" column added to all per-source sheets
+- Article 30 report — highlighted row in summary; dedicated section listing detected categories with count table and full item list (capped at 50)
+- DB — `special_category` column (JSON array) added to `flagged_items` via migration #3; count written to `scan_history.special_category` after each scan
+- Translated — EN / DA / DE (17 new keys per language)
+- All tests pass: 10/10 detection scenarios including edge cases (no CPR fallback, substring false positive prevention)
+
+**Impact:** Highest audit priority — supervisory authorities specifically look for Article 9 data.
+
+---
+
+## 4. Data subject index ✅
+
+**GDPR reference:** Article 15 (right of access), Article 17 (right to erasure)
+
+**What was done:** The SQLite layer (`m365_db.py`) implements the full backend:
+
+- `cpr_index` table stores `(SHA-256(cpr), item_id, scan_id)` — CPR numbers are never stored in plaintext
+- `lookup_data_subject(cpr)` returns all flagged items containing a given CPR across all scans
+- `POST /api/db/subject` API endpoint accepts a CPR, hashes it, and returns matching items
+- `delete_item_record()` removes items from the index when deleted from M365
+
+**What was done (UI):**
+- 🔍 **Data subject lookup** button in the sidebar opens a modal
+- CPR input field (Enter-to-search), results list showing name, source type, date, and CPR hit count
+- **Delete all for this person** button triggers bulk deletion with `reason="data-subject-request"`, refreshes grid
+- All deletions logged in the `deletion_log` table with reason and actor
+- CPR is SHA-256 hashed before querying — never stored or transmitted in plaintext
+
+---
+
+## 5. External sharing / data transfer detection ✅
+
+**GDPR reference:** Article 44–46 — transfers to third countries
+
+**Problem:** Emails forwarded to external domains or files shared outside the organisation represent potential unauthorised data transfers. The scanner does not currently distinguish between internal and external recipients.
+
+**What was done:**
+
+- **Email:** fetches `toRecipients` and `ccRecipients` from Graph API; compares recipient domains against the tenant domain (resolved from the signed-in user's UPN); flags items where any recipient is external with `transfer_risk = "external-recipient"`. Badge: **⚠ Ext.**
+- **OneDrive / SharePoint / Teams:** fetches the `shared` property on all drive items; flags files with external sharing links (`scope: anonymous`) as `"external-share"` and organisation-wide links as `"shared"`. Badge: **🔗**
+- **Filter bar dropdown** — "All items / External recipient / Externally shared / Shared" filters the results grid
+- **Card badges** — orange `⚠ Ext.` pill for external email recipients; blue `🔗` pill for shared files
+- **Excel export** — dedicated red-tabbed **External transfers** sheet with all flagged external items; highlighted row in the Summary sheet
+- **DB** — `transfer_risk` column added to `flagged_items` via migration #2; persisted alongside all other card data
+- **Translated** — EN / DA / DE
+
+**Impact:** Identifies the highest-risk data exposure scenarios — data that has potentially already left the organisation's control.
+
+---
+
+## 6. Legal basis and disposition tagging ✅
+
+**GDPR reference:** Article 5(1)(a) — lawfulness, Article 30
+
+**What was done:** The SQLite layer implements the full backend:
+
+- `dispositions` table stores `(item_id, status, legal_basis, notes, reviewed_by, reviewed_at)`
+- `set_disposition()` / `get_disposition()` methods
+- `POST /api/db/disposition` and `GET /api/db/disposition/<id>` API routes
+
+**Disposition values:**
+
+| Value | Meaning |
+|---|---|
+| `unreviewed` | Default |
+| `retain-legal` | Must keep (e.g. Regnskabsloven) |
+| `retain-legitimate` | Justified retention |
+| `retain-contract` | Part of an active contract |
+| `delete-scheduled` | Mark for deletion at next cleanup run |
+| `deleted` | Already actioned |
+
+**What was done (UI):**
+- Disposition dropdown in the preview panel meta strip — loads current status on open, saves on click
+- **Filter bar dropdown** — filter the results grid by disposition status alongside source and search
+- Disposition cached on `flaggedData` items after first view — filter works without extra API calls
+- Saving a disposition while a filter is active immediately re-applies the filter
+- **Clear filters (×)** resets the disposition dropdown alongside search and source
+- **Excel export** — Disposition column added to all per-source sheets
+- **Headless auto-delete** — after each scan, items tagged `delete-scheduled` are automatically deleted (interactive: prompts for confirmation; non-interactive/cron: deletes automatically); each deletion is logged in the `deletion_log` table with `reason="bulk"` and actor identity
+
+---
+
+## 7. Compliance trend tracking ✅
+
+**GDPR reference:** Article 5(2) — accountability principle
+
+**What was done:** The SQLite layer implements the full backend:
+
+- `scan_history` table records per-scan aggregates: `(scan_date, flagged_count, overdue_count, deleted_count, sources_json)`
+- `finish_scan()` writes a history row automatically after every completed scan
+- `get_trend(n)` returns the last N rows ordered by date
+- `GET /api/db/trend` API endpoint
+
+**What was done (UI):**
+- Sparkline panel embedded in the sidebar Stats section, shown after first scan or on login if DB has history
+- Blue solid line = flagged count over last 10 scans; amber dashed line = overdue count
+- Shaded fill under the flagged line; dot on the latest data point
+- Hover tooltip showing exact date, flagged count, and overdue count
+- Trend change badge (↓ 17% / ↑ 5%) showing % movement vs previous scan in green/red
+- Date labels at first, middle, and last scan
+- Redraws on window resize; refreshes after every scan completes
+- Hidden until at least 2 scans exist in the DB
+
+---
+
+
+## 8. File system scanning — local and network (SMB/CIFS) ✅
+
+**GDPR reference:** Article 5(1)(c)(e) — data minimisation, storage limitation
+
+**Background**
+
+Many organisations store personal data on local workstations, external drives, and file servers (NAS devices accessible via SMB/CIFS) — not in Microsoft 365. Local and network file scanning share identical core logic: both ultimately hand a file path or byte stream to `document_scanner.py`. The only difference is how files are accessed. They are therefore treated as a single unified feature rather than two separate modules.
+
+**Design — unified `FileScanner` connector**
+
+```python
+class FileScanner:
+    def __init__(self, path, smb_host=None, smb_user=None, smb_password=None):
+        self.is_smb = path.startswith("//") or path.startswith("\\\\")
+        # SMB without mount: use smbprotocol directly
+        # SMB with mount, or local path: use os.walk()
+
+    def iter_files(self, extensions=None):
+        # Yields (relative_path, bytes_or_stream, metadata) regardless of source
+        ...
+```
+
+The scanner calls `iter_files()` without knowing whether the files are local or remote. Results go into the same SQLite database as M365 items with `source_type = "local"` or `"smb"`, so the Article 30 report and data subject lookup cover all sources in a single view.
+
+**Connection approaches**
+
+| Mode | How | When to use |
+|---|---|---|
+| Local path | `os.walk()` on any local or mounted path | Workstations, USB drives, already-mounted network shares |
+| Native SMB (`smbprotocol`) | Direct connection without mounting — programmatic auth | Headless/scheduled scans, no admin rights to mount |
+
+If `smbprotocol` is not installed, the scanner falls back gracefully to local-path mode with a warning. This keeps the dependency optional — users who only need local scanning don't need to install it.
+
+**Credential security (SMB)**
+
+| Method | How | Notes |
+|---|---|---|
+| OS keychain (`keyring`) | `keyring.set_password("gdpr-scanner-nas", user, pw)` | Best — password never touches the filesystem |
+| Environment variables | `NAS_USER` / `NAS_PASSWORD` | Good for headless/cron |
+| `.env` file (chmod 600) | `python-dotenv` | Acceptable fallback — already in `.gitignore` |
+| Kerberos / NTLM | `smbprotocol` uses domain ticket | No stored credentials — best for domain environments |
+
+**New optional dependencies**
+
+```
+smbprotocol>=1.13    # Native SMB2/3 — optional, falls back to local-only without it
+keyring>=25.0        # OS keychain credential storage — optional
+python-dotenv>=1.0   # .env file loading for headless mode — optional
+```
+
+**New CLI flags**
+
+```bash
+# Scan a local folder
+python m365_scanner.py --scan-path ~/Documents
+
+# Scan a network share (native SMB)
+python m365_scanner.py --scan-path //nas.school.dk/shares \
+  --smb-user "DOMAIN\henrik" --smb-keychain-key gdpr-scanner-nas
+
+# Store SMB credentials in OS keychain (one-time setup)
+python m365_scanner.py --smb-store-creds --smb-host nas.school.dk \
+  --smb-user "DOMAIN\henrik"
+
+# Combine with headless M365 scan
+python m365_scanner.py --headless --scan-path //nas/shares \
+  --smb-user "DOMAIN\henrik" --output ~/Reports/
+```
+
+**Impact:** Closes the most common blind spot — years of personal data sitting on old file servers and teacher workstations that have never been scanned. A school scanning both M365 and its file server in a single job gets a complete picture in one Article 30 report.
+
+---
+
+## 9. Photographs of pupils and staff (biometric data) ✅
+
+**GDPR reference:** Article 9 (special categories — biometric data), Article 5(1)(b)(e) (purpose and storage limitation), Recital 38 (children), Databeskyttelsesloven §6
+
+**Why this is different from ordinary personal data**
+
+Photographs that can be used to uniquely identify a person qualify as **biometric data** under Article 9 GDPR — a special category requiring either explicit consent or one of the narrow legal bases in Article 9(2). This applies to school class photos, staff portraits, and any image where faces are clearly identifiable. A standard scan for CPR numbers will not detect photographs at all; this is a separate compliance risk that requires dedicated handling.
+
+**Children require heightened protection**
+
+Recital 38 specifically calls out children as deserving particular protection. In Denmark, Databeskyttelsesloven §6 sets the digital consent age at 15 — below that, a parent or guardian must give consent. Consent obtained in a school context is questionable in any case, given the power imbalance between school and family.
+
+**Retention — no fixed statutory period**
+
+Unlike accounting records, GDPR sets no specific number of years for school photographs. The applicable principles are:
+
+| Principle | Implication for school photos |
+|---|---|
+| Purpose limitation (Art. 5(1)(b)) | Photos may only be kept while the original purpose remains valid. A class photo from 2018 documents the 2018 school year; after the pupil leaves, the purpose narrows sharply |
+| Storage limitation (Art. 5(1)(e)) | Data must not be kept longer than necessary. No documented justification = must delete |
+| Archiving / public interest (Art. 89) | Historical or cultural-heritage use can justify longer retention, but only with specific safeguards and typically requires the images to be non-individually identifiable or properly anonymised |
+
+**Staff photographs**
+
+The legal basis for staff photos is usually legitimate interest or the employment contract. Once a staff member leaves, retention requires a specific documented basis. Photos on public-facing websites (school homepage, social media) must be removed promptly after departure.
+
+**Consent withdrawal**
+
+If consent was the legal basis and a parent or former pupil withdraws it, the photo must be removed regardless of when it was taken. This applies to published photos (website, social media) immediately and to internal archives on request under Article 17.
+
+**Datatilsynet guidance (Danish DPA)**
+
+Datatilsynet has published specific guidance on schools and photography. The general position:
+- Internal use (yearbooks, internal records) — retain for the duration of enrolment plus a short grace period; document the basis
+- Website / social media — require valid consent; remove immediately on withdrawal
+- Historical archive (pre-digital, cultural heritage) — assess case by case under Article 89
+- Biometric use (facial recognition for access control) — strict rules, almost always requires explicit consent
+
+**Proposed scanner feature**
+
+Since CPR scanning cannot detect photographs, a separate detection pass is needed:
+
+- **File type detection** — flag `.jpg`, `.jpeg`, `.png`, `.heic`, `.tiff`, `.mp4`, `.mov` files in OneDrive, SharePoint, and Teams as *potential biometric data*
+- **Face detection** (already implemented in Document Scanner) — use OpenCV `haarcascade` to confirm at least one face is present before flagging
+- **Age estimation heuristic** — optional: flag images with multiple faces (class photos) at higher risk than single portraits
+- **Metadata** — check EXIF creation date; flag images older than the configurable retention threshold
+- **Disposition tagging** — compliance officer reviews each flagged image and tags with legal basis (`retain-archive`, `retain-consent`, `delete-scheduled`, etc.)
+- **Source note** — add image items to the Article 30 report under data category "Biometric data / photographs"
+
+**Effort:** Medium — face detection is already available via OpenCV in the Document Scanner. The main work is wiring it into the M365 file scan pass and adding a dedicated results filter.
+
+**Impact:** High — photographs are one of the most commonly overlooked GDPR risks in schools and public-sector organisations. Datatilsynet has issued enforcement actions against Danish schools specifically for unlawful retention of pupil photographs.
+
+---
+
+## 10. Google Workspace scanning (Gmail & Google Drive) ✅
+
+**Background**
+
+Many organisations run a mixed environment — Microsoft 365 for staff and administration, Google Workspace for some departments or as a legacy system. A scanner covering only M365 leaves Google data as a blind spot.
+
+**What was done (v1.5.9)**
+
+Option B (unified sources panel) was implemented:
+
+- **`google_connector.py`** — service account auth with domain-wide delegation; `iter_gmail_messages()` yields message body + attachments; `iter_drive_files()` auto-exports native Docs/Sheets/Slides → DOCX/XLSX/PPTX before scanning; `list_users()` via Admin Directory API
+- **`routes/google_auth.py`** — `/api/google/auth/status`, `/connect`, `/disconnect`; service account JSON key saved to `~/.gdpr_scanner_google_sa.json` (chmod 600); admin email persisted to `~/.gdpr_scanner_google.json`
+- **`routes/google_scan.py`** — `/api/google/scan/start`, `/cancel`, `/users`; full scan loop reusing `_scan_bytes()` and `broadcast()` from the M365 engine; results written to the same SQLite DB with `source_type = "gmail"` or `"gdrive"`
+- **Google Workspace tab** in Source Management activated (was "Coming soon" stub); service account key file upload; admin email field; Gmail and Google Drive source toggles; setup guide with required API scopes
+- **Auto-restore** — connector rebuilt from saved key on startup
+- **Dependencies added:** `google-auth>=2.0`, `google-auth-httplib2`, `google-api-python-client>=2.0` (optional — scanner starts without them)
+
+**Known limitation (to address in #23)**
+
+`routes/google_scan.py` currently writes `user_role: "other"` for all Google scan results. Role classification for Google accounts is covered by suggestion #23.
+
+**Setup required in Google Workspace Admin Console:**
+1. Create a Google Cloud project; enable Gmail API, Drive API, Admin SDK
+2. Create a service account; download JSON key; enable domain-wide delegation
+3. Add the service account client ID in Workspace Admin → Security → API Controls → Domain-wide delegation with scopes: `gmail.readonly`, `drive.readonly`, `admin.directory.user.readonly`
+
+---
+
+## 11. Database export / import ✅
+
+**Background**
+
+The SQLite database (`~/.m365_scanner.db`) accumulates scan history, flagged items, CPR index, dispositions, and the deletion audit log over time. Without export/import, there is no way to back it up, move it between machines, archive a completed compliance cycle, or share a snapshot with an auditor without transferring the raw database file.
+
+**What was done (CLI)**
+
+The core export and import logic is implemented in `m365_db.py` and wired into the CLI:
+
+```bash
+# Export — creates a structured ZIP archive
+python m365_scanner.py --export-db ~/compliance/gdpr_export_2026.zip
+
+# Import merge (default) — adds dispositions + deletion log, leaves existing data intact
+python m365_scanner.py --import-db ~/compliance/gdpr_export_2026.zip
+
+# Import replace — wipes DB first, then restores everything (prompts for confirm)
+python m365_scanner.py --import-db ~/compliance/gdpr_export_2026.zip --import-mode replace --yes
+```
+
+**Export ZIP contents:**
+
+| File | Contents |
+|---|---|
+| `export_meta.json` | Export date, schema version, row counts |
+| `scans.json` | Scan run summaries |
+| `flagged_items.json` | Flagged items — `thumb_b64` stripped to keep size small |
+| `cpr_index.json` | CPR hashes (SHA-256 only — never raw CPR numbers) |
+| `pii_hits.json` | Per-type PII counts per item |
+| `dispositions.json` | Compliance decisions with legal basis and reviewer |
+| `scan_history.json` | Aggregated trend data |
+| `deletion_log.json` | Full deletion audit trail |
+
+**Import modes:**
+
+| Mode | Behaviour |
+|---|---|
+| `merge` (default) | Imports only `dispositions` and `deletion_log` — safe to run against a live DB |
+| `replace` | Wipes the DB first, then imports all 7 tables — full backup/restore |
+
+> ⚠ **Not fully tested in production yet.** The export/import cycle has been verified in unit tests (export → merge → replace all pass) but has not been tested against a real M365 scan database with thousands of rows, nor validated across different schema versions. Treat as beta — always keep a manual copy of `~/.m365_scanner.db` before running `--import-mode replace`.
+
+**Known complication**
+
+The `cpr_index` table is keyed by `(cpr_hash, item_id, scan_id)`. Importing into a DB with different scan IDs means the hashes are still valid for lookup but won't resolve to the correct scan context. Acceptable for archiving; a full fix requires remapping scan IDs on import.
+
+**Remaining work**
+
+- UI panel in the sidebar with **Export DB** and **Import DB** buttons (`GET /api/db/export`, `POST /api/db/import`)
+- Import confirmation dialog showing row counts before proceeding
+- Production testing with real scan databases
+- Cross-version import testing (schema version mismatch handling)
+
+**Impact:** Closes the gap between the scanner as a detection tool and a long-term compliance record. An auditor can request the export ZIP as evidence of ongoing GDPR monitoring activity.
+
+---
+
+## 12. ~~Network drive scanning (SMB / CIFS)~~ — retired
+
+> Merged into **suggestion #8** (File system scanning — local and network). See #8 for the full specification including SMB connection approaches, credential security, and CLI flags.
+
+---
+
+## 13. Optimise Article 9 keyword matching with compiled regex ✅
+
+**Background**
+
+Suggestion #3 implemented Article 9 keyword detection using sequential `str.find()` calls — up to 459 iterations per flagged item. For typical school tenants (tens to a few hundred flagged items) the added cost is imperceptible (~1–5ms per item, ~100–500ms total). For larger tenants or tenants with many flagged items, the linear scan could add several seconds.
+
+**Current approach**
+
+```python
+for kw, cat in _keyword_flat:          # up to 459 iterations
+    idx = text_lower.find(kw, pos)     # sequential string search
+```
+
+**Proposed optimisation**
+
+Compile one `re.search()` alternation per category at load time rather than looping `str.find()` at scan time:
+
+```python
+import re
+_compiled_keywords: dict[str, re.Pattern] = {}
+
+def _load_keywords(lang="da"):
+    ...
+    _compiled_keywords = {
+        cat: re.compile(
+            r"(?<![\w])" +                           # no preceding word char
+            "(?:" + "|".join(re.escape(kw) for kw in sorted(kws, key=len, reverse=True)) + ")" +
+            r"(?![\w])",                              # no following word char
+            re.IGNORECASE
+        )
+        for cat, kws in categories.items()
+    }
+```
+
+The regex engine uses optimised multi-pattern matching internally (similar to Aho-Corasick), making this roughly **10–50x faster** for large texts. The word-boundary anchors (`(?<![\w])` / `(?![\w])`) also reduce false positives from keywords that appear as substrings inside unrelated words.
+
+**Impact by tenant size**
+
+| Flagged items | Current (str.find) | Compiled regex | Saving |
+|---|---|---|---|
+| 100 | ~0.5s | ~0.01s | Negligible in both cases |
+| 1,000 | ~5s | ~0.1s | ~5s |
+| 10,000 | ~50s | ~1s | ~49s |
+
+**When to implement**
+
+Low priority for a typical school. Worth doing before releasing to larger organisations (universities, municipalities) where a single tenant scan may produce thousands of flagged items.
+
+**Effort:** Small — change is confined to `_load_keywords()` and `_check_special_category()` in `m365_scanner.py`. No DB or UI changes needed.
+
+---
+
+## 14. Progress phase text improvements ✅
+
+**Background**
+
+Minor UI polish items related to the scan progress area.
+
+**What was done:**
+
+- **Phase text stuck after collection** — the blue phase text remained on the last "Collecting Teams…" message for the entire scan duration. Fixed by broadcasting a `scan_phase` event immediately after `scan_start`, replacing the collection message with "Scanner…" / "Scanning…" as soon as actual file scanning begins.
+
+**Remaining ideas:**
+
+- Show per-source progress counters in the phase text (e.g. "Scanning OneDrive — 42 / 180")
+- Show current account name in the phase text during multi-user scans
+- Animate phase text transitions with a subtle fade
+
+---
+
+## 15. Scan profiles — named, reusable scan configurations
+
+**GDPR reference:** Article 5(2) — accountability; Article 30 — records of processing activities
+
+**Background**
+
+Currently all scan settings are stored as a single flat configuration. Scan profiles give each configuration a name, making them reusable from both the UI and headless CLI — enabling different scan schedules for different purposes without manual reconfiguration.
+
+This feature is broken into 6 incremental steps that can each be shipped and tested independently.
+
+---
+
+### 15a. Backend profile storage ✅ *(Small)*
+
+- Define the profile data structure (see below)
+- Add `load_profiles()`, `save_profile()`, `delete_profile()`, `get_profile(name)` helpers
+- On first run, migrate the existing flat `~/.m365_scanner_settings.json` to become a default profile named "Default"
+- No UI changes — purely backend. Foundation for all subsequent steps.
+
+**Profile data structure:**
+```json
+{
+  "id": "uuid-1",
+  "name": "Nightly email scan",
+  "description": "Quick nightly CPR check on all Exchange mailboxes",
+  "sources": ["email"],
+  "user_ids": "all",
+  "options": {
+    "email_body": true,
+    "attachments": false,
+    "older_than_days": 0
+  },
+  "retention_years": null,
+  "fiscal_year_end": null,
+  "email_to": "compliance@school.dk",
+  "file_sources": [],
+  "last_run": "2026-03-19T02:00:00",
+  "last_scan_id": 42
+}
+```
+
+---
+
+### 15b. CLI profile support ✅ *(Small)*
+
+Immediately useful for headless/cron runs without any UI work:
+
+```bash
+# Run a named profile headlessly
+python m365_scanner.py --headless --profile "Full compliance scan"
+
+# List available profiles
+python m365_scanner.py --list-profiles
+
+# Save current settings as a new profile
+python m365_scanner.py --save-profile "Nightly email" --sources email --email-to compliance@school.dk
+
+# Delete a profile
+python m365_scanner.py --delete-profile "Old scan"
+```
+
+Cron example — different profiles on different schedules:
+```bash
+0 2 * * *   ./venv/bin/python m365_scanner.py --headless --profile "Nightly email scan"
+0 3 * * 1   ./venv/bin/python m365_scanner.py --headless --profile "Weekly M365 scan"
+0 4 1 * *   ./venv/bin/python m365_scanner.py --headless --profile "Monthly full scan"
+```
+
+---
+
+### 15c. ~~Profile selector in topbar~~ — dropped
+
+The profile management modal (15d) already lets you select, edit, and run profiles. The scheduler (#19) handles automated runs. A topbar dropdown would add UI complexity for a workflow most users do infrequently.
+
+**Dropped.** If you have a genuinely elegant solution that adds clear value without cluttering the topbar, open an issue — but the bar is high.
+
+---
+
+### 15d. Profile management modal ✅
+
+- "Manage profiles" button opens a modal listing all profiles with last run date, sources summary, and edit/duplicate/delete buttons
+- Creating a new profile copies the current sidebar state
+- Makes profiles fully self-service from the UI without needing to edit JSON manually
+
+---
+
+### 15e. Full profile editor panel *(Medium)*
+
+- Dedicated edit panel mirroring all sidebar options but saving to a named profile rather than applying immediately
+- Without this, profiles can only be created from the current sidebar state — sufficient for most users but not ideal
+- Polish step — implement after 15c and 15d are stable
+
+---
+
+### 15f. File source integration ✅
+
+- ✅ `file_sources` array stored in profile data structure
+- ✅ File sources defined once, reused across profiles (interactive UI)
+- ✅ `saveProfile()` now saves actual checked file sources (was hardcoded `[]`)
+- ✅ Scheduled scans now fire `run_file_scan()` for each file source in the profile
+- ⏳ Profile editor does not yet show a dedicated file sources section (editing requires re-saving from sidebar)
+
+---
+
+**Article 30 integration (all steps)**
+
+The Article 30 report includes the profile name and description in the scan metadata section, providing an audit trail of which configuration produced which results.
+
+**Overall impact:** Transforms the scanner from a single-purpose tool into a multi-schedule compliance platform. Steps 15a + 15b alone deliver immediate CLI value with minimal effort.
+
+
+---
+
+## 16. Student/Staff role classification ✅
+
+**GDPR reference:** Art. 30 (records of processing activities), Databeskyttelsesloven §6 (children under 15)
+
+**What was done:**
+
+- **Automatic role detection** — users are classified as 🎓 Student or 👔 Staff at login based on their Microsoft 365 licences, without requiring extra Azure permissions
+- **Two-pass classification** in `m365_connector.classify_user_role()`:
+  1. **`skuPartNumber` fragment match** (preferred) — strings like `STANDARDWOFFPACK_FACULTY` are stable across all Microsoft licensing generations; runs first whenever part numbers are available via `get_subscribed_skus()` or `build_sku_map_from_users()`
+  2. **SKU ID lookup** from `classification/m365_skus.json` — fallback for when part numbers are unavailable or for licences with no recognisable fragment (e.g. Power Automate Free)
+- **`classification/m365_skus.json`** — external file in `classification/` folder (mirrors `lang/`, `keywords/`); edit to add new SKU IDs without code changes; bundled into PyInstaller app via `build_m365.py`
+- **Three-tier `get_subscribed_skus()`** — tries `/subscribedSkus` (admin), `/me/licenseDetails` (User.Read), then `build_sku_map_from_users()` (per-user sampling spread across full list) so part numbers are discovered regardless of permission level
+- **Manual role override** — click the role badge (🎓/👔/❓) on any user row to cycle `student → staff → other → (clear)`; stored in `~/.m365_scanner_role_overrides.json`; ✎ indicator shows overridden rows; applied at both display time and scan time
+- **🔍 SKU debug modal** — button next to role filters shows all tenant SKU IDs colour-coded known/unknown; unknown IDs are selectable text for pasting into `m365_skus.json`
+- **Role filter buttons** — **All / 👔 Ansat / 🎓 Elev** filter the accounts list
+- **Role badges on cards** — 🎓/👔 pill on every result card in grid and list view
+- **Article 30 report** — Data Inventory section split into separate Staff and Student tables; parental consent note for students under 15 (Databeskyttelsesloven §6)
+- **Excel export** — Role column on all per-source sheets
+- **Translated** — EN / DA / DE
+
+**Impact:** Required for Article 30 compliance in Danish schools — the staff/student distinction is legally significant under Databeskyttelsesloven §6.
+
+---
+
+## 17. Unified source management modal ✅
+
+**Background**
+
+The current sidebar has three separate, disconnected places for source configuration:
+- The M365 connection panel (Azure credentials)
+- The hardcoded Email / OneDrive / SharePoint / Teams checkboxes
+- The 📁 File sources "Manage" button (local paths and SMB shares)
+
+As the scanner grows to support more connectors (Google Workspace, local file systems, SMB), this fragmentation becomes unwieldy. A user who only scans local file servers should not be confronted with M365 connection UI. A user who only uses M365 should not see file source clutter.
+
+**Proposed design — single ⚙ Sources button in the sidebar**
+
+Replace the current patchwork with a single **"⚙ Sources"** button that opens a unified source management modal. The left column sources panel becomes a clean, read-only list of *active* sources with their status indicators.
+
+**Modal sections:**
+
+| Section | Contents |
+|---|---|
+| **Microsoft 365** | Azure app credentials (client ID, tenant ID, secret), auth mode toggle (Application / Delegated), per-source toggles (Email, OneDrive, SharePoint, Teams), visibility toggle (show/hide in sidebar) |
+| **Google Workspace** | Google OAuth credentials (client ID, secret), per-source toggles (Gmail, Google Drive), visibility toggle — greyed out with "Coming soon" until implemented |
+| **File sources** | Full list of saved local/SMB sources with Add/Edit/Delete; each has a visibility toggle |
+| **Sidebar display** | Drag-to-reorder the sources shown in the left column; set which appear by default |
+
+**Sidebar behaviour after this change:**
+
+- Sources panel shows only sources the user has *enabled* for display
+- Each row has a status dot (green = connected, amber = credential issue, grey = disabled)
+- Scrolls at 5 visible rows as already implemented
+- The panel is purely for selection — all configuration is in the modal
+
+**Impact:** Cleaner onboarding (new users see only what's relevant), easier multi-connector setups, and a natural home for future connectors (Dropbox, SharePoint on-premises, SFTP) without adding more sidebar clutter.
+
+---
+
+
+## 18. EXIF metadata extraction from images ✅
+
+**GDPR reference:** Art. 4 (personal data — location, identity), Art. 9 (biometric + location context)
+
+**Background**
+
+EXIF (Exchangeable Image File Format) metadata is embedded in JPEG, TIFF, and HEIC images by cameras and smartphones. It frequently contains:
+
+- **GPS coordinates** — exact latitude/longitude where the photo was taken; personal data under Art. 4 and a significant privacy risk for photos of children or staff
+- **Author / Artist / Copyright** — name of the photographer
+- **Description / Subject / Keywords / Comment** — free-text fields that may contain names, diagnoses, or other PII
+- **Device identifiers** — camera make/model, serial number, software
+- **Timestamps** — DateTimeOriginal, DateTimeDigitized
+
+**What was implemented:**
+
+- **`_extract_exif(content: bytes, filename: str) -> dict`** — extracts structured EXIF data using `PIL.Image` (already a dependency). Returns GPS, author, description, timestamps, and device info.
+- **GPS extraction** — converts DMS (degrees/minutes/seconds) rational values to decimal degrees; adds a Google Maps link.
+- **PII fields** — Author, Artist, Copyright, Description, UserComment, ImageDescription, Subject, Keywords checked for content.
+- **Risk classification:**
+  - GPS present → `"gps"` added to `special_category`; card gets 🌍 GPS badge
+  - PII-bearing EXIF fields → `"exif_pii"` added to `special_category`
+- **Preview panel** — EXIF data shown in a collapsible section below the image with GPS map link
+- **Art. 30 report** — photos with GPS are called out in the biometric/photo section with coordinates and map links
+- **Excel export** — `gps_lat`, `gps_lon` columns added to image rows
+- **No new dependencies** — uses `Pillow` which is already required
+
+---
+
+
+## 19. Scheduled / automatic scans ✅
+
+**GDPR reference:** Art. 5(2) — accountability; Art. 32 — security of processing; Art. 25 — data protection by design
+
+**Background**
+
+A one-off scan is useful for an audit, but ongoing GDPR compliance requires regular, repeatable scanning. Personal data accumulates continuously — new emails arrive, files are uploaded, staff change. A scheduler removes the need for manual intervention and provides a documented, reproducible compliance cadence.
+
+**Status:** Fully implemented in v1.5.5 (multi-job support, inline toggle, next-run display, auth fix). Settings → Scheduler tab supports multiple independent named scan jobs. Old single-job config files are migrated automatically.
+
+**Proposed update to the existing Scheduler tab:**
+
+**Each scheduled scan is a named job with:**
+- **Name** — e.g. "Nightly tenant scan", "Weekly NAS archive"
+- **Frequency** — daily, weekly, monthly, or custom cron expression
+- **Time of day** — run at off-peak hours (e.g. 02:00)
+- **Sources** — which sources to include (links to a saved profile)
+- **Email report** — automatically send the Excel report after each run (uses existing SMTP config)
+- **Retention** — optionally apply retention policy enforcement as part of the run
+- **Enabled / disabled** toggle per job
+
+**Settings → Scheduler tab UI:**
+
+```
+Scheduled scans
+┌──────────────────────────────────────────────────────┐
+│ ✔  Nightly tenant scan     Daily 02:00   Next: 01:23 │
+│ ✔  Weekly NAS archive      Mon   03:00   Next: 6d    │
+│ ✗  Ad-hoc test             Manual        Last: never  │
+│ + Add scheduled scan                                   │
+└──────────────────────────────────────────────────────┘
+```
+
+Each row has an enable/disable toggle, edit (✏) and delete buttons. Schedule configuration (name, frequency, profile, email) lives exclusively in the job editor modal — nothing schedule-related appears in the sidebar.
+
+**Persistence:**
+- All scheduled scan definitions stored in `~/.m365_scanner_schedule.json` (list)
+- Last run time, next run time, and run history in the existing SQLite DB (`scan_schedules` table)
+- Missed runs flagged in the UI (e.g. "Last run was 3 days ago — missed?")
+
+**Log** — scheduled scans appear in the scan log with a 🕐 prefix
+
+**Implementation notes:**
+- `APScheduler` (MIT licence) is the most straightforward — `pip install apscheduler`
+- Alternatively use `schedule` (simpler, no persistence) or a system-level cron job calling the existing CLI
+- The scanner already supports `--scan-path`, `--smb-user`, and profile-based configuration via CLI — a cron-based approach using the CLI requires no new code, just documentation
+- An in-process scheduler is more user-friendly (visible in the UI, no system access needed)
+
+**Effort:** Medium — APScheduler integration + Settings tab + DB table + email trigger hook
+
+---
+
+
+## 20. PDF scanning in local/SMB file scans (multiprocessing timeout) ✅ Done
+
+**What was done:**
+
+PDFs were excluded from local/SMB file scans because Tesseract/Poppler subprocesses could not be stopped from a Python thread, causing indefinite hangs. Fixed by spawning each PDF scan in a dedicated process with a 60-second hard timeout.
+
+**Implementation:**
+
+- **`cpr_detector.py`** — `_worker_scan_pdf()` (module-level, required for `spawn` context) calls `document_scanner.scan_pdf()` and returns via a `multiprocessing.Queue`. `_scan_bytes_timeout()` writes PDF bytes to a temp file, spawns the worker via `multiprocessing.get_context("spawn")`, joins with 60s timeout, terminates if exceeded. Non-PDF files delegate to `_scan_bytes()` directly.
+- **`scan_engine.py`** — `run_file_scan()` calls `_scan_bytes_timeout()` instead of `_scan_bytes()`. Stub added to module-level injected globals.
+- **`gdpr_scanner.py`** — `_scan_bytes_timeout` imported from `cpr_detector` and injected into `scan_engine`.
+- **`file_scanner.py`** — `.pdf` removed from `FILE_SCAN_EXTENSIONS` exclusion; all default extensions now included.
+
+Key design choice: content is written to a temp file before spawning (avoids pickling up to 50 MB through the queue). `spawn` context is required on macOS + Flask to avoid duplicating the server socket.
+
+---
+
+
+
+
+## 21. SSE event replay for late-connecting browsers ✅
+
+**Status:** Fully implemented in v1.5.8. Both manual and scheduled scans now
+replay buffered SSE events to late-connecting browsers. Scheduled scans show
+full live progress in the browser (progress bar, phase text, flagged cards, log
+entries) exactly like manual scans.
+
+**Background**
+
+`broadcast()` pushes scan progress events (phase updates, flagged items, log
+messages) over Server-Sent Events (SSE) to connected browser tabs. If a
+scheduled scan starts before the browser is open, all events fire into the
+void — the live log is empty when the user opens the UI mid-scan.
+
+This affects scheduled scans specifically, but also manual scans started
+in one tab and watched from another.
+
+**What was done:**
+
+**Module identity fix (critical):**
+- When run as `python m365_scanner.py`, the module loads as `__main__`. The
+  scheduler's `import m365_scanner as _m` loaded a **second copy** with its own
+  empty `_sse_queues` — events from scheduled scans never reached the browser.
+- **Fix:** `sys.modules["m365_scanner"] = sys.modules[__name__]` at the top of
+  the module ensures all imports share one instance.
+
+**SSE event replay:**
+- **`_current_scan_id`** — unique timestamp-based ID (`scan_1711612345678` /
+  `filescan_1711612345678`) set at the start of every scan and injected into
+  every SSE event by `broadcast()`. Cleared automatically after `scan_done`.
+- **`scan_stream()` replay filter** — on connect, replays only buffer events
+  matching the current `scan_id` (avoids stale replay from a previous scan).
+  Emits `sse_replay` / `sse_replay_done` marker events to bracket the
+  replayed block.
+- **`GET /api/scan/status`** — lightweight endpoint returning `{running, scan_id}`.
+  Used by the polling watchdog and page-load check.
+
+**Shared SSE listeners:**
+- **`_attachScanListeners(es)`** / **`_attachSchedulerListeners(es)`** — shared
+  JS functions used by both `startScan()` and `_autoConnectSSEIfRunning()`.
+  Eliminates the duplication that caused the original bug.
+- **`_attachSchedulerListeners`** now shows the progress bar on
+  `scheduler_started` and hides it on `scheduler_done` / `scheduler_error`.
+  Also listens for `scan_start` as a fallback to activate the progress UI if
+  `scheduler_started` was missed (e.g. browser reconnected mid-scan).
+
+**SSE connection resilience:**
+- **Polling watchdog** (`_sseWatchdog`) — checks `/api/scan/status` every 4s.
+  When a running scan is detected, ensures the SSE connection is alive via
+  `_ensureSSE()` and shows the progress UI. Solves the problem of idle SSE
+  connections being silently dropped by Flask/Werkzeug.
+- **`_ensureSSE()`** — opens or reopens the SSE connection if dead
+  (`readyState === CLOSED`), attaches all listeners.
+- **`_userStartedScan` flag** — `scan_done` only closes the SSE connection for
+  user-initiated scans; scheduled scans keep it alive for future events.
+- **`es.onerror` fix** — no longer silently nulls `es` (EventSource
+  auto-reconnects; nulling it broke reconnection).
+
+**Other fixes:**
+- **`scan_complete` → `scan_done`** — `run_file_scan()` was broadcasting
+  `scan_complete` on finish, but the JS only listens for `scan_done`. Renamed
+  for consistency with matching payload shape.
+- **Resume scan profile fix** — `startScan()` now sends `profile_id` in the
+  POST body; `_save_settings()` accepts `profile_id` so the correct profile is
+  updated instead of always writing to Default.
+- **i18n** — `m365_sse_reconnecting` and `m365_sse_replay_note` added (EN/DA/DE).
+- **Diagnostic logging** — `[run_scan]` prints sources, user count, app_mode,
+  and a sample user entry. Browser console logs `[SSE]` prefixed messages for
+  all event types.
+
+**Impact:** Closes the last gap in scheduled scan observability — scheduled
+scans now show full live progress in the browser, and opening the browser
+mid-scan replays buffered events.
+
+---
+
+
+## 22. Pre-fetch cache for SMB/local file scans ✅ Done
+
+**What was done:**
+
+SMB file reads now run in a `ThreadPoolExecutor` sliding window (`PREFETCH_WINDOW = 5`) with a per-read `SMB_READ_TIMEOUT = 60` second hard deadline. A stalled read yields an error sentinel and the scan continues — the scan thread is never blocked.
+
+**Implementation (`file_scanner.py` only):**
+
+- `_smb_collect()` — new method that walks the SMB directory tree (listing only, no reads), yielding `(display_rel, smb_path, size, modified, source_root)` tuples. Over-size files and directory-listing errors are emitted as `_COLLECT_SKIP` / `_COLLECT_ERROR` sentinels.
+- `_iter_smb()` rewritten in two phases:
+  1. Calls `_smb_collect()` to build the full candidate list (fast).
+  2. Resolves sentinels immediately (yielded without entering the executor), then feeds real candidates through a `ThreadPoolExecutor` sliding window. `fut.result(timeout=SMB_READ_TIMEOUT)` gives each read a hard deadline; timed-out futures are cancelled and produce an error card in the UI.
+- Local scanner (`_iter_local`) is untouched — local reads are fast and don't need buffering.
+- No new dependencies.
+
+
+## 22b. OOM on large SMB scans — Partially mitigated (v1.6.8 / v1.6.10)
+
+**v1.6.8:** `PREFETCH_WINDOW` 5→2, `MAX_FILE_BYTES` 50→20 MB, PDF semaphore(1), GWS `del buf` before yield.
+
+**v1.6.10:** Three additional buffer-lifetime fixes:
+- `del content` in `_scan_bytes_timeout` after temp-file write — frees the 20 MB PDF buffer before the subprocess spawns its 150–300 MB heap
+- `del content` in `run_file_scan` after thumbnail — frees raw bytes before card dict build and next iteration
+- `PREFETCH_WINDOW` 2→1 — halves peak concurrent SMB read buffers (2 × 20 MB → 1 × 20 MB)
+
+**Remaining risk:** under a very large SMB scan with many back-to-back PDFs the combined main-process + subprocess peak can still exceed available RAM on memory-constrained machines. If OOM recurs, `tracemalloc` profiling on a live scan is the next diagnostic step.
+
+---
+
+## 23. Google Workspace role classification + cross-platform identity mapping
+
+**What was done (v1.6.2) — Phase 1**
+
+- `classification/google_ou_roles.json` — OU prefix → role mapping file (same pattern as `classification/m365_skus.json`). Edit to match your school's OU structure; no code change required.
+- `google_connector.py` — `list_users()` now fetches `orgUnitPath` (via `projection=full`) and calls `classify_ou_role()` to return `userRole` for each user
+- `routes/google_scan.py` — role map built from `list_users()` result; each scan card now gets the correct `user_role` (`staff` / `student` / `other`) instead of always `"other"`
+- Default mapping: `/Elever` → student, `/Personale` → staff (matches Gudenaaskolen.dk OU structure shown in screenshot)
+
+**Background**
+
+M365 staff/student role classification is fully implemented in suggestion #16
+(licence SKU matching, manual overrides, Article 30 split by role). However,
+Google Workspace scan results currently always write `user_role: "other"` —
+and there is no mechanism to link the same person's M365 and Google identities
+when both platforms are in use.
+
+This suggestion extends role classification to Google Workspace and adds
+cross-platform identity mapping for mixed deployments.
+
+**Two real-world scenarios addressed**
+
+| Scenario | Description |
+|---|---|
+| B | Google Workspace only — staff and students in same Workspace domain |
+| C | Mixed M365 + Google, possibly different users on each platform |
+
+Scenario C is the hard case: a municipality might have staff in M365 and
+students in Google, or the same person on both platforms with different email
+addresses and no shared identity provider. Scenario A (M365 only) is already
+fully covered by #16.
+
+---
+
+**Proposed implementation — two phases**
+
+### Phase 1 — Google role classification at scan time (small effort, high value)
+
+Pull role from Google Directory during `list_users()`, before scanning begins.
+No manual configuration required for standard Workspace deployments.
+
+**Google Workspace — `google_connector.py` `list_users()`:**
+
+| Signal | Mapping |
+|---|---|
+| `orgUnitPath` starts with `/Students/` or `/Elever/` | → `student` |
+| `orgUnitPath` starts with `/Staff/` or `/Lærere/` or `/Ansatte/` | → `staff` |
+| Primary email domain matches a configurable domain → role | → configurable |
+| Member of a Google Group matching a configurable pattern | → role from group |
+
+OU path prefixes and group name patterns are configurable in the Admin Settings
+modal (a new "Role mapping" sub-tab under General).
+
+**UI changes (Phase 1):**
+- Google scan cards show role badge `👩‍🏫 Staff` / `🎒 Student` / `—` (M365 cards already do via #16)
+- `user_role` written correctly for Google results (`staff` / `student` / `unknown`) instead of `"other"`
+- Role filter and Article 30 role columns already exist from #16 — no additional UI work needed
+
+---
+
+### Phase 2 — Group/OU mapping rules + manual overrides + cross-platform identity (medium effort)
+
+**Group/OU mapping rules UI** (Settings → Role mapping tab):
+
+A rule list where each rule has:
+```
+IF  [field]          [operator]  [value]        THEN  [role]
+IF  orgUnitPath      starts with /Elever         →    student
+IF  group            member of   all-staff@...   →    staff
+IF  department       contains    Lærer           →    staff
+IF  email domain     equals      skole.dk        →    student
+```
+
+Rules evaluated in order; first match wins. Covers the mixed-platform case:
+if staff are always `@kommune.dk` and students always `@skole.dk`, a single
+domain rule classifies everyone with zero directory API calls.
+
+**Manual override** (Users panel, per-user dropdown):
+
+```
+Auto (staff)  ▼
+  Auto (staff)
+  Staff
+  Student
+  Ignore       ← skips account entirely during scan (service accounts, shared mailboxes)
+```
+
+Stored in a new `user_roles` SQLite table. Survives restarts. "Ignore" is
+immediately useful for service accounts and shared mailboxes that pollute
+results.
+
+**Cross-platform identity linking** (for Scenario C):
+
+New `user_identities` table in `m365_db.py`:
+
+```sql
+CREATE TABLE user_identities (
+    id            INTEGER PRIMARY KEY,
+    canonical_id  TEXT NOT NULL,   -- internal UUID assigned by scanner
+    platform      TEXT NOT NULL,   -- "m365" | "google"
+    email         TEXT NOT NULL,
+    display_name  TEXT,
+    role          TEXT,            -- staff | student | unknown
+    UNIQUE(platform, email)
+);
+```
+
+Matching heuristics (applied automatically, in priority order):
+1. Exact email match across platforms (most common — same address on both)
+2. Same display name + same domain-suffix group
+3. Manual link: drag one user card onto another in the Users panel to merge
+
+Once linked, Article 30 reports and data subject lookups treat both accounts
+as a single person entry:
+> **Henrik Nielsen** — M365: 3 OneDrive files · Google: 12 Gmail messages · Role: Staff
+
+**Dependencies to add:** none (all using existing APIs and DB patterns)
+
+---
+
+**Files to change**
+
+| File | Change |
+|---|---|
+| `m365_connector.py` | `list_users()` returns `role` field derived from licenses/dept/groups |
+| `google_connector.py` | `list_users()` returns `role` field derived from `orgUnitPath`/groups |
+| `m365_db.py` | Add `user_roles` and `user_identities` tables; DB migration |
+| `scan_engine.py` | Pass `role` through to `_broadcast_card()`; apply manual overrides before scan (file will exist after #25 splits `m365_scanner.py`) |
+| `routes/google_scan.py` | Same role pass-through as M365 scan engine |
+| `routes/app_routes.py` | New endpoints: `GET /api/user_roles`, `POST /api/user_roles/set`, `POST /api/user_roles/link` |
+| `templates/index.html` | Role badge CSS; role filter pill; Settings → Role mapping tab |
+| `static/app.js` | Role filter logic; role mapping rules editor; manual override dropdown; identity link drag-handle |
+| `lang/*.lang` | i18n keys for role labels and mapping UI |
+
+**Effort estimate:** Phase 1 ≈ 1 session · Phase 2 ≈ 2–3 sessions
+
+**GDPR articles addressed:** Art. 5(1)(f) integrity and confidentiality,
+Art. 25 data protection by design, Art. 30 records of processing activities
+(role-segmented register), Art. 32 security of processing
+
+---
+
+---
+
+## 24. Rename — M365 Scanner → GDPRScanner ✅
+
+**What was done (v1.6.0)**
+
+- `m365_scanner.py` → `gdpr_scanner.py`; `m365_db.py` → `gdpr_db.py`; `build_m365.*` → `build_gdpr.*`
+- All `~/.m365_scanner_*` config and data paths renamed to `~/.gdpr_scanner_*`
+- Migration shim in `gdpr_scanner.py` silently renames existing files on first startup — scan history, credentials, settings, and role overrides preserved automatically
+- UI title, sidebar heading, About panel, document output strings, install scripts, CI workflow, README, CONTRIBUTING, DEPENDENCIES all updated
+- `m365_connector.py` intentionally unchanged — the prefix correctly describes the Microsoft Graph connector
+- i18n keys describing M365-specific UI (Azure credential fields, device code flow) intentionally keep `m365_` prefix
+
+**Background**
+
+The tool was originally built to scan Microsoft 365. It now scans M365, Google
+Workspace, local file systems, and SMB network shares, and produces GDPR
+compliance reports. The name "M365 Scanner" is actively misleading to new
+users and limits adoption outside Microsoft-centric environments.
+
+**Scope of changes**
+
+This is a purely mechanical rename — no behaviour changes.
+
+| What changes | From | To |
+|---|---|---|
+| Main entry point | `m365_scanner.py` | `gdpr_scanner.py` |
+| M365 connector | `m365_connector.py` | `m365_connector.py` *(keep — it is specific to M365)* |
+| Config file | `~/.m365_scanner.json` | `~/.gdpr_scanner.json` |
+| Token cache | `~/.m365_scanner_token.json` | `~/.gdpr_scanner_token.json` |
+| Database | `~/.m365_scanner.db` | `~/.gdpr_scanner.db` |
+| Role overrides | `~/.m365_scanner_role_overrides.json` | `~/.gdpr_scanner_role_overrides.json` |
+| Delta tokens | `~/.m365_scanner_delta.json` | `~/.gdpr_scanner_delta.json` |
+| Settings | `~/.m365_scanner_settings.json` | `~/.gdpr_scanner_settings.json` |
+| i18n key prefix | `m365_` | `gdpr_` *(or keep `m365_` for M365-specific keys)* |
+| Window title | M365 Scanner | GDPRScanner |
+| `<title>` in HTML | M365 Scanner | GDPRScanner |
+| Sidebar heading | ☁️ M365 Scanner | 🔍 GDPRScanner |
+| Build script | `build_m365.py`, `build_m365.sh` | `build_gdpr.py`, `build_gdpr.sh` |
+| Install scripts | `install_windows.ps1`, `install_macos.sh` | *(rename optional — keep for compatibility)* |
+| README | throughout | update all references |
+| SUGGESTIONS.md | throughout | update all `m365_scanner.py` references |
+
+**Migration shim (one-time, on first startup after rename)**
+
+```python
+# In gdpr_scanner.py startup — runs once, then removes itself
+_OLD_FILES = {
+    Path.home() / ".m365_scanner.json":               Path.home() / ".gdpr_scanner.json",
+    Path.home() / ".m365_scanner.db":                 Path.home() / ".gdpr_scanner.db",
+    Path.home() / ".m365_scanner_token.json":         Path.home() / ".gdpr_scanner_token.json",
+    Path.home() / ".m365_scanner_delta.json":         Path.home() / ".gdpr_scanner_delta.json",
+    Path.home() / ".m365_scanner_settings.json":      Path.home() / ".gdpr_scanner_settings.json",
+    Path.home() / ".m365_scanner_role_overrides.json":Path.home() / ".gdpr_scanner_role_overrides.json",
+}
+for old, new in _OLD_FILES.items():
+    if old.exists() and not new.exists():
+        old.rename(new)
+        print(f"[migrate] {old.name} → {new.name}")
+```
+
+This ensures existing users do not lose their scan history, credentials, or
+settings when upgrading.
+
+**i18n key strategy**
+
+Keep the `m365_` prefix for keys that are genuinely M365-specific (auth
+screens, Azure credential labels). Update keys that describe general scanner
+behaviour (`m365_scan_start` → `gdpr_scan_start`, `m365_settings_title` →
+`gdpr_settings_title`). This avoids a big-bang translation churn — only
+~30% of keys are general rather than M365-specific.
+
+**Files to change**
+
+| File | Change |
+|---|---|
+| `m365_scanner.py` | Rename to `gdpr_scanner.py`; update all internal `m365_` references |
+| `build_m365.py` / `build_m365.sh` | Rename; update entry point reference |
+| `install_windows.ps1` / `install_macos.sh` | Update script name and entry point |
+| `templates/index.html` | `<title>`, sidebar heading, `m365_scanner` → `gdpr_scanner` in JS paths |
+| `lang/en.lang`, `da.lang`, `de.lang` | Rename ~50 general keys from `m365_` to `gdpr_` prefix |
+| `README.md` | Full text update |
+| `SUGGESTIONS.md` | Replace remaining `m365_scanner.py` references |
+
+**Effort:** Small — 1 session. Mostly find-and-replace with careful handling
+of the migration shim and i18n key renames.
+
+---
+
+## 25. Split `gdpr_scanner.py` into focused modules ✅
+
+**Background**
+
+`m365_scanner.py` (to be renamed `gdpr_scanner.py` in #24) is currently ~4800
+lines and contains Flask app setup, scan orchestration, SSE, CPR detection,
+file type dispatch, config, checkpointing, delta tokens, image scanning, and
+more. This makes the file hard to navigate, impossible to unit-test in
+isolation, and increasingly fragile as new scan sources are added.
+
+The Blueprint refactoring (#17) successfully separated the route layer. This
+suggestion applies the same principle to the core application layer.
+
+**Proposed module structure**
+
+```
+gdpr_scanner.py        (~150 lines)
+  Flask app init, blueprint registration, CLI arg parsing, __main__ block.
+  Imports everything else. Entry point only.
+
+scan_engine.py         (~1200 lines)
+  run_m365_scan(), run_file_scan(), run_google_scan()
+  _broadcast_card(), _check_special_category(), _check_transfer_risk()
+  _after_cutoff(), _eta(), _check_abort()
+  Checkpointing calls delegated to checkpoint.py
+
+cpr_detector.py        (~600 lines)
+  _scan_bytes() — top-level dispatcher
+  _scan_pdf(), _scan_docx(), _scan_xlsx(), _scan_image(), _scan_text()
+  CPR regex, modulo-11 validation
+  This is the most important module to isolate — it is the legal core
+  of the tool and the highest-value target for unit tests (#26)
+
+checkpoint.py          (~150 lines)
+  _save_checkpoint(), _load_checkpoint(), _checkpoint_key()
+  _load_delta_tokens(), _save_delta_tokens()
+
+app_config.py          (~120 lines)
+  _load_config(), _save_config()
+  _load_file_sources(), _save_file_sources()
+  _load_keywords(), _load_lang()
+
+sse.py                 (~80 lines)
+  broadcast(), _sse_queues, _sse_buffer, _current_scan_id
+  /api/stream SSE endpoint
+```
+
+**Approach**
+
+The `routes/` blueprints already use `__getattr__` lazy loading to resolve
+globals from `m365_scanner`. After the split, they resolve from `gdpr_scanner`
+(which re-exports everything from the sub-modules). No blueprint changes
+needed.
+
+Split in order of lowest risk first:
+1. `sse.py` — self-contained, no dependencies on other scanner code
+2. `app_config.py` — pure file I/O, no Flask or scan dependencies
+3. `checkpoint.py` — depends only on Path and json
+4. `cpr_detector.py` — depends on document_scanner, PIL, no Flask
+5. `scan_engine.py` — depends on all of the above; split last
+
+Each step: move code → update imports → run smoke test → commit.
+
+**What does NOT move**
+
+- Flask `app` object stays in `gdpr_scanner.py` (blueprints register against it)
+- `_connector`, `_scan_lock`, `_scan_abort` stay in `gdpr_scanner.py` or `routes/state.py`
+- `LANG`, `flagged_items`, `scan_meta` stay in `routes/state.py` (already there)
+
+**Effort:** Medium — 1 session if done carefully in the order above. The
+biggest risk is circular imports; the `__getattr__` pattern already in place
+prevents most of them.
+
+---
+
+## 26. Test suite — pytest for CPR detection, connectors, and DB ✅
+
+**Background**
+
+There are currently zero tests in the repository. For a GDPR compliance tool
+that DPOs and auditors may rely on, this is a credibility gap — especially for
+CPR detection, where a false negative means a real violation goes undetected.
+The split in #25 makes isolated unit testing practical for the first time.
+
+**Test modules, in priority order**
+
+### `tests/test_cpr_detector.py` *(highest priority — legal core)*
+
+```python
+# Known valid CPR numbers
+def test_valid_cpr_detected(): ...
+def test_cpr_in_table_cell_detected(): ...
+def test_cpr_in_pdf_text_layer(): ...
+def test_cpr_split_across_line_break(): ...
+
+# Modulo-11 validation
+def test_valid_checksum_accepted(): ...
+def test_invalid_checksum_rejected(): ...
+def test_exempt_dates_bypass_modulo11(): ...   # post-2007 CPRs exempt
+
+# Date range validation
+def test_future_date_rejected(): ...
+def test_implausible_date_rejected(): ...      # e.g. month 13
+
+# False positive prevention
+def test_phone_number_not_flagged(): ...       # 12 34 56 78
+def test_account_number_not_flagged(): ...     # looks like CPR with dashes
+def test_zip_plus4_not_flagged(): ...
+
+# File type dispatch
+def test_scan_docx_with_cpr(): ...
+def test_scan_xlsx_cpr_in_cell(): ...
+def test_scan_pdf_cpr_in_text_layer(): ...
+def test_scan_plaintext(): ...
+def test_empty_file_returns_empty(): ...
+def test_binary_garbage_does_not_crash(): ...
+```
+
+### `tests/test_m365_connector.py` *(mock-based — no real API calls)*
+
+```python
+def test_classify_user_role_faculty_sku(): ...
+def test_classify_user_role_student_sku(): ...
+def test_classify_user_role_unknown_sku(): ...
+def test_pagination_follows_next_link(): ...
+def test_403_raises_permission_error(): ...
+def test_token_refresh_on_expiry(): ...
+def test_app_mode_vs_delegated_mode(): ...
+```
+
+### `tests/test_google_connector.py`
+
+```python
+def test_service_account_key_validation(): ...
+def test_invalid_key_type_rejected(): ...
+def test_iter_gmail_respects_max_messages(): ...
+def test_drive_export_map_docs_to_docx(): ...
+def test_drive_skips_oversized_files(): ...
+def test_list_users_filters_suspended(): ...
+```
+
+### `tests/test_db.py`
+
+```python
+def test_begin_end_scan_round_trip(): ...
+def test_save_and_retrieve_flagged_item(): ...
+def test_cpr_index_stores_hash_not_plaintext(): ...
+def test_lookup_data_subject_returns_items(): ...
+def test_disposition_set_and_get(): ...
+def test_export_import_merge_cycle(): ...
+def test_export_import_replace_cycle(): ...
+def test_migration_from_prior_schema_version(): ...
+```
+
+**Framework and conventions**
+
+- `pytest` + `unittest.mock` — no new runtime dependencies
+- Fixtures in `tests/conftest.py`: `tmp_db`, `sample_docx`, `sample_pdf`,
+  `mock_m365_connector`, `mock_google_connector`
+- All tests runnable with `pytest tests/` from the project root
+- CI target: all `test_cpr_detector.py` tests must pass before any release
+- Mock strategy for connectors: patch at the `requests.get` / `googleapiclient`
+  level so tests are fast and require no credentials
+
+**CPR test corpus**
+
+A `tests/fixtures/` folder with:
+- `sample_with_cpr.docx` — Word file containing 3 known CPR numbers
+- `sample_with_cpr.pdf` — PDF with text layer containing 1 CPR
+- `sample_no_cpr.xlsx` — Excel file with account numbers that look like CPRs
+- `sample_art9.txt` — text file with CPR adjacent to Article 9 keywords
+- `sample_binary.bin` — garbage bytes (must not crash scanner)
+
+**Effort:** ~1 session for `test_cpr_detector.py` + `test_db.py`.
+Connector tests add another session once #25 is complete (modules need to be
+importable in isolation first).
+
+## 27. Migrate i18n format from `.lang` to JSON
+
+**Background**
+
+The current `.lang` format is a flat `key = value` text file with a custom
+loader. It works well for the current scale (3 languages, ~700 keys) and has
+no dependencies. This suggestion tracks a potential migration for when the
+format becomes a limiting factor.
+
+**Current state**
+
+- Server-side loader in `app_config.py` parses `.lang` files into a Python dict
+- The `/api/lang` endpoint converts that dict to JSON for the browser anyway
+- Keys use prefix namespacing (`m365_`, `gdpr_`) as a poor-man's hierarchy
+- Three language files: `en.lang`, `da.lang`, `de.lang`
+
+**Why JSON would be better at scale**
+
+- The browser already receives JSON — removing the conversion step simplifies
+  `app_config.py` and makes lang files directly usable in JS unit tests
+- Nested keys (`{"scan": {"start": "Start scan"}}`) would replace the
+  prefix convention with real structure
+- Standard tooling (VS Code JSON schema, linters) would work out of the box
+- Easier to validate completeness across languages programmatically
+
+**Why not now**
+
+- The existing format works and the loader is already written
+- A migration touches every key in all three lang files plus the loader —
+  high effort, zero user-visible benefit
+- Three languages and ~700 keys is well within the comfort zone of flat files
+
+**Trigger condition:** consider when adding a 4th language, when key count
+exceeds ~1500, or when a contributor wants to use professional translation
+tooling (Poedit, Weblate, Transifex) that expects standard formats.
+
+**Effort:** Small (loader rewrite + file conversion script) — but the rename
+touches every lang file so best done in one clean pass, not incrementally.
+
+
+## 28. Disposition: personal-use — out of scope ✅
+
+**Background**
+
+Staff members often use work equipment (OneDrive, email) for private purposes.
+A scan will surface these files alongside genuine work records. The organisation
+has no compliance obligation over personal files — in fact, scanning them may
+itself be a GDPR issue (Article 2(2)(c) excludes processing by a natural person
+in the course of a purely personal activity from GDPR scope entirely).
+
+There was no way to mark a flagged item as "this is private, not our business"
+without using a work-specific disposition like "retain-legal" which is
+semantically wrong.
+
+**What was done (v1.6.2)**
+
+Added `personal-use` as a disposition value:
+
+| Value | Meaning |
+|---|---|
+| `personal-use` | Private use of work equipment — outside GDPR scope per Art. 2(2)(c) |
+
+- Added to both disposition dropdowns in the UI (filter bar and preview panel)
+- Added to Art. 30 report disposition map with the legal citation
+- Added to all three lang files (EN / DA / DE)
+- Article 30 report labels it "Personal use — out of GDPR scope (Art. 2(2)(c))"
+
+**GDPR basis:** Article 2(2)(c) — GDPR does not apply to processing by a natural
+person in the course of a purely personal or household activity.
+
+
+## 29. Rename `skus/` → `classification/`
+
+**Background**
+
+The `classification/` folder was created to hold Microsoft Education SKU ID mappings
+(`m365_skus.json`). It now also holds Google Workspace OU role mappings
+(`google_ou_roles.json`), and may grow further as more platforms are added.
+The name "skus" is Microsoft-specific and misleading for a multi-platform tool.
+
+**Proposed rename**
+
+`classification/` → `classification/`
+
+Optionally sub-divided as the folder grows:
+```
+classification/
+  m365_skus.json          # M365 SKU → role (currently classification/m365_skus.json)
+  google_ou_roles.json    # Google OU → role (currently classification/google_ou_roles.json)
+```
+
+**Files to change**
+
+| File | Change |
+|---|---|
+| `classification/` directory | Rename to `classification/` |
+| `m365_connector.py` | Update path constant `_SKU_DIR` or equivalent |
+| `google_connector.py` | Update `_OU_ROLES_PATH` constant |
+| `build_gdpr.py` | Update `skus_dir` reference in `datas` list |
+| `install_windows.ps1` / `install_macos.sh` | Update any references |
+| `MAINTAINER.md` | Update file listing |
+
+**Trigger condition:** do this when #23 Phase 2 lands, or when a third
+classification file is added — whichever comes first. Not worth doing in
+isolation.
+
+**Effort:** Tiny — pure rename, no logic changes.
+
+
+
+## 30. Google personal account (OAuth) support ✅ Done
+
+**GDPR reference:** Art. 5(1)(f) — integrity and confidentiality; Art. 32 — security of processing
+
+**What:** Personal Google accounts can now be scanned without a service account or Workspace admin. A device-code OAuth flow (mirrors M365 delegated mode) lets a user sign in interactively with their own Google account and scan their own Gmail and Google Drive.
+
+**Why:** Mirrors the M365 delegated mode. Useful for individuals, small organisations, or situations where a Google Workspace admin is unavailable.
+
+**Implementation:**
+- Auth-mode toggle (Workspace / Personal account) in the Google connection panel
+- Personal section: OAuth 2.0 client ID + secret (from a GCP Desktop App credential); device-code box shows `user_code` + `verification_url` inline
+- `PersonalGoogleConnector` class in `google_connector.py` — same public interface as `GoogleConnector`; `get_device_code_flow()` / `complete_device_code_flow()` hit Google's device-auth endpoint directly via `requests`; token refresh via `google.oauth2.credentials.Credentials`
+- `list_users()` returns a single-item list (the signed-in user from `/oauth2/v2/userinfo`) — scan engine unchanged
+- `_gmail_iter()` / `_drive_iter()` extracted as shared module-level helpers; both connector classes delegate to them
+- Token persisted to `~/.gdprscanner/google_token.json` (chmod 600)
+- Four new API endpoints: `GET /api/google/personal/status`, `POST /api/google/personal/start`, `POST /api/google/personal/poll`, `POST /api/google/personal/signout`
+- Backend poll pattern identical to M365 delegated: background thread blocks on `complete_device_code_flow`, frontend polls every 3 s
+- Scopes: `gmail.readonly`, `drive.readonly`
+- 14 new i18n keys in `en.json`, `da.json`, `de.json`
+
+**Size:** Medium  
+**Priority:** Low — service account covers institutional use cases well
+
+
+---
+
+## 31. Built-in user manual accessible from the interface ✅ Done
+
+**What:** End-user documentation accessible directly from the running application — no external site, no separate PDF, printable from the browser.
+
+**Why:** The scanner is used by school administrators and municipal compliance officers who are not technically minded. A built-in manual reduces support burden and ensures the right version of the documentation is always paired with the installed version.
+
+**Implementation:**
+- `MANUAL-EN.md` and `MANUAL-DA.md` — standalone Markdown manuals covering all major features in plain language. 14 sections each: Getting started, Sources panel, Running a scan, Understanding results, Reviewing results, Bulk actions, Profiles, Scheduler, Export & email, Article 30 report, Data subject lookup, Settings, Retention policy, FAQ.
+- `GET /manual` route in `routes/app_routes.py` — reads `?lang=da|en` (defaults to the current UI language), finds the appropriate `.md` file relative to the project root, converts it to a fully self-contained HTML page, and returns it.
+- `_md_to_html(md)` — zero-external-dependency Markdown-to-HTML converter using only Python's `re` and `html` stdlib modules. Handles: headings with anchor IDs, fenced code blocks, tables, ordered/unordered lists, blockquotes, bold, italic, inline code, links, horizontal rules.
+- Manual page features: max-width 860 px readable layout, language switcher (DA ↔ EN), 🖨 print button (calls `window.print()`), `@media print` CSS that hides the toolbar, forces page breaks before `<h2>` sections, and appends external link URLs for paper printing.
+- `?` button in the topbar (right of the theme toggle) — `window.open('/manual?lang=...', '_blank')` with the current `langSelect` value. Opens in a new tab without interrupting any in-progress scan.
+- No new dependencies. The manual route is stateless and always up to date with the installed version.
+
+**Size:** Small  
+**Priority:** Medium — reduces support requests; required for regulated-sector deployments
+
+
+---
+
+## 32. Windowed mode for Profiles, Sources, and Settings
+
+**What:** Replace the three modal dialogs (Profiler, Kilder, Indstillinger) with dedicated windows — either native pywebview windows (in the packaged desktop app) or browser popups (in the web UI).
+
+**Why:** Modals are blocking and interrupt the main workspace. A compliance officer reviewing scan results should be able to check or edit a profile without losing their place in the results grid. Separate windows allow the main view and the configuration panel to be visible simultaneously — useful on multi-monitor setups common in school admin offices.
+
+**Three implementation options were evaluated:**
+
+**Option A — Main app URL with `?panel=X` query param** *(least work)*
+- The existing modal HTML/CSS/JS is reused unchanged.
+- A new window opens `http://localhost:5100/?panel=profiles` — the JS detects the param on load and auto-opens the relevant modal.
+- In the packaged app: `pywebview.api.open_panel("profiles")` creates a second native window (same pattern as the manual viewer).
+- State sync (e.g. "profile saved, refresh main window") via `postMessage` or `localStorage` events.
+- **Pro:** Zero modal rewrite. **Con:** Each popup loads the full ~3800-line app; two JS instances share the same Flask server.
+- **Estimated effort:** 1–2 days.
+
+**Option B — Dedicated Flask routes serving lightweight standalone pages** *(most work, cleanest)*
+- `/panel/profiles`, `/panel/sources`, `/panel/settings` — each a minimal self-contained HTML page talking to the existing API endpoints.
+- **Pro:** Clean separation, small pages, no duplicate state. **Con:** All three modal JS sections must be rewritten as standalone pages; shared utilities (i18n, `_esc`, rendering helpers) must be extracted or replicated.
+- **Estimated effort:** 15–20 days (Profiles: 3–4 d, Sources: 5–6 d, Settings: 4–5 d, shared infra: 1–2 d, QA: 2–3 d).
+
+**Option C — Side drawer instead of popup** *(no new windows, best UX for single-monitor)*
+- Modals become slide-in side drawers that don't block the main results grid.
+- **Pro:** No window management complexity, works identically in app and browser, no state sync needed. **Con:** Not a true separate window.
+- **Estimated effort:** 2–3 days.
+
+**Decision:** Won't do. The workflow is sequential (configure → scan → review) — there is no realistic scenario where a modal and the results grid need to be open simultaneously. The Sources panel is already permanently visible in the sidebar, covering the main configuration need during result review. Option A (the least-work path) would still load the full ~3800-line JS stack in a second window, sharing the same Flask server — poor value for a configuration-only panel. Closed 2026-04-10.
+
+**Size:** Option A: Small · Option B: Large · Option C: Small  
+**Priority:** N/A — closed
+
+---
+
+## 33. Read-only viewer mode with PIN/token URL ✅
+
+**GDPR reference:** Art. 5(2) — accountability; Art. 30 — records of processing activities
+
+**Problem:** The scanner is operated by IT, but the people who need to review results and make compliance decisions (DPO, school principal, municipal data protection coordinator) are different people. Currently the only way to share results is to export to Excel or Word — a static snapshot. There is no way to give a stakeholder live access to the results grid (with disposition tagging) without also giving them full access to scan controls, credentials, and settings.
+
+**What:** A token-protected URL that opens a read-only view of the scan results. The viewer can browse the results grid, open previews, and tag dispositions — but cannot start or stop scans, view or change credentials, access settings, or delete items.
+
+**How it works:**
+
+1. **Token generation** — a new **Share** button in the top bar (or Settings) generates a random URL-safe token (e.g. 32-byte hex) and stores it in `~/.gdprscanner/viewer_tokens.json` with an optional expiry date. The full URL is displayed and copyable: `http://host:5100/view?token=abc123…`
+2. **Token validation** — a `@viewer_token_required` decorator checks `request.args.get("token")` or a session cookie against the stored tokens. Invalid or expired tokens return 403.
+3. **Restricted route** — `/view` serves a stripped version of `index.html` (or the same template with JS feature flags) that hides the scan controls, credentials, source management, settings, and delete buttons. Disposition tagging remains enabled — this is the primary action a reviewer needs.
+4. **PIN alternative** — optionally, instead of (or alongside) a token URL, a numeric PIN can be set in Settings. Entering the PIN in a login prompt grants the same read-only session for the browser's session duration.
+5. **Expiry** — tokens can be time-limited (e.g. 7 days, 30 days, no expiry). Expired tokens are silently rejected and cleaned up on next startup.
+6. **Scope** — viewer sees the most recent completed scan's results from the DB, identical to what the operator sees in the main results grid. Live scan progress is not shown.
+
+**What the viewer can do:**
+- Browse results grid (filter, sort, search)
+- Open item preview (file preview, email preview, EXIF, face count)
+- Tag dispositions (retain / delete-scheduled / deleted / personal-use)
+- Export to Excel and Article 30 Word doc
+
+**What the viewer cannot do:**
+- Start, stop, or configure scans
+- View or change M365 / Google credentials
+- Access source management or settings
+- Delete items from M365 / Google / file systems
+- Generate or revoke viewer tokens
+
+**Implementation notes:**
+- Simplest path: serve the same `index.html` but inject a `window.VIEWER_MODE = true` JS global. All feature modules check this flag to hide/disable restricted controls. No second template needed.
+- Token storage in `viewer_tokens.json` (alongside other data files in `~/.gdprscanner/`) keeps it simple and consistent with existing persistence.
+- No new dependencies — `secrets.token_hex(32)` for token generation, existing Flask session for PIN-based sessions.
+- The `/view` route and token validation live in `routes/auth.py` or a new `routes/viewer.py`.
+
+**Size:** Medium — ~3–5 days (token generation + storage + validation decorator + JS viewer-mode flag + UI hiding + PIN flow + Settings panel entry).  
+**Priority:** Medium — directly supports the multi-stakeholder review workflow common in schools and municipalities.
+
+---
+
+## Summary table
+
+| # | Effort | GDPR Article | Impact | Status |
+|---|---|---|---|---|
+| 1 | Small | Art. 5(1)(e) — storage limitation | High | ✅ Done |
+| 2 | Medium | Art. 30 — processing register | High | ✅ Done |
+| 3 | Medium | Art. 9 — special categories | High | ✅ Done |
+| 4 | Medium | Art. 15/17 — access/erasure rights | High | ✅ Done |
+| 5 | Medium | Art. 44–46 — data transfers | Medium | ✅ Done |
+| 6 | Small | Art. 5(1)(a) / Art. 30 — lawfulness | Medium | ✅ Done |
+| 7 | Small | Art. 5(2) — accountability | Medium | ✅ Done |
+| 8 | Large | Art. 5(1)(c)(e) — data minimisation | High | ✅ Done |
+| 9 | Medium | Art. 9 — biometric data (photos) | High | ✅ Done |
+| 10 | Large | Google Workspace scanning (Gmail & Drive) | High | ✅ Done |
+| 11 | Medium | Art. 5(2) — accountability | Medium | ✅ Done |
+| 12 | — | — | — | ~~Retired — merged into #8~~ |
+| 13 | Small | Performance | Low | ✅ Done |
+| 14 | Tiny  | UI polish | Low | ✅ Done (phase text) |
+| 15a | Small  | Art. 5(2) — accountability | High | ✅ Done |
+| 15b | Small  | Art. 5(2) — accountability | High | ✅ Done |
+| 15c | — | — | — | ~~Dropped~~ |
+| 15d | Medium | Art. 5(2) — accountability | High | ✅ Done |
+| 15e | Medium | Art. 5(2) — accountability | Medium | ✅ Done |
+| 15f | Large  | Art. 5(2) — accountability | High | ✅ Done |
+| 16  | Medium | Art. 30, Databeskyttelsesloven §6 | High | ✅ Done |
+| 17  | Medium | UX / configurability | Medium | ✅ Done |
+| 18  | Small  | Art. 4, Art. 9 — EXIF / location | High | ✅ Done |
+| 19  | Medium | Art. 5(2), Art. 25, Art. 32 — scheduled compliance | High | ✅ Done (v1.5.5) |
+| 20  | Small  | File scan quality — PDF OCR via multiprocessing | Medium | ✅ Done |
+| 21  | Small  | UX — SSE event replay for late-connecting browsers | Medium | ✅ Done |
+| 22  | Medium | File scan reliability — SMB pre-fetch cache | Low | ✅ Done |
+| 23  | Medium/Large | Art. 5, 25, 30, 32 — Google Workspace role classification + cross-platform identity mapping | High | ✅ Done |
+| 24  | Small        | Codebase hygiene — rename M365 Scanner → GDPRScanner | Medium | ✅ Done |
+| 25  | Medium       | Codebase hygiene — split `gdpr_scanner.py` into focused modules | Medium | ✅ Done |
+| 26  | Medium       | Quality — pytest suite for CPR detection, connectors, DB | High | ✅ Done |
+| 27  | Small        | Codebase hygiene — migrate i18n from `.lang` to JSON | Low | ✅ Done |
+| 28  | Tiny         | Compliance UX — personal-use disposition value | Medium | ✅ Done |
+| 29  | Tiny         | Codebase hygiene — rename `skus/` → `classification/` | Low | ✅ Done |
+| 30  | Medium | Personal Google account OAuth (delegated mode like M365) | Low | ✅ Done |
+| 31  | Small  | Built-in user manual accessible from the interface | Medium | ✅ Done |
+| 32  | Small–Large (option-dependent) | UX — windowed mode for Profiles, Sources, Settings | Low | ✗ Won't do |
+| 33  | Medium | Compliance UX — read-only viewer mode with PIN/token URL | Medium | ✅ Done |
+
+---
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..53418a5
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,46 @@
+# TODO — Pending features and sustainability
+
+Quick overview of what's still to be done. Full details in [SUGGESTIONS.md](SUGGESTIONS.md).
+
+---
+
+## Recently completed
+
+### Memory exhaustion during large M365 scans ✅
+Six root causes fixed in `scan_engine.py` and `document_scanner.py`:
+- Email body HTML stripped at collection time (`body` key deleted from each message dict before it enters `work_items`; plain text stored as `_precomputed_body` instead)
+- `work_items` list converted to a `deque` before processing so each item is released immediately after `popleft()`
+- `del content` added in file-processing branch as soon as raw bytes are no longer needed (before NER/PII counting)
+- `del body_text` added after email body is fully consumed
+- PDF OCR page images (`PIL.Image`) nulled out one by one after OCR instead of holding all pages in RAM
+- Memory guard using `psutil` skips file downloads when < 300 MB RAM is available
+
+**Still open:** The collection phase itself is still a "gather all, then process" loop. For very large tenants (>500k emails) the pre-extracted plain text in `work_items` could still be significant. The complete fix is to process each user's emails/files inline as they are fetched (generator/streaming pattern) rather than accumulating them into `work_items` first — estimated 1–2 days of refactor.
+
+---
+
+## Pending
+
+### #15 — Scan profiles ✅
+Named, reusable scan configurations. Full spec in SUGGESTIONS.md §15.  
+**Size:** Large · **Priority:** High
+
+### #23 — Google Workspace role classification + cross-platform identity mapping ✅
+Full spec in SUGGESTIONS.md §23.  
+**Size:** Large · **Priority:** Medium
+
+### #27 — Migrate i18n format from `.lang` to JSON ✅
+Full spec in SUGGESTIONS.md §27.  
+**Size:** Medium · **Priority:** Low
+
+### #29 — Rename `skus/` → `classification/` ✅
+Full spec in SUGGESTIONS.md §29.  
+**Size:** Small · **Priority:** Low
+
+### #33 — Read-only viewer mode with PIN/token URL ✅
+A shareable URL (token-protected) or numeric PIN that gives a DPO, school principal, or compliance coordinator read-only access to the results grid — with disposition tagging but without scan controls, credentials, or delete access. Full spec in SUGGESTIONS.md §33.  
+**Size:** Medium · **Priority:** Medium
+
+### #32 — Windowed mode for Profiles, Sources, and Settings ✗ Won't do
+The workflow is sequential (configure → scan → review), not parallel — there is no realistic scenario where a modal and the results grid need to be open simultaneously. The Sources panel is already visible in the sidebar. Option A (the least-work path) still loads the full 3800-line JS stack twice. Closed.
+
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..5577648
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+1.6.14
diff --git a/app_config.py b/app_config.py
new file mode 100644
index 0000000..6f51ca2
--- /dev/null
+++ b/app_config.py
@@ -0,0 +1,794 @@
+"""
+app_config.py — Configuration, i18n, keywords, profiles, settings,
+                SMTP config, file sources, and Fernet encryption for GDPRScanner.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import logging
+import re as _re
+import time
+import uuid as _uuid
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+_DATA_DIR = Path.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+
+from typing import Optional
+
+# ── i18n ──────────────────────────────────────────────────────────────────────
+
+def _load_lang() -> dict:
+    import locale, sys as _sys, os as _os, subprocess as _sp
+    from pathlib import Path as _Path
+    _here = _Path(_sys._MEIPASS) if getattr(_sys, "frozen", False) else _Path(__file__).parent
+    lang_dir = _here / "lang"
+    lang_code = "en"
+    try:
+        if _sys.platform == "darwin":
+            try:
+                r = _sp.run(["defaults", "read", "-g", "AppleLocale"],
+                            capture_output=True, text=True, timeout=3)
+                if r.returncode == 0 and r.stdout.strip():
+                    lang_code = r.stdout.strip().split("_")[0].split("-")[0].lower()
+            except Exception:
+                pass
+            if lang_code == "en":
+                try:
+                    r = _sp.run(["defaults", "read", "-g", "AppleLanguages"],
+                                capture_output=True, text=True, timeout=3)
+                    import re as _re
+                    m = _re.search(r'"([a-z]{2})[-_]', r.stdout, _re.I)
+                    if m:
+                        lang_code = m.group(1).lower()
+                except Exception:
+                    pass
+        else:
+            loc = (locale.getlocale()[0] or _os.environ.get("LC_ALL") or
+                   _os.environ.get("LANG") or "en")
+            lang_code = loc.split("_")[0].split(".")[0].split("-")[0].lower() or "en"
+    except Exception:
+        lang_code = "en"
+
+    def _parse(path) -> dict:
+        import json as _json
+        out = {}
+        try:
+            if path.suffix == ".json":
+                out = _json.loads(path.read_text(encoding="utf-8"))
+            else:
+                for line in path.read_text(encoding="utf-8").splitlines():
+                    line = line.strip()
+                    if not line or line.startswith("#") or "=" not in line:
+                        continue
+                    k, _, v = line.partition("=")
+                    out[k.strip()] = v.strip()
+        except Exception:
+            pass
+        return out
+
+    for code in [lang_code, "en"]:
+        # Prefer .json, fall back to .lang for backward compatibility
+        for ext in [".json", ".lang"]:
+            p = lang_dir / f"{code}{ext}"
+            if p.exists():
+                result = _parse(p)
+                result["_lang_code"] = code
+                logger.info("[i18n] loaded %s  (%d keys)", p, len(result))
+                return result
+    return {}
+
+def _load_lang_forced(code: str) -> dict:
+    import sys as _sys
+    from pathlib import Path as _Path
+    _here = _Path(_sys._MEIPASS) if getattr(_sys, "frozen", False) else _Path(__file__).parent
+    lang_dir = _here / "lang"
+    def _parse(path) -> dict:
+        import json as _json
+        out = {}
+        try:
+            if path.suffix == ".json":
+                out = _json.loads(path.read_text(encoding="utf-8"))
+            else:
+                for line in path.read_text(encoding="utf-8").splitlines():
+                    line = line.strip()
+                    if not line or line.startswith("#") or "=" not in line:
+                        continue
+                    k, _, v = line.partition("=")
+                    out[k.strip()] = v.strip()
+        except Exception:
+            pass
+        return out
+    for c in [code, "en"]:
+        for ext in [".json", ".lang"]:
+            p = lang_dir / f"{c}{ext}"
+            if p.exists():
+                result = _parse(p)
+                result["_lang_code"] = c
+                return result
+    return {}
+
+_LANG_OVERRIDE_FILE = _DATA_DIR / "lang"
+
+def _lang_override() -> "str | None":
+    try:
+        v = _LANG_OVERRIDE_FILE.read_text().strip()
+        return v if v else None
+    except Exception:
+        return None
+
+def _set_lang_override(code: str) -> None:
+    try:
+        _LANG_OVERRIDE_FILE.write_text(code.strip())
+    except Exception:
+        pass
+
+
+# ── Display name resolver (used by scan_engine) ───────────────────────────────
+import re as _re2
+
+_GUID_RE = _re2.compile(
+    r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', _re2.I
+)
+_GENERIC_DISPLAY_NAMES = {
+    "microsoft konto", "microsoft account", "microsoftkonto",
+    "microsoft-konto", "compte microsoft", "cuenta de microsoft",
+}
+
+def _resolve_display_name(display_name: str, email: str = "", upn: str = "") -> str:
+    """Return the best human-readable name for a Microsoft 365 user."""
+    dn = (display_name or "").strip()
+    if not dn or _GUID_RE.match(dn) or dn.lower() in _GENERIC_DISPLAY_NAMES:
+        return email or upn or dn
+    return dn
+
+LANG = _load_lang_forced(_lang_override()) if _lang_override() else _load_lang()
+logger.info("[i18n] gdpr lang=%s  keys=%d", LANG.get("_lang_code", "?"), len(LANG))
+
+# ── Article 9 sensitive keywords ──────────────────────────────────────────────
+import re as _re
+
+_KEYWORDS_PATH     = Path(__file__).parent / "keywords"
+_keyword_data:     dict = {}
+_keyword_flat:     list = []   # (keyword, category_key) kept for reference / len count
+_compiled_keywords: dict = {}  # cat_key → compiled re.Pattern  (#13)
+_KEYWORD_WINDOW    = 150        # characters around a keyword to check for CPR proximity
+
+def _load_keywords(lang: str = "da") -> None:
+    """Load keyword list from keywords/{lang}.json and compile one regex per
+    Article 9 category.  Falls back to da.json if unavailable.
+
+    Each category pattern is an alternation of all its keywords, sorted
+    longest-first and anchored with negative-lookbehind/lookahead so that
+    short tokens (≤4 chars) require a word boundary while longer ones are
+    matched as substrings.  The compiled regex is ~10–50× faster than the
+    previous sequential str.find() loop for large texts. (#13)
+    """
+    global _keyword_data, _keyword_flat, _compiled_keywords
+    for candidate in [lang, "da"]:
+        p = _KEYWORDS_PATH / f"{candidate}.json"
+        if p.exists():
+            try:
+                import json as _kjson
+                _keyword_data = _kjson.loads(p.read_text(encoding="utf-8"))
+                flat: list = []
+                categories: dict = {}
+                for cat_key, cat_val in _keyword_data.items():
+                    if cat_key.startswith("_") or not isinstance(cat_val, dict):
+                        continue
+                    kws = [kw.lower() for kw in cat_val.get("keywords", [])]
+                    for kw in kws:
+                        flat.append((kw, cat_key))
+                    categories[cat_key] = kws
+
+                _keyword_flat = sorted(flat, key=lambda x: -len(x[0]))
+
+                # Compile one alternation regex per category (#13)
+                compiled: dict = {}
+                for cat, kws in categories.items():
+                    if not kws:
+                        continue
+                    # Sort longest-first so the engine prefers the most specific match
+                    sorted_kws = sorted(kws, key=len, reverse=True)
+                    parts = []
+                    for kw in sorted_kws:
+                        esc = _re.escape(kw)
+                        if len(kw) <= 4:
+                            # Whole-word boundary for short tokens
+                            parts.append(r"(?<!\w)" + esc + r"(?!\w)")
+                        else:
+                            parts.append(esc)
+                    compiled[cat] = _re.compile(
+                        "(?:" + "|".join(parts) + ")",
+                        _re.IGNORECASE,
+                    )
+                _compiled_keywords = compiled
+
+                logger.info("[keywords] Loaded %d keywords (%d categories compiled) from keywords/%s.json",
+                            len(_keyword_flat), len(compiled), candidate)
+                return
+            except Exception as e:
+                logger.warning("[keywords] Failed to load %s: %s", p, e)
+
+_load_keywords(LANG.get("_lang_code", "da"))
+
+
+def _check_special_category(text: str, cprs: list) -> list:
+    """Return sorted list of Article 9 category keys detected near a CPR number.
+
+    Uses compiled per-category regex patterns for efficient matching (#13).
+    A keyword counts only when within _KEYWORD_WINDOW characters of a CPR
+    in the same text.  If no CPRs are present, any keyword occurrence triggers.
+    Returns e.g. ['health', 'criminal'] — empty list if none detected.
+    """
+    if not _compiled_keywords or not text:
+        return []
+    text_lower = text.lower()
+    found_cats: set = set()
+
+    # Locate CPR positions for proximity check
+    cpr_positions: list = []
+    if cprs:
+        for m in _re.finditer(r"\d{6}[-\s]?\d{4}", text_lower):
+            cpr_positions.append(m.start())
+
+    for cat, pattern in _compiled_keywords.items():
+        # Use compiled regex — single-pass alternation match per category
+        for m in pattern.finditer(text_lower):
+            idx = m.start()
+            if not cpr_positions or any(
+                abs(idx - cp) <= _KEYWORD_WINDOW for cp in cpr_positions
+            ):
+                found_cats.add(cat)
+                break  # One match per category is enough
+
+    return sorted(found_cats)
+
+
+_CONFIG_FILE = _DATA_DIR / "config.json"
+
+import hashlib as _hashlib
+
+_ADMIN_PIN_KEY = "admin_pin_hash"
+
+def _get_admin_pin_hash() -> str:
+    """Return the stored admin PIN hash, or empty string if not set."""
+    cfg = _load_config()
+    return cfg.get(_ADMIN_PIN_KEY, "")
+
+def _set_admin_pin(pin: str) -> None:
+    """Hash and store the admin PIN in the config file."""
+    h = _hashlib.sha256(pin.encode()).hexdigest()
+    cfg = _load_config()
+    cfg[_ADMIN_PIN_KEY] = h
+    _save_config(cfg)
+
+def _verify_admin_pin(pin: str) -> bool:
+    """Return True if the PIN matches the stored hash."""
+    stored = _get_admin_pin_hash()
+    if not stored:
+        return False
+    return _hashlib.sha256(pin.encode()).hexdigest() == stored
+
+def _admin_pin_is_set() -> bool:
+    return bool(_get_admin_pin_hash())
+
+
+def _load_config() -> dict:
+    if _CONFIG_FILE.exists():
+        try:
+            return json.loads(_CONFIG_FILE.read_text())
+        except Exception:
+            pass
+    return {}
+
+def _save_config(cfg: dict):
+    try:
+        _CONFIG_FILE.write_text(json.dumps(cfg, indent=2))
+    except Exception:
+        pass
+
+
+# ── Profile storage (15a) ─────────────────────────────────────────────────────
+_SETTINGS_PATH     = _DATA_DIR / "settings.json"
+_SRC_TOGGLES_PATH  = _DATA_DIR / "src_toggles.json"
+
+def _load_src_toggles() -> dict:
+    """Load persisted source toggle state."""
+    try:
+        if _SRC_TOGGLES_PATH.exists():
+            return json.loads(_SRC_TOGGLES_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        pass
+    return {}
+
+def _save_src_toggles(state: dict) -> None:
+    """Persist source toggle state."""
+    try:
+        existing = _load_src_toggles()
+        existing.update(state)
+        tmp = _SRC_TOGGLES_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(existing, ensure_ascii=False, indent=2), encoding="utf-8")
+        tmp.replace(_SRC_TOGGLES_PATH)
+    except Exception as e:
+        logger.error("[src_toggles] write failed: %s", e)
+
+
+def _profiles_load() -> list:
+    """Return list of all profiles from settings file."""
+    try:
+        if not _SETTINGS_PATH.exists():
+            return []
+        data = json.loads(_SETTINGS_PATH.read_text(encoding="utf-8"))
+        # Migrate: old flat settings → wrapped in a default profile
+        if isinstance(data, dict) and "profiles" not in data and (
+            "sources" in data or "user_ids" in data
+        ):
+            data = {"profiles": [_profile_from_settings(data, name="Default")]}
+            _profiles_write(data)
+        return data.get("profiles", [])
+    except Exception:
+        return []
+
+
+def _profiles_write(data: dict) -> None:
+    """Write the full settings dict (including profiles) atomically."""
+    try:
+        tmp = _SETTINGS_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2, default=str),
+                       encoding="utf-8")
+        tmp.replace(_SETTINGS_PATH)
+    except Exception as e:
+        logger.error("[profiles] write failed: %s", e)
+
+
+def _profiles_save_all(profiles: list) -> None:
+    """Overwrite the profiles list, preserving any other top-level keys."""
+    try:
+        data = {}
+        if _SETTINGS_PATH.exists():
+            data = json.loads(_SETTINGS_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        data = {}
+    data["profiles"] = profiles
+    _profiles_write(data)
+
+
+def _profile_from_settings(settings: dict, name: str = "Default",
+                            description: str = "") -> dict:
+    """Wrap a flat settings dict as a profile."""
+    import uuid as _uuid
+    return {
+        "id":           str(_uuid.uuid4()),
+        "name":         name,
+        "description":  description,
+        "sources":        settings.get("sources", []),
+        "google_sources": settings.get("google_sources", []),
+        "user_ids":       settings.get("user_ids", []),
+        "options":        settings.get("options", {}),
+        "retention_years":  settings.get("retention_years"),
+        "fiscal_year_end":  settings.get("fiscal_year_end"),
+        "email_to":       settings.get("email_to", ""),
+        "file_sources":   settings.get("file_sources", []),
+        "last_run":     settings.get("last_run"),
+        "last_scan_id": settings.get("last_scan_id"),
+    }
+
+
+def _profile_get(name_or_id: str) -> dict | None:
+    """Find a profile by name (case-insensitive) or ID."""
+    for p in _profiles_load():
+        if p.get("id") == name_or_id or \
+           p.get("name", "").lower() == name_or_id.lower():
+            return p
+    return None
+
+
+def _profile_save(profile: dict) -> dict:
+    """Insert or update a profile. Assigns a new UUID if id is missing."""
+    import uuid as _uuid
+    if not profile.get("id"):
+        profile["id"] = str(_uuid.uuid4())
+    profiles = _profiles_load()
+    for i, p in enumerate(profiles):
+        if p.get("id") == profile["id"]:
+            profiles[i] = profile
+            _profiles_save_all(profiles)
+            return profile
+    profiles.append(profile)
+    _profiles_save_all(profiles)
+    return profile
+
+
+def _profile_delete(name_or_id: str) -> bool:
+    """Delete a profile by name or ID. Returns True if found and deleted."""
+    profiles = _profiles_load()
+    before   = len(profiles)
+    profiles = [p for p in profiles
+                if p.get("id") != name_or_id
+                and p.get("name", "").lower() != name_or_id.lower()]
+    if len(profiles) == before:
+        return False
+    _profiles_save_all(profiles)
+    return True
+
+
+def _profile_touch(profile_id: str, scan_id: int) -> None:
+    """Update last_run and last_scan_id after a successful scan."""
+    import datetime as _dt2
+    profiles = _profiles_load()
+    for p in profiles:
+        if p.get("id") == profile_id:
+            p["last_run"]     = _dt2.datetime.now().isoformat(timespec="seconds")
+            p["last_scan_id"] = scan_id
+            break
+    _profiles_save_all(profiles)
+
+
+# ── Legacy shim — keep _save_settings / _load_settings working ────────────────
+
+def _save_settings(payload: dict, profile_name: str | None = None,
+                   profile_id: str | None = None) -> None:
+    """Save settings. Upserts the active profile (or 'Default' if none).
+    profile_id takes precedence over profile_name when both are given."""
+    profiles = _profiles_load()
+    # Resolve profile: ID → name → first profile → "Default"
+    existing = None
+    if profile_id:
+        existing = _profile_get(profile_id)
+    if not existing and profile_name:
+        existing = _profile_get(profile_name)
+    if not existing and profiles:
+        existing = profiles[0]
+    name = existing["name"] if existing else (profile_name or "Default")
+    merged = _profile_from_settings(payload, name=name,
+                                     description=existing.get("description", "") if existing else "")
+    if existing:
+        merged["id"]           = existing["id"]
+        merged["last_run"]     = existing.get("last_run")
+        merged["last_scan_id"] = existing.get("last_scan_id")
+        # Scan start payloads only include M365 sources/user_ids/options.
+        # Preserve google_sources and file_sources so a single-source scan
+        # doesn't clobber the profile's other source selections.
+        _M365_IDS    = {"email", "onedrive", "sharepoint", "teams"}
+        google_src   = payload.get("google_sources", existing.get("google_sources", []))
+        file_src     = payload.get("file_sources") or existing.get("file_sources", [])
+        merged["google_sources"] = google_src
+        merged["file_sources"]   = file_src
+        # Rebuild combined sources: incoming M365 selection + preserved google/file
+        m365_src         = [s for s in merged.get("sources", []) if s in _M365_IDS]
+        merged["sources"] = m365_src + google_src + file_src
+    _profile_save(merged)
+
+
+def _load_settings() -> dict | None:
+    """Return the first (default) profile as a flat settings dict."""
+    profiles = _profiles_load()
+    if not profiles:
+        return None
+    p = profiles[0]
+    return {
+        "sources":          p.get("sources", []),
+        "user_ids":         p.get("user_ids", []),
+        "options":          p.get("options", {}),
+        "retention_years":  p.get("retention_years"),
+        "fiscal_year_end":  p.get("fiscal_year_end"),
+        "email_to":         p.get("email_to", ""),
+    }
+
+
+# ── SMTP / email report sending ───────────────────────────────────────────────
+_SMTP_CONFIG_PATH    = _DATA_DIR / "smtp.json"
+_ROLE_OVERRIDES_PATH = _DATA_DIR / "role_overrides.json"
+
+
+def _load_role_overrides() -> dict:
+    """Return {user_id: 'student'|'staff'|'other'} manual overrides dict."""
+    try:
+        if _ROLE_OVERRIDES_PATH.exists():
+            return json.loads(_ROLE_OVERRIDES_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        pass
+    return {}
+
+
+def _save_role_overrides(overrides: dict) -> None:
+    """Atomically write the role overrides dict to disk."""
+    try:
+        tmp = _ROLE_OVERRIDES_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(overrides, ensure_ascii=False, indent=2), encoding="utf-8")
+        tmp.replace(_ROLE_OVERRIDES_PATH)
+    except Exception as e:
+        logger.error("[role_overrides] write failed: %s", e)
+
+
+# ── File source settings (#8) ─────────────────────────────────────────────────
+_FILE_SOURCES_PATH = _DATA_DIR / "file_sources.json"
+
+
+def _load_file_sources() -> list:
+    """Return saved file source definitions.
+
+    Each entry: {id, label, path, smb_host, smb_user, smb_domain, keychain_key}
+    """
+    try:
+        if _FILE_SOURCES_PATH.exists():
+            return json.loads(_FILE_SOURCES_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        pass
+    return []
+
+
+def _save_file_sources(sources: list) -> None:
+    """Atomically write the file sources list to disk."""
+    try:
+        tmp = _FILE_SOURCES_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(sources, ensure_ascii=False, indent=2), encoding="utf-8")
+        tmp.replace(_FILE_SOURCES_PATH)
+    except Exception as e:
+        logger.error("[file_sources] write failed: %s", e)
+
+# ── Viewer tokens ────────────────────────────────────────────────────────────
+# Read-only viewer tokens allow sharing scan results with a DPO or compliance
+# officer without exposing scan controls or credentials.  Each token is a
+# 64-character hex string stored in viewer_tokens.json alongside other data files.
+
+_VIEWER_TOKENS_PATH = _DATA_DIR / "viewer_tokens.json"
+
+
+def _load_viewer_tokens() -> list:
+    """Return list of viewer token dicts (empty list if file missing or corrupt)."""
+    try:
+        if _VIEWER_TOKENS_PATH.exists():
+            return json.loads(_VIEWER_TOKENS_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        pass
+    return []
+
+
+def _save_viewer_tokens(tokens: list) -> None:
+    """Atomically write viewer tokens to disk."""
+    try:
+        tmp = _VIEWER_TOKENS_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(tokens, ensure_ascii=False, indent=2), encoding="utf-8")
+        tmp.replace(_VIEWER_TOKENS_PATH)
+    except Exception as e:
+        logger.error("[viewer_tokens] write failed: %s", e)
+
+
+def create_viewer_token(label: str = "", expires_days: int | None = None) -> dict:
+    """Generate a new viewer token, persist it, and return the token dict.
+
+    Args:
+        label:       Human-readable description (e.g. "DPO review April 2026").
+        expires_days: Days until expiry.  None = no expiry.
+    """
+    import secrets as _secrets
+    token = _secrets.token_hex(32)   # 64-char URL-safe hex string
+    now   = time.time()
+    entry: dict = {
+        "token":        token,
+        "label":        label or "",
+        "created_at":   now,
+        "expires_at":   now + expires_days * 86400 if expires_days else None,
+        "last_used_at": None,
+    }
+    tokens = _load_viewer_tokens()
+    tokens.append(entry)
+    _save_viewer_tokens(tokens)
+    return entry
+
+
+def validate_viewer_token(token: str) -> dict | None:
+    """Return the token dict if the token is valid and not expired, else None.
+
+    Updates last_used_at as a best-effort side effect.
+    """
+    if not token:
+        return None
+    tokens = _load_viewer_tokens()
+    now    = time.time()
+    found: dict | None = None
+    for entry in tokens:
+        if entry.get("token") == token:
+            exp = entry.get("expires_at")
+            if exp is not None and now > exp:
+                return None   # expired — treat as not found
+            found = entry
+            break
+    if found is None:
+        return None
+    found["last_used_at"] = now
+    _save_viewer_tokens(tokens)   # best-effort; ignore failures
+    return found
+
+
+def revoke_viewer_token(token: str) -> bool:
+    """Remove a token from storage.  Returns True if found and removed."""
+    tokens = _load_viewer_tokens()
+    before = len(tokens)
+    tokens = [t for t in tokens if t.get("token") != token]
+    if len(tokens) == before:
+        return False
+    _save_viewer_tokens(tokens)
+    return True
+
+
+def cleanup_expired_viewer_tokens() -> int:
+    """Delete all expired tokens from storage.  Returns count removed."""
+    tokens  = _load_viewer_tokens()
+    now     = time.time()
+    active  = [t for t in tokens if t.get("expires_at") is None or now <= t["expires_at"]]
+    removed = len(tokens) - len(active)
+    if removed:
+        _save_viewer_tokens(active)
+    return removed
+
+
+# ── Viewer PIN ───────────────────────────────────────────────────────────────
+# A numeric PIN that grants a browser session read-only viewer access at /view.
+# The PIN is stored as a salted SHA-256 hash inside viewer_tokens.json under a
+# top-level "__pin__" key so it lives in the same file as the token list.
+
+_PIN_META_KEY = "__pin__"
+
+
+def _load_pin_store() -> dict:
+    """Load the full viewer_tokens.json as a dict (tokens list + optional pin meta)."""
+    try:
+        if _VIEWER_TOKENS_PATH.exists():
+            raw = json.loads(_VIEWER_TOKENS_PATH.read_text(encoding="utf-8"))
+            if isinstance(raw, list):
+                # Legacy format — just a list; promote to dict
+                return {"tokens": raw}
+            if isinstance(raw, dict):
+                return raw
+    except Exception:
+        pass
+    return {"tokens": []}
+
+
+def _save_pin_store(store: dict) -> None:
+    try:
+        tmp = _VIEWER_TOKENS_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(store, ensure_ascii=False, indent=2), encoding="utf-8")
+        tmp.replace(_VIEWER_TOKENS_PATH)
+    except Exception as e:
+        logger.error("[viewer_pin] write failed: %s", e)
+
+
+# Rewrite the token helpers to use the new store format transparently.
+def _load_viewer_tokens() -> list:  # type: ignore[misc]  # noqa: F811
+    return _load_pin_store().get("tokens", [])
+
+
+def _save_viewer_tokens(tokens: list) -> None:  # type: ignore[misc]  # noqa: F811
+    store = _load_pin_store()
+    store["tokens"] = tokens
+    _save_pin_store(store)
+
+
+def get_viewer_pin_hash() -> "str | None":
+    """Return the stored PIN hash dict, or None if no PIN is set."""
+    return _load_pin_store().get(_PIN_META_KEY)
+
+
+def set_viewer_pin(pin: str) -> None:
+    """Hash and store a viewer PIN."""
+    import hashlib as _hl, secrets as _sec
+    if not pin:
+        raise ValueError("PIN must not be empty")
+    salt = _sec.token_hex(16)
+    h    = _hl.sha256((salt + pin).encode()).hexdigest()
+    store = _load_pin_store()
+    store[_PIN_META_KEY] = {"hash": h, "salt": salt}
+    _save_pin_store(store)
+
+
+def verify_viewer_pin(pin: str) -> bool:
+    """Return True if *pin* matches the stored hash."""
+    import hashlib as _hl
+    meta = get_viewer_pin_hash()
+    if not meta:
+        return False
+    h = _hl.sha256((meta["salt"] + pin).encode()).hexdigest()
+    return h == meta["hash"]
+
+
+def clear_viewer_pin() -> None:
+    """Remove the viewer PIN."""
+    store = _load_pin_store()
+    store.pop(_PIN_META_KEY, None)
+    _save_pin_store(store)
+
+
+# ── SMTP password encryption ─────────────────────────────────────────────────
+# The SMTP password is encrypted at rest using Fernet symmetric encryption.
+# The encryption key is derived from a stable machine-specific UUID stored in
+# ~/.gdpr_scanner_machine_id.  This key is only usable on the same machine —
+# the encrypted password cannot be decrypted if the config file is copied to
+# another host.
+
+_MACHINE_ID_PATH = _DATA_DIR / "machine_id"
+
+try:
+    from cryptography.fernet import Fernet as _Fernet
+    import base64 as _b64
+    _CRYPTO_OK = True
+except ImportError:
+    _CRYPTO_OK = False
+
+def _get_fernet() -> "Optional[_Fernet]":
+    """Return a Fernet instance keyed to this machine, or None if unavailable."""
+    if not _CRYPTO_OK:
+        return None
+    try:
+        if _MACHINE_ID_PATH.exists():
+            machine_key = _MACHINE_ID_PATH.read_bytes()
+        else:
+            machine_key = _Fernet.generate_key()
+            _MACHINE_ID_PATH.write_bytes(machine_key)
+            try:
+                _MACHINE_ID_PATH.chmod(0o600)
+            except Exception:
+                pass
+        return _Fernet(machine_key)
+    except Exception:
+        return None
+
+def _encrypt_password(plaintext: str) -> str:
+    """Encrypt a password string; returns a 'enc:' prefixed ciphertext string."""
+    if not plaintext:
+        return ""
+    f = _get_fernet()
+    if f is None:
+        return plaintext  # fallback: store as-is (no cryptography lib)
+    try:
+        return "enc:" + f.encrypt(plaintext.encode()).decode()
+    except Exception:
+        return plaintext
+
+def _decrypt_password(stored: str) -> str:
+    """Decrypt a stored password; handles both encrypted and legacy plaintext."""
+    if not stored:
+        return ""
+    if not stored.startswith("enc:"):
+        return stored  # legacy plaintext — return as-is
+    f = _get_fernet()
+    if f is None:
+        return ""
+    try:
+        return f.decrypt(stored[4:].encode()).decode()
+    except Exception:
+        return ""
+
+
+def _load_smtp_config() -> dict:
+    """Return saved SMTP config, decrypting the password field."""
+    try:
+        if _SMTP_CONFIG_PATH.exists():
+            cfg = json.loads(_SMTP_CONFIG_PATH.read_text(encoding="utf-8"))
+            if cfg.get("password"):
+                cfg["password"] = _decrypt_password(cfg["password"])
+            return cfg
+    except Exception:
+        pass
+    return {}
+
+def _save_smtp_config(cfg: dict) -> None:
+    """Save SMTP config, encrypting the password field."""
+    try:
+        to_save = dict(cfg)
+        if to_save.get("password"):
+            to_save["password"] = _encrypt_password(to_save["password"])
+        tmp = _SMTP_CONFIG_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(to_save, ensure_ascii=False), encoding="utf-8")
+        tmp.replace(_SMTP_CONFIG_PATH)
+        try:
+            _SMTP_CONFIG_PATH.chmod(0o600)
+        except Exception:
+            pass
+    except Exception as e:
+        logger.error("[smtp] config save failed: %s", e)
diff --git a/build_gdpr.py b/build_gdpr.py
new file mode 100755
index 0000000..1bd2fc0
--- /dev/null
+++ b/build_gdpr.py
@@ -0,0 +1,1095 @@
+#!/usr/bin/env python3
+"""
+GDPRScanner — Self-Contained App Builder
+==========================================
+Packages gdpr_scanner.py + m365_connector.py + document_scanner.py into a
+native desktop app:
+  macOS   -> dist/GDPRScanner.app   (double-click to run)
+  Windows -> dist/GDPRScanner.exe   (double-click to run)
+
+The app starts Flask on port 5100, opens the UI in a native webview window
+(WKWebView on macOS, WebView2 on Windows), and quits cleanly when the window
+is closed.
+
+Usage:
+    python build_gdpr.py              # build for current platform
+    python build_gdpr.py --clean      # remove build/ and dist/ first
+    python build_gdpr.py --dmg        # macOS: also wrap .app in a .dmg
+    python build_gdpr.py --installer  # Windows: also build NSIS installer
+
+Requirements (install once via pip):
+    pip install pyinstaller pyinstaller-hooks-contrib
+    pip install pywebview          # native window (no browser chrome)
+    pip install pystray pillow     # fallback tray icon when pywebview absent
+
+Python version:
+    Requires 3.11 or 3.12.  spaCy (used by document_scanner for NER) does not
+    support 3.13+.  This script auto-relaunches with python3.12/python3.11 if
+    the current interpreter is incompatible.
+"""
+
+# ── Version guard ─────────────────────────────────────────────────────────────
+import sys as _sys
+if not (3, 11) <= _sys.version_info[:2] <= (3, 12):
+    import re as _re, subprocess as _sp, os as _os
+
+    _cur = f"{_sys.version_info.major}.{_sys.version_info.minor}"
+    print(f"  [!] Python {_cur} is not supported (need 3.11 or 3.12 — spaCy incompatible with 3.13+)")
+
+    def _check_version(cmd: list) -> bool:
+        try:
+            out = _sp.check_output(cmd + ["--version"], stderr=_sp.STDOUT, text=True).strip()
+            m = _re.search(r"Python (\d+)\.(\d+)", out)
+            return bool(m and int(m.group(1)) == 3 and int(m.group(2)) in (11, 12))
+        except (FileNotFoundError, _sp.CalledProcessError, OSError):
+            return False
+
+    _candidates = [["python3.12"], ["python3.11"], ["py", "-3.12"], ["py", "-3.11"]]
+    _found = next((c for c in _candidates if _check_version(c)), None)
+
+    if _found:
+        print(f"  [*] Re-launching with: {' '.join(_found)}")
+        _result = _sp.run(_found + [_os.path.abspath(__file__)] + _sys.argv[1:])
+        _sys.exit(_result.returncode)
+
+    print()
+    print("  No compatible Python found on PATH.")
+    print("  Install Python 3.12:")
+    if _sys.platform == "darwin":
+        print("      brew install python@3.12")
+    elif _sys.platform == "win32":
+        print("      winget install Python.Python.3.12")
+    print()
+    _sys.exit(1)
+
+# ── Standard imports ──────────────────────────────────────────────────────────
+import argparse
+import platform
+import re
+import shutil
+import subprocess
+import sys
+import textwrap
+from pathlib import Path
+
+HERE   = Path(__file__).parent.resolve()
+SYSTEM = platform.system()   # "Darwin", "Windows", "Linux"
+
+# ── App metadata ──────────────────────────────────────────────────────────────
+APP_NAME  = "GDPRScanner"
+APP_PORT  = 5100
+BUNDLE_ID = "com.m365scanner.app"
+
+def _read_app_version() -> str:
+    # Read from VERSION file (single source of truth)
+    try:
+        return (HERE / "VERSION").read_text(encoding="utf-8").strip()
+    except Exception:
+        pass
+    return "1.0.0"
+
+APP_VERSION = _read_app_version()
+ICON_MACOS  = HERE / "icon_gdpr.icns"    # optional; falls back to icon.icns / icon.png
+ICON_WIN    = HERE / "icon_gdpr.ico"     # optional; falls back to icon.ico / icon.png
+
+# ── Paths ─────────────────────────────────────────────────────────────────────
+ENTRY_POINT = HERE / "m365_launcher.py"   # generated by this script
+DIST_DIR    = HERE / "dist"
+BUILD_DIR   = HERE / "build"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Step 1 — Generate the launcher entry point
+# ═══════════════════════════════════════════════════════════════════════════════
+
+LAUNCHER_CODE = '''\
+"""
+gdpr_launcher.py — entry point for the packaged GDPRScanner app.
+
+Responsibilities:
+  1. Find a free port (default 5100)
+  2. Start Flask in a background thread
+  3. Open the UI in a native webview window (pywebview)
+     — falls back to the system browser if pywebview is unavailable
+
+Generated by build_gdpr.py — do not edit manually.
+"""
+import os
+os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
+
+import subprocess
+import sys
+import socket
+import threading
+import time
+import webbrowser
+from pathlib import Path
+
+if getattr(sys, "frozen", False):
+    BASE_DIR = Path(sys._MEIPASS)
+else:
+    BASE_DIR = Path(__file__).parent
+
+
+def _setup_external_tools():
+    """
+    Locate Tesseract and Poppler regardless of how the app was launched.
+    GDPRScanner calls document_scanner for file content extraction, which
+    may need OCR for scanned PDFs — same setup as Document Scanner.
+    """
+    extra_paths = []
+
+    if sys.platform == "darwin":
+        brew_prefix = None
+        for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
+            if Path(brew_candidate).exists():
+                try:
+                    result = subprocess.run(
+                        [brew_candidate, "--prefix"],
+                        capture_output=True, text=True, timeout=5
+                    )
+                    if result.returncode == 0:
+                        brew_prefix = result.stdout.strip()
+                        break
+                except Exception:
+                    pass
+
+        brew_candidates = []
+        if brew_prefix:
+            brew_candidates.append(brew_prefix)
+        brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
+
+        for prefix in brew_candidates:
+            bin_dir = Path(prefix) / "bin"
+            if bin_dir.exists():
+                extra_paths.append(str(bin_dir))
+            tessdata = Path(prefix) / "share" / "tessdata"
+            if tessdata.exists():
+                os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
+
+        for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
+            if Path(t).exists():
+                os.environ.setdefault("TESSERACT_CMD", t)
+                break
+
+        for p in ["/opt/homebrew/bin", "/usr/local/bin",
+                  "/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
+            if (Path(p) / "pdftoppm").exists():
+                os.environ.setdefault("POPPLER_PATH", p)
+                extra_paths.insert(0, p)
+                break
+
+    elif sys.platform == "win32":
+        import winreg
+        tess_dir = None
+        try:
+            key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\\Tesseract-OCR")
+            tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
+            winreg.CloseKey(key)
+        except Exception:
+            pass
+
+        for d in ([tess_dir] if tess_dir else []) + [
+            r"C:\\Program Files\\Tesseract-OCR",
+            r"C:\\Program Files (x86)\\Tesseract-OCR",
+            r"C:\\Tesseract-OCR",
+        ]:
+            if d and Path(d, "tesseract.exe").exists():
+                os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
+                extra_paths.append(d)
+                tessdata = Path(d) / "tessdata"
+                if tessdata.exists():
+                    os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
+                break
+
+        for d in [
+            r"C:\\poppler\\Library\\bin", r"C:\\poppler\\bin",
+            r"C:\\Program Files\\poppler\\Library\\bin",
+            r"C:\\Program Files\\poppler\\bin",
+            r"C:\\tools\\poppler\\Library\\bin",
+        ]:
+            if (Path(d) / "pdftoppm.exe").exists():
+                os.environ.setdefault("POPPLER_PATH", d)
+                extra_paths.insert(0, d)
+                break
+
+    if getattr(sys, "frozen", False):
+        tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
+        if tess_bin.exists():
+            os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
+        for sub in ["poppler/bin", "poppler/Library/bin", "."]:
+            pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
+            if pdftoppm.exists():
+                os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
+                extra_paths.insert(0, str(pdftoppm.parent))
+                break
+        extra_paths.insert(0, str(BASE_DIR))
+
+    if extra_paths:
+        current = os.environ.get("PATH", "")
+        additions = os.pathsep.join(p for p in extra_paths if p not in current)
+        if additions:
+            os.environ["PATH"] = additions + os.pathsep + current
+
+    cmd = os.environ.get("TESSERACT_CMD")
+    if cmd and Path(cmd).exists():
+        try:
+            import pytesseract
+            pytesseract.pytesseract.tesseract_cmd = cmd
+        except ImportError:
+            pass
+
+    poppler = os.environ.get("POPPLER_PATH")
+    if poppler:
+        try:
+            import pdf2image.pdf2image as _p2i
+            _orig = _p2i.convert_from_path
+            def _patched(pdf_path, *a, poppler_path=None, **kw):
+                return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
+            _p2i.convert_from_path = _patched
+        except Exception:
+            pass
+
+
+_setup_external_tools()
+
+
+def find_free_port(start: int = 5100) -> int:
+    for port in range(start, start + 100):
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(("127.0.0.1", port))
+                return port
+            except OSError:
+                continue
+    raise RuntimeError("No free port found in range 5100-5200")
+
+
+# ── Single-instance lock ──────────────────────────────────────────────────────
+_LOCK_FH = None
+
+def acquire_instance_lock() -> bool:
+    """
+    Acquire an exclusive process lock so only one instance runs at a time.
+    Returns True if the lock was acquired, False if another instance holds it.
+    The lock is released automatically when the process exits.
+    """
+    global _LOCK_FH
+    lock_dir = Path.home() / ".gdprscanner"
+    lock_dir.mkdir(parents=True, exist_ok=True)
+    lock_path = lock_dir / "app.lock"
+    try:
+        _LOCK_FH = open(lock_path, "w")
+        if sys.platform == "win32":
+            import msvcrt
+            msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            import fcntl
+            fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        _LOCK_FH.write(str(os.getpid()))
+        _LOCK_FH.flush()
+        return True
+    except (IOError, OSError):
+        if _LOCK_FH:
+            _LOCK_FH.close()
+            _LOCK_FH = None
+        return False
+
+
+def _activate_venv():
+    if getattr(sys, "frozen", False):
+        return
+    for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
+        if sys.platform == "win32":
+            site_pkg = candidate / "Lib" / "site-packages"
+        else:
+            lib = candidate / "lib"
+            site_pkg = None
+            if lib.exists():
+                for d in lib.iterdir():
+                    sp = d / "site-packages"
+                    if sp.exists():
+                        site_pkg = sp
+                        break
+        if site_pkg and site_pkg.exists():
+            sys.path.insert(0, str(site_pkg))
+            os.environ["VIRTUAL_ENV"] = str(candidate)
+            os.environ.pop("PYTHONHOME", None)
+            break
+
+
+_activate_venv()
+
+
+def start_flask(port: int):
+    import gdpr_scanner as _app
+    _app.app.run(host="127.0.0.1", port=port, debug=False,
+                 threaded=True, use_reloader=False)
+
+
+def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        try:
+            with socket.create_connection(("127.0.0.1", port), timeout=0.2):
+                return True
+        except OSError:
+            time.sleep(0.1)
+    return False
+
+
+def _load_icon_image():
+    try:
+        from PIL import Image as PILImage
+        for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
+                     "icon.ico", "icon.icns", "icon.png",
+                     "icon_m365.ico", "icon_m365.icns", "icon_m365.png"]:  # legacy fallback
+            p = BASE_DIR / name
+            if p.exists():
+                return PILImage.open(p).convert("RGBA").resize((64, 64))
+        # Minimal fallback — blue square
+        img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
+        return img
+    except Exception:
+        return None
+
+
+def run_webview(port: int):
+    """
+    Open the app in a native webview window.
+    Returns True on success, False if pywebview is unavailable.
+    """
+    try:
+        import webview
+    except ImportError:
+        return False
+
+    class Api:
+        def quit(self):
+            import webview as _wv
+            for w in _wv.windows:
+                w.destroy()
+
+        def save_excel(self):
+            """Fetch the Excel export from Flask and save via native dialog."""
+            import urllib.request, datetime, os, webview as _wv
+            try:
+                url = f"http://127.0.0.1:{port}/api/export_excel"
+                with urllib.request.urlopen(url) as resp:
+                    data = resp.read()
+                fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+                win = _wv.windows[0] if _wv.windows else None
+                if win:
+                    paths = win.create_file_dialog(
+                        _wv.SAVE_DIALOG,
+                        save_filename=fname,
+                        file_types=("Excel Files (*.xlsx)",),
+                    )
+                    if paths:
+                        dest = paths[0] if isinstance(paths, (list, tuple)) else paths
+                        if not dest.endswith(".xlsx"):
+                            dest += ".xlsx"
+                        with open(dest, "wb") as f:
+                            f.write(data)
+                        return {"ok": True, "path": dest}
+                return {"ok": False, "error": "cancelled"}
+            except Exception as e:
+                return {"ok": False, "error": str(e)}
+
+        def save_db_export(self):
+            """Fetch the DB export ZIP from Flask and save via native dialog."""
+            import urllib.request, datetime, webview as _wv
+            try:
+                url = f"http://127.0.0.1:{port}/api/db/export"
+                with urllib.request.urlopen(url) as resp:
+                    data = resp.read()
+                fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
+                win = _wv.windows[0] if _wv.windows else None
+                if win:
+                    paths = win.create_file_dialog(
+                        _wv.SAVE_DIALOG,
+                        save_filename=fname,
+                        file_types=("ZIP Archive (*.zip)",),
+                    )
+                    if paths:
+                        dest = paths[0] if isinstance(paths, (list, tuple)) else paths
+                        if not dest.endswith(".zip"):
+                            dest += ".zip"
+                        with open(dest, "wb") as f:
+                            f.write(data)
+                        return {"ok": True, "path": dest}
+                return {"ok": False, "error": "cancelled"}
+            except Exception as e:
+                return {"ok": False, "error": str(e)}
+
+        def save_article30(self):
+            """Fetch the Article 30 Word doc from Flask and save via native dialog."""
+            import urllib.request, datetime, webview as _wv
+            try:
+                url = f"http://127.0.0.1:{port}/api/export_article30"
+                with urllib.request.urlopen(url) as resp:
+                    data = resp.read()
+                fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
+                win = _wv.windows[0] if _wv.windows else None
+                if win:
+                    paths = win.create_file_dialog(
+                        _wv.SAVE_DIALOG,
+                        save_filename=fname,
+                        file_types=("Word Document (*.docx)",),
+                    )
+                    if paths:
+                        dest = paths[0] if isinstance(paths, (list, tuple)) else paths
+                        if not dest.endswith(".docx"):
+                            dest += ".docx"
+                        with open(dest, "wb") as f:
+                            f.write(data)
+                        return {"ok": True, "path": dest}
+                return {"ok": False, "error": "cancelled"}
+            except Exception as e:
+                return {"ok": False, "error": str(e)}
+
+        def open_manual(self, lang: str):
+            """Open the user manual in a new native webview window."""
+            import webview as _wv
+            url = f"http://127.0.0.1:{port}/manual?lang={lang}"
+            existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
+            if existing:
+                existing.load_url(url)
+            else:
+                mw = _wv.create_window(
+                    title="GDPRScanner — Manual",
+                    url=url,
+                    width=960,
+                    height=800,
+                    resizable=True,
+                )
+                mw._is_manual = True
+
+    w = webview.create_window(
+        title="GDPRScanner",
+        url=f"http://127.0.0.1:{port}/",
+        width=1400,
+        height=900,
+        min_size=(900, 600),
+        js_api=Api(),
+    )
+
+    def _on_closed():
+        os._exit(0)
+
+    w.events.closed += _on_closed
+    webview.start(debug=False)
+    return True
+
+
+def _run_browser_fallback(port: int):
+    """Open in system browser + optional tray icon."""
+    url = f"http://127.0.0.1:{port}/"
+    webbrowser.open(url)
+
+    try:
+        import pystray
+        from PIL import Image as PILImage
+
+        img = _load_icon_image()
+        if img is None:
+            return
+
+        def _quit(icon, item):
+            icon.stop()
+            os._exit(0)
+
+        def _open(icon, item):
+            webbrowser.open(url)
+
+        menu = pystray.Menu(
+            pystray.MenuItem("Open GDPRScanner", _open, default=True),
+            pystray.MenuItem("Quit", _quit),
+        )
+        icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
+        icon.run()
+    except ImportError:
+        # No pystray — just keep the process alive
+        try:
+            while True:
+                time.sleep(60)
+        except KeyboardInterrupt:
+            pass
+
+
+if __name__ == "__main__":
+    if not acquire_instance_lock():
+        print("GDPRScanner is already running.", file=sys.stderr)
+        sys.exit(1)
+
+    # On macOS, multiprocessing uses "fork" which is unsafe with some
+    # frameworks — use "spawn" to match PyInstaller's behaviour.
+    if sys.platform == "darwin":
+        import multiprocessing
+        multiprocessing.set_start_method("spawn", force=True)
+
+    port = find_free_port()
+    # Machine-readable port line — stdout pipe for any parent process.
+    print(f"GDPR_PORT={port}", flush=True)
+
+    # Pre-import on main thread so cv2 / numpy initialise safely
+    try:
+        import gdpr_scanner  # noqa: F401 — side effect: loads Flask app
+    except Exception as e:
+        print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
+    flask_thread.start()
+
+    if not wait_for_flask(port):
+        print("[!] Flask did not start in time", file=sys.stderr)
+        sys.exit(1)
+
+    webview_ok = run_webview(port)
+    if not webview_ok:
+        _run_browser_fallback(port)
+'''
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Step 2 — Icon generation
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def make_icons():
+    """Generate icon_gdpr.icns (macOS) and icon_gdpr.ico (Windows)."""
+    try:
+        from PIL import Image, ImageDraw, ImageFont
+    except ImportError:
+        print("  [!] Pillow not found — skipping icon generation")
+        print("      Install with: pip install pillow")
+        return
+
+    # ── Draw the icon: dark background + "GDPR" text ──────────────────────────
+    SIZE = 512
+    img  = Image.new("RGBA", (SIZE, SIZE), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(img)
+
+    # Rounded-rect background
+    R  = 100
+    BG = (31, 41, 64, 255)        # dark navy
+    ACC = (255, 255, 255, 255)    # white text
+
+    # Fill body
+    draw.rectangle([R, 0, SIZE - R, SIZE], fill=BG)
+    draw.rectangle([0, R, SIZE, SIZE - R], fill=BG)
+    # Rounded corners
+    for cx, cy in [(R, R), (SIZE - R, R), (R, SIZE - R), (SIZE - R, SIZE - R)]:
+        draw.ellipse([cx - R, cy - R, cx + R, cy + R], fill=BG)
+
+    # Text "M365"
+    font = None
+    for font_path in [
+        "/System/Library/Fonts/Helvetica.ttc",
+        "/System/Library/Fonts/Arial.ttf",
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+        "C:/Windows/Fonts/arialbd.ttf",
+    ]:
+        if Path(font_path).exists():
+            try:
+                font = ImageFont.truetype(font_path, size=160)
+                break
+            except Exception:
+                pass
+
+    if font is None:
+        font = ImageFont.load_default()
+
+    text = "GDPR"
+    bbox = draw.textbbox((0, 0), text, font=font)
+    tw = bbox[2] - bbox[0]
+    th = bbox[3] - bbox[1]
+    draw.text(((SIZE - tw) / 2 - bbox[0], (SIZE - th) / 2 - bbox[1] - 10),
+              text, fill=ACC, font=font)
+
+    # Smaller "Scanner" subtitle
+    sub_font = None
+    for font_path in [
+        "/System/Library/Fonts/Helvetica.ttc",
+        "/System/Library/Fonts/Arial.ttf",
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+        "C:/Windows/Fonts/arial.ttf",
+    ]:
+        if Path(font_path).exists():
+            try:
+                sub_font = ImageFont.truetype(font_path, size=68)
+                break
+            except Exception:
+                pass
+    if sub_font is None:
+        sub_font = ImageFont.load_default()
+
+    sub = "Scanner"
+    sbbox = draw.textbbox((0, 0), sub, font=sub_font)
+    sw = sbbox[2] - sbbox[0]
+    draw.text(((SIZE - sw) / 2 - sbbox[0], SIZE * 0.65),
+              sub, fill=(200, 230, 255, 220), font=sub_font)
+
+    # ── macOS .icns ────────────────────────────────────────────────────────────
+    if SYSTEM == "Darwin":
+        icns_path = HERE / "icon_gdpr.icns"
+        iconset   = HERE / "icon_gdpr.iconset"
+        iconset.mkdir(exist_ok=True)
+        sizes = [16, 32, 64, 128, 256, 512]
+        for s in sizes:
+            img.resize((s, s), Image.LANCZOS).save(iconset / f"icon_{s}x{s}.png")
+            img.resize((s * 2, s * 2), Image.LANCZOS).save(iconset / f"icon_{s}x{s}@2x.png")
+        result = subprocess.run(
+            ["iconutil", "-c", "icns", str(iconset), "-o", str(icns_path)],
+            capture_output=True
+        )
+        shutil.rmtree(iconset, ignore_errors=True)
+        if result.returncode == 0:
+            print(f"  [+] Icon: {icns_path.name}")
+        else:
+            print("  [!] iconutil failed — no .icns generated")
+
+    # ── Windows .ico ───────────────────────────────────────────────────────────
+    ico_path = HERE / "icon_gdpr.ico"
+    ico_imgs = [img.resize((s, s), Image.LANCZOS).convert("RGBA")
+                for s in [16, 32, 48, 64, 128, 256]]
+    ico_imgs[0].save(ico_path, format="ICO", sizes=[(s, s) for s in [16, 32, 48, 64, 128, 256]],
+                     append_images=ico_imgs[1:])
+    print(f"  [+] Icon: {ico_path.name}")
+
+    # Save PNG fallback
+    img.save(HERE / "icon_gdpr.png")
+    print(f"  [+] Icon: icon_gdpr.png")
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Step 3 — Build with PyInstaller
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def get_pyinstaller_args() -> list:
+    """Return the PyInstaller command-line arguments for the current platform."""
+
+    hidden = [
+        # Flask / web
+        "flask", "flask.templating", "jinja2", "jinja2.ext",
+        "werkzeug", "werkzeug.serving", "werkzeug.routing",
+        # M365 / auth
+        "msal", "msal.application", "msal.authority",
+        "requests", "requests.adapters", "urllib3",
+        "cryptography", "cryptography.hazmat",
+        # Document scanning (via document_scanner)
+        "pdfplumber", "pdfplumber.page", "pdfminer", "pdfminer.high_level",
+        "pdf2image", "pytesseract",
+        "pypdf", "reportlab", "reportlab.pdfgen", "reportlab.lib",
+        "spacy", "spacy.lang.da", "spacy.lang.en",
+        "docx", "docx.oxml", "docx.styles",
+        "openpyxl", "openpyxl.styles", "openpyxl.utils",
+        "numpy", "PIL", "PIL.Image",
+        # App window
+        "pystray", "pystray._base",
+        "webview", "webview.platforms",
+        "webview.platforms.cocoa",
+        "webview.platforms.winforms",
+        "webview.platforms.gtk",
+        "webview.platforms.qt",
+        # Scheduler (#19)
+        "apscheduler", "apscheduler.schedulers.background",
+        "apscheduler.triggers.cron",
+    ]
+
+    datas = [
+        (str(HERE / "gdpr_scanner.py"),    "."),
+        (str(HERE / "m365_connector.py"),  "."),
+        (str(HERE / "gdpr_db.py"),         "."),
+        (str(HERE / "file_scanner.py"),    "."),
+        #(str(HERE / "scheduler.py"),       "."),
+        (str(HERE / "document_scanner.py"), "."),
+        # ── Modules split from gdpr_scanner.py in v1.6.1 (#25) ──────────────
+        (str(HERE / "sse.py"),             "."),
+        (str(HERE / "checkpoint.py"),      "."),
+        (str(HERE / "app_config.py"),      "."),
+        (str(HERE / "cpr_detector.py"),    "."),
+        (str(HERE / "scan_engine.py"),     "."),
+        (str(HERE / "google_connector.py"), "."),
+        (str(HERE / "scan_scheduler.py"),  "."),
+    ]
+
+    # Bundle VERSION file — read at startup by both scanners
+    version_file = HERE / "VERSION"
+    if version_file.exists():
+        datas.append((str(version_file), "."))
+        print(f"  [+] Bundling VERSION: {version_file.read_text().strip()}")
+
+    lang_dir = HERE / "lang"
+    if lang_dir.exists():
+        datas.append((str(lang_dir), "lang"))
+    keywords_dir = HERE / "keywords"
+    if keywords_dir.exists():
+        datas.append((str(keywords_dir), "keywords"))
+        print(f"  [+] Bundling keywords: {list(keywords_dir.glob('*.json'))}")
+        print(f"  [+] Bundling lang files: {list(lang_dir.glob('*.json')) + list(lang_dir.glob('*.lang'))}")
+    skus_dir = HERE / "classification"
+    if skus_dir.exists():
+        datas.append((str(skus_dir), "classification"))
+        print(f"  [+] Bundling classification files: {list(skus_dir.glob('*.json'))}")
+    templates_dir = HERE / "templates"
+    if templates_dir.exists():
+        datas.append((str(templates_dir), "templates"))
+        print(f"  [+] Bundling templates: {list(templates_dir.glob('*.html'))}")
+    static_dir = HERE / "static"
+    if static_dir.exists():
+        datas.append((str(static_dir), "static"))
+        print(f"  [+] Bundling static: {list(static_dir.iterdir())}")
+    for manual_file in (HERE / "docs" / "manuals").glob("MANUAL-*.md"):
+        datas.append((str(manual_file), "docs/manuals"))
+        print(f"  [+] Bundling manual: {manual_file.name}")
+
+    # Bundle routes/ blueprints
+    routes_dir = HERE / "routes"
+    if routes_dir.exists():
+        for f in routes_dir.glob("*.py"):
+            datas.append((str(f), "routes"))
+        print(f"  [+] Bundling routes/: {[f.name for f in routes_dir.glob('*.py')]}")
+
+    # cv2 cascade data
+    try:
+        import cv2 as _cv2
+        cv2_data = Path(_cv2.__file__).parent / "data"
+    except Exception:
+        import importlib.util
+        spec = importlib.util.find_spec("cv2")
+        cv2_data = Path(spec.origin).parent / "data" if spec and spec.origin else None
+    if cv2_data and Path(cv2_data).exists():
+        datas.append((str(cv2_data), "cv2/data"))
+        print(f"  [+] Bundling cv2/data")
+        cv2_pkg = Path(cv2_data).parent
+        for so in cv2_pkg.glob("cv2*.so"):
+            datas.append((str(so), "cv2"))
+        dylibs = cv2_pkg / ".dylibs"
+        if dylibs.exists():
+            datas.append((str(dylibs), "cv2/.dylibs"))
+
+    # spaCy models
+    try:
+        from PyInstaller.utils.hooks import collect_data_files as _cdf, collect_submodules as _csm
+        for model in ["da_core_news_lg", "da_core_news_md", "da_core_news_sm",
+                      "xx_ent_wiki_sm", "en_core_web_sm"]:
+            try:
+                _md = _cdf(model)
+                _mh = _csm(model)
+                if _md or _mh:
+                    datas  += _md
+                    hidden += _mh
+                    print(f"  [+] Bundling spaCy model: {model}")
+                    break
+            except Exception:
+                pass
+    except Exception:
+        pass
+
+    args = [
+        str(ENTRY_POINT),
+        "--name", APP_NAME,
+        "--onedir",
+        "--noconfirm",
+        "--clean",
+        "--distpath", str(DIST_DIR),
+        "--workpath", str(BUILD_DIR),
+        "--specpath", str(HERE),
+        "--exclude-module", "cv2",
+    ]
+
+    for h in hidden:
+        args += ["--hidden-import", h]
+
+    sep = ";" if SYSTEM == "Windows" else ":"
+    for src, dst in datas:
+        args += ["--add-data", f"{src}{sep}{dst}"]
+
+    # Platform options
+    if SYSTEM == "Darwin":
+        icon = next(
+            (p for p in [ICON_MACOS, HERE / "icon.icns", HERE / "icon_gdpr.png", HERE / "icon.png"]
+             if p.exists()), None
+        )
+        if icon:
+            args += ["--icon", str(icon)]
+        args += ["--windowed", "--osx-bundle-identifier", BUNDLE_ID]
+
+    elif SYSTEM == "Windows":
+        icon = next(
+            (p for p in [ICON_WIN, HERE / "icon.ico", HERE / "icon_gdpr.png"]
+             if p.exists()), None
+        )
+        if icon:
+            args += ["--icon", str(icon)]
+        args += ["--windowed", "--version-file", str(_make_win_version_file())]
+
+    return args
+
+
+def _make_win_version_file() -> Path:
+    ver = tuple(int(x) for x in (APP_VERSION + ".0.0").split(".")[:4])
+    content = textwrap.dedent(f"""\
+        VSVersionInfo(
+          ffi=FixedFileInfo(
+            filevers={ver}, prodvers={ver},
+            mask=0x3f, flags=0x0, OS=0x4, fileType=0x1,
+            subtype=0x0, date=(0, 0)
+          ),
+          kids=[
+            StringFileInfo([StringTable('040904B0', [
+              StringStruct('CompanyName', 'GDPRScanner'),
+              StringStruct('FileDescription', '{APP_NAME}'),
+              StringStruct('FileVersion', '{APP_VERSION}'),
+              StringStruct('InternalName', 'M365Scanner'),
+              StringStruct('LegalCopyright', ''),
+              StringStruct('OriginalFilename', 'GDPRScanner.exe'),
+              StringStruct('ProductName', '{APP_NAME}'),
+              StringStruct('ProductVersion', '{APP_VERSION}'),
+            ])]),
+            VarFileInfo([VarStruct('Translation', [0x0409, 1200])])
+          ]
+        )
+    """)
+    path = HERE / "m365_win_version_info.txt"
+    path.write_text(content, encoding="utf-8")
+    return path
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Step 4 — Post-build helpers
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def create_dmg():
+    if shutil.which("create-dmg") is None:
+        print("  [!] create-dmg not found — skipping .dmg")
+        print("      Install with: brew install create-dmg")
+        return
+
+    app_path = DIST_DIR / f"{APP_NAME}.app"
+    dmg_path = DIST_DIR / f"{APP_NAME}-{APP_VERSION}.dmg"
+    if dmg_path.exists():
+        dmg_path.unlink()
+
+    print("  Creating .dmg …")
+    cmd = [
+        "create-dmg",
+        "--volname", APP_NAME,
+        "--window-pos", "200", "120",
+        "--window-size", "600", "400",
+        "--icon-size", "100",
+        "--icon", f"{APP_NAME}.app", "175", "190",
+        "--hide-extension", f"{APP_NAME}.app",
+        "--app-drop-link", "425", "190",
+        str(dmg_path),
+        str(app_path),
+    ]
+    result = subprocess.run(cmd)
+    if result.returncode == 0:
+        print(f"  [+] DMG created: {dmg_path.name}")
+    else:
+        print("  [!] create-dmg failed — .app is still usable directly")
+
+
+def create_nsis_installer():
+    if SYSTEM != "Windows":
+        print("  [!] NSIS installer only available on Windows"); return
+    if shutil.which("makensis") is None:
+        print("  [!] NSIS not found — download from https://nsis.sourceforge.io"); return
+
+    nsi = HERE / "m365_installer.nsi"
+    dist_folder = DIST_DIR / APP_NAME
+    nsi.write_text(textwrap.dedent(f"""\
+        !define APP_NAME      "{APP_NAME}"
+        !define APP_VERSION   "{APP_VERSION}"
+        !define DIST_FOLDER   "{dist_folder}"
+        !define INSTALL_DIR   "$PROGRAMFILES64\\\\{APP_NAME}"
+
+        Name "${{APP_NAME}}"
+        OutFile "dist\\\\{APP_NAME}-{APP_VERSION}-Setup.exe"
+        InstallDir "${{INSTALL_DIR}}"
+        RequestExecutionLevel admin
+
+        Section "Install"
+          SetOutPath "${{INSTALL_DIR}}"
+          File /r "${{DIST_FOLDER}}\\\\*.*"
+          CreateShortcut "$DESKTOP\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe"
+          CreateShortcut "$SMPROGRAMS\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe"
+        SectionEnd
+
+        Section "Uninstall"
+          Delete "$DESKTOP\\\\{APP_NAME}.lnk"
+          Delete "$SMPROGRAMS\\\\{APP_NAME}.lnk"
+          RMDir /r "${{INSTALL_DIR}}"
+        SectionEnd
+    """), encoding="utf-8")
+
+    result = subprocess.run(["makensis", str(nsi)])
+    if result.returncode == 0:
+        print(f"  [+] Installer: dist/{APP_NAME}-{APP_VERSION}-Setup.exe")
+    else:
+        print("  [!] NSIS compilation failed")
+
+
+def print_next_steps():
+    if SYSTEM == "Darwin":
+        app = DIST_DIR / f"{APP_NAME}.app"
+        print(f"""
+  ╔══════════════════════════════════════════════════════════╗
+  ║  Build complete!                                         ║
+  ╠══════════════════════════════════════════════════════════╣
+  ║  App:  {str(app):<51}║
+  ╠══════════════════════════════════════════════════════════╣
+  ║  To run:                                                 ║
+  ║    open "{app}"
+  ║    — or double-click in Finder                           ║
+  ║                                                          ║
+  ║  Opens on http://127.0.0.1:5100 in a native WKWebView    ║
+  ║  window (no browser chrome).                             ║
+  ║  If pywebview was not installed, falls back to browser.  ║
+  ║                                                          ║
+  ║  To distribute:                                          ║
+  ║    python build_gdpr.py --dmg   (requires create-dmg)    ║
+  ╚══════════════════════════════════════════════════════════╝""")
+
+    elif SYSTEM == "Windows":
+        exe = DIST_DIR / APP_NAME / f"{APP_NAME}.exe"
+        print(f"""
+  ╔══════════════════════════════════════════════════════════╗
+  ║  Build complete!                                         ║
+  ╠══════════════════════════════════════════════════════════╣
+  ║  Exe:  {str(exe):<51}║
+  ╠══════════════════════════════════════════════════════════╣
+  ║  To run:                                                 ║
+  ║    Double-click  "{APP_NAME}.exe"                        ║
+  ║                                                          ║
+  ║  Opens on http://127.0.0.1:5100 in a native WebView2     ║
+  ║  window (Edge engine, built into Windows 10/11).         ║
+  ║  If pywebview was not installed, falls back to browser.  ║
+  ║                                                          ║
+  ║  To distribute as installer:                             ║
+  ║    Install NSIS: https://nsis.sourceforge.io             ║
+  ║    Then run: python build_gdpr.py --installer            ║
+  ╚══════════════════════════════════════════════════════════╝""")
+
+    else:
+        print(f"\n  [+] Build complete — see dist/")
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Main
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def main():
+    parser = argparse.ArgumentParser(description="Build GDPRScanner app")
+    parser.add_argument("--clean",      action="store_true", help="Remove build/ and dist/ first")
+    parser.add_argument("--dmg",        action="store_true", help="macOS: wrap .app in .dmg after build")
+    parser.add_argument("--installer",  action="store_true", help="Windows: create NSIS installer")
+    parser.add_argument("--icons-only", action="store_true", help="Only regenerate icons, don't build")
+    args = parser.parse_args()
+
+    print(f"\n  GDPRScanner — App Builder  v{APP_VERSION}")
+    print(f"  Platform: {SYSTEM}  Python: {sys.version.split()[0]}")
+    print(f"  {'─' * 42}\n")
+
+    if not args.icons_only:
+        # Check PyInstaller
+        try:
+            import PyInstaller
+            print(f"  [+] PyInstaller {PyInstaller.__version__}")
+        except ImportError:
+            print("  [!] PyInstaller not found. Install with:")
+            print("        pip install pyinstaller pyinstaller-hooks-contrib")
+            sys.exit(1)
+
+        # Check pywebview
+        try:
+            import webview
+            try:   _wv_ver = webview.__version__
+            except AttributeError:
+                import importlib.metadata
+                _wv_ver = importlib.metadata.version("pywebview")
+            print(f"  [+] pywebview {_wv_ver}  (native window — recommended)")
+        except ImportError:
+            print("  [!] pywebview not found — will fall back to system browser")
+            print("      Install with: pip install pywebview")
+
+        # Check pystray
+        try:
+            import pystray
+            print(f"  [+] pystray available  (browser-fallback tray icon)")
+        except ImportError:
+            print("  [!] pystray not found — no tray icon in browser-fallback mode")
+
+        # Check MSAL
+        try:
+            import msal
+            print(f"  [+] msal {msal.__version__}")
+        except ImportError:
+            print("  [!] msal not found — run: pip install msal")
+            sys.exit(1)
+
+        # Check requests
+        try:
+            import requests
+            print(f"  [+] requests {requests.__version__}")
+        except ImportError:
+            print("  [!] requests not found — run: pip install requests")
+            sys.exit(1)
+
+        # Check source files
+        for fname in ["gdpr_scanner.py", "gdpr_db.py", "m365_connector.py", "document_scanner.py",
+                       "sse.py", "checkpoint.py", "app_config.py", "cpr_detector.py", "scan_engine.py"]:
+            p = HERE / fname
+            if not p.exists():
+                print(f"  [!] {fname} not found in {HERE}")
+                sys.exit(1)
+            print(f"  [+] Found {fname}")
+
+    # Clean
+    for d in [BUILD_DIR, DIST_DIR]:
+        if d.exists():
+            shutil.rmtree(d)
+            print(f"  [+] Removed {d.name}/")
+
+    # Icons
+    print("\n  Generating icons …")
+    make_icons()
+
+    if args.icons_only:
+        return
+
+    # Write launcher
+    print("\n  Writing launcher …")
+    ENTRY_POINT.write_text(LAUNCHER_CODE, encoding="utf-8")
+    print(f"  [+] {ENTRY_POINT.name}")
+
+    # cv2 DLL check on Windows
+    if SYSTEM == "Windows":
+        try:
+            import cv2  # noqa: F401
+        except ImportError as e:
+            if "DLL load failed" in str(e):
+                print("  [!] cv2 DLL load failed — reinstalling headless variant …")
+                subprocess.run([sys.executable, "-m", "pip", "install",
+                                "--force-reinstall", "opencv-python-headless", "-q"], check=False)
+
+    # Run PyInstaller
+    print("\n  Running PyInstaller …")
+    pyi_args = get_pyinstaller_args()
+
+    import PyInstaller.__main__ as pyi
+    pyi.run(pyi_args)
+
+    # Post-build
+    if args.dmg and SYSTEM == "Darwin":
+        create_dmg()
+    if args.installer and SYSTEM == "Windows":
+        create_nsis_installer()
+
+    print_next_steps()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/build_gdpr.sh b/build_gdpr.sh
new file mode 100755
index 0000000..63cecd8
--- /dev/null
+++ b/build_gdpr.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# GDPRScanner — build .app (uses ./venv)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/venv/bin/activate"
+exec python3 "$SCRIPT_DIR/build_gdpr.py" --clean "$@"
diff --git a/checkpoint.py b/checkpoint.py
new file mode 100644
index 0000000..8b9c36d
--- /dev/null
+++ b/checkpoint.py
@@ -0,0 +1,84 @@
+"""
+checkpoint.py — Scan checkpoint and delta-token persistence for GDPRScanner.
+
+Provides save/load/clear for mid-scan checkpoints (so interrupted scans can
+resume) and load/save for Microsoft Graph delta-link tokens.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import logging
+import time
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+_DATA_DIR = Path.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+_CHECKPOINT_PATH = _DATA_DIR / "checkpoint.json"
+
+def _checkpoint_key(options: dict) -> str:
+    """Stable hash of the scan options — used to detect when a checkpoint
+    belongs to a different scan configuration and should be ignored."""
+    sig = json.dumps({
+        "sources":  sorted(options.get("sources", [])),
+        "user_ids": sorted([u["id"] if isinstance(u, dict) else u for u in options.get("user_ids", [])]),
+        "older_than_days": options.get("options", {}).get("older_than_days", 0),
+    }, sort_keys=True)
+    return hashlib.sha256(sig.encode()).hexdigest()[:16]
+
+def _save_checkpoint(key: str, scanned_ids: set, flagged: list, meta: dict) -> None:
+    """Write checkpoint to disk. Called periodically during scanning."""
+    try:
+        payload = {
+            "key":         key,
+            "scanned_ids": list(scanned_ids),
+            "flagged":     flagged,
+            "meta":        {k: v for k, v in meta.items() if k != "options"},
+        }
+        tmp = _CHECKPOINT_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(payload, ensure_ascii=False, default=str), encoding="utf-8")
+        tmp.replace(_CHECKPOINT_PATH)
+    except Exception as e:
+        logger.error("[checkpoint] save failed: %s", e)
+
+def _load_checkpoint(key: str) -> dict | None:
+    """Load checkpoint if it matches the current scan key. Returns None on mismatch or error."""
+    try:
+        if not _CHECKPOINT_PATH.exists():
+            return None
+        payload = json.loads(_CHECKPOINT_PATH.read_text(encoding="utf-8"))
+        if payload.get("key") != key:
+            return None
+        return payload
+    except Exception:
+        return None
+
+def _clear_checkpoint() -> None:
+    try:
+        if _CHECKPOINT_PATH.exists():
+            _CHECKPOINT_PATH.unlink()
+    except Exception:
+        pass
+
+_DELTA_PATH = _DATA_DIR / "delta.json"
+
+def _load_delta_tokens() -> dict:
+    """Return saved delta token map {key: deltaLink_url}."""
+    try:
+        if _DELTA_PATH.exists():
+            return json.loads(_DELTA_PATH.read_text(encoding="utf-8"))
+    except Exception:
+        pass
+    return {}
+
+def _save_delta_tokens(tokens: dict) -> None:
+    """Persist delta tokens atomically."""
+    try:
+        tmp = _DELTA_PATH.with_suffix(".tmp")
+        tmp.write_text(json.dumps(tokens, ensure_ascii=False), encoding="utf-8")
+        tmp.replace(_DELTA_PATH)
+    except Exception as e:
+        logger.error("[delta] save failed: %s", e)
+
+# ── Broadcast ─────────────────────────────────────────────────────────────────
diff --git a/cpr_detector.py b/cpr_detector.py
new file mode 100644
index 0000000..75852e0
--- /dev/null
+++ b/cpr_detector.py
@@ -0,0 +1,446 @@
+"""
+cpr_detector.py — File scanning and CPR/PII detection for GDPRScanner.
+
+Provides:
+  _scan_bytes(content, filename)    — dispatch to correct scanner by file type
+  _scan_text_direct(text)           — scan a plain text string
+  _extract_exif(content, filename)  — extract PII-bearing EXIF tags from images
+  _detect_photo_faces(content, fn)  — count faces in an image (OpenCV)
+  _get_pii_counts(text)             — NER-based PII type counts
+  _make_thumb(content, filename)    — JPEG thumbnail as base64 string
+  _placeholder_svg(ext, name)       — SVG file-type icon
+
+Globals SCANNER_OK, PIL_OK, PHOTO_EXTS, SUPPORTED_EXTS, ds, PILImage, LANG,
+and _check_special_category are injected at startup by gdpr_scanner.py via
+`from cpr_detector import *` AFTER those names are defined.  This keeps the
+module cleanly importable in isolation for unit tests (#26) while preserving
+the existing runtime behaviour.
+"""
+from __future__ import annotations
+import base64
+import hashlib
+import io
+import tempfile
+import threading
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+# Only one PDF subprocess may run at a time — each spawned process loads
+# ~150-300 MB of Python libs (pdfplumber, pdf2image, pytesseract).
+# Serialising them prevents overlapping subprocesses from exhausting RAM.
+_pdf_subprocess_sem = threading.Semaphore(1)
+
+# ── Lazy fallbacks for standalone / test imports ──────────────────────────────
+# When imported in isolation (e.g. pytest), these defaults prevent NameErrors.
+# gdpr_scanner.py overwrites them at startup via explicit assignment.
+try:
+    import document_scanner as ds
+    SCANNER_OK = True
+except ImportError:
+    ds = None  # type: ignore[assignment]
+    SCANNER_OK = False
+
+try:
+    from PIL import Image as PILImage
+    PIL_OK = True
+except ImportError:
+    PILImage = None  # type: ignore[assignment]
+    PIL_OK = False
+
+SUPPORTED_EXTS = {
+    ".pdf", ".docx", ".doc", ".xlsx", ".xlsm", ".csv",
+    ".txt", ".eml", ".msg",
+    ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp",
+}
+PHOTO_EXTS = {
+    ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp", ".heic", ".heif",
+}
+LANG: dict = {}
+
+def _check_special_category(text: str, cprs: list) -> list:
+    """Stub — overwritten by app_config._check_special_category at startup."""
+    return []
+
+
+# EXIF tags that may contain PII
+# EXIF tags that may contain genuinely personal data (name, description, keywords).
+# Deliberately excludes hardware/OS fields (HostComputer, Software, Make, Model,
+# DocumentName, PageName) that are set automatically by the OS on every screenshot
+# and carry no personal information about an individual.
+_EXIF_PII_TAGS = {
+    "Artist", "Copyright", "ImageDescription", "UserComment",
+    "XPAuthor", "XPSubject", "XPComment", "XPKeywords",
+}
+
+# Minimum character length for a PII field value to be considered meaningful.
+# Prevents single-letter or empty values from triggering a flag.
+_EXIF_PII_MIN_LEN = 3
+
+def _extract_exif(content: bytes, filename: str) -> dict:
+    """Extract EXIF metadata from an image file.
+
+    Returns a dict with keys:
+        gps       — {lat, lon, lat_ref, lon_ref, maps_url} or None
+        pii_fields — {tag: value} for fields containing potential PII
+        author    — str or None
+        datetime  — str or None
+        device    — str or None
+        has_pii   — bool
+    """
+    result = {"gps": None, "pii_fields": {}, "author": None,
+              "datetime": None, "device": None, "has_pii": False}
+
+    if not PIL_OK:
+        return result
+
+    try:
+        from PIL import Image as _Img, ExifTags as _ExifTags
+        import io
+        img = _Img.open(io.BytesIO(content))
+
+        # Get raw EXIF
+        raw = getattr(img, "_getexif", lambda: None)()
+        if not raw:
+            # Try newer Pillow API
+            exif_data = img.getexif()
+            raw = {k: v for k, v in exif_data.items()}
+
+        if not raw:
+            return result
+
+        tag_names = {v: k for k, v in _ExifTags.TAGS.items()}
+
+        # Build human-readable dict
+        named = {}
+        for tag_id, value in raw.items():
+            tag = _ExifTags.TAGS.get(tag_id, str(tag_id))
+            named[tag] = value
+
+        # Author / description fields
+        for field in _EXIF_PII_TAGS:
+            val = named.get(field)
+            if val:
+                try:
+                    # UserComment is bytes with encoding prefix
+                    if isinstance(val, bytes):
+                        val = val.decode("utf-8", errors="replace").strip("\x00 ")
+                    elif not isinstance(val, str):
+                        val = str(val)
+                    if val.strip() and len(val.strip()) >= _EXIF_PII_MIN_LEN:
+                        result["pii_fields"][field] = val.strip()
+                        result["has_pii"] = True
+                except Exception:
+                    pass
+
+        if named.get("Artist"):
+            result["author"] = str(named["Artist"])
+        elif named.get("XPAuthor"):
+            result["author"] = str(named["XPAuthor"])
+
+        if named.get("DateTimeOriginal"):
+            result["datetime"] = str(named["DateTimeOriginal"])
+        elif named.get("DateTime"):
+            result["datetime"] = str(named["DateTime"])
+
+        make  = named.get("Make", "")
+        model = named.get("Model", "")
+        if make or model:
+            result["device"] = f"{make} {model}".strip()
+
+        # GPS
+        gps_raw = named.get("GPSInfo")
+        if gps_raw and isinstance(gps_raw, dict):
+            try:
+                gps_tags = {_ExifTags.GPSTAGS.get(k, k): v for k, v in gps_raw.items()}
+
+                def _dms_to_decimal(dms, ref):
+                    if not dms or len(dms) < 3:
+                        return None
+                    deg, mn, sec = dms
+                    # Pillow may return IFDRational objects
+                    deg = float(deg); mn = float(mn); sec = float(sec)
+                    dec = deg + mn / 60 + sec / 3600
+                    if ref in ("S", "W"):
+                        dec = -dec
+                    return round(dec, 7)
+
+                lat = _dms_to_decimal(
+                    gps_tags.get("GPSLatitude"),
+                    gps_tags.get("GPSLatitudeRef", "N"),
+                )
+                lon = _dms_to_decimal(
+                    gps_tags.get("GPSLongitude"),
+                    gps_tags.get("GPSLongitudeRef", "E"),
+                )
+                if lat is not None and lon is not None:
+                    result["gps"] = {
+                        "lat":      lat,
+                        "lon":      lon,
+                        "lat_ref":  gps_tags.get("GPSLatitudeRef", "N"),
+                        "lon_ref":  gps_tags.get("GPSLongitudeRef", "E"),
+                        "maps_url": f"https://www.google.com/maps?q={lat},{lon}",
+                    }
+                    result["has_pii"] = True
+            except Exception:
+                pass
+
+    except Exception:
+        pass
+
+    return result
+
+
+
+    """Detect faces in an image file using OpenCV Haar cascades.
+
+    Returns the number of faces detected, or 0 if cv2 is unavailable,
+    the file is not a supported image format, or decoding fails.
+    Face detection is intentionally strict (minNeighbors=8, min_size=80px) to
+    reduce false positives on background textures, labels, and artwork.
+    Haar cascades are tuned for compliance flagging, not exhaustive detection.  (#9)
+    """
+    if not SCANNER_OK:
+        return 0
+    try:
+        cv2_mod = getattr(ds, "_get_cv2", None)
+        if cv2_mod is None:
+            return 0
+        cv2, np = ds._get_cv2()
+        if cv2 is None or np is None:
+            return 0
+    except Exception:
+        return 0
+
+    try:
+        # Decode image bytes → cv2 BGR array
+        arr = np.frombuffer(content, dtype=np.uint8)
+        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+        if img is None:
+            # imdecode failed (e.g. HEIC without codec) — try PIL fallback
+            if PIL_OK:
+                try:
+                    from PIL import Image as _PILImg
+                    import io as _io
+                    pil_img = _PILImg.open(_io.BytesIO(content)).convert("RGB")
+                    pil_arr = np.array(pil_img)
+                    img = cv2.cvtColor(pil_arr, cv2.COLOR_RGB2BGR)
+                except Exception:
+                    return 0
+            else:
+                return 0
+
+        faces = ds.detect_faces_cv2(img, min_size=80, neighbors=8)
+        return len(faces)
+    except Exception:
+        return 0
+
+def _detect_photo_faces(content: bytes, filename: str) -> int:
+    """Detect faces in an image file using OpenCV Haar cascades.
+
+    Returns the number of faces detected, or 0 if cv2 is unavailable,
+    the file is not a supported image format, or decoding fails.
+    Face detection is intentionally strict (minNeighbors=8, min_size=80px) to
+    reduce false positives on background textures, labels, and artwork.
+    Haar cascades are tuned for compliance flagging, not exhaustive detection.  (#9)
+    """
+    if not SCANNER_OK:
+        return 0
+    try:
+        cv2_mod = getattr(ds, "_get_cv2", None)
+        if cv2_mod is None:
+            return 0
+        cv2, np = ds._get_cv2()
+        if cv2 is None or np is None:
+            return 0
+    except Exception:
+        return 0
+
+    try:
+        arr = np.frombuffer(content, dtype=np.uint8)
+        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+        if img is None:
+            if PIL_OK:
+                try:
+                    from PIL import Image as _PILImg
+                    import io as _io
+                    pil_img = _PILImg.open(_io.BytesIO(content)).convert("RGB")
+                    pil_arr = np.array(pil_img)
+                    img = cv2.cvtColor(pil_arr, cv2.COLOR_RGB2BGR)
+                except Exception:
+                    return 0
+            else:
+                return 0
+
+        faces = ds.detect_faces_cv2(img, min_size=80, neighbors=8)
+        return len(faces)
+    except Exception:
+        return 0
+
+
+def _scan_bytes(content: bytes, filename: str, poppler_path=None) -> dict:
+    """Scan raw bytes for CPRs. Returns scanner result dict."""
+    if not SCANNER_OK:
+        return {"cprs": [], "dates": [], "error": "scanner not available"}
+    ext = Path(filename).suffix.lower()
+    with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
+        tmp.write(content)
+        tmp_path = Path(tmp.name)
+    try:
+        if ext == ".pdf":
+            # Check if the PDF has a text layer before running full scan_pdf.
+            # Image-only PDFs (scanned documents) have no text and would trigger
+            # Tesseract OCR subprocesses that hang indefinitely on some files.
+            try:
+                import pdfplumber as _pp, io as _io
+                with _pp.open(_io.BytesIO(content)) as _pdf:
+                    has_text = any(ds.is_text_page(p) for p in _pdf.pages)
+                if not has_text:
+                    return {"cprs": [], "dates": []}  # image-only PDF — no CPRs possible
+            except Exception:
+                pass  # if pdfplumber fails, fall through to full scan_pdf
+            return ds.scan_pdf(tmp_path, poppler_path=poppler_path)
+        elif ext in {".docx", ".doc"}:
+            return ds.scan_docx(tmp_path)
+        elif ext in {".xlsx", ".xlsm"}:
+            return ds.scan_xlsx(tmp_path)
+        elif ext == ".csv":
+            return ds.scan_csv(tmp_path)
+        elif ext == ".txt":
+            text = content.decode("utf-8", errors="replace")
+            cprs, dates = ds.extract_matches(text, 1, "text")
+            return {"cprs": cprs, "dates": dates}
+        elif ext in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}:
+            return ds.scan_image(tmp_path)
+        else:
+            # Try plain text
+            try:
+                text = content.decode("utf-8", errors="replace")
+                cprs, dates = ds.extract_matches(text, 1, "text")
+                return {"cprs": cprs, "dates": dates}
+            except Exception:
+                return {"cprs": [], "dates": []}
+    except Exception as e:
+        return {"cprs": [], "dates": [], "error": str(e)}
+    finally:
+        try:
+            tmp_path.unlink()
+        except Exception:
+            pass
+
+def _worker_scan_pdf(pdf_path_str: str, result_q) -> None:
+    """Worker executed in a spawned subprocess — must be a module-level function."""
+    try:
+        import document_scanner as _ds
+        from pathlib import Path as _Path
+        result_q.put(_ds.scan_pdf(_Path(pdf_path_str)))
+    except Exception as e:
+        result_q.put({"cprs": [], "dates": [], "error": str(e)})
+
+
+def _scan_bytes_timeout(content: bytes, filename: str, timeout: int = 60) -> dict:
+    """Like _scan_bytes but runs PDF scanning in a spawned subprocess with a hard timeout.
+
+    For non-PDF files delegates straight to _scan_bytes.  For PDFs it writes the
+    bytes to a temp file, spawns a fresh Python process (spawn context — safe on
+    macOS/Flask), and joins with *timeout* seconds.  If the worker is still alive
+    after the timeout it is forcibly terminated so the scan thread is never blocked.
+    """
+    ext = Path(filename).suffix.lower()
+    if ext != ".pdf":
+        return _scan_bytes(content, filename)
+
+    import multiprocessing
+    ctx = multiprocessing.get_context("spawn")
+
+    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
+        tmp.write(content)
+        tmp_path_str = tmp.name
+    del content  # written to temp file — release raw bytes before subprocess loads
+
+    try:
+        with _pdf_subprocess_sem:
+            q = ctx.Queue()
+            p = ctx.Process(target=_worker_scan_pdf, args=(tmp_path_str, q))
+            p.start()
+            p.join(timeout)
+            if p.is_alive():
+                p.terminate()
+                p.join()
+                return {"cprs": [], "dates": [], "error": f"PDF OCR timed out after {timeout}s"}
+            try:
+                return q.get_nowait()
+            except Exception:
+                return {"cprs": [], "dates": [], "error": "Worker returned no result"}
+    finally:
+        try:
+            Path(tmp_path_str).unlink()
+        except Exception:
+            pass
+
+
+def _scan_text_direct(text: str) -> dict:
+    """Scan a plain text string for CPRs using extract_matches.
+    
+    Uses ds.extract_matches() directly rather than ds.scan_text() because
+    scan_text() calls extract_cpr_and_dates() which is not defined in
+    document_scanner.py (pre-existing bug).
+    """
+    if not SCANNER_OK or not text:
+        return {"cprs": [], "dates": []}
+    try:
+        cprs, dates = ds.extract_matches(text, 1, "text")
+        return {"cprs": cprs, "dates": dates}
+    except Exception:
+        return {"cprs": [], "dates": []}
+
+def _html_esc(s: str) -> str:
+    """HTML-escape a string for safe inline embedding."""
+    import html as _h
+    return _h.escape(str(s))
+
+
+def _get_pii_counts(text: str) -> dict:
+    """Run count_pii_types on text if the scanner is available."""
+    if not SCANNER_OK:
+        return {}
+    try:
+        return ds.count_pii_types(text, use_ner=True)
+    except Exception:
+        return {}
+
+
+def _make_thumb(content: bytes, filename: str) -> str:
+    """Make a small base64 thumbnail from image bytes, or return SVG placeholder."""
+    ext = Path(filename).suffix.lower()
+    if not PIL_OK or ext not in {".jpg", ".jpeg", ".png", ".bmp", ".webp"}:
+        return _placeholder_svg(ext, filename)
+    try:
+        img = PILImage.open(io.BytesIO(content)).convert("RGB")
+        img.thumbnail((280, 360), PILImage.LANCZOS)
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG", quality=82)
+        return base64.b64encode(buf.getvalue()).decode()
+    except Exception:
+        return _placeholder_svg(ext, filename)
+
+def _placeholder_svg(ext: str, name: str) -> str:
+    colors = {
+        ".pdf":  ("#E8453C", "PDF"),  ".docx": ("#2B7CD3", "DOCX"),
+        ".doc":  ("#2B7CD3", "DOC"),  ".xlsx": ("#1E7145", "XLSX"),
+        ".xlsm": ("#1E7145", "XLSM"), ".csv":  ("#6B7280", "CSV"),
+        ".eml":  ("#8B44AD", "EML"),  ".msg":  ("#8B44AD", "MSG"),
+        ".txt":  ("#6B7280", "TXT"),
+    }
+    bg, label = colors.get(ext, ("#9CA3AF", ext.upper().lstrip(".")))
+    short = name[:22] + "…" if len(name) > 22 else name
+    svg = f"""<svg xmlns="http://www.w3.org/2000/svg" width="280" height="360">
+  <rect width="280" height="360" fill="{bg}"/>
+  <rect x="20" y="20" width="240" height="280" rx="8" fill="rgba(255,255,255,0.12)"/>
+  <text x="140" y="170" font-family="monospace" font-size="52" font-weight="bold"
+        fill="#fff" text-anchor="middle" opacity="0.9">{label}</text>
+  <text x="140" y="320" font-family="monospace" font-size="13"
+        fill="#fff" text-anchor="middle" opacity="0.7">{short}</text>
+</svg>"""
+    return base64.b64encode(svg.encode()).decode()
+
+# ── Main scan runner ──────────────────────────────────────────────────────────
diff --git a/docs/manuals/MANUAL-DA.md b/docs/manuals/MANUAL-DA.md
new file mode 100644
index 0000000..b878bfa
--- /dev/null
+++ b/docs/manuals/MANUAL-DA.md
@@ -0,0 +1,543 @@
+# GDPR Scanner — Brugermanual
+
+Version 1.6.14
+
+---
+
+## Indholdsfortegnelse
+
+1. [Hvad er GDPR Scanner?](#1-hvad-er-gdpr-scanner)
+2. [Overblik over brugerfladen](#2-overblik-over-brugerfladen)
+3. [Forbindelse til dine datakilder](#3-forbindelse-til-dine-datakilder)
+4. [Kør en scanning](#4-kør-en-scanning)
+5. [Forstå resultaterne](#5-forstå-resultaterne)
+6. [Gennemgang og mærkning af fund](#6-gennemgang-og-mærkning-af-fund)
+7. [Sletning af elementer](#7-sletning-af-elementer)
+8. [Profiler — gem dine scanningsindstillinger](#8-profiler--gem-dine-scanningsindstillinger)
+9. [Rapporter og eksport](#9-rapporter-og-eksport)
+10. [Del resultater med en gennemganger](#10-del-resultater-med-en-gennemganger)
+11. [Planlagte scanninger](#11-planlagte-scanninger)
+12. [E-mailrapporter](#12-e-mailrapporter)
+13. [Sikkerhedskopi og gendannelse af database](#13-sikkerhedskopi-og-gendannelse-af-database)
+14. [Indstillinger — oversigt](#14-indstillinger--oversigt)
+15. [Ofte stillede spørgsmål](#15-ofte-stillede-spørgsmål)
+
+---
+
+## 1. Hvad er GDPR Scanner?
+
+GDPR Scanner søger i din organisations digitale data — e-mails, cloud-filer, delte drev og lokale filservere — efter personoplysninger som CPR-numre, navne, adresser, telefonnumre og særlige kategorier af oplysninger efter GDPR artikel 9.
+
+Når der er fundet elementer, kan du gennemgå dem, beslutte hvad der skal ske med hvert enkelt (beholde, slette eller markere som uden for scope), udarbejde en artikel 30-fortegnelse og masseslette forældet data.
+
+**Hvad scanneren gennemgår:**
+- Microsoft 365: Exchange e-mail, OneDrive, SharePoint, Teams
+- Google Workspace: Gmail, Google Drev
+- Lokale og netværksbaserede filmapper (herunder SMB/NAS-drev)
+
+**Hvad den finder:**
+- CPR-numre
+- Telefonnumre, e-mailadresser, postadresser
+- Bankkontonumre og IBAN-numre
+- Navne og organisationsnavne
+- Fotografier med genkendelige ansigter (valgfrit)
+- GPS-placeringsdata indlejret i billedfiler
+
+---
+
+## 2. Overblik over brugerfladen
+
+Når du åbner scanneren, er skærmen inddelt i tre områder:
+
+```
+┌─────────────────┬──────────────────────────────────────────┐
+│                 │  Topbjælke: Scan-knap, profiler, handlinger│
+│   Venstre panel ├──────────────────────────────────────────┤
+│                 │                                           │
+│  - Kilder       │         Resultater / scanningsforløb      │
+│  - Indstillinger│                                           │
+│  - Konti        │                                           │
+│  - Statistik    ├──────────────────────────────────────────┤
+│                 │               Aktivitetslog               │
+└─────────────────┴──────────────────────────────────────────┘
+```
+
+**Venstre panel** — vælg hvad der skal scannes og hvordan.  
+**Topbjælke** — start en scanning, vælg profiler, og tilgå eksporter og indstillinger.  
+**Resultatområde** — fundne elementer vises her, mens scanningen kører.  
+**Statuslinje** — vises lige over aktivitetsloggen og angiver hvilken kilde der scannes, hvem der scannes, og hvor langt scanningen er.  
+**Aktivitetslog** — viser statusbeskeder i realtid under scanningen. Klik på **▾**-pilen i loggens overskrift for at folde panelet sammen eller ud. Du kan også filtrere loggen til kun at vise fejl, kopiere al logtekst til udklipsholderen og ændre størrelsen på panelet ved at trække i håndtaget øverst på panelet.
+
+### Mørkt / lyst tema
+
+Klik på **🌙**-knappen øverst til højre for at skifte mellem mørkt og lyst tema. Din præference huskes.
+
+---
+
+## 3. Forbindelse til dine datakilder
+
+Inden du kan scanne, skal du forbinde mindst én datakilde. Klik på **Kilder** i topbjælken for at åbne kildestyringspanelet.
+
+### 3.1 Microsoft 365
+
+Fanen Microsoft 365 viser din aktuelle forbindelsesstatus. Hvis du ser en grøn prik og dit kontonavn eller lejernavn, er du allerede forbundet.
+
+**Kilder du kan slå til og fra:**
+
+| Skift | Hvad der scannes |
+|-------|-----------------|
+| Outlook | Exchange-postkasser (indbakke, sendt post, alle mapper) |
+| OneDrive | Den enkelte brugers personlige cloud-lager |
+| SharePoint | Team- og projektsider |
+| Teams | Filer delt i Teams-kanaler |
+
+Slå de kilder fra, du ikke ønsker at medtage. Disse indstillinger huskes.
+
+### 3.2 Google Workspace
+
+Fanen Google Workspace lader dig forbinde en Google Workspace-konto (tidligere G Suite) via en tjenestekonto, eller en personlig Google-konto via login.
+
+**Kilder du kan slå til og fra:**
+
+| Skift | Hvad der scannes |
+|-------|-----------------|
+| Gmail | Alle e-mails i den enkelte brugers indbakke og labels |
+| Google Drev | Alle filer ejet af eller delt med den enkelte bruger |
+
+### 3.3 Lokale og netværksbaserede filer
+
+Fanen **Filkilder** viser de lokale mapper og netværksdrev, du har konfigureret.
+
+**Sådan tilføjer du en ny filkilde:**
+1. Indtast en **Betegnelse** — et navn du kan genkende (f.eks. "Skolens Fællesmappe").
+2. Indtast **Stien**:
+   - Lokal mappe: `~/Dokumenter` eller `/Volumes/Drev`
+   - Netværksdrev: `//nas-server/delt` eller `\\server\delt`
+3. Hvis det er et netværksdrev, udfyldes felterne **SMB-vært**, **Brugernavn** og **Adgangskode** automatisk. Adgangskoden gemmes sikkert i systemets nøglering.
+4. Klik på **Tilføj**.
+
+Du kan tilføje så mange filkilder, du har brug for. De vil fremgå som valgbare kilder i venstre panel, når du er klar til at scanne.
+
+---
+
+## 4. Kør en scanning
+
+### 4.1 Vælg dine kilder
+
+I venstre panel under **Kilder** sætter du hak ved de kilder, du vil medtage. Du kan kombinere M365, Google og filkilder i samme scanning.
+
+### 4.2 Vælg konti
+
+Under **Konti** vises alle brugere tilknyttet din M365- og/eller Google-lejer.
+
+- Brug **søgefeltet** til at finde bestemte personer.
+- Brug knapperne **Alle / Ansat / Elev** til at filtrere efter rolle.
+- Brug **Alle**- og **Ingen**-knapperne til at vælge eller fravælge alle på én gang.
+- Sæt hak ved eller fjern hak fra enkeltpersoner.
+
+For filkilder er kontovalg ikke relevant — alle filer i de valgte stier scannes.
+
+### 4.3 Konfigurer indstillinger
+
+Under **Indstillinger** kan du justere scanningen:
+
+**Datofilter (Scan e-mails/filer fra)**  
+Scan kun elementer ændret efter en bestemt dato. Hurtige forudindstillinger — **1 år**, **2 år**, **5 år**, **10 år**, **Alle** — lader dig vælge et interval med ét klik. Du kan også vælge en specifik dato med datovælgeren.
+
+> Tip: "2 år" er et godt udgangspunkt for den første scanning. Du kan altid udvide til "Alle" bagefter.
+
+**Scan e-mailindhold** — gennemgår selve teksten i e-mails. Aktiveret som standard.
+
+**Scan vedhæftede filer** — gennemgår filer vedhæftet e-mails. Aktiveret som standard.
+
+**Maks. vedhæftet filstørrelse** — spring vedhæftede filer over, der er større end denne grænse (standard 20 MB). Øg grænsen, hvis du vil kontrollere større dokumenter.
+
+**Maks. e-mails pr. bruger** — stop efter at have scannet dette antal e-mails per person (standard 2.000). Øg det, hvis du har brug for fuld dækning.
+
+### 4.4 Start scanningen
+
+Klik på den blå **Scan**-knap i topbjælken.
+
+En statuslinje viser:
+- En farvet **kildemærkat** — **Outlook**, **OneDrive**, **SharePoint**, **Teams**, **Gmail**, **GDrive** eller **Local** — efterfulgt af det fulde navn på den konto, der scannes i øjeblikket
+- En løbende optælling af scannede og fundne elementer
+- Estimeret resterende tid
+
+Resultater vises i hovedområdet efterhånden som de findes — du behøver ikke vente på, at scanningen er færdig, før du begynder at gennemgå dem.
+
+Klik på **Stop** for at afbryde. Et kontrolpunkt gemmes automatisk, så du kan fortsætte senere.
+
+### 4.5 Genoptag en afbrudt scanning
+
+Hvis en scanning blev afbrudt (via stop, nedbrud eller lukning af programmet), vises et gult banner øverst i resultatområdet:
+
+> Forrige scanning blev afbrudt — X scannet, Y fundet  
+> **▶ Genoptag** · Start forfra
+
+Klik på **▶ Genoptag** for at fortsætte fra det sted, scanningen slap. Klik på **Start forfra** for at kassere kontrolpunktet og begynde en ny scanning.
+
+---
+
+## 5. Forstå resultaterne
+
+Hvert fundet element vises som et kort. Her er forklaringen på mærker og labels:
+
+### Kildemærker
+
+| Mærke | Betydning |
+|-------|-----------|
+| Outlook | Fundet i en Exchange-postkasse |
+| OneDrive | Fundet i en brugers OneDrive |
+| SharePoint | Fundet på et SharePoint-site |
+| Teams | Fundet i en Teams-kanal |
+| Gmail | Fundet i en Gmail-postkasse |
+| Google Drev | Fundet i Google Drev |
+| Lokal / Netværk | Fundet på et filshare |
+
+### Risikoniveau
+
+| Niveau | Betydning |
+|--------|-----------|
+| HØJ | Flere CPR-numre, særlige kategorier af data, ældre end opbevaringspolitikken eller eksternt delt |
+| MELLEM | Et enkelt CPR-nummer med noget deling eller kontekstuel risiko |
+| LAV | Et enkelt CPR-nummer, ikke delt, nyligt oprettet |
+
+### Øvrige mærker
+
+| Mærke | Betydning |
+|-------|-----------|
+| Tal (f.eks. **3**) | Antal CPR-numre fundet i elementet |
+| **Delt** | Elementet er delt med andre brugere |
+| **Ekstern** | Elementet er delt med nogen uden for organisationen |
+| **Art. 9** | Særlige kategorier af oplysninger fundet (helbred, religion, biometriske data mv.) |
+| **N ansigter** | N genkendelige ansigter registreret i et foto |
+| **GPS** | Filen indeholder GPS-placeringsdata i metadata |
+
+### Kortvisning vs. listevisning
+
+Standardvisningen er **kortvisning**. Klik på **Liste** i filterbjælken for at skifte til en kompakt tabelvisning med sorterbare kolonner. Klik på **Gitter** for at skifte tilbage.
+
+### Filtrering af resultater
+
+Brug filterbjælken over resultaterne til at indsnævre visningen:
+
+- **Søgefelt** — søg på navn, emne eller filsti.
+- **Kildetype** — vis kun én kildetype.
+- **Disposition** — vis elementer efter gennemgangsstatus.
+- **Deling** — filtrer på delt / ekstern / alle.
+- **Risiko** — vis kun Art. 9, fotos, GPS eller høj-risiko-elementer.
+
+---
+
+## 6. Gennemgang og mærkning af fund
+
+Klik på et resultatkort for at åbne forhåndsvisningspanelet i højre side af skærmen.
+
+Forhåndsvisningen viser:
+- Elementets navn eller e-mailens emne
+- Kontoen (ejer / afsender)
+- Kilde og ændringsdate
+- Alle fundne CPR-numre og deres kontekst
+- Øvrige personoplysninger registreret (telefon, e-mailadresse, IBAN mv.)
+- Deling og ekstern adgangsinformation
+
+### Angiv en disposition
+
+Hvert element har en **Disposition**-rullemenu i forhåndsvisningspanelet. Vælg én af følgende:
+
+| Disposition | Brug den når… |
+|-------------|---------------|
+| Ikke gennemgået | Endnu ikke vurderet — standardværdi |
+| Opbevar — lovkrav | Du er lovpligtig til at beholde den |
+| Opbevar — legitim interesse | Du har en legitim interesse i at beholde den |
+| Opbevar — kontrakt | Nødvendig i forbindelse med en kontrakt |
+| Slet — planlagt | Markeret til fremtidig sletning |
+| Privat brug — uden for scope | Personligt element, ikke inden for GDPR-scopet |
+| Slettet | Allerede slettet (angives automatisk ved sletning) |
+
+Klik på **Gem** efter valget. En lille **✓ Gemt**-bekræftelse vises.
+
+### Find alle elementer for en bestemt person
+
+Klik på **🔍** i venstre panel (under Statistik) for at åbne **Registreret person**-opslaget. Indtast et CPR-nummer, og scanneren finder alle fundne elementer, der indeholder dette nummer. Du kan derefter slette dem alle i ét trin — i overensstemmelse med retten til sletning (GDPR artikel 17).
+
+CPR-nummeret hashes inden søgningen og gemmes aldrig i klartekst.
+
+---
+
+## 7. Sletning af elementer
+
+### 7.1 Sletning af et enkelt element
+
+Med et element åbent i forhåndsvisningspanelet kan du angive dispositionen **Slet — planlagt** og bruge handlingsknappen til at slette det. E-mailen flyttes til mappen Slettet post; filer flyttes til papirkurven i den pågældende tjeneste.
+
+### 7.2 Massesletning
+
+Klik på **Slet**-knappen i filterbjælken for at åbne massesletningsvinduet.
+
+1. **Indstil filtre** for at målrette de elementer, du ønsker at slette:
+   - **Kildetype** — slet fra én kilde eller alle.
+   - **Min. CPR-fund** — slet kun elementer med mindst dette antal CPR-numre.
+   - **Ældre end dato** — slet kun elementer ændret inden en bestemt dato.
+   - Klik på **🗓 Filter forældet** for automatisk at udfylde datoen ud fra din opbevaringspolitik.
+
+2. Vinduet viser, hvor mange elementer der matcher dine filtre.
+
+3. Klik på den røde **Slet matchende elementer**-knap for at fortsætte.
+
+4. En statuslinje viser sletningerne i realtid. E-mails flyttes til **Slettet post**; filer flyttes til **papirkurven**.
+
+En fuldstændig revisionslog over alle sletninger (hvad der er slettet, hvornår og hvorfor) medtages i artikel 30-rapporten.
+
+---
+
+## 8. Profiler — gem dine scanningsindstillinger
+
+En profil gemmer dine valgte kilder, konti, scanningsindstillinger og datoindstillinger, så du kan genbruge dem uden at konfigurere alt på ny hver gang.
+
+### Gem en profil
+
+Konfigurer venstre panel præcis som du ønsker det — herunder hvilke M365-kilder, Google-kilder og lokale filkilder der er aktiveret, hvilke konti der er valgt, og alle indstillinger — og klik derefter på **Gem**-knappen i topbjælken. Indtast et navn og klik OK. Profilen gemmes og vælges med det samme.
+
+### Anvend en profil
+
+Klik på profil-rullemenuen i topbjælken og vælg en profil. Alle indstillinger i venstre panel — kilder, konti, indstillinger og datofilter — indlæses på én gang. Venstre panel viser derefter din aktive tilstand, og du kan justere hvad som helst, inden du scanner.
+
+En **Ryd**-knap vises ved siden af rullemenuen, når en profil er valgt. Klik på den for at rydde profil­etiketten uden at ændre indstillingerne i venstre panel. Det er nyttigt, når du vil køre en engangsscan uden at overskrive en gemt profil.
+
+### Administrer profiler
+
+Klik på **Profiler** for at åbne profil­administrations­panelet. Her kan du:
+
+- **Redigere** en profil — ændre navn, beskrivelse, kilder, konti eller indstillinger.
+- **Duplikere** en profil — nyttigt som udgangspunkt for en variant.
+- **Slette** en profil.
+
+> Bemærk: Redigering af en profil påvirker ikke scanninger, der allerede er gennemført med den pågældende profil.
+
+---
+
+## 9. Rapporter og eksport
+
+### 9.1 Excel-eksport
+
+Klik på **Excel** i filterbjælken for at downloade de aktuelle resultater som en Excel-projektmappe. Projektmappen indeholder:
+- Et oversigtsfaneblad med scanningsdato, antal elementer og kildefordeling.
+- Et separat faneblad for hver kildetype (Outlook, OneDrive, SharePoint, Teams, Gmail, Google Drive, Lokal, Netværk).
+- Alle fundne elementer, herunder kilde, konto, CPR-antal, risikoniveau, delingsstatus og disposition.
+
+Knapperne **Excel** og **Art.30** er altid tilgængelige — også efter genstart af programmet — og eksporterer resultaterne fra den seneste afsluttede scanningssession uden at kræve en ny scanning.
+
+Excel-filen er det primære arbejdsdokument til din interne gennemgangsproces.
+
+### 9.2 GDPR Artikel 30-rapport (Word-dokument)
+
+Klik på **Art.30** i filterbjælken for at generere et Word-dokument, der opfylder kravet i GDPR artikel 30 om at føre en fortegnelse over behandlingsaktiviteter.
+
+Dokumentet indeholder:
+- **Resumé** — scanningsdato, samlet antal elementer, CPR-fund pr. kilde.
+- **Datakategorier** — hvilke typer personoplysninger der er fundet.
+- **Datafortegnelse** — den fulde liste over fundne elementer.
+- **Opbevaringsanalyse** — elementer ældre end din opbevaringspolitik, fordelt på kilder.
+- **Særlige kategorier (Art. 9)** — helbreds-, biometriske og andre følsomme oplysninger.
+- **Fotografier / biometriske data** — hvis ansigtsgenkendelse var aktiveret.
+- **GPS-data** — filer med indlejrede placeringsoplysninger.
+- **Compliance-tendens** — antal fundne elementer på tværs af dine seneste 20 scanninger.
+- **Revisionslog for sletninger** — en komplet dokumentation af alle sletninger foretaget via scanneren.
+- **Metode** — hvordan scanningen er udført og det juridiske grundlag.
+- **Noter om elevdata** — vejledning om krav til forældresamtykke for børn under 15 år.
+
+---
+
+## 10. Del resultater med en gennemganger
+
+Du kan give en DPO, skoleleder eller compliance-koordinator skrivebeskyttet adgang til resultatgitteret — herunder mulighed for at mærke dispositioner — uden at give dem adgang til scanningskontroller, loginoplysninger eller indstillinger.
+
+### 10.1 Token-links
+
+Klik på **🔗**-knappen øverst til højre i topbjælken for at åbne delingspanelet.
+
+1. Angiv eventuelt en **Betegnelse** for at identificere, hvem linket er til (f.eks. "DPO-gennemgang april 2026").
+2. Vælg en **Udløbsdato** — 7 dage, 30 dage, 90 dage, 1 år eller Aldrig.
+3. Klik på **Opret**. Der genereres et unikt link: `http://host:5100/view?token=…`
+4. Klik på **Kopiér** for at kopiere linket til udklipsholderen, og send det til gennemgangeren.
+
+Gennemgangeren åbner linket i en browser. De kan se det fulde resultatgitter og mærke dispositioner, men kan ikke starte scanninger, ændre indstillinger, se loginoplysninger eller slette elementer.
+
+**Administrer eksisterende links**
+
+Delingspanelet viser alle aktive links. Hver række viser betegnelse, udløbsdato og hvornår linket sidst blev brugt. Klik på **Kopiér** for at kopiere et link igen, eller **Tilbagekald** for at gøre det ugyldigt med det samme.
+
+### 10.2 Viewer-PIN
+
+Som alternativ til token-links kan du angive en numerisk PIN-kode (4–8 cifre) under **Indstillinger → Sikkerhed → Viewer-PIN**. Alle, der kender PIN-koden, kan åbne `http://host:5100/view` i en browser, indtaste PIN-koden og få adgang til den skrivebeskyttede visning i hele browserens session.
+
+For at angive eller ændre PIN-koden skal du indtaste den nye kode i feltet **Ny PIN** og klikke på **Gem PIN**. Klik på **Ryd PIN** for at fjerne den.
+
+> **Sikkerhedsnote:** Token-links er mere sikre end en PIN-kode, fordi hvert link kan tilbagekaldes individuelt og har en udløbsdato. Brug PIN-indstillingen kun til betroede interne gennemgangere på dit lokale netværk.
+
+### 10.3 Hvad gennemgangeren kan gøre
+
+| Handling | Tilladt |
+|----------|---------|
+| Gennemse resultatgitter | Ja |
+| Filtrere og søge i resultater | Ja |
+| Åbne forhåndsvisning | Ja |
+| Mærke dispositioner | Ja |
+| Eksportere til Excel | Ja |
+| Eksportere Artikel 30-rapport | Ja |
+| Starte eller stoppe en scanning | Nej |
+| Se eller ændre loginoplysninger | Nej |
+| Slette elementer | Nej |
+| Tilgå indstillinger | Nej |
+| Oprette eller tilbagekalde viewer-links | Nej |
+
+---
+
+## 11. Planlagte scanninger
+
+Gå til **Indstillinger → Planlægger** for at konfigurere automatiske scanninger.
+
+### Opret en planlagt scanning
+
+1. Klik på **+ Tilføj planlagt scanning**.
+2. Giv jobbet et navn.
+3. Vælg frekvens: **Dagligt**, **Ugentligt** eller **Månedligt**.
+4. For ugentlige scanninger vælges ugedag. For månedlige vælges dag i måneden.
+5. Angiv det tidspunkt, scanningen skal køre.
+6. Vælg en **Profil** — scanneren bruger den pågældende profils kilder, konti og indstillinger.
+7. Aktiver eventuelt:
+   - **Send rapport automatisk** — send Excel-rapporten pr. e-mail til dine konfigurerede modtagere efter hver scanning.
+   - **Håndhæv opbevaringspolitik** — slet automatisk elementer ældre end din opbevaringspolitik efter hver scanning.
+8. Klik på **Gem**.
+
+Planlæggerikatoren i topbjælken viser dato og tidspunkt for den næste planlagte scanning ("Næste: …").
+
+### Se seneste kørsler
+
+Fanen Planlægger viser historik over seneste kørsler med starttidspunkt, status og antal fundne elementer.
+
+---
+
+## 12. E-mailrapporter
+
+Gå til **Indstillinger → E-mailrapport** for at konfigurere e-mail-afsendelse.
+
+### Opsætning af SMTP
+
+Udfyld oplysningerne for din udgående mailserver:
+
+| Felt | Eksempel |
+|------|----------|
+| SMTP-vært | smtp.office365.com |
+| Port | 587 |
+| Brugernavn | scanner@skole.dk |
+| Adgangskode | (din e-mailadgangskode eller app-adgangskode) |
+| Afsenderadresse | scanner@skole.dk |
+| Modtagere | dpo@skole.dk; it@skole.dk |
+
+Klik på **Gem** for at gemme, og klik derefter på **Test** for at sende en test-e-mail og bekræfte, at konfigurationen virker.
+
+> Hvis din konto har MFA (to-faktor-godkendelse) aktiveret, kan du ikke bruge din almindelige adgangskode. Du skal oprette en **app-adgangskode** i din kontos sikkerhedsindstillinger:
+> - **Personlig Microsoft-konto**: account.microsoft.com/security → App-adgangskoder
+> - **Gmail**: myaccount.google.com → Sikkerhed → 2-trinsbekræftelse → App-adgangskoder
+
+### Send en rapport manuelt
+
+Klik på **Send nu** for øjeblikkeligt at sende den aktuelle Excel-rapport pr. e-mail til alle konfigurerede modtagere.
+
+---
+
+## 13. Sikkerhedskopi og gendannelse af database
+
+Alle scanningsresultater, dispositioner og sletningsrevisionsloggen gemmes i en lokal database. Det anbefales at tage regelmæssige sikkerhedskopier.
+
+Gå til **Indstillinger → Database**.
+
+### Sikkerhedskopi (Eksport)
+
+Klik på **Eksporter** for at oprette en `.zip`-sikkerhedskopi af din database. Gem den på et sikkert sted.
+
+### Gendannelse (Import)
+
+Klik på **Importer** for at gendanne fra en sikkerhedskopi. To tilstande er tilgængelige:
+
+| Tilstand | Hvornår du bruger den |
+|----------|-----------------------|
+| Flet (sikker) | Tilføj dispositioner og sletningslog fra sikkerhedskopien til dine eksisterende data. Brug denne til at samle data fra flere installationer. |
+| Erstat (fuld gendannelse) | Slet alt eksisterende og gendan sikkerhedskopien fuldstændigt. Brug denne til at flytte til en ny maskine eller gendanne efter datatab. Kræver bekræftelse med admin-PIN. |
+
+### Nulstil database
+
+Klik på **Nulstil database** for at slette alle scanningsdata, dispositioner og sletningslog. Dette kan ikke fortrydes. Hvis en admin-PIN er sat, skal du indtaste den for at fortsætte.
+
+---
+
+## 14. Indstillinger — oversigt
+
+### Fanen Generelt
+
+| Indstilling | Beskrivelse |
+|-------------|-------------|
+| Tema | Mørkt eller lyst |
+
+### Fanen Sikkerhed
+
+| Indstilling | Beskrivelse |
+|-------------|-------------|
+| Admin-PIN | Valgfri PIN-kode, der beskytter destruktive handlinger (nulstil database, erstat ved import) |
+| Viewer-PIN | Valgfri 4–8-cifret PIN-kode, der giver alle adgang til `/view` i en browser som skrivebeskyttet gennemganger uden et token-link |
+
+### Avancerede scanningsindstillinger
+
+Disse indstillinger findes i venstre panel under **Indstillinger**:
+
+**Delta-scanning** — efter din første fulde scanning kan du aktivere dette for kun at scanne elementer, der er ændret siden sidste scanning. Meget hurtigere til løbende kontrol. Knappen "Ryd tokens" tvinger den næste scanning til at være en fuld scanning.
+
+**Søg efter ansigter i billeder** — langsommere scanning, der registrerer fotografier med genkendelige menneskelige ansigter. Markerer dem som artikel 9 biometriske data. Anbefales til skoler, der opbevarer elevfotos.
+
+**Opbevaringspolitik** — når aktiveret, markeres elementer ældre end det angivne antal år som forældet. Regnskabsårets afslutning bestemmer, hvordan skæringsdatoen beregnes:
+
+| Indstilling | Beregning af skæringsdato |
+|-------------|--------------------------|
+| Løbende (fra i dag) | I dag minus N år |
+| 31 dec (Bogføringsloven) | Seneste 31. december minus N år |
+| 30 jun / 31 mar | Seneste forekomst af den dato minus N år |
+
+---
+
+## 15. Ofte stillede spørgsmål
+
+**Gemmer scanneren CPR-numre?**  
+Nej. CPR-numre fundet under en scanning gemmes kun som et antal (f.eks. "3 CPR-numre fundet") og som en SHA-256-hash, der bruges til personopslag. Det faktiske nummer skrives aldrig til databasen.
+
+**Hvad sker der, når jeg sletter elementer via scanneren?**  
+E-mails flyttes til brugerens **Slettet post**-mappe i Exchange — de slettes ikke permanent og kan gendannes af brugeren eller en administrator. Filer flyttes til **papirkurven** i den pågældende tjeneste (OneDrive, SharePoint, filsystem). Permanent sletning kræver en efterfølgende handling af brugeren eller administrator.
+
+**Kan jeg scanne uden at forbinde til Microsoft 365?**  
+Ja. Du kan scanne lokale og SMB-filshares uden nogen M365- eller Google-forbindelse. Åbn **Kilder**, gå til fanen **Filkilder**, og tilføj dine filstier.
+
+**Hvad er delta-scanning, og hvornår skal jeg bruge det?**  
+Delta-scanning bruger Microsoft Graphs ændringstokens til kun at hente elementer ændret siden den seneste scanning. Det er ideelt til regelmæssige (f.eks. ugentlige) compliance-tjek efter, at du har gennemført en fuld basisscan. Aktiver det i afsnittet Indstillinger i venstre panel.
+
+**Scanningen stoppede — kan jeg fortsætte, hvor den slap?**  
+Ja. Når du starter scanningen igen, vil et gult banner tilbyde at genoptage fra kontrolpunktet. Klik på **▶ Genoptag** for at fortsætte. Hvis du foretrækker at starte forfra, klikker du på **Start forfra**.
+
+**Hvordan dokumenterer jeg compliance, hvis vi bliver auditeret?**  
+Brug **Art.30**-knappen til at eksportere artikel 30-rapporten. Det er et Word-dokument, der dækker din datafortegnelse, opbevaringsanalyse, sletningslog og metode — præcis hvad en tilsynsmyndighed (Datatilsynet) typisk anmoder om.
+
+**Hvad gør filteret "Elev / Ansat"?**  
+Scanneren klassificerer brugere som ansatte eller elever ud fra deres Microsoft 365-licenstype eller Google Workspace-organisationsenhed. Du kan bruge dette filter i kontolisten til at begrænse en scanning til kun ansatte, kun elever eller en bestemt person. Det er nyttigt, fordi reglerne for behandling af elevdata — særligt for børn under 15 år — adskiller sig fra reglerne for medarbejderdata i henhold til databeskyttelsesloven.
+
+**Hvordan tilføjer jeg en konto, der ikke er på listen?**  
+I kontoafsnittet i venstre panel er der et felt **+ Tilføj konto manuelt**. Indtast e-mailadressen eller UPN'en, og den tilføjes til den aktuelle sessions kontoliste.
+
+**Kører scanneren? Jeg kan ikke se en statuslinje.**  
+Tjek aktivitetsloggen nederst på skærmen. Hvis en scanning kører, vises der beskeder her. Hvis du ikke ser noget, er scanningen muligvis afsluttet eller ikke startet. Kontrollér også, at du har valgt mindst én kilde og mindst én konto.
+
+**Kan en gennemganger mærke dispositioner uden adgang til scanningskontrollerne?**  
+Ja. Brug **🔗 Del**-knappen til at oprette et skrivebeskyttet viewer-link eller angiv en Viewer-PIN under Indstillinger → Sikkerhed. Gennemgangeren åbner linket i sin browser og kan gennemse resultater og mærke dispositioner uden at se loginoplysninger, kilder eller scanningsknapper. Se afsnit 10 for detaljer.
+
+---
+
+*GDPR Scanner v1.6.14 — teknisk opsætning og konfiguration: se README.md*
diff --git a/docs/manuals/MANUAL-EN.md b/docs/manuals/MANUAL-EN.md
new file mode 100644
index 0000000..07c1f6a
--- /dev/null
+++ b/docs/manuals/MANUAL-EN.md
@@ -0,0 +1,543 @@
+# GDPR Scanner — User Manual
+
+Version 1.6.14
+
+---
+
+## Table of Contents
+
+1. [What is GDPR Scanner?](#1-what-is-gdpr-scanner)
+2. [The Interface at a Glance](#2-the-interface-at-a-glance)
+3. [Connecting to Your Data Sources](#3-connecting-to-your-data-sources)
+4. [Running a Scan](#4-running-a-scan)
+5. [Understanding the Results](#5-understanding-the-results)
+6. [Reviewing and Tagging Results](#6-reviewing-and-tagging-results)
+7. [Deleting Items](#7-deleting-items)
+8. [Profiles — Saving Your Scan Settings](#8-profiles--saving-your-scan-settings)
+9. [Reports and Exports](#9-reports-and-exports)
+10. [Sharing Results with a Reviewer](#10-sharing-results-with-a-reviewer)
+11. [Scheduled Scans](#11-scheduled-scans)
+12. [Email Reports](#12-email-reports)
+13. [Database Backup and Restore](#13-database-backup-and-restore)
+14. [Settings Reference](#14-settings-reference)
+15. [Frequently Asked Questions](#15-frequently-asked-questions)
+
+---
+
+## 1. What is GDPR Scanner?
+
+GDPR Scanner searches your organisation's digital data — emails, cloud files, shared drives, and local file servers — for personal data such as CPR numbers, names, addresses, phone numbers, and special-category data under GDPR Article 9.
+
+When items are found, you can review them, decide what to do with each one (keep, delete, or note as out of scope), produce an Article 30 compliance report, and delete overdue data in bulk.
+
+**What it scans:**
+- Microsoft 365: Exchange email, OneDrive, SharePoint, Teams
+- Google Workspace: Gmail, Google Drive
+- Local and network file shares (including SMB/NAS drives)
+
+**What it finds:**
+- CPR numbers (Danish civil registration numbers)
+- Phone numbers, email addresses, postal addresses
+- Bank account and IBAN numbers
+- Names and organisation names
+- Photographs containing recognisable faces (optional)
+- GPS location data embedded in image files
+
+---
+
+## 2. The Interface at a Glance
+
+When you open the scanner, the screen is divided into three areas:
+
+```
+┌─────────────────┬──────────────────────────────────────────┐
+│                 │  Top bar: Scan button, profiles, actions  │
+│   Left sidebar  ├──────────────────────────────────────────┤
+│                 │                                           │
+│  - Sources      │         Results / scan progress           │
+│  - Options      │                                           │
+│  - Accounts     │                                           │
+│  - Stats        ├──────────────────────────────────────────┤
+│                 │               Activity log                │
+└─────────────────┴──────────────────────────────────────────┘
+```
+
+**Left sidebar** — choose what to scan and how.  
+**Top bar** — start a scan, select profiles, and access exports and settings.  
+**Results area** — flagged items appear here as the scan runs.  
+**Progress bar** — sits just above the activity log and shows which source is being scanned, who is being scanned, and how far along the scan is.  
+**Activity log** — shows live status messages during scanning. Click the **▾** arrow in the log header to collapse or expand the panel. You can also filter the log to show only errors, copy all log text to the clipboard, and resize the panel by dragging the handle at its top edge.
+
+### Dark / Light mode
+
+Click the **🌙** button in the top-right corner to switch between dark and light mode. Your preference is remembered.
+
+---
+
+## 3. Connecting to Your Data Sources
+
+Before you can scan, you need to connect to at least one data source. Click the **Sources** button in the top bar to open the Source Management panel.
+
+### 3.1 Microsoft 365
+
+The Microsoft 365 tab shows your current connection status. If you see a green dot and your account or tenant name, you are already connected.
+
+**Sources you can enable or disable:**
+
+| Toggle | What it scans |
+|--------|---------------|
+| Outlook | Exchange mailboxes (inbox, sent, all folders) |
+| OneDrive | Each user's personal cloud storage |
+| SharePoint | Team and project sites |
+| Teams | Files shared in Teams channels |
+
+Turn off any source you do not want to include. These settings are remembered.
+
+### 3.2 Google Workspace
+
+The Google Workspace tab lets you connect a Google Workspace (formerly G Suite) account via a service account, or a personal Google account via sign-in.
+
+**Sources you can enable or disable:**
+
+| Toggle | What it scans |
+|--------|---------------|
+| Gmail | All emails in each user's inbox and labels |
+| Google Drive | All files owned by or shared with each user |
+
+### 3.3 Local and Network File Shares
+
+The **Filkilder** (File Sources) tab lists any local folders or network drives you have configured.
+
+**To add a new file source:**
+1. Enter a **Label** — a friendly name you will recognise (e.g. "Skolens Fællesmappe").
+2. Enter the **Path**:
+   - Local folder: `~/Documents` or `/Volumes/Share`
+   - Network share: `//nas-server/shared` or `\\server\share`
+3. If it is a network share, fill in the **SMB Host**, **Username**, and **Password** that appear automatically. The password is stored securely in your system keychain.
+4. Click **Tilføj** (Add).
+
+You can add as many file sources as you need. Each one will appear as a selectable source in the main sidebar when you are ready to scan.
+
+---
+
+## 4. Running a Scan
+
+### 4.1 Select Your Sources
+
+In the left sidebar under **Kilder** (Sources), tick the sources you want to include in this scan. You can mix M365, Google, and file sources in the same scan.
+
+### 4.2 Choose Your Accounts
+
+Under **Konti** (Accounts) the sidebar shows all users connected to your M365 and/or Google tenant.
+
+- Use the **search box** to find specific people.
+- Use the **Alle / Ansat / Elev** buttons to filter by role.
+- Use the **Alle** and **Ingen** buttons to select or deselect everyone at once.
+- Tick or untick individual names.
+
+For file sources, accounts are not relevant — all files in the selected paths are scanned.
+
+### 4.3 Configure Options
+
+Under **Indstillinger** (Options) you can refine the scan:
+
+**Date filter (Scan e-mails/filer fra)**  
+Only scan items modified after a certain date. Quick presets — **1 år**, **2 år**, **5 år**, **10 år**, **Alle** — let you choose a window with one click. You can also pick a specific date with the date picker.
+
+> Tip: Starting with "2 år" is a good first scan. You can always widen to "Alle" later.
+
+**Email body** — scan the text content of emails. On by default.
+
+**Attachments** — scan files attached to emails. On by default.
+
+**Max attachment size** — skip attachments larger than this limit (default 20 MB). Increase it if you want to check large documents.
+
+**Max emails per user** — stop after scanning this many emails per person (default 2,000). Increase if you need complete coverage.
+
+### 4.4 Start the Scan
+
+Click the blue **Scan** button in the top bar.
+
+A progress bar appears showing:
+- A coloured **source label** — **Outlook**, **OneDrive**, **SharePoint**, **Teams**, **Gmail**, **GDrive**, or **Local** — followed by the full name of the account currently being scanned
+- A live count of items scanned and flagged
+- An estimated time remaining
+
+Results appear in the main area as they are found — you do not need to wait for the scan to finish before reviewing them.
+
+To stop a scan, click **Stop**. A checkpoint is saved automatically so you can resume later.
+
+### 4.5 Resuming an Interrupted Scan
+
+If a scan was interrupted (by a stop, a crash, or closing the application), a yellow banner appears at the top of the results area:
+
+> Previous scan interrupted — X scanned, Y found
+> **▶ Genoptag** · Start fresh
+
+Click **▶ Genoptag** to continue from where the scan left off. Click **Start fresh** to discard the checkpoint and begin again.
+
+---
+
+## 5. Understanding the Results
+
+Each flagged item appears as a card. Here is what the badges and labels mean:
+
+### Source badges
+
+| Badge | Meaning |
+|-------|---------|
+| Outlook | Found in an Exchange mailbox |
+| OneDrive | Found in a user's OneDrive |
+| SharePoint | Found in a SharePoint site |
+| Teams | Found in a Teams channel |
+| Gmail | Found in a Gmail mailbox |
+| Google Drive | Found in Google Drive |
+| Local / Network | Found on a file share |
+
+### Risk level
+
+| Level | Meaning |
+|-------|---------|
+| HIGH | Multiple CPR numbers, special-category data, older than retention policy, or externally shared |
+| MEDIUM | Single CPR with some sharing or contextual risk |
+| LOW | Single CPR number, not shared, recent |
+
+### Other badges
+
+| Badge | Meaning |
+|-------|---------|
+| Number (e.g. **3**) | Number of CPR numbers found in this item |
+| **Delt** (Shared) | The item has been shared with other users |
+| **Ekstern** (External) | The item has been shared with someone outside your organisation |
+| **Art. 9** | Special-category data detected (health, religion, biometric, etc.) |
+| **N faces** | N recognisable faces detected in a photo |
+| **GPS** | The file contains GPS location data in its metadata |
+
+### Grid view vs. list view
+
+The default **grid view** shows cards. Click **List** in the filter bar to switch to a compact table view with sortable columns. Click **Grid** to switch back.
+
+### Filtering results
+
+Use the filter bar above the results to narrow down what you see:
+
+- **Search box** — search by name, subject, or path.
+- **Source dropdown** — show only one source type.
+- **Disposition dropdown** — show items by their review status.
+- **Transfer dropdown** — filter by shared / external / all.
+- **Risk dropdown** — show only Art. 9, photos, GPS, or high-risk items.
+
+---
+
+## 6. Reviewing and Tagging Results
+
+Click any result card to open the preview panel on the right side of the screen.
+
+The preview shows:
+- The item name or email subject
+- The account (owner / sender)
+- Source and modification date
+- All CPR numbers found and their context
+- Other personal data detected (phone, email address, IBAN, etc.)
+- Sharing and external-access information
+
+### Setting a disposition
+
+Every item has a **Disposition** dropdown in the preview panel. Choose one of:
+
+| Disposition | Use when… |
+|-------------|-----------|
+| Ikke gennemgået (Unreviewed) | Not yet assessed — the default |
+| Opbevar — lovkrav | You must keep it by law |
+| Opbevar — legitim interesse | You have a legitimate interest in keeping it |
+| Opbevar — kontrakt | Required for a contract |
+| Slet — planlagt | Marked for future deletion |
+| Privat brug — uden for scope | Personal item, not in scope for GDPR processing |
+| Slettet | Already deleted (set automatically when you delete an item) |
+
+After choosing, click **Gem**. A small **✓ Gemt** confirmation appears.
+
+### Finding all items for a specific person
+
+Click **🔍** in the sidebar (under Stats) to open the **Data Subject Lookup**. Enter a CPR number and the scanner will find all flagged items containing that number. You can then delete all of them in one step — supporting the GDPR right to erasure (Article 17).
+
+The CPR number is hashed before the search and is never stored in plaintext.
+
+---
+
+## 7. Deleting Items
+
+### 7.1 Deleting a Single Item
+
+With an item open in the preview panel, set its disposition to **Slet — planlagt**, then use the action button to delete it. The item moves to the Deleted Items folder (email) or recycle bin (files).
+
+### 7.2 Bulk Delete
+
+Click the **Delete** button in the filter bar to open the bulk delete modal.
+
+1. **Set filters** to target the items you want to delete:
+   - **Source type** — delete from one source or all.
+   - **Min. CPR hits** — only delete items with at least this many CPR numbers.
+   - **Older than date** — only delete items modified before a specific date.
+   - Click **🗓 Filter overdue** to automatically fill in the date based on your retention policy.
+
+2. The modal shows how many items match your filters.
+
+3. Click the red **Delete matching items** button to proceed.
+
+4. A progress bar shows deletions as they happen. Emails go to **Deleted Items**; files go to the **recycle bin**.
+
+A full audit log of every deletion (what was deleted, when, and why) is included in the Article 30 report.
+
+---
+
+## 8. Profiles — Saving Your Scan Settings
+
+A profile stores your chosen sources, accounts, scan options, and date settings so you can re-use them without reconfiguring every time.
+
+### Saving a profile
+
+Configure the sidebar exactly as you want it — including which M365 sources, Google sources, and local file sources are enabled, which accounts are selected, and all options — then click the **Save** button in the top bar. Enter a name and click OK. The profile is saved and selected immediately.
+
+### Applying a profile
+
+Click the profile dropdown in the top bar and select a profile. All sidebar settings — sources, accounts, options, and date filter — are loaded at once. The sidebar then shows your live state and you can adjust anything before scanning.
+
+A **Clear** button appears next to the dropdown after you select a profile. Click it to clear the profile label without changing the sidebar settings. This is useful when you want to run a one-off scan without overwriting a saved profile.
+
+### Managing profiles
+
+Click **Profiles** to open the profile management panel. Here you can:
+
+- **Edit** any profile — change its name, description, sources, accounts, or options.
+- **Duplicate** a profile — useful as a starting point for a variation.
+- **Delete** a profile.
+
+> Note: Editing a profile does not affect scans already completed with that profile.
+
+---
+
+## 9. Reports and Exports
+
+### 9.1 Excel Export
+
+Click **Excel** in the filter bar to download the current results as an Excel workbook. The workbook contains:
+- A summary tab with scan date, item counts, and source breakdown.
+- A separate tab for each source type (Outlook, OneDrive, SharePoint, Teams, Gmail, Google Drive, Local, Network).
+- Every flagged item, including source, account, CPR count, risk level, sharing status, and disposition.
+
+The **Excel** and **Art.30** buttons are always available — even after restarting the application — and will export the results from the most recent completed scan session without requiring a new scan.
+
+The Excel file is the main working document for your internal review process.
+
+### 9.2 GDPR Article 30 Report (Word document)
+
+Click **Art.30** in the filter bar to generate a Word document that satisfies the GDPR Article 30 requirement to maintain a record of processing activities.
+
+The document includes:
+- **Executive summary** — scan date, total items, CPR counts per source.
+- **Data categories** — which types of personal data were found.
+- **Data inventory** — the full list of flagged items.
+- **Retention analysis** — items older than your retention policy, with a breakdown by source.
+- **Special-category data (Art. 9)** — health, biometric, and other sensitive data found.
+- **Photographs / biometric data** — if face scanning was enabled.
+- **GPS data** — files with embedded location information.
+- **Compliance trend** — flagged counts across your last 20 scans.
+- **Deletion audit log** — a complete record of all deletions made through the scanner.
+- **Methodology** — how the scan was performed and the legal basis for scanning.
+- **Notes on student data** — guidance on parental consent requirements for children under 15.
+
+---
+
+## 10. Sharing Results with a Reviewer
+
+You can give a DPO, school principal, or compliance coordinator read-only access to the results grid — including the ability to tag dispositions — without giving them access to scan controls, credentials, or settings.
+
+### 10.1 Token links
+
+Click the **🔗** button in the top-right of the top bar to open the Share panel.
+
+1. Optionally enter a **Label** to identify who the link is for (e.g. "DPO review April 2026").
+2. Choose an **Expiry** — 7 days, 30 days, 90 days, 1 year, or Never.
+3. Click **Create**. A unique link is generated: `http://host:5100/view?token=…`
+4. Click **Copy** to copy the link to your clipboard, then send it to the reviewer.
+
+The reviewer opens the link in any browser. They see the full results grid and can tag dispositions but cannot start scans, change settings, view credentials, or delete items.
+
+**Managing existing links**
+
+The Share panel lists all active links. Each row shows the label, expiry date, and when the link was last used. Click **Copy** to copy a link again, or **Revoke** to invalidate it immediately.
+
+### 10.2 Viewer PIN
+
+As an alternative to token links, you can set a numeric PIN (4–8 digits) in **Settings → Security → Viewer PIN**. Anyone who knows the PIN can open `http://host:5100/view` in a browser, enter the PIN, and access the read-only view for the duration of their browser session.
+
+To set or change the PIN, enter the new PIN in the **New PIN** field and click **Save PIN**. To remove it, click **Clear PIN**.
+
+> **Security note:** Token links are more secure than a PIN because each link can be individually revoked and has an expiry date. Use the PIN option only for trusted internal reviewers on your local network.
+
+### 10.3 What the reviewer can do
+
+| Action | Allowed |
+|--------|---------|
+| Browse results grid | Yes |
+| Filter and search results | Yes |
+| Open item preview | Yes |
+| Tag dispositions | Yes |
+| Export to Excel | Yes |
+| Export Article 30 report | Yes |
+| Start or stop a scan | No |
+| View or change credentials | No |
+| Delete items | No |
+| Access Settings | No |
+| Create or revoke viewer links | No |
+
+---
+
+## 11. Scheduled Scans
+
+Go to **Settings → Planlægger** to configure automatic scans.
+
+### Creating a scheduled scan
+
+1. Click **+ Tilføj planlagt scanning** (+ Add scheduled scan).
+2. Give the job a name.
+3. Choose the frequency: **Dagligt**, **Ugentligt**, or **Månedligt**.
+4. For weekly scans, choose the day of the week. For monthly, choose the day of the month.
+5. Set the time the scan should run.
+6. Choose a **Profile** — the scanner will use that profile's sources, accounts, and options.
+7. Optionally enable:
+   - **Send rapport automatisk** — email the Excel report to your configured recipients after each scan.
+   - **Håndhæv opbevaringspolitik** — automatically delete items older than your retention policy after each scan.
+8. Click **Gem** (Save).
+
+The scheduler indicator in the top bar shows the date and time of the next scheduled scan ("Next: …").
+
+### Viewing recent runs
+
+The scheduler tab shows a history of recent runs, including start time, status, and the number of items flagged.
+
+---
+
+## 12. Email Reports
+
+Go to **Settings → E-mailrapport** to configure email sending.
+
+### Setting up SMTP
+
+Fill in your outgoing mail server details:
+
+| Field | Example |
+|-------|---------|
+| SMTP host | smtp.office365.com |
+| Port | 587 |
+| Username | scanner@skole.dk |
+| Password | (your email password or app password) |
+| From address | scanner@skole.dk |
+| Recipients | dpo@skole.dk; it@skole.dk |
+
+Click **Gem** to save, then click **Test** to send a test email and verify the configuration is working.
+
+> If your account has MFA (two-factor authentication) enabled, you cannot use your regular password. You need to create an **App Password** in your account security settings:
+> - **Microsoft personal account**: account.microsoft.com/security → App passwords
+> - **Gmail**: myaccount.google.com → Security → 2-Step Verification → App passwords
+
+### Sending a report manually
+
+Click **Send nu** (Send now) to email the current Excel report immediately to all configured recipients.
+
+---
+
+## 13. Database Backup and Restore
+
+All scan results, dispositions, and the deletion audit log are stored in a local database. It is good practice to take regular backups.
+
+Go to **Settings → Database**.
+
+### Backup (Export)
+
+Click **Export** to create a `.zip` backup of your database. Save it to a safe location.
+
+### Restore (Import)
+
+Click **Import** to restore from a backup. Two modes are available:
+
+| Mode | When to use |
+|------|-------------|
+| Merge (safe) | Add dispositions and deletion log from the backup to your existing data. Use this to consolidate data from multiple installations. |
+| Replace (full restore) | Erase everything and restore the backup completely. Use this to move to a new machine or recover from data loss. Requires Admin PIN confirmation. |
+
+### Reset database
+
+Click **Reset DB** to wipe all scan data, dispositions, and deletion log. This is irreversible. If an Admin PIN is set, you must enter it to proceed.
+
+---
+
+## 14. Settings Reference
+
+### General tab
+
+| Setting | Description |
+|---------|-------------|
+| Theme | Dark or light mode |
+
+### Security tab
+
+| Setting | Description |
+|---------|-------------|
+| Admin PIN | Optional PIN that protects destructive actions (database reset, replace import) |
+| Viewer PIN | Optional 4–8 digit PIN that lets anyone open `/view` in a browser for read-only access to results without a token link |
+
+### Advanced scan options
+
+These options are in the left sidebar under **Indstillinger**:
+
+**Delta scanning** — after your first full scan, enable this to scan only items that have changed since the last scan. Much faster for routine checks. A "Clear tokens" button forces the next scan to be a full scan.
+
+**Scan photos for faces** — slower scan that detects photographs containing recognisable human faces. Flags them as Article 9 biometric data. Recommended for schools storing student photos.
+
+**Retention policy** — when enabled, marks items older than the specified number of years as overdue. The fiscal year end setting determines how the cutoff date is calculated:
+
+| Option | Cutoff date calculation |
+|--------|------------------------|
+| Rolling (fra i dag) | Today minus N years |
+| 31 dec (Bogføringsloven) | Last 31 December minus N years |
+| 30 jun / 31 mar | Last occurrence of that date minus N years |
+
+---
+
+## 15. Frequently Asked Questions
+
+**Does the scanner store CPR numbers?**  
+No. CPR numbers found during a scan are stored only as a count (e.g. "3 CPR numbers found") and as a SHA-256 hash used for the Data Subject Lookup. The actual number is never written to the database.
+
+**What happens when I delete items through the scanner?**  
+Emails are moved to the user's **Deleted Items** folder in Exchange — they are not permanently deleted and can be recovered by the user or an administrator. Files are moved to the **recycle bin** of the relevant service (OneDrive, SharePoint, file system). A permanent deletion requires a second action by the user or admin.
+
+**Can I scan without connecting to Microsoft 365?**  
+Yes. You can scan local and SMB file shares without any M365 or Google connection. Open **Sources**, go to the **Filkilder** tab, and add your file paths.
+
+**What is delta scanning and when should I use it?**  
+Delta scanning uses Microsoft Graph change tokens to fetch only items modified since the last scan. It is ideal for regular (e.g. weekly) compliance checks after you have done a full baseline scan. Enable it in the Options section of the sidebar.
+
+**The scan stopped — can I continue where it left off?**  
+Yes. When you restart the scan, a yellow banner will offer to resume from the checkpoint. Click **▶ Genoptag** to continue. If you prefer to start over, click **Start fresh**.
+
+**How do I prove compliance if we are audited?**  
+Use the **Art.30** button to export the Article 30 report. It is a Word document covering your data inventory, retention analysis, deletion log, and methodology — exactly what a supervisory authority (Datatilsynet) typically requests.
+
+**What does the "Elev / Ansat" filter do?**  
+The scanner classifies users as staff (Ansat) or students (Elev) based on their Microsoft 365 licence type or Google Workspace organisational unit. You can use this filter in the accounts list to restrict a scan to only staff, only students, or a specific individual. This is useful because the rules for processing student data — especially for children under 15 — differ from staff data under Databeskyttelsesloven.
+
+**How do I add an account that is not in the list?**  
+In the accounts section of the sidebar, there is an **+ Tilføj konto manuelt** (Add account manually) field. Enter the email address or UPN and it will be added to the current session's account list.
+
+**Is the scanner running? I cannot see a progress bar.**  
+Check the activity log at the bottom of the screen. If a scan is running it will show messages there. If you see nothing, the scan may have completed or not started. Also check that you have at least one source ticked and at least one account selected.
+
+**Can a reviewer tag dispositions without access to the scan controls?**  
+Yes. Use the **🔗 Share** button to create a read-only viewer link or set a Viewer PIN in Settings → Security. The reviewer opens the link in their browser and can browse results and tag dispositions without seeing credentials, sources, or scan buttons. See section 10 for details.
+
+---
+
+*GDPR Scanner v1.6.14 — for technical setup and configuration see README.md*
diff --git a/docs/setup/GOOGLE_SETUP.md b/docs/setup/GOOGLE_SETUP.md
new file mode 100644
index 0000000..7f5ad52
--- /dev/null
+++ b/docs/setup/GOOGLE_SETUP.md
@@ -0,0 +1,144 @@
+# Google Workspace Setup
+
+Step-by-step guide for connecting GDPRScanner to Google Workspace via a service account.
+
+GDPRScanner connects using a **service account** with **domain-wide delegation** — this allows it to scan all users' Gmail and Drive without requiring each user to sign in individually.
+
+---
+
+## 1. Create a Google Cloud project
+
+Go to [console.cloud.google.com](https://console.cloud.google.com) and create a new project (or use an existing one).
+
+---
+
+## 2. Enable the required APIs
+
+In your project: **APIs & Services → Enable APIs and Services**. Enable:
+
+- **Gmail API**
+- **Google Drive API**
+- **Admin SDK API**
+
+---
+
+## 3. Create a service account
+
+Go to **IAM & Admin → Service accounts → Create service account**.
+
+| Field | Value |
+|---|---|
+| Name | gdprscanner (or any name) |
+| Description | GDPRScanner service account |
+
+Click **Create and continue**. Skip the optional role and user access steps. Click **Done**.
+
+### Create a key
+
+Click on the service account → **Keys → Add key → Create new key → JSON**.
+
+Download the JSON file. This is your service account key — treat it like a password.
+
+---
+
+## 4. Enable domain-wide delegation
+
+Back on the service account page: **Show advanced settings → Domain-wide delegation → Enable**.
+
+Note the **Client ID** (a long number) — you'll need it in the next step.
+
+---
+
+## 5. Authorise scopes in Google Admin Console
+
+Go to [admin.google.com](https://admin.google.com) →
+**Security → Access and data control → API controls → Manage domain-wide delegation → Add new**.
+
+| Field | Value |
+|---|---|
+| Client ID | The numeric Client ID from the service account |
+| OAuth scopes | See below |
+
+Add all of these scopes (paste as a comma-separated list):
+
+```
+https://www.googleapis.com/auth/admin.directory.user.readonly,
+https://www.googleapis.com/auth/gmail.readonly,
+https://www.googleapis.com/auth/drive.readonly
+```
+
+Click **Authorise**. Changes can take a few minutes to propagate.
+
+---
+
+## 6. Connect in GDPRScanner
+
+Open GDPRScanner → **Source Management → Google Workspace** tab.
+
+1. **Upload service account key** — select the JSON file you downloaded in step 3
+2. **Admin email** — enter the email address of a Google Workspace admin user in your domain (e.g. `admin@skolen.dk`). The service account impersonates this user to call the Admin Directory API.
+
+Click **Connect**. If successful, the status dot turns green and shows the service account email.
+
+---
+
+## 7. User role classification
+
+GDPRScanner classifies Google Workspace users as **staff** or **student** based on their **Organisational Unit (OU) path** in Google Admin.
+
+The mapping is in `classification/google_ou_roles.json`. Edit it to match your school's OU structure — no code change required.
+
+Default mapping:
+
+| OU prefix | Role |
+|---|---|
+| `/Elever` | student |
+| `/Personale` | staff |
+| `/Admin` | staff |
+
+To see your OU structure: **Google Admin → Directory → Administrer organisationsenheder**.
+
+Example `classification/google_ou_roles.json` for a typical Danish school (Gudenaaskolen.dk structure):
+
+```json
+{
+  "student_ou_prefixes": ["/Elever"],
+  "staff_ou_prefixes":   ["/Personale", "/Admin"]
+}
+```
+
+After editing the file, restart GDPRScanner — no rebuild required.
+
+---
+
+## 8. Verify
+
+After connecting:
+
+- **Sources panel** shows Gmail and Google Drive checkboxes
+- **Accounts panel** shows all Google Workspace users with `GWS` badges
+- Users are classified as Elev / Ansat based on their OU
+
+Select one or more accounts, check Gmail and/or Google Drive, and click Scan.
+
+---
+
+## Notes on what is scanned
+
+| Source | What is scanned |
+|---|---|
+| Gmail | Email bodies and attachments for all mail folders |
+| Google Drive | My Drive files — Docs, Sheets, Slides are auto-exported to text for scanning |
+
+---
+
+## Troubleshooting
+
+| Symptom | Likely cause |
+|---|---|
+| `unauthorized_client` on connect | Domain-wide delegation not enabled, or scopes not authorised in Admin Console |
+| 0 users listed | `admin.directory.user.readonly` scope missing, or wrong admin email |
+| Users show as "Anden" (other) | OU path not matched in `classification/google_ou_roles.json` — check OU paths in Google Admin and compare with the file |
+| Gmail scan finds nothing | `gmail.readonly` scope not authorised |
+| Drive scan finds nothing | `drive.readonly` scope not authorised |
+| `RefreshError` on scan | Service account key expired or revoked — generate a new key |
diff --git a/docs/setup/M365_SETUP.md b/docs/setup/M365_SETUP.md
new file mode 100644
index 0000000..4790ec2
--- /dev/null
+++ b/docs/setup/M365_SETUP.md
@@ -0,0 +1,160 @@
+# Microsoft 365 Setup
+
+Step-by-step guide for connecting GDPRScanner to Microsoft 365.
+
+---
+
+## 1. Register an app in Azure
+
+Go to **Azure Portal → Microsoft Entra ID → App registrations → New registration**.
+
+| Field | Value |
+|---|---|
+| Name | GDPRScanner (or any name) |
+| Supported account types | Accounts in this organisational directory only |
+| Redirect URI | Leave blank |
+
+Click **Register**. Note the **Application (client) ID** and **Directory (tenant) ID** — you'll need both.
+
+---
+
+## 2. Choose an authentication mode
+
+| Mode | How it works | When to use |
+|---|---|---|
+| **Application** | Client credentials — client ID + tenant ID + client secret. No user interaction. | Automated / scheduled scans, all-user scans |
+| **Delegated** | OAuth device code flow — user signs in interactively. | Single-user scans, testing |
+
+### Application mode — create a client secret
+
+In your app registration: **Certificates & secrets → New client secret**.
+
+Set an expiry (24 months recommended) and copy the **Value** immediately — it is only shown once.
+
+### Delegated mode — no secret needed
+
+The scanner will show a device code URL. Open it in a browser, sign in, and the scanner authenticates as that user.
+
+---
+
+## 3. Add API permissions
+
+Go to **API permissions → Add a permission → Microsoft Graph**.
+
+### Scan only
+
+| Permission | Type |
+|---|---|
+| `Mail.Read` | Application or Delegated |
+| `Files.Read.All` | Application or Delegated |
+| `Sites.Read.All` | Application or Delegated |
+| `ChannelMessage.Read.All` | Application |
+| `Team.ReadBasic.All` | Application |
+| `User.Read.All` | Application |
+
+### Scan + Delete
+
+Add these in addition to the read permissions above:
+
+| Permission | Type |
+|---|---|
+| `Mail.ReadWrite` | Application or Delegated |
+| `Files.ReadWrite.All` | Application or Delegated |
+| `Sites.ReadWrite.All` | Application or Delegated |
+
+### Email reports via Graph
+
+If you want the scanner to send email reports via Microsoft 365 (not SMTP):
+
+| Permission | Type |
+|---|---|
+| `Mail.Send` | Application or Delegated |
+
+### Grant admin consent
+
+All **Application** permissions require admin consent. Click **Grant admin consent for [your tenant]** at the top of the API permissions page. Without this, scans will fail with 403 errors.
+
+---
+
+## 4. Connect in GDPRScanner
+
+Open GDPRScanner → **Source Management → Microsoft 365** tab.
+
+| Field | Where to find it |
+|---|---|
+| Client ID | App registration → Overview → Application (client) ID |
+| Tenant ID | App registration → Overview → Directory (tenant) ID |
+| Client Secret | The value you copied in step 2 (Application mode only) |
+
+Click **Connect**. In Application mode, the connection is immediate. In Delegated mode, a browser window opens for sign-in.
+
+---
+
+## 5. Verify
+
+After connecting, the Sources panel shows:
+
+- **Email** — Exchange mailboxes
+- **OneDrive** — personal drives
+- **SharePoint** — site file libraries
+- **Teams** — Teams channel files
+
+The Accounts panel lists all users in the tenant (Application mode) or just the signed-in user (Delegated mode).
+
+---
+
+## Notes on deletion
+
+Emails deleted via the scanner are moved to **Deleted Items** — recoverable for 14–30 days depending on admin configuration. Files are sent to the **OneDrive/SharePoint recycle bin** — retained for 93 days across both recycle bin stages before permanent deletion. Nothing is permanently destroyed without a second manual step.
+
+---
+
+## Headless / scheduled mode
+
+Headless mode uses Application auth only. Credentials are read in priority order:
+
+1. `--settings FILE` — a JSON file you provide
+2. Environment variables: `M365_CLIENT_ID`, `M365_TENANT_ID`, `M365_CLIENT_SECRET`
+
+Example settings file:
+
+```json
+{
+  "client_id":     "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
+  "tenant_id":     "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
+  "client_secret": "your-secret",
+  "sources":       ["email", "onedrive"],
+  "options": {
+    "older_than_days": 365,
+    "email_body":      true,
+    "attachments":     true,
+    "delta":           true
+  }
+}
+```
+
+Run:
+
+```bash
+python gdpr_scanner.py --headless --output ~/Reports/ --settings settings.json
+```
+
+See the full CLI flag reference in `README.md`.
+
+---
+
+## Role classification (staff / student)
+
+GDPRScanner classifies users as **staff** or **student** based on their Microsoft 365 licence SKU. The mapping is in `classification/m365_skus.json`. If users appear as "other", open **Settings → SKU debug** to see which SKU IDs are assigned in your tenant and add any missing ones to `m365_skus.json`.
+
+---
+
+## Troubleshooting
+
+| Symptom | Likely cause |
+|---|---|
+| 403 on scan start | Admin consent not granted, or wrong permissions added |
+| `AADSTS7000215` | Invalid client secret — check it was copied correctly |
+| No users listed | `User.Read.All` permission missing or not consented |
+| Teams files not appearing | `ChannelMessage.Read.All` or `Team.ReadBasic.All` missing |
+| Delta scan not working | Delta tokens require at least one full scan first |
diff --git a/document_scanner.py b/document_scanner.py
new file mode 100644
index 0000000..2b85078
--- /dev/null
+++ b/document_scanner.py
@@ -0,0 +1,2659 @@
+#!/usr/bin/env python3
+"""
+Scan PDF and Word documents (.docx) for Danish CPR numbers and dates.
+Handles text-based and image-based (scanned) PDFs automatically via OCR.
+Supports masking, full anonymisation, dry-run preview, and JSON logging.
+
+Supported formats: .pdf, .docx, .xlsx, .xlsm, .csv, .jpg, .jpeg, .png, .bmp, .tiff, .webp
+  (.doc requires conversion: soffice --headless --convert-to docx file.doc)
+
+Usage:
+    python document_scanner.py file.pdf
+    python document_scanner.py file.docx
+    python document_scanner.py file1.pdf file2.docx spreadsheet.xlsx /path/to/folder/
+
+Options:
+    --mask            Redact CPR numbers only    -> <n>_masked.pdf/.docx
+    --anonymise       Redact all personal data   -> <n>_anonymised.pdf/.docx
+                      (CPR, names, addresses, phone numbers, emails)
+    --dry-run         Scan and report without writing any output files
+    --log FILE        Write a structured JSON log of all findings to FILE
+    --older-than DAYS List files with CPR numbers AND dates older than DAYS
+    --ocr             Force OCR on every page (even if text is extractable)
+    --lang LANG       Tesseract language(s), default: dan+eng
+    --dpi DPI         DPI for OCR image rendering, default: 300
+    --poppler PATH    Path to Poppler bin folder (Windows only)
+
+Dependencies:
+    pip install pdfplumber pdf2image pytesseract pypdf reportlab spacy python-docx openpyxl opencv-python
+    python -m spacy download da_core_news_lg   # Danish NER model (~500 MB)
+
+    System packages:
+        macOS:  brew install tesseract tesseract-lang poppler
+        Linux:  sudo apt install tesseract-ocr tesseract-ocr-dan poppler-utils
+
+    Note: Python 3.12 recommended -- spaCy does not yet support Python 3.14.
+
+Recommended workflow:
+    # 1. Dry run first to audit without writing anything
+    python document_scanner.py /folder/ --anonymise --dry-run --log audit.json
+
+    # 2. Run for real once satisfied
+    python document_scanner.py /folder/ --anonymise --log run.json
+"""
+
+import argparse
+import hashlib
+import io
+import json
+import logging
+import re
+import sqlite3
+import sys
+from datetime import date, datetime, timedelta
+from pathlib import Path
+
+# Suppress pdfminer's noisy font-descriptor warnings that appear when PDFs
+# contain malformed or incomplete font definitions.  These do not affect text
+# extraction or CPR detection — the warning is informational only.
+logging.getLogger("pdfminer").setLevel(logging.ERROR)
+logging.getLogger("pdfminer.pdffont").setLevel(logging.ERROR)
+logging.getLogger("pdfminer.pdfpage").setLevel(logging.ERROR)
+logging.getLogger("pdfplumber").setLevel(logging.ERROR)
+# ── Dependency checks ──────────────────────────────────────────────────────────
+
+try:
+    import pdfplumber
+except ImportError:
+    print("Missing dependency. Install with: pip install pdfplumber")
+    sys.exit(1)
+
+try:
+    from pdf2image import convert_from_path
+    PDF2IMAGE_OK = True
+except ImportError:
+    PDF2IMAGE_OK = False
+
+try:
+    import pytesseract
+    TESSERACT_OK = True
+except ImportError:
+    TESSERACT_OK = False
+
+OCR_AVAILABLE = PDF2IMAGE_OK and TESSERACT_OK
+
+try:
+    from pypdf import PdfReader, PdfWriter
+    from reportlab.pdfgen import canvas as rl_canvas
+    from reportlab.lib.colors import black as rl_black
+    MASK_AVAILABLE = True
+except ImportError:
+    MASK_AVAILABLE = False
+
+try:
+    import fitz as _fitz          # PyMuPDF — for secure (sanitised) PDF redaction
+    PYMUPDF_AVAILABLE = True
+except ImportError:
+    PYMUPDF_AVAILABLE = False
+
+try:
+    import spacy
+    SPACY_OK = True
+except ImportError:
+    SPACY_OK = False
+
+try:
+    from docx import Document as DocxDocument
+    DOCX_OK = True
+except ImportError:
+    DOCX_OK = False
+
+try:
+    import openpyxl
+    XLSX_OK = True
+except ImportError:
+    XLSX_OK = False
+
+# cv2 is imported lazily inside _get_cv2() to avoid macOS recursion errors.
+# Never import cv2 at module level or from server.py.
+CV2_OK = False
+
+def _face_log(msg: str):
+    """Debug logging — file output disabled."""
+    import sys as _sys
+    print(msg, file=_sys.stderr, flush=True)
+_cv2_version = None
+_cv2_import_error = None
+_cv2_mod = None
+_np_mod  = None
+
+def _get_cv2():
+    """Return (cv2, numpy) tuple, importing once on first call.
+
+    In a PyInstaller bundle we exclude cv2/__init__.py entirely (it causes a
+    macOS arm64 recursion crash) and load cv2.abi3.so directly instead.
+    Outside the bundle, plain 'import cv2' works normally.
+    """
+    global CV2_OK, _cv2_version, _cv2_import_error, _cv2_mod, _np_mod
+    if _cv2_mod is not None:
+        return _cv2_mod, _np_mod
+    if _cv2_import_error is not None:
+        return None, None  # already tried and failed
+    try:
+        import sys as _sys
+        import numpy as _np
+
+        if getattr(_sys, "frozen", False):
+            # Bundle has cv2.abi3.so but NOT cv2/__init__.py.
+            # Load the .so directly and register it as 'cv2'.
+            import importlib.util as _ilu
+            import types as _types
+            from pathlib import Path as _Path
+
+            _so = _Path(_sys._MEIPASS) / "cv2" / "cv2.abi3.so"
+            if not _so.exists():
+                raise RuntimeError(f"cv2.abi3.so not found at {_so}")
+
+            _spec = _ilu.spec_from_file_location("cv2", str(_so),
+                        submodule_search_locations=[])
+            _cv2 = _ilu.module_from_spec(_spec)
+            _sys.modules["cv2"] = _cv2   # register before exec to break cycles
+            _spec.loader.exec_module(_cv2)
+
+            # Wire up cv2.data.haarcascades for cascade path resolution
+            _data = _types.ModuleType("cv2.data")
+            _data.haarcascades = str(_Path(_sys._MEIPASS) / "cv2" / "data") + "/"
+            _sys.modules["cv2.data"] = _data
+            _cv2.data = _data
+        else:
+            import cv2 as _cv2
+
+        if not hasattr(_cv2, "imread"):
+            raise RuntimeError(
+                f"cv2 binary not loaded (file: {getattr(_cv2, '__file__', '?')})"
+            )
+
+        _cv2_version      = getattr(_cv2, "__version__", "unknown")
+        CV2_OK            = True
+        _cv2_mod          = _cv2
+        _np_mod           = _np
+        _cv2_import_error = None
+    except Exception as e:
+        CV2_OK            = False
+        _cv2_import_error = str(e)
+        import sys as _sys
+        _sys.modules.pop("cv2", None)  # clean up partial registration
+    return _cv2_mod, _np_mod
+
+# spaCy model preference: large Danish → medium → small → multilingual → English fallback
+SPACY_MODEL_PREFERENCE = [
+    "da_core_news_lg", "da_core_news_md", "da_core_news_sm",
+    "xx_ent_wiki_sm", "en_core_web_sm",
+]
+_NLP = None  # lazy-loaded singleton
+
+def load_nlp():
+    """Load the best available spaCy model. Returns model or None."""
+    global _NLP
+    if _NLP is not None:
+        return _NLP
+    if not SPACY_OK:
+        return None
+
+    import sys as _sys
+    _frozen = getattr(_sys, "frozen", False)
+
+    for model_name in SPACY_MODEL_PREFERENCE:
+        try:
+            import importlib as _il
+            _mod = _il.import_module(model_name)
+            _NLP = _mod.load()
+            print(f"  [NER] Loaded spaCy model: {model_name}", flush=True)
+            _face_log(f"[NER] Loaded spaCy model: {model_name}")
+            return _NLP
+        except Exception as _e:
+            _face_log(f"[NER] {model_name} failed: {_e} (frozen={_frozen})")
+            continue
+    return None
+
+
+# ── OCR page cache ───────────────────────────────────────────────────────────
+
+_OCR_CACHE_PATH = Path.home() / ".document_scanner_ocr_cache.db"
+
+class OCRCache:
+    """
+    SQLite-backed cache for OCR text extraction.
+
+    Key:   SHA-256 of the raw page image bytes + lang string
+    Value: extracted text string
+
+    This means:
+    - Rescanning the same file reuses cached text (near-instant).
+    - Editing a file invalidates its pages (hash changes).
+    - Different OCR language settings get separate cache entries.
+    - The cache is shared across all processes (safe: writes are idempotent).
+    """
+
+    def __init__(self, path: Path = _OCR_CACHE_PATH):
+        self._path = path
+        self._conn: sqlite3.Connection | None = None
+
+    def _connect(self) -> sqlite3.Connection:
+        if self._conn is None:
+            conn = sqlite3.connect(str(self._path), check_same_thread=False,
+                                   timeout=10)
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS ocr_cache (
+                    key   TEXT PRIMARY KEY,
+                    text  TEXT NOT NULL,
+                    ts    INTEGER NOT NULL
+                )
+            """)
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON ocr_cache(ts)")
+            conn.commit()
+            self._conn = conn
+        return self._conn
+
+    @staticmethod
+    def _key(image_bytes: bytes, lang: str) -> str:
+        h = hashlib.sha256(image_bytes)
+        h.update(lang.encode())
+        return h.hexdigest()
+
+    def get(self, image_bytes: bytes, lang: str) -> str | None:
+        key = self._key(image_bytes, lang)
+        try:
+            row = self._connect().execute(
+                "SELECT text FROM ocr_cache WHERE key=?", (key,)
+            ).fetchone()
+            return row[0] if row else None
+        except Exception:
+            return None
+
+    def put(self, image_bytes: bytes, lang: str, text: str) -> None:
+        key = self._key(image_bytes, lang)
+        ts  = int(datetime.now().timestamp())
+        try:
+            self._connect().execute(
+                "INSERT OR REPLACE INTO ocr_cache(key, text, ts) VALUES(?,?,?)",
+                (key, text, ts),
+            )
+            self._connect().commit()
+        except Exception:
+            pass
+
+    def prune(self, max_entries: int = 50_000) -> None:
+        """Delete oldest entries when the cache grows beyond max_entries."""
+        try:
+            conn = self._connect()
+            n = conn.execute("SELECT COUNT(*) FROM ocr_cache").fetchone()[0]
+            if n > max_entries:
+                to_del = n - max_entries
+                conn.execute("""
+                    DELETE FROM ocr_cache
+                    WHERE key IN (
+                        SELECT key FROM ocr_cache ORDER BY ts ASC LIMIT ?
+                    )
+                """, (to_del,))
+                conn.commit()
+        except Exception:
+            pass
+
+    def clear(self) -> None:
+        try:
+            self._connect().execute("DELETE FROM ocr_cache")
+            self._connect().commit()
+        except Exception:
+            pass
+
+    def stats(self) -> dict:
+        try:
+            conn = self._connect()
+            n    = conn.execute("SELECT COUNT(*) FROM ocr_cache").fetchone()[0]
+            size = self._path.stat().st_size if self._path.exists() else 0
+            return {"entries": n, "size_bytes": size}
+        except Exception:
+            return {"entries": 0, "size_bytes": 0}
+
+
+# Module-level singleton — shared within a process
+_ocr_cache = OCRCache()
+
+
+def ocr_page_cached(image, lang: str) -> str:
+    """
+    Run Tesseract OCR on `image`, returning cached text when available.
+    Falls back to uncached OCR if the cache is unavailable.
+    """
+    import io as _io
+    # Serialise image to bytes for hashing (use PNG for lossless round-trip)
+    buf = _io.BytesIO()
+    image.save(buf, format="PNG")
+    img_bytes = buf.getvalue()
+
+    cached = _ocr_cache.get(img_bytes, lang)
+    if cached is not None:
+        return cached
+
+    text = ocr_page(image, lang)
+    _ocr_cache.put(img_bytes, lang, text)
+    _ocr_cache.prune()
+    return text
+
+
+# ── Patterns ──────────────────────────────────────────────────────────────────
+
+# Danish CPR: DDMMYY-XXXX  or  DDMMYYXXXX  (optional space/dash separator)
+CPR_PATTERN = re.compile(r"\b(\d{2})(\d{2})(\d{2})[-\s]?(\d{4})\b")
+
+DATE_PATTERNS = [
+    (re.compile(r"\b(\d{4})[-/](\d{1,2})[-/](\d{1,2})\b"), "ISO YYYY-MM-DD"),
+    (re.compile(r"\b(\d{1,2})[.\-/](\d{1,2})[.\-/](\d{4})\b"), "DD.MM.YYYY"),
+    (re.compile(r"\b(\d{1,2})[.\-/](\d{1,2})[.\-/](\d{2})\b"), "DD.MM.YY"),
+    (re.compile(
+        r"\b(\d{1,2})\.\s*(januar|februar|marts|april|maj|juni|juli|"
+        r"august|september|oktober|november|december)\s+(\d{4})\b", re.IGNORECASE),
+     "D. maaned YYYY"),
+    (re.compile(
+        r"\b(\d{1,2})\s+(January|February|March|April|May|June|July|"
+        r"August|September|October|November|December)\s+(\d{4})\b", re.IGNORECASE),
+     "D Month YYYY"),
+    (re.compile(
+        r"\b(January|February|March|April|May|June|July|August|"
+        r"September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})\b", re.IGNORECASE),
+     "Month D, YYYY"),
+]
+
+# ── Regex patterns for PII beyond CPR ─────────────────────────────────────────
+
+# Danish phone: 8 digits, optionally grouped in pairs/fours with spaces or dashes
+# Also matches +45 prefix
+PHONE_PATTERN = re.compile(
+    r"(?<!\d)(?:\+45[\s\-]?)?(?:\d{2}[\s\-]?){3}\d{2}(?!\d)"
+)
+
+EMAIL_PATTERN = re.compile(
+    r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b"
+)
+
+# Danish IBAN: DKxx xxxx xxxx xxxx xx  (18 digits total, starts DK)
+# Also plain REG/Konto format:  RRRR KKKKKKKKKK  (4-digit reg + up to 10-digit account)
+IBAN_PATTERN = re.compile(
+    r"\bDK\d{2}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{2}\b",
+    re.IGNORECASE,
+)
+# Danish bank account REG no + account number (e.g. "1234 1234567890" or "REG: 1234 Konto: 12345")
+REG_KONTO_PATTERN = re.compile(
+    r"(?:reg(?:ister|\.|:)?\s*(?:nr\.?\s*)?)?\b(\d{4})[\s\-]+(\d{6,10})\b",
+    re.IGNORECASE,
+)
+
+# Danish address: "Streetname 12" or "Streetname 12A, 2. tv" style
+# Matches: word(s) ending in common suffix + number + optional letter + optional floor/door
+DANISH_ADDRESS_PATTERN = re.compile(
+    r"\b([A-ZÆØÅ][a-zæøåA-ZÆØÅ\-]+"           # Street name start (capitalised)
+    r"(?:\s+[A-ZÆØÅ]?[a-zæøåA-ZÆØÅ\-]+)*"     # additional words (allé, vej, gade, …)
+    r"(?:\s+(?:vej|gade|allé|alle|plads|torv|stræde|straede|boulevard|bvd|"
+    r"have|park|skov|bakke|bjerg|dal|mark|eng|sø|dam|holm|bro|port))?)"  # optional suffix
+    r"\s+(\d{1,4}[A-Za-z]?)"                   # house number (e.g. 12, 12A, 4B)
+    r"(?:[,\s]+\d{1,2}\.?(?:\s*(?:sal|tv|th|mf|[0-9]{1,3}))?)?"  # optional floor/door
+    r"(?:[,\s]+\d{4})?",                        # optional postcode
+    re.UNICODE,
+)
+
+# Danish full name: two or more capitalised words (Firstname [Middle] Lastname).
+# Covers Danish/Nordic letters. Excludes common non-name capitalisations by
+# requiring at least 2 name parts and rejecting single-word matches.
+# This is a reliable fallback for isolated names where spaCy has no context.
+# Danish name pattern — matches "Firstname [Middle...] Lastname" where:
+# - First name must be capitalised (A-ZÆØÅ start)
+# - Middle/last parts can be lowercase (handles "Frode holm truelsen" style)
+# - Particles like "la", "de", "van" are allowed
+# - Accented chars (Bräuner, Buéno) supported via \w
+DANISH_NAME_PATTERN = re.compile(
+    r"\b([A-ZÆØÅ][\w\-]{1,})"              # First name — must be capitalised
+    r"(?:\s+[\w\-]{2,})*"                   # Optional middle parts (any case)
+    r"\s+([\w\-]{2,})\b",                   # Last name (any case, min 2 chars)
+    re.UNICODE,
+)
+
+# Words that are NEVER part of a person name — used to filter regex false positives.
+# NOTE: Month names that are also Danish first names (April, August, Juni, Juli,
+# Mai/Maj) are intentionally excluded so we don't block real names.
+_NAME_STOPWORDS = {
+    # Calendar — only months not used as first names
+    "januar","februar","marts","september","oktober","november","december",
+    "january","february","march","september","october","november","december",
+    # Days of week
+    "mandag","tirsdag","onsdag","torsdag","fredag","lordag","sondag",
+    "monday","tuesday","wednesday","thursday","friday","saturday","sunday",
+    # Business / document words
+    "dk","cvr","cpr","att","re","fwd","til","fra","dato","side","total",
+    "faktura","invoice","bilag","nota","subtotal","moms","vat","inkl","ekskl",
+    "afdeling","department","company","virksomhed","adresse","address",
+    "telefon","email","website","homepage","tlf","mobil","mobile",
+}
+
+# Particles that can appear lowercase inside a name ("la", "de", "van", etc.)
+_NAME_PARTICLES = {"la","le","de","du","van","von","af","of","the"}
+
+def _is_name_match(m) -> bool:
+    """Return True if a DANISH_NAME_PATTERN match looks like a real person name."""
+    parts = m.group(0).split()
+    if len(parts) < 2:
+        return False
+    # First and last parts must be at least 2 chars
+    if len(parts[0]) < 2 or len(parts[-1]) < 2:
+        return False
+    # Reject if the first (capitalised) word is a document stopword
+    if parts[0].lower() in _NAME_STOPWORDS:
+        return False
+    # Reject if ALL non-particle parts are stopwords
+    real_parts = [p for p in parts if p.lower() not in _NAME_PARTICLES]
+    if all(p.lower() in _NAME_STOPWORDS for p in real_parts):
+        return False
+    # Reject strings that are all-uppercase (acronyms, e.g. "CVR NR")
+    if all(p.isupper() and len(p) > 1 for p in parts):
+        return False
+    # Require at least the first word to look like a name (starts uppercase, has lowercase)
+    if not re.search(r'[a-zæøå]', parts[0]):
+        return False
+    return True
+
+
+# Words that strongly suggest a nearby 10-digit sequence is a CPR number.
+# Used by cpr_context_boost() to raise the risk score.
+CPR_CONTEXT_WORDS = re.compile(
+    r"\b(?:cpr|personnummer|person[\-\s]?nr|cpr[\-\s]?nr|"
+    r"f\.?d\.?t\.?|fodt|fødselsdato|fdato|"
+    r"born|date\s+of\s+birth|dob|"
+    r"civil\s*registration|NemID|MitID)\b",
+    re.IGNORECASE | re.UNICODE,
+)
+
+
+# ── False-positive exclusion: invoice / document-number context ───────────────
+# If any of these words appear within ~120 characters of a candidate match,
+# it is very likely an invoice number, order number, or part number — not a CPR.
+CPR_FALSE_POSITIVE_WORDS = re.compile(
+    r"\b(?:"
+    # Invoice / order documents
+    r"faktura(?:nr|nummer)?|invoice|invoicenr|invno|inv\.?\s*no"
+    r"|ordre(?:nr|nummer)?|order(?:nr|number)?"
+    r"|rekvisition|requisition"
+    r"|tilbud(?:snr|snummer)?"
+    r"|kvittering"
+    r"|kreditnota|credit\s*note"
+    # Item / part / product references
+    r"|varenr|vare(?:nummer)?"
+    r"|art(?:ikel)?(?:nr|nummer|no)?"
+    r"|item\s*(?:nr|no|number|#)?"
+    r"|part\s*(?:nr|no|number|#)?"
+    r"|produkt(?:nr|nummer)?"
+    r"|model(?:nr|number)?"
+    r"|serial\s*(?:nr|no|number)?"
+    r"|serie(?:nr|nummer)?"
+    r"|lot\s*(?:nr|no|number)?"
+    r"|batch\s*(?:nr|no|number)?"
+    # Reference / document codes
+    r"|referencenr|ref(?:erence)?\.?\s*(?:nr|no|number)?"
+    r"|sagsnr|sags(?:nummer)?"
+    r"|doc(?:ument)?\s*(?:nr|no|number|#)?"
+    r"|bilag(?:snr|snummer)?"
+    r"|bogf(?:øring)?"
+    r"|kontonr|konto(?:nummer)?"
+    r"|ean\s*(?:nr|no|number)?"
+    r"|gln"
+    r"|p(?:urchase)?\s*order"
+    r"|po\s*(?:nr|no|number)?"
+    r"|so\s*(?:nr|no)?"           # sales order
+    # Typical invoice line columns
+    r"|antal|quantity|qty"
+    r"|stk\.|pcs\.|units?"
+    r"|enhedspris|unit\s*price"
+    r"|rabat|discount"
+    r"|moms|vat|tax"
+    r"|subtotal|i\s*alt|total\s*(?:ekskl|inkl)"
+    r")\b",
+    re.IGNORECASE,
+)
+
+# Characters that, if appearing immediately before the 10-digit match,
+# indicate it's embedded in a longer document/product code — not a CPR.
+# e.g. "REF-250312-4821", "ART250312-4821", "V250312-4821"
+_CPR_PREFIX_NOISE = re.compile(r"[A-Za-z0-9]$")
+
+
+def _is_false_positive(text: str, match_start: int, match_end: int,
+                        window: int = 120) -> bool:
+    """
+    Return True if the 10-digit candidate is almost certainly NOT a CPR number.
+
+    Two checks:
+    1. Invoice/order/part-number keyword within `window` chars of the match.
+    2. The character immediately preceding the match is alphanumeric
+       (suggests the number is part of a product or reference code).
+    """
+    # Check 1 — surrounding keyword context
+    lo = max(0, match_start - window)
+    hi = min(len(text), match_end + window)
+    if CPR_FALSE_POSITIVE_WORDS.search(text[lo:hi]):
+        return True
+
+    # Check 2 — prefix character (letter or digit immediately before match)
+    if match_start > 0 and _CPR_PREFIX_NOISE.search(text[match_start - 1]):
+        return True
+
+    return False
+
+
+def cpr_context_boost(text: str, cpr_match_start: int, cpr_match_end: int,
+                      window: int = 80) -> bool:
+    """
+    Return True if a CPR-context keyword appears within `window` characters
+    of the match — used to boost risk score for contextually confirmed CPRs.
+    """
+    lo = max(0, cpr_match_start - window)
+    hi = min(len(text), cpr_match_end + window)
+    return bool(CPR_CONTEXT_WORDS.search(text[lo:hi]))
+
+# ── NER entity types to redact ─────────────────────────────────────────────────
+# spaCy label → human label. Covers Danish (da_core_news) and multilingual models.
+NER_REDACT_LABELS = {
+    "PER":    "NAME",       # da_core_news
+    "PERSON": "NAME",       # en_core_web / xx_ent_wiki
+    "LOC":    "ADDRESS",    # da_core_news locations (includes addresses)
+    "GPE":    "ADDRESS",    # geopolitical entity (en/xx models)
+    "FAC":    "ADDRESS",    # facilities / addresses
+    "ORG":    "ORG",        # organisations (optional — included for thoroughness)
+}
+
+
+# ── General helpers ───────────────────────────────────────────────────────────
+
+# Official CPR mod-11 weights applied to digits 1-10
+_MOD11_WEIGHTS = (4, 3, 2, 7, 6, 5, 4, 3, 2, 1)
+
+
+def _passes_mod11(dd: str, mm: str, yy: str, seq: str) -> bool:
+    """
+    Return True if the 10-digit CPR passes the official Danish mod-11 checksum.
+
+    Note: Denmark stopped issuing mod-11-valid CPR numbers around 2007 when the
+    number space was exhausted.  Post-2007 births have CPR numbers that do NOT
+    pass this check — so mod-11 failure does NOT prove a number is fake.
+    Use this as a CONFIDENCE signal, not a hard gate.
+    """
+    digits = [int(c) for c in (dd + mm + yy + seq)]
+    return sum(d * w for d, w in zip(digits, _MOD11_WEIGHTS)) % 11 == 0
+
+
+def is_valid_cpr(dd, mm, yy, seq):
+    """
+    Validate a candidate CPR number.
+
+    Returns:
+      (False, False)  — fails date/range/century check — not a CPR
+      (True, True)    — passes date check AND mod-11 checksum (high confidence)
+      (True, False)   — passes date only, not mod-11 (post-2007 numbers are
+                         legitimately valid but fail mod-11 — require context)
+
+    Rules applied:
+    - Month must be 01-12
+    - Day must be 01-31 (or 41-71 for protected numbers where day += 40)
+    - The date DDMMYY must be a real calendar date (e.g. 310200 is invalid)
+    - Sequence (last 4 digits) must not be 0000
+    - Century digit (first digit of seq) must be consistent with the year
+      according to the official Danish CPR century table
+
+    CPR century digit rules (7th digit → birth century):
+      0-3 → always 1900s
+      4   → 1937-1999 → 1900s ; 2000-2036 → 2000s
+      5-8 → 1858-1899 → 1800s ; 1900-1999 → 1900s  (effectively 1900s for modern docs)
+      9   → 1937-1999 → 1900s ; 2000-2036 → 2000s
+    """
+    try:
+        d, m, y, s = int(dd), int(mm), int(yy), int(seq)
+    except ValueError:
+        return False, False
+
+    # Reject all-zero sequence
+    if s == 0:
+        return False, False
+
+    # Normalise protected numbers (day += 40)
+    d_norm = d - 40 if d > 40 else d
+
+    # Basic range checks
+    if not (1 <= m <= 12):
+        return False, False
+    if not (1 <= d_norm <= 31):
+        return False, False
+
+    # Determine century from 7th digit (first digit of seq)
+    c7 = s // 1000
+    if c7 in (0, 1, 2, 3):
+        century = 1900
+    elif c7 == 4:
+        century = 2000 if y <= 36 else 1900
+    elif c7 in (5, 6, 7, 8):
+        century = 1900
+    elif c7 == 9:
+        century = 2000 if y <= 36 else 1900
+    else:
+        return False, False
+
+    # Validate actual calendar date (catches 310200, 290200 in non-leap years, etc.)
+    try:
+        date(century + y, m, d_norm)
+    except ValueError:
+        return False, False
+
+    return True, _passes_mod11(dd, mm, yy, seq)
+
+
+def is_text_page(page) -> bool:
+    text = page.extract_text() or ""
+    return len(text.replace(" ", "").replace("\n", "")) >= 20
+
+
+def ocr_page(image, lang: str) -> str:
+    config = "--oem 3 --psm 3"
+    return pytesseract.image_to_string(image, lang=lang, config=config)
+
+
+def extract_matches(text: str, page_num: int, source: str):
+    """Extract CPR numbers and dates. Returns (cprs, dates)."""
+    cprs, dates = [], []
+    for m in CPR_PATTERN.finditer(text):
+        dd, mm, yy, seq = m.groups()
+        date_ok, mod11_ok = is_valid_cpr(dd, mm, yy, seq)
+        if not date_ok:
+            continue
+        if _is_false_positive(text, m.start(), m.end()):
+            continue
+        ctx = cpr_context_boost(text, m.start(), m.end())
+        # Gate: require mod-11 OR explicit CPR context keyword.
+        # This rejects ~91% of random date-valid numbers (invoice/part numbers)
+        # while keeping real post-2007 CPRs that appear with explicit labels.
+        if not mod11_ok and not ctx:
+            continue
+        cprs.append({"page": page_num, "raw": m.group(0),
+                     "formatted": f"{dd}{mm}{yy}-{seq}", "source": source,
+                     "context_confirmed": ctx,
+                     "mod11": mod11_ok})
+    for pattern, fmt in DATE_PATTERNS:
+        for m in pattern.finditer(text):
+            dates.append({"page": page_num, "raw": m.group(0),
+                          "format": fmt, "source": source})
+    return cprs, dates
+
+
+def dedup_dates(dates):
+    seen, result = set(), []
+    for d in dates:
+        key = (d["page"], d["raw"].strip())
+        if key not in seen:
+            seen.add(key)
+            result.append(d)
+    return result
+
+
+def count_pii_types(text: str, use_ner: bool = True) -> dict:
+    """
+    Count all PII types in text.
+    Returns e.g. {"PHONE": 2, "EMAIL": 1, "IBAN": 0, "BANK_ACCOUNT": 1, "NAME": 3, "ADDRESS": 1, "ORG": 2}.
+    NER (NAME/ADDRESS/ORG) is run when use_ner=True and the spaCy model is loaded.
+    """
+    counts: dict[str, int] = {
+        "PHONE": 0, "EMAIL": 0, "IBAN": 0, "BANK_ACCOUNT": 0,
+        "NAME": 0, "ADDRESS": 0, "ORG": 0,
+    }
+
+    for m in PHONE_PATTERN.finditer(text):
+        raw = m.group(0).replace(" ", "").replace("-", "").lstrip("+")
+        digits = re.sub(r"\D", "", raw)
+        if len(digits) in (8, 10, 11):
+            counts["PHONE"] += 1
+
+    for _ in EMAIL_PATTERN.finditer(text):
+        counts["EMAIL"] += 1
+
+    for _ in IBAN_PATTERN.finditer(text):
+        counts["IBAN"] += 1
+
+    for m in REG_KONTO_PATTERN.finditer(text):
+        reg, acct = m.group(1), m.group(2)
+        if 1 <= int(reg) <= 9999 and len(acct) >= 6:
+            counts["BANK_ACCOUNT"] += 1
+
+    # NER-based counts — only run if model is loaded and text is non-trivial
+    if use_ner and len(text.strip()) > 20:
+        nlp = load_nlp()
+        if nlp:
+            NER_LIMIT = 20_000
+            for chunk_start in range(0, min(len(text), NER_LIMIT * 10), NER_LIMIT):
+                chunk = text[chunk_start:chunk_start + NER_LIMIT]
+                if not chunk.strip():
+                    continue
+                doc = nlp(chunk)
+                for ent in doc.ents:
+                    mapped = NER_REDACT_LABELS.get(ent.label_)
+                    if mapped in counts:
+                        counts[mapped] += 1
+
+    return counts
+
+
+# ── Date parsing (for --older-than) ──────────────────────────────────────────
+
+MONTH_DA = {"januar":1,"februar":2,"marts":3,"april":4,"maj":5,"juni":6,
+            "juli":7,"august":8,"september":9,"oktober":10,"november":11,"december":12}
+MONTH_EN = {"january":1,"february":2,"march":3,"april":4,"may":5,"june":6,
+            "july":7,"august":8,"september":9,"october":10,"november":11,"december":12}
+
+def parse_date(raw: str, fmt: str):
+    raw = raw.strip()
+    try:
+        if fmt == "ISO YYYY-MM-DD":
+            return datetime.strptime(raw, "%Y-%m-%d").date()
+        if fmt in ("DD.MM.YYYY", "DD.MM.YY"):
+            for sep in ".-/":
+                try:
+                    d, m, y = raw.split(sep)
+                    y = int(y)
+                    if fmt == "DD.MM.YY":
+                        y += 2000 if y <= 30 else 1900
+                    return date(y, int(m), int(d))
+                except Exception:
+                    pass
+        if fmt == "D. maaned YYYY":
+            mo = re.match(r"(\d{1,2})\.\s*(\w+)\s+(\d{4})", raw, re.IGNORECASE)
+            if mo:
+                d, mon, y = mo.groups()
+                mn = MONTH_DA.get(mon.lower())
+                if mn: return date(int(y), mn, int(d))
+        if fmt == "D Month YYYY":
+            mo = re.match(r"(\d{1,2})\s+(\w+)\s+(\d{4})", raw, re.IGNORECASE)
+            if mo:
+                d, mon, y = mo.groups()
+                mn = MONTH_EN.get(mon.lower())
+                if mn: return date(int(y), mn, int(d))
+        if fmt == "Month D, YYYY":
+            mo = re.match(r"(\w+)\s+(\d{1,2}),?\s+(\d{4})", raw, re.IGNORECASE)
+            if mo:
+                mon, d, y = mo.groups()
+                mn = MONTH_EN.get(mon.lower())
+                if mn: return date(int(y), mn, int(d))
+    except Exception:
+        pass
+    return None
+
+
+def older_than(d, days: int) -> bool:
+    return d <= date.today() - timedelta(days=days)
+
+
+def build_flagged_list(all_results, min_age_days):
+    flagged = []
+    for path, results in all_results:
+        if not results["cprs"]:
+            continue
+        old_dates = []
+        for hit in results["dates"]:
+            d = parse_date(hit["raw"], hit["format"])
+            if d and older_than(d, min_age_days):
+                old_dates.append((d, hit["raw"], hit["page"]))
+        if old_dates:
+            old_dates.sort(key=lambda x: x[0])
+            flagged.append({"path": path, "cpr_count": len(results["cprs"]),
+                            "oldest_date": old_dates[0], "old_dates": old_dates})
+    return flagged
+
+
+def print_flagged(flagged, min_age_days):
+    print(f"\n{'#'*62}")
+    print(f"  FILES WITH CPR + DATES OLDER THAN {min_age_days} DAYS: {len(flagged)}")
+    print(f"{'#'*62}")
+    if not flagged:
+        print("  None found.\n")
+        return
+    for i, entry in enumerate(flagged, 1):
+        oldest_d, oldest_raw, oldest_page = entry["oldest_date"]
+        print(f"\n  {i}. {entry['path']}")
+        print(f"     CPR numbers : {entry['cpr_count']}")
+        print(f"     Oldest date : {oldest_raw}  ({oldest_d.isoformat()}, page {oldest_page})")
+        for d, raw, pg in entry["old_dates"][1:4]:
+            print(f"                   {raw}  ({d.isoformat()}, page {pg})")
+        if len(entry["old_dates"]) > 4:
+            print(f"                   ... and {len(entry['old_dates'])-4} more")
+    print()
+
+
+# ── PII detection: text spans ─────────────────────────────────────────────────
+
+def find_pii_spans_in_text(text: str, use_ner: bool = True) -> list[tuple[int, int, str]]:
+    """
+    Return list of (start, end, label) for all PII found in text.
+    Covers: CPR, phone, email, and (if use_ner) NER entities.
+    """
+    spans = []
+
+    # CPR
+    for m in CPR_PATTERN.finditer(text):
+        dd, mm, yy, seq = m.groups()
+        date_ok, mod11_ok = is_valid_cpr(dd, mm, yy, seq)
+        if not date_ok:
+            continue
+        if _is_false_positive(text, m.start(), m.end()):
+            continue
+        ctx = cpr_context_boost(text, m.start(), m.end())
+        if not mod11_ok and not ctx:
+            continue
+        spans.append((m.start(), m.end(), "CPR"))
+
+    # Phone
+    for m in PHONE_PATTERN.finditer(text):
+        raw = m.group(0).replace(" ", "").replace("-", "").lstrip("+")
+        digits = re.sub(r"\D", "", raw)
+        if len(digits) in (8, 10, 11):  # 8=DK, 10/11=with country code
+            spans.append((m.start(), m.end(), "PHONE"))
+
+    # Email
+    for m in EMAIL_PATTERN.finditer(text):
+        spans.append((m.start(), m.end(), "EMAIL"))
+
+    # Danish IBAN
+    for m in IBAN_PATTERN.finditer(text):
+        spans.append((m.start(), m.end(), "IBAN"))
+
+    # Danish REG/Konto bank account  (only when plausibly formatted as account)
+    for m in REG_KONTO_PATTERN.finditer(text):
+        reg, acct = m.group(1), m.group(2)
+        if 1 <= int(reg) <= 9999 and len(acct) >= 6:
+            spans.append((m.start(), m.end(), "BANK_ACCOUNT"))
+
+    # Danish postal addresses
+    for m in DANISH_ADDRESS_PATTERN.finditer(text):
+        # Only include if the match is long enough to avoid false positives
+        if len(m.group(0).strip()) >= 8:
+            spans.append((m.start(), m.end(), "ADDRESS"))
+
+    # Regex-based name detection — catches isolated "Firstname Lastname" cells
+    # where spaCy has no surrounding context to work from.
+    if use_ner:
+        for m in DANISH_NAME_PATTERN.finditer(text):
+            if _is_name_match(m):
+                spans.append((m.start(), m.end(), "NAME"))
+
+    # NER (names, addresses, orgs)
+    # Cap at 20 000 chars per call — spaCy NER is O(n) but dense tabular text
+    # (e.g. Excel-converted PDFs) can have thousands of tokens per page and stall.
+    #
+    # Context boosting: spaCy needs sentence context to recognise isolated names.
+    # For short text (< 80 chars, e.g. a single cell or line) we prepend a label
+    # so the model sees "Navn: Peter Hansen" instead of bare "Peter Hansen".
+    # Matches are shifted back by the prefix length before being recorded.
+    if use_ner:
+        nlp = load_nlp()
+        if nlp:
+            NER_LIMIT = 20_000
+            PREFIX = "Navn: "
+            PLEN   = len(PREFIX)
+            # Only inject prefix for short/isolated text
+            if len(text.strip()) < 80:
+                ner_input  = PREFIX + text
+                ner_offset = -PLEN
+            else:
+                ner_input  = text
+                ner_offset = 0
+            for chunk_start in range(0, min(len(ner_input), NER_LIMIT * 10), NER_LIMIT):
+                chunk = ner_input[chunk_start:chunk_start + NER_LIMIT]
+                if not chunk.strip():
+                    continue
+                doc = nlp(chunk)
+                for ent in doc.ents:
+                    if ent.label_ in NER_REDACT_LABELS:
+                        s = chunk_start + ent.start_char + ner_offset
+                        e = chunk_start + ent.end_char   + ner_offset
+                        if e <= 0:   # entity was entirely within the prefix
+                            continue
+                        spans.append((max(s, 0), e, NER_REDACT_LABELS[ent.label_]))
+
+    # Merge overlapping spans
+    spans.sort()
+    merged = []
+    for start, end, label in spans:
+        if merged and start <= merged[-1][1]:
+            prev_s, prev_e, prev_l = merged[-1]
+            merged[-1] = (prev_s, max(prev_e, end), prev_l)
+        else:
+            merged.append((start, end, label))
+
+    return merged
+
+
+# ── Bounding box finders ──────────────────────────────────────────────────────
+
+def find_pii_char_bboxes(page, use_ner: bool = True) -> list[tuple[float, float, float, float, str]]:
+    """
+    Return (x0, top, x1, bottom, label) for all PII on a text-based pdfplumber page.
+
+    Uses extract_words() for bbox lookup, but extract_text() for the NER text so
+    that spaCy sees newlines between lines — critical for name recognition. Without
+    newlines, names from adjacent rows run together and spaCy misses them.
+    """
+    words = page.extract_words(keep_blank_chars=False, x_tolerance=3, y_tolerance=3)
+    if not words:
+        return []
+
+    # Build a word-span index for bbox lookup (space-separated, no newlines)
+    word_text = ""
+    word_spans = []
+    for w in words:
+        ws = len(word_text)
+        word_text += w["text"]
+        word_spans.append((ws, len(word_text), w))
+        word_text += " "
+
+    # For PII/NER detection use extract_text() which preserves newlines between
+    # lines — spaCy needs sentence structure to reliably recognise names.
+    ner_text = page.extract_text() or word_text
+
+    spans = find_pii_spans_in_text(ner_text, use_ner=use_ner)
+
+    bboxes = []
+    PAD = 1
+    for span_start, span_end, label in spans:
+        # The matched span is in ner_text coordinates. Map to word_text by
+        # extracting the matched surface form and fuzzy-searching in word_text.
+        matched_surface = ner_text[span_start:span_end].strip()
+        if not matched_surface:
+            continue
+
+        # Search for the token sequence in the word list
+        # Split matched surface into tokens (same split as extract_words uses)
+        import re as _re
+        tokens = _re.split(r'\s+', matched_surface)
+        tokens = [t for t in tokens if t]
+
+        hit_words = []
+        if tokens:
+            # Find the first word that starts with the first token
+            for i, (ws, we, w) in enumerate(word_spans):
+                if w["text"].startswith(tokens[0]) or tokens[0].startswith(w["text"]):
+                    # Try to match the full token sequence from here
+                    candidate = word_spans[i:i + len(tokens)]
+                    if len(candidate) == len(tokens):
+                        hit_words = [cw for (_, _, cw) in candidate]
+                        break
+                    # Partial match — just take as many words as match
+                    hit_words = [cw for (_, _, cw) in candidate]
+                    break
+
+        if not hit_words:
+            # Fallback: find words whose text overlaps with matched_surface tokens
+            surface_lower = matched_surface.lower()
+            hit_words = [w for (_, _, w) in word_spans
+                         if w["text"].lower() in surface_lower
+                         or surface_lower in w["text"].lower()]
+
+        if not hit_words:
+            continue
+
+        bboxes.append((
+            min(w["x0"]     for w in hit_words) - PAD,
+            min(w["top"]    for w in hit_words) - PAD,
+            max(w["x1"]     for w in hit_words) + PAD,
+            max(w["bottom"] for w in hit_words) + PAD,
+            label,
+        ))
+    return bboxes
+def find_cpr_char_bboxes(page):
+    """
+    CPR-only version for --mask (no NER).
+
+    Uses extract_words() to build the text string — the same tokenisation that
+    extract_text() uses during scanning. Raw page.chars iteration fails on
+    Excel-converted PDFs where chars have no inter-word spacing or are stored
+    in a different order than reading order, causing CPR patterns to either
+    not match or match at the wrong offsets.
+
+    Strategy:
+      1. Build a word list with bboxes via extract_words().
+      2. Concatenate words (space-separated) and run CPR_PATTERN on that string.
+      3. For each match, find which word(s) it falls in and union their bboxes.
+         Add a small padding so the black box covers the full glyph.
+    """
+    words = page.extract_words(keep_blank_chars=False, x_tolerance=3, y_tolerance=3)
+    if not words:
+        return []
+
+    # Build concatenated text and track each word's start offset
+    full_text = ""
+    word_spans = []  # (start_offset, end_offset, word_dict)
+    for w in words:
+        start = len(full_text)
+        full_text += w["text"]
+        word_spans.append((start, len(full_text), w))
+        full_text += " "  # space separator between words
+
+    bboxes = []
+    for m in CPR_PATTERN.finditer(full_text):
+        dd, mm, yy, seq = m.groups()
+        date_ok, mod11_ok = is_valid_cpr(dd, mm, yy, seq)
+        if not date_ok:
+            continue
+        if _is_false_positive(full_text, m.start(), m.end()):
+            continue
+        ctx = cpr_context_boost(full_text, m.start(), m.end())
+        if not mod11_ok and not ctx:
+            continue
+        ms, me = m.start(), m.end()
+        # Collect all words that overlap this match span
+        hit_words = [w for (ws, we, w) in word_spans if ws < me and we > ms]
+        if not hit_words:
+            continue
+        PAD = 1  # points of padding around the glyph
+        bboxes.append((
+            min(w["x0"]    for w in hit_words) - PAD,
+            min(w["top"]   for w in hit_words) - PAD,
+            max(w["x1"]    for w in hit_words) + PAD,
+            max(w["bottom"]for w in hit_words) + PAD,
+        ))
+    return bboxes
+
+
+def find_cpr_image_bboxes(image, lang: str):
+    """CPR-only image bboxes for --mask."""
+    raw_bboxes = find_pii_image_bboxes(image, lang, use_ner=False)
+    return [(l, t, r, b) for (l, t, r, b, lbl) in raw_bboxes if lbl == "CPR"]
+
+
+# ── Drawing helpers ───────────────────────────────────────────────────────────
+
+def build_redaction_overlay(page_width, page_height, bboxes_pdfplumber) -> bytes:
+    """Build a PDF overlay with black boxes. bboxes: (x0, top, x1, bottom[, label])."""
+    buf = io.BytesIO()
+    c = rl_canvas.Canvas(buf, pagesize=(page_width, page_height))
+    c.setFillColor(rl_black)
+    c.setStrokeColor(rl_black)
+    pad = 1.5
+    for bbox in bboxes_pdfplumber:
+        x0, top, x1, bot = bbox[:4]
+        rl_y = page_height - bot - pad
+        rl_h = (bot - top) + pad * 2
+        c.rect(x0 - pad, rl_y, (x1 - x0) + pad * 2, rl_h, fill=1, stroke=0)
+    c.save()
+    buf.seek(0)
+    return buf.read()
+
+
+def apply_overlay_to_page(writer, reader_page, bboxes):
+    page_width  = float(reader_page.mediabox.width)
+    page_height = float(reader_page.mediabox.height)
+    overlay_bytes  = build_redaction_overlay(page_width, page_height, bboxes)
+    overlay_page   = PdfReader(io.BytesIO(overlay_bytes)).pages[0]
+    reader_page.merge_page(overlay_page)
+    writer.add_page(reader_page)
+
+
+def redact_image(image, bboxes_px):
+    """Paint black rectangles over pixel bboxes in a PIL image."""
+    from PIL import ImageDraw
+    img = image.copy()
+    draw = ImageDraw.Draw(img)
+    for bbox in bboxes_px:
+        left, top, right, bottom = bbox[:4]
+        draw.rectangle([left, top, right, bottom], fill="black")
+    return img
+
+
+def image_to_pdf_page(image, dpi=300) -> bytes:
+    buf = io.BytesIO()
+    image.convert("RGB").save(buf, format="PDF", resolution=dpi)
+    buf.seek(0)
+    return buf.read()
+
+
+# ── Secure PDF redaction (PyMuPDF) ───────────────────────────────────────────
+
+def redact_pdf_secure(input_path: Path, output_path: Path, results: dict,
+                      force_ocr: bool, lang: str, dpi: int, poppler_path,
+                      use_ner: bool = False) -> "int | bool":
+    """
+    Physically-secure PDF redaction using PyMuPDF (fitz).
+
+    Unlike the reportlab overlay approach, PyMuPDF:
+      1. Draws opaque redaction annotations over the target character bboxes.
+      2. Calls page.apply_redactions() which physically REMOVES the underlying
+         text/image data — not just paints over it.
+      3. Saves with garbage collection and compression to strip orphaned objects.
+
+    This means a user cannot recover the redacted text by:
+      - Selecting text under the black box in a viewer
+      - Extracting the PDF text layer programmatically
+      - Inspecting raw PDF object streams
+
+    Falls back to the reportlab overlay method if PyMuPDF is not installed.
+    """
+    if not PYMUPDF_AVAILABLE:
+        return redact_pdf(input_path, output_path, results,
+                          force_ocr, lang, dpi, poppler_path, use_ner)
+
+    page_methods = results["page_methods"]
+
+    images = None
+    ocr_pages = [p for p, m in page_methods.items() if m == "ocr"]
+    if ocr_pages and OCR_AVAILABLE:
+        images = convert_from_path(str(input_path), dpi=dpi, poppler_path=poppler_path)
+
+    total = 0
+    doc = _fitz.open(str(input_path))
+
+    with pdfplumber.open(input_path) as plumb_pdf:
+        for page_num, plumb_page in enumerate(plumb_pdf.pages, start=1):
+            method    = page_methods.get(page_num, "text")
+            fitz_page = doc[page_num - 1]
+
+            # Get bboxes in pdfplumber coordinates (origin top-left, y increases down)
+            if method == "text":
+                bboxes = (find_pii_char_bboxes(plumb_page, use_ner=use_ner)
+                          if use_ner else find_cpr_char_bboxes(plumb_page))
+            elif method == "ocr" and images is not None:
+                img    = images[page_num - 1]
+                bboxes = (find_pii_image_bboxes(img, lang, use_ner=use_ner)
+                          if use_ner else find_cpr_image_bboxes(img, lang))
+            else:
+                bboxes = []
+
+            # pdfplumber char coords: origin top-left of CropBox, y increases DOWN.
+            # fitz Rect coords:       origin top-left of MediaBox, y increases DOWN.
+            # Both already have y=0 at the top — no flip needed.
+            # Add the CropBox offset so boxes land correctly when CropBox != MediaBox.
+            cb = fitz_page.cropbox
+            mb = fitz_page.mediabox
+            crop_x0 = cb.x0 - mb.x0
+            crop_y0 = cb.y0 - mb.y0
+
+            for bbox in bboxes:
+                x0, top, x1, bottom = bbox[:4]
+                rect = _fitz.Rect(
+                    x0     + crop_x0,
+                    top    + crop_y0,
+                    x1     + crop_x0,
+                    bottom + crop_y0,
+                )
+                annot = fitz_page.add_redact_annot(rect, fill=(0, 0, 0))
+                _ = annot  # silence linter
+
+            # Apply redactions — physically removes text/image data under rects
+            # PDF_REDACT_IMAGE_REMOVE / PDF_REDACT_LINE_ART_REMOVE were added in
+            # PyMuPDF 1.22; fall back to their integer values (2) on older builds.
+            _img_flag  = getattr(_fitz, "PDF_REDACT_IMAGE_REMOVE",    2)
+            _art_flag  = getattr(_fitz, "PDF_REDACT_LINE_ART_REMOVE", 2)
+            fitz_page.apply_redactions(images=_img_flag, graphics=_art_flag)
+            total += len(bboxes)
+
+    # Save with full garbage collection (removes orphaned objects/streams)
+    doc.save(
+        str(output_path),
+        garbage=4,          # maximum GC: also removes unused xref entries
+        deflate=True,       # compress streams
+        clean=True,         # sanitise content streams
+        linear=False,
+    )
+    doc.close()
+    return total
+
+
+# ── Generic redact-PDF engine (reportlab overlay — visual only) ───────────────
+
+def redact_pdf(input_path: Path, output_path: Path, results: dict,
+               force_ocr: bool, lang: str, dpi: int, poppler_path,
+               use_ner: bool = False) -> int | bool:
+    """
+    Write a redacted PDF to output_path.
+    If use_ner=False: CPR only (--mask).
+    If use_ner=True:  all PII (--anonymise).
+    Returns count of redacted regions, or False on error.
+    """
+    if not MASK_AVAILABLE:
+        print("  Requires: pip install pypdf reportlab")
+        return False
+
+    page_methods = results["page_methods"]
+    reader = PdfReader(str(input_path))
+    writer = PdfWriter()
+
+    images = None
+    ocr_pages = [p for p, m in page_methods.items() if m == "ocr"]
+    if ocr_pages and OCR_AVAILABLE:
+        images = convert_from_path(str(input_path), dpi=dpi, poppler_path=poppler_path)
+
+    total = 0
+    with pdfplumber.open(input_path) as plumb_pdf:
+        for page_num, plumb_page in enumerate(plumb_pdf.pages, start=1):
+            method = page_methods.get(page_num, "text")
+            reader_page = reader.pages[page_num - 1]
+
+            if method == "text":
+                bboxes = (find_pii_char_bboxes(plumb_page, use_ner=use_ner)
+                          if use_ner else find_cpr_char_bboxes(plumb_page))
+                if bboxes:
+                    apply_overlay_to_page(writer, reader_page, bboxes)
+                    total += len(bboxes)
+                else:
+                    writer.add_page(reader_page)
+
+            elif method == "ocr" and images is not None:
+                img = images[page_num - 1]
+                bboxes = (find_pii_image_bboxes(img, lang, use_ner=use_ner)
+                          if use_ner else find_cpr_image_bboxes(img, lang))
+                if bboxes:
+                    writer.add_page(
+                        PdfReader(io.BytesIO(
+                            image_to_pdf_page(redact_image(img, bboxes), dpi)
+                        )).pages[0]
+                    )
+                    total += len(bboxes)
+                else:
+                    writer.add_page(reader_page)
+            else:
+                writer.add_page(reader_page)
+
+    with open(output_path, "wb") as f:
+        writer.write(f)
+    return total
+
+
+
+# ── Word document support ─────────────────────────────────────────────────────
+
+def _iter_docx_runs(doc):
+    """Yield every run in a docx Document: body, tables, headers, footers."""
+    def _from_paragraphs(paragraphs):
+        for para in paragraphs:
+            for run in para.runs:
+                yield run
+
+    yield from _from_paragraphs(doc.paragraphs)
+    for table in doc.tables:
+        for row in table.rows:
+            for cell in row.cells:
+                yield from _from_paragraphs(cell.paragraphs)
+    for section in doc.sections:
+        for hf in [section.header, section.footer,
+                   section.even_page_header, section.even_page_footer,
+                   section.first_page_header, section.first_page_footer]:
+            try:
+                yield from _from_paragraphs(hf.paragraphs)
+            except Exception:
+                pass
+
+
+def scan_docx(docx_path: Path) -> dict:
+    """
+    Scan a .docx file for CPR numbers and dates.
+    Returns the same results dict shape as scan_document(), plus internal
+    _doc / _run_map / _full_text keys used by redact_docx().
+    """
+    if not DOCX_OK:
+        print("  .docx support requires: pip install python-docx")
+        return {"cprs": [], "dates": [], "page_methods": {1: "docx"},
+                "_doc": None, "_run_map": [], "_full_text": ""}
+
+    doc = DocxDocument(str(docx_path))
+
+    # Build full text + run map (global_start, global_end, run)
+    full_text = ""
+    run_map = []
+    for run in _iter_docx_runs(doc):
+        if run.text:
+            start = len(full_text)
+            full_text += run.text
+            run_map.append((start, len(full_text), run))
+
+    cprs, dates = extract_matches(full_text, 1, "docx")
+    return {
+        "cprs": cprs,
+        "dates": dates,
+        "page_methods": {1: "docx"},
+        "_full_text": full_text,
+        "_run_map": run_map,
+        "_doc": doc,
+    }
+
+
+def _redact_runs(run_map: list, spans: list):
+    """
+    Replace characters in the given spans with block characters (█).
+    Modifies runs in-place.
+    spans: list of (start, end, label) in full_text coordinates.
+    """
+    if not spans:
+        return
+
+    # Build char → (run, index_within_run) lookup
+    char_owner = []   # index = position in full_text, value = (run, char_pos_in_run)
+    for (gs, ge, run) in run_map:
+        for i in range(ge - gs):
+            char_owner.append((run, i))
+
+    # Apply redactions (process in reverse so earlier spans aren't shifted)
+    for span_start, span_end, _label in sorted(spans, key=lambda s: s[0], reverse=True):
+        # Group by run
+        by_run = {}
+        for pos in range(span_start, min(span_end, len(char_owner))):
+            run_obj, char_pos = char_owner[pos]
+            rid = id(run_obj)
+            if rid not in by_run:
+                by_run[rid] = {"run": run_obj, "positions": []}
+            by_run[rid]["positions"].append(char_pos)
+        for entry in by_run.values():
+            run_obj = entry["run"]
+            chars = list(run_obj.text)
+            for p in entry["positions"]:
+                if p < len(chars):
+                    chars[p] = "█"
+            run_obj.text = "".join(chars)
+
+
+def redact_docx(input_path: Path, output_path: Path, results: dict,
+                use_ner: bool = False) -> int:
+    """
+    Write a redacted copy of a .docx.
+    use_ner=False → CPR only; use_ner=True → all PII.
+    Returns number of spans redacted.
+    """
+    doc      = results.get("_doc")
+    run_map  = results.get("_run_map", [])
+    text     = results.get("_full_text", "")
+
+    if doc is None:
+        return 0
+
+    spans = find_pii_spans_in_text(text, use_ner=use_ner)
+
+    # If CPR-only, filter to CPR spans
+    if not use_ner:
+        spans = [(s, e, l) for s, e, l in spans if l == "CPR"]
+
+    _redact_runs(run_map, spans)
+    doc.save(str(output_path))
+    return len(spans)
+
+
+def print_docx_results(docx_path: Path, results: dict):
+    cprs  = results["cprs"]
+    dates = results["dates"]
+    print(f"\n{'='*62}")
+    print(f"File : {docx_path}  [Word document]")
+    print(f"{'='*62}")
+    print(f"\n  CPR Numbers found: {len(cprs)}")
+    if cprs:
+        for hit in cprs:
+            print(f"    {hit['formatted']:<16}  (raw: \"{hit['raw']}\")")
+    else:
+        print("    None found.")
+    print(f"\n  Dates found: {len(dates)}")
+    if dates:
+        for hit in dates:
+            print(f"    {hit['raw']:<28}  [{hit['format']}]")
+    else:
+        print("    None found.")
+    print()
+
+
+# ── Logging ───────────────────────────────────────────────────────────────────
+
+# Module-level logger — handlers are added in main() based on --log argument
+logger = logging.getLogger("scanner")
+logger.setLevel(logging.DEBUG)
+
+_log_records: list[dict] = []   # in-memory log, flushed to JSON at end
+
+def _log(level: str, path: Path | None, event: str, **kwargs):
+    """
+    Append a structured log record and emit to the logger.
+    level: "INFO" | "WARNING" | "ACTION" | "DRY_RUN" | "ERROR"
+    """
+    record = {
+        "time":  datetime.now().isoformat(timespec="seconds"),
+        "level": level,
+        "file":  str(path) if path else None,
+        "event": event,
+        **kwargs,
+    }
+    _log_records.append(record)
+    msg = f"[{level}] {path.name if path else ''} — {event}"
+    if kwargs:
+        extras = "  " + "  ".join(f"{k}={v}" for k, v in kwargs.items())
+        msg += extras
+    if level == "ERROR":
+        logger.error(msg)
+    elif level == "WARNING":
+        logger.warning(msg)
+    else:
+        logger.info(msg)
+
+
+def flush_log(log_path: Path):
+    """Write all accumulated log records to a JSON file."""
+    with open(log_path, "w", encoding="utf-8") as f:
+        json.dump(_log_records, f, ensure_ascii=False, indent=2, default=str)
+    print(f"\nLog written to: {log_path}  ({len(_log_records)} records)")
+
+
+
+# ── Excel / CSV support ───────────────────────────────────────────────────────
+
+def _cell_text(cell) -> str:
+    """Return a string representation of a cell value, or empty string."""
+    if cell.value is None:
+        return ""
+    return str(cell.value)
+
+
+def scan_xlsx(path: Path) -> dict:
+    """
+    Scan an .xlsx / .xlsm file for CPR numbers and dates across all sheets.
+    Returns results dict compatible with the rest of the pipeline, plus
+    _wb (workbook) for use by redact_xlsx().
+    Each CPR/date hit carries sheet + row + col in the "page" field
+    (formatted as "Sheet!R{row}C{col}").
+    """
+    if not XLSX_OK:
+        print("  .xlsx support requires: pip install openpyxl")
+        return {"cprs": [], "dates": [], "page_methods": {1: "xlsx"}, "_wb": None}
+
+    wb = openpyxl.load_workbook(str(path), data_only=True)
+    all_cprs, all_dates = [], []
+
+    for sheet in wb.worksheets:
+        for row in sheet.iter_rows():
+            for cell in row:
+                val = _cell_text(cell)
+                if not val:
+                    continue
+                location = f"{sheet.title}!R{cell.row}C{cell.column}"
+                cprs, dates = extract_matches(val, location, "xlsx")
+                all_cprs.extend(cprs)
+                all_dates.extend(dates)
+
+    return {
+        "cprs": all_cprs,
+        "dates": all_dates,
+        "page_methods": {1: "xlsx"},
+        "_wb": wb,
+        "_path": path,
+    }
+
+
+def scan_csv(path: Path) -> dict:
+    """
+    Scan a .csv file for CPR numbers and dates.
+    Returns results dict compatible with the rest of the pipeline.
+    """
+    import csv as _csv
+
+    all_cprs, all_dates = [], []
+    try:
+        with open(path, newline="", encoding="utf-8-sig", errors="replace") as f:
+            reader = _csv.reader(f)
+            for row_num, row in enumerate(reader, start=1):
+                for col_num, cell in enumerate(row, start=1):
+                    if not cell.strip():
+                        continue
+                    location = f"R{row_num}C{col_num}"
+                    cprs, dates = extract_matches(cell, location, "csv")
+                    all_cprs.extend(cprs)
+                    all_dates.extend(dates)
+    except Exception as e:
+        print(f"  Warning: could not read CSV: {e}")
+
+    return {
+        "cprs": all_cprs,
+        "dates": all_dates,
+        "page_methods": {1: "csv"},
+        "_wb": None,
+        "_path": path,
+    }
+
+
+def scan_text(text: str, source: str = "text") -> dict:
+    """
+    Scan a plain text string for CPR numbers and dates.
+    Returns a results dict compatible with the rest of the pipeline.
+    False-positive suppression (invoice/part-number context) is applied
+    via extract_matches → extract_cpr_and_dates → _is_false_positive.
+    """
+    cprs, dates = extract_cpr_and_dates(text, page_num=1, source=source)
+    return {
+        "cprs": cprs,
+        "dates": dates,
+        "page_methods": {1: "text"},
+    }
+
+
+def scan_image(path: Path, lang: str = "dan+eng") -> dict:
+    """
+    OCR an image file and scan the resulting text for CPR numbers.
+    Requires Tesseract and pytesseract.
+    """
+    try:
+        import pytesseract as _tess
+        from PIL import Image as _PILImage
+        img = _PILImage.open(path)
+        text = _tess.image_to_string(img, lang=lang, config="--oem 3 --psm 3")
+        return scan_text(text, source="image-ocr")
+    except ImportError:
+        return {"cprs": [], "dates": [], "error": "pytesseract/PIL not available"}
+    except Exception as e:
+        return {"cprs": [], "dates": [], "error": str(e)}
+
+
+def redact_xlsx(input_path: Path, output_path: Path, results: dict,
+                use_ner: bool = False) -> int:
+    """
+    Write a redacted copy of an .xlsx file.
+    Cells containing PII are overwritten with "████████".
+    use_ner=False -> CPR only; use_ner=True -> all PII.
+    Returns number of cells redacted.
+    """
+    wb = results.get("_wb")
+    if wb is None:
+        return 0
+
+    redacted = 0
+    for sheet in wb.worksheets:
+        for row in sheet.iter_rows():
+            for cell in row:
+                val = _cell_text(cell)
+                if not val:
+                    continue
+                # Wrap cell in a context sentence so spaCy NER can recognise
+                # names that appear in isolation (e.g. a name-only cell has no
+                # surrounding text to provide the model with PER entity context).
+                PREFIX = "Navn: "
+                ctx = PREFIX + val
+                raw_spans = find_pii_spans_in_text(ctx, use_ner=use_ner)
+                # Shift spans back by prefix length; discard any that start in prefix
+                plen = len(PREFIX)
+                spans = [(s - plen, e - plen, l) for s, e, l in raw_spans if e > plen]
+                spans = [(max(s, 0), e, l) for s, e, l in spans]
+                if not use_ner:
+                    spans = [(s, e, l) for s, e, l in spans if l == "CPR"]
+                if spans:
+                    # Replace the whole cell value with redaction marker
+                    # (partial in-cell redaction is not reliably possible in xlsx)
+                    cell.value = "████████"
+                    redacted += 1
+
+    wb.save(str(output_path))
+    return redacted
+
+
+def redact_csv(input_path: Path, output_path: Path, use_ner: bool = False) -> int:
+    """
+    Write a redacted copy of a .csv file.
+    Cells containing PII are overwritten with "████████".
+    Returns number of cells redacted.
+    """
+    import csv as _csv
+
+    rows_out = []
+    redacted = 0
+    try:
+        with open(input_path, newline="", encoding="utf-8-sig", errors="replace") as f:
+            reader = _csv.reader(f)
+            for row in reader:
+                new_row = []
+                for cell in row:
+                    if cell.strip():
+                        PREFIX = "Navn: "
+                        ctx = PREFIX + cell
+                        plen = len(PREFIX)
+                        raw_spans = find_pii_spans_in_text(ctx, use_ner=use_ner)
+                        spans = [(max(s - plen, 0), e - plen, l)
+                                 for s, e, l in raw_spans if e > plen]
+                    else:
+                        spans = []
+                    if not use_ner:
+                        spans = [(s, e, l) for s, e, l in spans if l == "CPR"]
+                    if spans:
+                        new_row.append("████████")
+                        redacted += 1
+                    else:
+                        new_row.append(cell)
+                rows_out.append(new_row)
+    except Exception as e:
+        print(f"  Warning: could not read CSV for redaction: {e}")
+        return 0
+
+    with open(output_path, "w", newline="", encoding="utf-8") as f:
+        _csv.writer(f).writerows(rows_out)
+    return redacted
+
+
+def print_xlsx_results(path: Path, results: dict, file_type: str = "xlsx"):
+    cprs  = results["cprs"]
+    dates = results["dates"]
+    label = "Excel spreadsheet" if file_type == "xlsx" else "CSV file"
+    print(f"\n{'='*62}")
+    print(f"File : {path}  [{label}]")
+    print(f"{'='*62}")
+    print(f"\n  CPR Numbers found: {len(cprs)}")
+    if cprs:
+        for hit in cprs:
+            print(f"    {hit['page']:<20}  {hit['formatted']:<16}  (raw: \"{hit['raw']}\")")
+    else:
+        print("    None found.")
+    print(f"\n  Dates found: {len(dates)}")
+    if dates:
+        for hit in dates:
+            print(f"    {hit['page']:<20}  {hit['raw']:<28}  [{hit['format']}]")
+    else:
+        print("    None found.")
+    print()
+
+
+# ── Face detection & pixelation ───────────────────────────────────────────────
+
+# Use both frontal and profile cascades for better coverage
+_FACE_CASCADES = None
+
+def _get_face_cascades():
+    global _FACE_CASCADES
+    if _FACE_CASCADES is not None:
+        return _FACE_CASCADES
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        return []
+
+    def _find_cascade(name: str):
+        """Try multiple locations to find a Haar cascade XML file."""
+        import sys as _sys
+        candidates = []
+        # 1. PyInstaller bundle — check FIRST so bundle path wins over stale install paths
+        if hasattr(_sys, "_MEIPASS"):
+            candidates.append(str(Path(_sys._MEIPASS) / "cv2" / "data" / name))
+            candidates.append(str(Path(_sys._MEIPASS) / name))
+        # 2. cv2.data attribute (standard install / venv)
+        try:
+            candidates.append(cv2.data.haarcascades + name)
+        except Exception:
+            pass
+        # 3. Relative to cv2 package directory
+        try:
+            candidates.append(str(Path(cv2.__file__).parent / "data" / name))
+        except Exception:
+            pass
+        # 4. Common system paths
+        for base in ["/usr/share/opencv4", "/usr/share/opencv",
+                     "/usr/local/share/opencv4", "/usr/local/share/opencv"]:
+            candidates.append(str(Path(base) / "haarcascades" / name))
+
+        for p in candidates:
+            if p and Path(p).exists():
+                c = cv2.CascadeClassifier(p)
+                if not c.empty():
+                    _face_log(f"  [+] Cascade: {p}")
+                    return c
+        # Nothing worked — log all paths tried so it shows in the app console
+        _face_log(f"  [!] Cascade not found: {name}")
+        for p in candidates:
+            _face_log(f"      {p}  exists={Path(p).exists()}")
+        return None
+
+    cascades = []
+    for name in ["haarcascade_frontalface_default.xml", "haarcascade_profileface.xml"]:
+        c = _find_cascade(name)
+        if c is not None:
+            cascades.append(c)
+
+    if not cascades:
+        _face_log("  [!] No Haar cascade XML files found — face detection disabled")
+
+    _FACE_CASCADES = cascades
+    return cascades
+
+
+def detect_faces_cv2(img_cv2, min_size: int = 40, neighbors: int = 4,
+                     strict: bool = False):
+    """
+    Detect faces in a BGR cv2 image using Haar cascades (frontal + profile).
+    Returns list of (x, y, w, h) in pixel coordinates.
+
+    Parameters
+    ----------
+    min_size   : minimum face side in pixels
+    neighbors  : minNeighbors for detectMultiScale (higher = stricter, fewer detections)
+    strict     : unused, kept for API compatibility
+    """
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        return []
+    gray = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2GRAY)
+    # Equalise histogram to improve detection on dark or low-contrast images
+    gray = cv2.equalizeHist(gray)
+
+    cascades = _get_face_cascades()
+    if not cascades:
+        return []
+
+    found = []
+    seen = set()
+
+    def _add(x, y, w, h):
+        key = (x // 10, y // 10, w // 10, h // 10)
+        if key not in seen:
+            seen.add(key)
+            found.append((x, y, w, h))
+
+    for cascade in cascades:
+        for img in [gray, cv2.flip(gray, 1)]:
+            faces = cascade.detectMultiScale(
+                img, scaleFactor=1.1, minNeighbors=neighbors,
+                minSize=(min_size, min_size), flags=cv2.CASCADE_SCALE_IMAGE
+            )
+            if faces is not None and len(faces) > 0:
+                if img is not gray:  # flip back x coords
+                    w_img = img.shape[1]
+                    faces = [(w_img - x - w, y, w, h) for (x, y, w, h) in faces]
+                for face in faces:
+                    _add(*face)
+    return found
+
+
+def pixelate_region(img_cv2, x: int, y: int, w: int, h: int, blocks: int = 6):
+    """Pixelate a rectangular region in a cv2 image. Returns modified copy.
+    Lower blocks = larger pixels = stronger anonymisation.
+    A Gaussian blur is applied on top to prevent edge-sharpening attacks.
+    """
+    cv2, np = _get_cv2()
+    out = img_cv2.copy()
+    roi = out[y:y+h, x:x+w]
+    bw = max(1, w // blocks)
+    bh = max(1, h // blocks)
+    small = cv2.resize(roi, (bw, bh), interpolation=cv2.INTER_LINEAR)
+    pixelated = cv2.resize(small, (w, h), interpolation=cv2.INTER_NEAREST)
+    ksize = max(3, (min(w, h) // blocks) | 1)
+    pixelated = cv2.GaussianBlur(pixelated, (ksize, ksize), 0)
+    out[y:y+h, x:x+w] = pixelated
+    return out
+
+
+def blur_faces_in_image(img_cv2, min_size: int = 30, blocks: int = 6):
+    """
+    Detect faces and apply pixelation to each. Returns (modified_img, face_count).
+    """
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        return img_cv2, 0
+    faces = detect_faces_cv2(img_cv2, min_size=min_size)
+    out = img_cv2.copy()
+    for (x, y, w, h) in faces:
+        pad_x = int(w * 0.1)
+        pad_y = int(h * 0.1)
+        x2 = max(0, x - pad_x)
+        y2 = max(0, y - pad_y)
+        w2 = min(out.shape[1] - x2, w + pad_x * 2)
+        h2 = min(out.shape[0] - y2, h + pad_y * 2)
+        out = pixelate_region(out, x2, y2, w2, h2, blocks=blocks)
+    return out, len(faces)
+
+
+def pil_to_cv2(pil_img):
+    cv2, np = _get_cv2()
+    return cv2.cvtColor(np.array(pil_img.convert("RGB")), cv2.COLOR_RGB2BGR)
+
+
+def cv2_to_pil(img_cv2):
+    cv2, np = _get_cv2()
+    from PIL import Image as PILImage
+    return PILImage.fromarray(cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB))
+
+
+def cv2_to_bytes(img_cv2, fmt: str = "JPEG") -> bytes:
+    """Encode cv2 image to bytes in given format."""
+    cv2, np = _get_cv2()
+    ext = {"JPEG": ".jpg", "PNG": ".png", "WEBP": ".webp"}.get(fmt.upper(), ".jpg")
+    ok, buf = cv2.imencode(ext, img_cv2)
+    if not ok:
+        raise RuntimeError(f"cv2.imencode failed for format {fmt}")
+    return buf.tobytes()
+
+
+# ── Face blur: standalone image files ─────────────────────────────────────────
+
+def blur_faces_image_file(input_path: Path, output_path: Path,
+                           blocks: int = 6) -> int:
+    """
+    Detect and pixelate faces in a standalone image file.
+    Returns number of faces blurred.
+    """
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        raise RuntimeError("OpenCV not available")
+    img = cv2.imread(str(input_path))
+    if img is None:
+        raise ValueError(f"Could not read image: {input_path}")
+    result, count = blur_faces_in_image(img, blocks=blocks)
+    cv2.imwrite(str(output_path), result)
+    return count
+
+
+# ── Face blur: PDF pages ───────────────────────────────────────────────────────
+
+def blur_faces_pdf(input_path: Path, output_path: Path,
+                   dpi: int = 150, poppler_path=None,
+                   blocks: int = 6) -> int:
+    """
+    Render each PDF page, detect faces, draw pixelated overlay back onto the
+    original page (preserving the text layer), save as new PDF.
+    Returns total number of faces blurred across all pages.
+    """
+    if not OCR_AVAILABLE:
+        raise RuntimeError("pdf2image required: pip install pdf2image")
+
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        raise RuntimeError("OpenCV not available")
+
+    from PIL import Image as PILImage
+    images = convert_from_path(str(input_path), dpi=dpi, poppler_path=poppler_path)
+
+    reader  = PdfReader(str(input_path))
+    writer  = PdfWriter()
+    total_faces = 0
+
+    for page_num, (pil_img, reader_page) in enumerate(zip(images, reader.pages), start=1):
+        page_w = float(reader_page.mediabox.width)   # PDF points
+        page_h = float(reader_page.mediabox.height)
+
+        img_px_w, img_px_h = pil_img.size
+        scale_x = page_w / img_px_w
+        scale_y = page_h / img_px_h
+
+        img_cv2 = pil_to_cv2(pil_img)
+        _, face_count = blur_faces_in_image(img_cv2, blocks=blocks)
+
+        if face_count == 0:
+            writer.add_page(reader_page)
+            continue
+
+        # Build a pixelated patch for each face and compose into a reportlab overlay
+        faces = detect_faces_cv2(img_cv2)
+        buf = io.BytesIO()
+        c = rl_canvas.Canvas(buf, pagesize=(page_w, page_h))
+
+        for (x, y, w, h) in faces:
+            pad_x = int(w * 0.1)
+            pad_y = int(h * 0.1)
+            x2, y2 = max(0, x - pad_x), max(0, y - pad_y)
+            w2 = min(img_px_w - x2, w + pad_x * 2)
+            h2 = min(img_px_h - y2, h + pad_y * 2)
+
+            # Pixelate just this region from the rendered page image
+            face_roi = img_cv2[y2:y2+h2, x2:x2+w2]
+            bw = max(1, w2 // blocks)
+            bh = max(1, h2 // blocks)
+            small = cv2.resize(face_roi, (bw, bh), interpolation=cv2.INTER_LINEAR)
+            pixelated_roi = cv2.resize(small, (w2, h2), interpolation=cv2.INTER_NEAREST)
+
+            # Convert to PIL for reportlab
+            roi_pil = cv2_to_pil(pixelated_roi)
+            roi_buf = io.BytesIO()
+            roi_pil.save(roi_buf, format="PNG")
+            roi_buf.seek(0)
+
+            # PDF coords: reportlab origin is bottom-left; image origin is top-left
+            pdf_x  = x2 * scale_x
+            pdf_y  = page_h - (y2 + h2) * scale_y
+            pdf_w  = w2 * scale_x
+            pdf_h  = h2 * scale_y
+            c.drawImage(
+                __import__("reportlab.lib.utils", fromlist=["ImageReader"]).ImageReader(roi_buf),
+                pdf_x, pdf_y, width=pdf_w, height=pdf_h
+            )
+
+        c.save()
+        buf.seek(0)
+        overlay_page = PdfReader(buf).pages[0]
+        reader_page.merge_page(overlay_page)
+        writer.add_page(reader_page)
+        total_faces += face_count
+
+    with open(output_path, "wb") as f:
+        writer.write(f)
+    return total_faces
+
+
+# ── Face blur: Word documents ─────────────────────────────────────────────────
+
+def blur_faces_docx(input_path: Path, output_path: Path,
+                    blocks: int = 6) -> int:
+    """
+    Detect and pixelate faces in images embedded in a .docx file.
+    Replaces the image part bytes in-place and saves as a new file.
+    Returns number of faces blurred.
+    """
+    if not DOCX_OK:
+        raise RuntimeError("python-docx required: pip install python-docx")
+
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        raise RuntimeError("OpenCV not available")
+
+    import shutil
+    from docx import Document
+    from docx.oxml.ns import qn
+    from docx.enum.shape import WD_INLINE_SHAPE
+    from PIL import Image as PILImage
+
+    shutil.copy2(str(input_path), str(output_path))
+    doc = Document(str(output_path))
+    total_faces = 0
+
+    for shape in doc.inline_shapes:
+        try:
+            if shape.type != WD_INLINE_SHAPE.PICTURE:
+                continue
+
+            blip = shape._inline.graphic.graphicData.pic.blipFill.blip
+            rId = blip.embed
+            image_part = doc.part.related_parts[rId]
+
+            # Decode image bytes → cv2
+            img_data = image_part.blob
+            np_arr   = np.frombuffer(img_data, dtype=np.uint8)
+            img_cv2  = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+            if img_cv2 is None:
+                continue
+
+            result, count = blur_faces_in_image(img_cv2, blocks=blocks)
+            if count == 0:
+                continue
+
+            # Re-encode with same format (try JPEG first, fall back to PNG)
+            ct = image_part.content_type
+            fmt = "PNG" if "png" in ct.lower() else "JPEG"
+            new_bytes = cv2_to_bytes(result, fmt=fmt)
+
+            # Monkey-patch blob on the part object
+            image_part._blob = new_bytes
+            total_faces += count
+
+        except Exception as e:
+            pass  # skip shapes that can't be processed
+
+    doc.save(str(output_path))
+    return total_faces
+
+
+# ── Face blur: Excel workbooks ────────────────────────────────────────────────
+
+def blur_faces_xlsx(input_path: Path, output_path: Path,
+                    blocks: int = 6) -> int:
+    """
+    Detect and pixelate faces in images embedded in an .xlsx workbook.
+    Returns number of faces blurred.
+    """
+    if not XLSX_OK:
+        raise RuntimeError("openpyxl required: pip install openpyxl")
+
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        raise RuntimeError("OpenCV not available")
+
+    import shutil
+    shutil.copy2(str(input_path), str(output_path))
+
+    # openpyxl stores images as _images list on each worksheet
+    wb = openpyxl.load_workbook(str(output_path))
+    total_faces = 0
+
+    for sheet in wb.worksheets:
+        for img_obj in getattr(sheet, "_images", []):
+            try:
+                # img_obj.ref is the image data (BytesIO or bytes)
+                raw = img_obj.ref
+                if hasattr(raw, "read"):
+                    raw = raw.read()
+                np_arr  = np.frombuffer(raw, dtype=np.uint8)
+                img_cv2 = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+                if img_cv2 is None:
+                    continue
+
+                result, count = blur_faces_in_image(img_cv2, blocks=blocks)
+                if count == 0:
+                    continue
+
+                # Re-encode and replace
+                new_bytes = cv2_to_bytes(result, fmt="PNG")
+                img_obj.ref = io.BytesIO(new_bytes)
+                total_faces += count
+            except Exception:
+                pass
+
+    wb.save(str(output_path))
+    return total_faces
+
+# ── Core scanner ──────────────────────────────────────────────────────────────
+
+def scan_pdf(pdf_path: Path, force_ocr=False, lang="dan+eng",
+             dpi=300, poppler_path=None) -> dict:
+    results = {"cprs": [], "dates": [], "page_methods": {}}
+
+    with pdfplumber.open(pdf_path) as pdf:
+        images = None
+        if OCR_AVAILABLE:
+            needs_ocr = (list(range(len(pdf.pages))) if force_ocr
+                         else [i for i, p in enumerate(pdf.pages) if not is_text_page(p)])
+            if needs_ocr:
+                print(f"  Rendering pages to images for OCR (DPI={dpi})...", flush=True)
+                images = convert_from_path(str(pdf_path), dpi=dpi, poppler_path=poppler_path)
+
+        for page_num, page in enumerate(pdf.pages, start=1):
+            use_text = not force_ocr and is_text_page(page)
+            if use_text:
+                method = "text"
+                text = page.extract_text() or ""
+                cprs, dates = extract_matches(text, page_num, "text")
+            elif OCR_AVAILABLE and images is not None:
+                method = "ocr"
+                _img = images[page_num-1]
+                images[page_num-1] = None  # release PIL image as soon as OCR is done
+                cprs, dates = extract_matches(ocr_page_cached(_img, lang), page_num, "ocr")
+                del _img
+            else:
+                method = "skipped"
+                if not OCR_AVAILABLE:
+                    print(f"  Page {page_num}: image-based but OCR unavailable.")
+                cprs, dates = [], []
+
+            results["page_methods"][page_num] = method
+            results["cprs"].extend(cprs)
+            results["dates"].extend(dates)
+
+    results["dates"] = dedup_dates(results["dates"])
+    return results
+
+
+# ── Output ────────────────────────────────────────────────────────────────────
+
+def print_results(pdf_path: Path, results: dict):
+    methods    = results["page_methods"]
+    text_pages = [p for p, m in methods.items() if m == "text"]
+    ocr_pages  = [p for p, m in methods.items() if m == "ocr"]
+    skip_pages = [p for p, m in methods.items() if m == "skipped"]
+
+    print(f"\n{'='*62}")
+    print(f"File : {pdf_path}")
+    print(f"Pages: {len(methods)}  |  text: {len(text_pages)}  |  OCR: {len(ocr_pages)}  |  skipped: {len(skip_pages)}")
+    print(f"{'='*62}")
+    if ocr_pages:
+        print(f"  [OCR]  Applied to page(s): {', '.join(map(str, ocr_pages))}")
+    if skip_pages:
+        print(f"  [SKIP] Skipped page(s): {', '.join(map(str, skip_pages))}")
+
+    cprs  = results["cprs"]
+    dates = results["dates"]
+
+    print(f"\n  CPR Numbers found: {len(cprs)}")
+    if cprs:
+        for hit in cprs:
+            tag = " [OCR]" if hit["source"] == "ocr" else ""
+            print(f"    Page {hit['page']:>3}: {hit['formatted']:<16}  (raw: \"{hit['raw']}\"){tag}")
+    else:
+        print("    None found.")
+
+    print(f"\n  Dates found: {len(dates)}")
+    if dates:
+        for hit in dates:
+            tag = " [OCR]" if hit["source"] == "ocr" else ""
+            print(f"    Page {hit['page']:>3}: {hit['raw']:<28}  [{hit['format']}]{tag}")
+    else:
+        print("    None found.")
+    print()
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Scan PDF and Word documents for Danish CPR numbers, dates and personal data.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("pdfs", nargs="+", metavar="FILE", help="PDF/Word file(s) or folder(s) to scan")
+    parser.add_argument("--ocr", action="store_true", help="Force OCR on every page")
+    parser.add_argument("--lang", default="dan+eng", metavar="LANG", help="Tesseract language(s), default: dan+eng")
+    parser.add_argument("--dpi", type=int, default=300, metavar="DPI", help="Rendering DPI for OCR, default: 300")
+    parser.add_argument("--poppler", default=None, metavar="PATH", help="Path to Poppler bin folder (Windows)")
+    parser.add_argument("--older-than", type=int, default=None, metavar="DAYS",
+                        help="List files with CPR numbers AND dates older than DAYS")
+    parser.add_argument("--mask", action="store_true",
+                        help="Black out CPR numbers -> <n>_masked.pdf/.docx")
+    parser.add_argument("--anonymise", action="store_true",
+                        help="Black out ALL personal data -> <n>_anonymised.pdf/.docx")
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Scan and report findings without writing any output files")
+    parser.add_argument("--log", default=None, metavar="FILE",
+                        help="Write a structured JSON log of all findings to FILE")
+    parser.add_argument("--blur-faces", action="store_true",
+                        help="Detect and pixelate portrait photos -> <n>_faces.pdf/.docx/.xlsx/.jpg")
+    parser.add_argument("--blur-strength", type=int, default=6, metavar="N",
+                        help="Face blur strength: lower = stronger (default: 6, range: 2-20)")
+    args = parser.parse_args()
+
+    dry_run = args.dry_run
+
+    # Logging setup
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setFormatter(logging.Formatter("%(message)s"))
+    logger.addHandler(console_handler)
+
+    if dry_run:
+        print("=" * 62)
+        print("  DRY RUN - no files will be written")
+        print("=" * 62 + "\n")
+        _log("INFO", None, "dry_run_started")
+
+    # Dependency warnings
+    if not OCR_AVAILABLE:
+        missing = [m for m, ok in [("pdf2image", PDF2IMAGE_OK), ("pytesseract", TESSERACT_OK)] if not ok]
+        msg = f"OCR disabled - pip install {' '.join(missing)}"
+        print(f"WARNING: {msg}\n")
+        _log("WARNING", None, msg)
+
+    if (args.mask or args.anonymise) and not MASK_AVAILABLE:
+        msg = "--mask/--anonymise require: pip install pypdf reportlab"
+        print(f"WARNING: {msg}\n")
+        _log("WARNING", None, msg)
+
+    if not DOCX_OK:
+        print("INFO: python-docx not installed - .docx files will be skipped.")
+        print("      Install with: pip install python-docx\n")
+        _log("WARNING", None, "python-docx not installed - .docx files skipped")
+
+    if not XLSX_OK:
+        print("INFO: openpyxl not installed - .xlsx/.csv files will be skipped.")
+        print("      Install with: pip install openpyxl\n")
+        _log("WARNING", None, "openpyxl not installed - .xlsx files skipped")
+
+    if args.blur_faces and not CV2_OK:
+        print("WARNING: --blur-faces requires OpenCV: pip install opencv-python\n")
+
+    if args.anonymise:
+        if not SPACY_OK:
+            msg = "--anonymise requires spaCy: pip install spacy"
+            print(f"WARNING: {msg}\n")
+            _log("WARNING", None, msg)
+        else:
+            nlp = load_nlp()
+            if nlp is None:
+                msg = "No spaCy model found - falling back to regex-only"
+                print(f"WARNING: {msg}\n")
+                _log("WARNING", None, msg)
+
+    # Collect files
+    SUPPORTED = {".pdf", ".docx", ".xlsx", ".xlsm", ".csv",
+                 ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}
+    all_paths = []
+    for entry in args.pdfs:
+        path = Path(entry)
+        if not path.exists():
+            print(f"Not found: {path}")
+            _log("WARNING", path, "file_not_found")
+        elif path.is_dir():
+            found = sorted(p for p in path.rglob("*") if p.suffix.lower() in SUPPORTED)
+            pdf_count  = sum(1 for p in found if p.suffix.lower() == ".pdf")
+            docx_count = sum(1 for p in found if p.suffix.lower() == ".docx")
+            xlsx_count = sum(1 for p in found if p.suffix.lower() in {".xlsx", ".xlsm", ".csv"})
+            img_count  = sum(1 for p in found if p.suffix.lower() in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"})
+            print(f"Found {pdf_count} PDF(s), {docx_count} Word doc(s), {xlsx_count} spreadsheet(s) and {img_count} image(s) in: {path}")
+            _log("INFO", path, "folder_scanned", pdf_count=pdf_count, docx_count=docx_count, xlsx_count=xlsx_count, img_count=img_count)
+            all_paths.extend(found)
+        elif path.suffix.lower() in SUPPORTED:
+            all_paths.append(path)
+        else:
+            print(f"Unsupported file type, skipping: {path}")
+            _log("WARNING", path, "unsupported_type")
+
+    if not all_paths:
+        print("No supported files to process.")
+        _log("INFO", None, "no_files_found")
+        if args.log:
+            flush_log(Path(args.log))
+        return
+
+    # Process files
+    all_results = []
+    for path in all_paths:
+        try:
+            ext = path.suffix.lower()
+
+            if ext in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}:
+                # Standalone image — face blur only (triggered by --blur-faces, --mask, or --anonymise)
+                print(f"\n{'='*62}")
+                print(f"File : {path}  [image]")
+                print(f"{'='*62}")
+                _log("INFO", path, "scanned", file_type="image")
+
+                do_blur = args.blur_faces or args.mask or args.anonymise
+                if do_blur:
+                    if not CV2_OK:
+                        print(f"  [FACE] Skipping - opencv-python not installed.")
+                        print(f"         pip install opencv-python\n")
+                        _log("WARNING", path, "skipped_no_opencv")
+                        continue
+                    out = path.with_stem(path.stem + "_faces")
+                    if dry_run:
+                        print(f"  [DRY-RUN] Would write -> {out.name}  (face blur)\n")
+                        _log("DRY_RUN", path, "face_blur_skipped_dry_run", output=str(out))
+                    else:
+                        print(f"  [FACE] Scanning for faces ...", flush=True)
+                        n = blur_faces_image_file(path, out, blocks=args.blur_strength)
+                        if n:
+                            print(f"  [FACE] Done - {n} face(s) blurred -> {out.name}\n")
+                            _log("ACTION", path, "faces_blurred", output=str(out), faces=n)
+                        else:
+                            out.unlink(missing_ok=True)
+                            print(f"  [FACE] No faces detected - no output written.\n")
+                            _log("INFO", path, "no_faces_detected")
+                else:
+                    print(f"  Image file: use --blur-faces, --mask, or --anonymise to pixelate portraits.\n")
+                    _log("INFO", path, "image_no_action_requested")
+                # Images have no CPR/date data — don't add to all_results
+                continue
+
+            elif ext == ".docx":
+                if not DOCX_OK:
+                    print(f"Skipping {path.name} - python-docx not installed.")
+                    _log("WARNING", path, "skipped_no_python_docx")
+                    continue
+
+                results = scan_docx(path)
+                print_docx_results(path, results)
+                all_results.append((path, results))
+                _log("INFO", path, "scanned",
+                     file_type="docx",
+                     cpr_count=len(results["cprs"]),
+                     date_count=len(results["dates"]),
+                     cprs=[h["formatted"] for h in results["cprs"]])
+
+                if args.mask:
+                    out = path.with_stem(path.stem + "_masked")
+                    if results["cprs"]:
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  ({len(results['cprs'])} CPR region(s))")
+                            _log("DRY_RUN", path, "mask_skipped_dry_run",
+                                 output=str(out), cpr_count=len(results["cprs"]))
+                        else:
+                            print(f"  [MASK] Writing -> {out.name} ...", flush=True)
+                            n = redact_docx(path, out, results, use_ner=False)
+                            print(f"  [MASK] Done - {n} region(s) redacted.\n")
+                            _log("ACTION", path, "masked", output=str(out), regions=n)
+                    else:
+                        print("  [MASK] No CPR numbers found - skipping.\n")
+                        _log("INFO", path, "mask_skipped_no_cpr")
+
+                if args.anonymise:
+                    out = path.with_stem(path.stem + "_anonymised")
+                    if dry_run:
+                        spans = find_pii_spans_in_text(results["_full_text"], use_ner=True)
+                        label_counts = {}
+                        for _, _, lbl in spans:
+                            label_counts[lbl] = label_counts.get(lbl, 0) + 1
+                        summary = "  ".join(f"{lbl}:{c}" for lbl, c in sorted(label_counts.items()))
+                        print(f"  [DRY-RUN] Would write -> {out.name}  ({len(spans)} region(s): {summary})")
+                        _log("DRY_RUN", path, "anonymise_skipped_dry_run",
+                             output=str(out), total_regions=len(spans), by_label=label_counts)
+                    else:
+                        print(f"  [ANON] Writing -> {out.name} ...", flush=True)
+                        n = redact_docx(path, out, results, use_ner=True)
+                        print(f"  [ANON] Done - {n} region(s) redacted.\n")
+                        _log("ACTION", path, "anonymised", output=str(out), regions=n)
+
+                if args.blur_faces:
+                    if not CV2_OK:
+                        print(f"  [FACE] Skipping - opencv-python not installed.")
+                    else:
+                        out = path.with_stem(path.stem + "_faces")
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  (face blur)")
+                            _log("DRY_RUN", path, "face_blur_skipped_dry_run", output=str(out))
+                        else:
+                            print(f"  [FACE] Scanning for faces ...", flush=True)
+                            n = blur_faces_docx(path, out, blocks=args.blur_strength)
+                            if n:
+                                print(f"  [FACE] Done - {n} face(s) blurred -> {out.name}\n")
+                                _log("ACTION", path, "faces_blurred", output=str(out), faces=n)
+                            else:
+                                out.unlink(missing_ok=True)
+                                print(f"  [FACE] No faces detected.\n")
+                                _log("INFO", path, "no_faces_detected")
+
+            elif ext in {".xlsx", ".xlsm"}:
+                if not XLSX_OK:
+                    print(f"Skipping {path.name} - openpyxl not installed.")
+                    _log("WARNING", path, "skipped_no_openpyxl")
+                    continue
+
+                results = scan_xlsx(path)
+                print_xlsx_results(path, results, "xlsx")
+                all_results.append((path, results))
+                _log("INFO", path, "scanned",
+                     file_type="xlsx",
+                     cpr_count=len(results["cprs"]),
+                     date_count=len(results["dates"]),
+                     cprs=[h["formatted"] for h in results["cprs"]])
+
+                if args.mask:
+                    out = path.with_stem(path.stem + "_masked")
+                    if results["cprs"]:
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  ({len(results['cprs'])} CPR cell(s))")
+                            _log("DRY_RUN", path, "mask_skipped_dry_run",
+                                 output=str(out), cpr_count=len(results["cprs"]))
+                        else:
+                            print(f"  [MASK] Writing -> {out.name} ...", flush=True)
+                            n = redact_xlsx(path, out, results, use_ner=False)
+                            print(f"  [MASK] Done - {n} cell(s) redacted.\n")
+                            _log("ACTION", path, "masked", output=str(out), regions=n)
+                    else:
+                        print("  [MASK] No CPR numbers found - skipping.\n")
+                        _log("INFO", path, "mask_skipped_no_cpr")
+
+                if args.anonymise:
+                    out = path.with_stem(path.stem + "_anonymised")
+                    if dry_run:
+                        full_text = " ".join(
+                            _cell_text(c)
+                            for s in results["_wb"].worksheets
+                            for row in s.iter_rows()
+                            for c in row
+                        )
+                        spans = find_pii_spans_in_text(full_text, use_ner=True)
+                        label_counts = {}
+                        for _, _, lbl in spans:
+                            label_counts[lbl] = label_counts.get(lbl, 0) + 1
+                        summary = "  ".join(f"{lbl}:{c}" for lbl, c in sorted(label_counts.items()))
+                        print(f"  [DRY-RUN] Would write -> {out.name}  ({len(spans)} region(s): {summary})")
+                        _log("DRY_RUN", path, "anonymise_skipped_dry_run",
+                             output=str(out), total_regions=len(spans), by_label=label_counts)
+                    else:
+                        print(f"  [ANON] Writing -> {out.name} ...", flush=True)
+                        n = redact_xlsx(path, out, results, use_ner=True)
+                        print(f"  [ANON] Done - {n} cell(s) redacted.\n")
+                        _log("ACTION", path, "anonymised", output=str(out), regions=n)
+
+                if args.blur_faces:
+                    if not CV2_OK:
+                        print(f"  [FACE] Skipping - opencv-python not installed.")
+                    else:
+                        out = path.with_stem(path.stem + "_faces")
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  (face blur)")
+                            _log("DRY_RUN", path, "face_blur_skipped_dry_run", output=str(out))
+                        else:
+                            print(f"  [FACE] Scanning for faces ...", flush=True)
+                            n = blur_faces_xlsx(path, out, blocks=args.blur_strength)
+                            if n:
+                                print(f"  [FACE] Done - {n} face(s) blurred -> {out.name}\n")
+                                _log("ACTION", path, "faces_blurred", output=str(out), faces=n)
+                            else:
+                                out.unlink(missing_ok=True)
+                                print(f"  [FACE] No faces detected.\n")
+                                _log("INFO", path, "no_faces_detected")
+
+            elif ext == ".csv":
+                results = scan_csv(path)
+                print_xlsx_results(path, results, "csv")
+                all_results.append((path, results))
+                _log("INFO", path, "scanned",
+                     file_type="csv",
+                     cpr_count=len(results["cprs"]),
+                     date_count=len(results["dates"]),
+                     cprs=[h["formatted"] for h in results["cprs"]])
+
+                if args.mask:
+                    out = path.with_stem(path.stem + "_masked")
+                    if results["cprs"]:
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  ({len(results['cprs'])} CPR cell(s))")
+                            _log("DRY_RUN", path, "mask_skipped_dry_run",
+                                 output=str(out), cpr_count=len(results["cprs"]))
+                        else:
+                            print(f"  [MASK] Writing -> {out.name} ...", flush=True)
+                            n = redact_csv(path, out, use_ner=False)
+                            print(f"  [MASK] Done - {n} cell(s) redacted.\n")
+                            _log("ACTION", path, "masked", output=str(out), regions=n)
+                    else:
+                        print("  [MASK] No CPR numbers found - skipping.\n")
+                        _log("INFO", path, "mask_skipped_no_cpr")
+
+                if args.anonymise:
+                    out = path.with_stem(path.stem + "_anonymised")
+                    if dry_run:
+                        import csv as _csv
+                        full_text = ""
+                        with open(path, newline="", encoding="utf-8-sig", errors="replace") as f:
+                            for row in _csv.reader(f):
+                                full_text += " ".join(row) + " "
+                        spans = find_pii_spans_in_text(full_text, use_ner=True)
+                        label_counts = {}
+                        for _, _, lbl in spans:
+                            label_counts[lbl] = label_counts.get(lbl, 0) + 1
+                        summary = "  ".join(f"{lbl}:{c}" for lbl, c in sorted(label_counts.items()))
+                        print(f"  [DRY-RUN] Would write -> {out.name}  ({len(spans)} region(s): {summary})")
+                        _log("DRY_RUN", path, "anonymise_skipped_dry_run",
+                             output=str(out), total_regions=len(spans), by_label=label_counts)
+                    else:
+                        print(f"  [ANON] Writing -> {out.name} ...", flush=True)
+                        n = redact_csv(path, out, use_ner=True)
+                        print(f"  [ANON] Done - {n} cell(s) redacted.\n")
+                        _log("ACTION", path, "anonymised", output=str(out), regions=n)
+
+            else:
+                results = scan_pdf(path, force_ocr=args.ocr, lang=args.lang,
+                                   dpi=args.dpi, poppler_path=args.poppler)
+                print_results(path, results)
+                all_results.append((path, results))
+                _log("INFO", path, "scanned",
+                     file_type="pdf",
+                     pages=len(results["page_methods"]),
+                     ocr_pages=sum(1 for m in results["page_methods"].values() if m == "ocr"),
+                     cpr_count=len(results["cprs"]),
+                     date_count=len(results["dates"]),
+                     cprs=[h["formatted"] for h in results["cprs"]])
+
+                if args.mask:
+                    out = path.with_stem(path.stem + "_masked")
+                    if results["cprs"]:
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  ({len(results['cprs'])} CPR region(s))")
+                            _log("DRY_RUN", path, "mask_skipped_dry_run",
+                                 output=str(out), cpr_count=len(results["cprs"]))
+                        else:
+                            print(f"  [MASK] Writing -> {out.name} ...", flush=True)
+                            n = redact_pdf(path, out, results, args.ocr, args.lang,
+                                           args.dpi, args.poppler, use_ner=False)
+                            if n is not False:
+                                print(f"  [MASK] Done - {n} region(s) redacted.\n")
+                                _log("ACTION", path, "masked", output=str(out), regions=n)
+                    else:
+                        print("  [MASK] No CPR numbers found - skipping.\n")
+                        _log("INFO", path, "mask_skipped_no_cpr")
+
+                if args.anonymise:
+                    out = path.with_stem(path.stem + "_anonymised")
+                    if dry_run:
+                        full_text = ""
+                        with pdfplumber.open(path) as _pdf:
+                            for _page in _pdf.pages:
+                                full_text += (_page.extract_text() or "") + " "
+                        spans = find_pii_spans_in_text(full_text, use_ner=True)
+                        label_counts = {}
+                        for _, _, lbl in spans:
+                            label_counts[lbl] = label_counts.get(lbl, 0) + 1
+                        summary = "  ".join(f"{lbl}:{c}" for lbl, c in sorted(label_counts.items()))
+                        print(f"  [DRY-RUN] Would write -> {out.name}  ({len(spans)} region(s): {summary})")
+                        _log("DRY_RUN", path, "anonymise_skipped_dry_run",
+                             output=str(out), total_regions=len(spans), by_label=label_counts)
+                    else:
+                        print(f"  [ANON] Writing -> {out.name} ...", flush=True)
+                        n = redact_pdf(path, out, results, args.ocr, args.lang,
+                                       args.dpi, args.poppler, use_ner=True)
+                        if n is not False:
+                            print(f"  [ANON] Done - {n} region(s) redacted.\n")
+                            _log("ACTION", path, "anonymised", output=str(out), regions=n)
+
+                if args.blur_faces:
+                    if not CV2_OK:
+                        print(f"  [FACE] Skipping - opencv-python not installed.")
+                    elif not OCR_AVAILABLE:
+                        print(f"  [FACE] Skipping - pdf2image required for PDF face blur.")
+                    else:
+                        out = path.with_stem(path.stem + "_faces")
+                        if dry_run:
+                            print(f"  [DRY-RUN] Would write -> {out.name}  (face blur)")
+                            _log("DRY_RUN", path, "face_blur_skipped_dry_run", output=str(out))
+                        else:
+                            print(f"  [FACE] Scanning pages for faces ...", flush=True)
+                            n = blur_faces_pdf(path, out, poppler_path=args.poppler, blocks=args.blur_strength)
+                            if n:
+                                print(f"  [FACE] Done - {n} face(s) blurred -> {out.name}\n")
+                                _log("ACTION", path, "faces_blurred", output=str(out), faces=n)
+                            else:
+                                out.unlink(missing_ok=True)
+                                print(f"  [FACE] No faces detected.\n")
+                                _log("INFO", path, "no_faces_detected")
+
+        except Exception as e:
+            print(f"Error processing {path}: {e}")
+            _log("ERROR", path, str(e))
+
+    if args.older_than is not None:
+        flagged = build_flagged_list(all_results, args.older_than)
+        print_flagged(flagged, args.older_than)
+        _log("INFO", None, "flagged_summary",
+             older_than_days=args.older_than,
+             flagged_count=len(flagged),
+             flagged_files=[str(f["path"]) for f in flagged])
+
+    # Final summary
+    total_cprs     = sum(len(r["cprs"])  for _, r in all_results)
+    total_dates    = sum(len(r["dates"]) for _, r in all_results)
+    files_with_cpr = sum(1 for _, r in all_results if r["cprs"])
+    print(f"{'--'*31}")
+    print(f"  Scanned : {len(all_results)} file(s)")
+    print(f"  CPR nos : {total_cprs} found in {files_with_cpr} file(s)")
+    print(f"  Dates   : {total_dates} found")
+    if dry_run:
+        print("  Mode    : DRY RUN - no files written")
+    print(f"{'--'*31}\n")
+    _log("INFO", None, "scan_complete",
+         files_scanned=len(all_results),
+         total_cprs=total_cprs,
+         total_dates=total_dates,
+         files_with_cpr=files_with_cpr,
+         dry_run=dry_run)
+
+    if args.log:
+        flush_log(Path(args.log))
+
+if __name__ == "__main__":
+    main()
+
+
+def count_faces_in_file(path, poppler_path=None, neighbors: int = 4) -> int:
+    """
+    Return the number of faces detected in a file (image, PDF, docx, xlsx).
+    Uses only this module's cv2/numpy — never triggers a second import from
+    outside (avoids the 'recursion detected during loading cv2' error on macOS).
+    neighbors controls detection strictness: higher = fewer false positives.
+    """
+    import sys as _sys
+    cv2, np = _get_cv2()
+    if cv2 is None:
+        _face_log(f"[face] cv2 unavailable: {_cv2_import_error}")
+        return 0
+
+    ext = Path(path).suffix.lower()
+    total = 0
+    cascades = _get_face_cascades()
+    _face_log(f"[face] {Path(path).name}  ext={ext}  cascades={len(cascades)}  neighbors={neighbors}")
+
+    try:
+        if ext in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}:
+            img = cv2.imread(str(path))
+            _face_log(f"[face]   imread={img is not None}  shape={getattr(img, 'shape', None)}")
+            if img is not None:
+                total = len(detect_faces_cv2(img, neighbors=neighbors))
+                _face_log(f"[face]   detected={total}")
+
+        elif ext == ".pdf":
+            if PYMUPDF_AVAILABLE:
+                import fitz as _fitz
+                doc = _fitz.open(str(path))
+                for page_idx in range(min(5, len(doc))):
+                    pix = doc[page_idx].get_pixmap(dpi=100)
+                    arr = cv2.imdecode(
+                        np.frombuffer(pix.tobytes("jpeg"), np.uint8),
+                        cv2.IMREAD_COLOR)
+                    if arr is not None:
+                        total += len(detect_faces_cv2(arr, neighbors=neighbors))
+                    if total > 0:
+                        break
+                doc.close()
+            else:
+                from pdf2image import convert_from_path
+                pages = convert_from_path(str(path), dpi=100,
+                                          first_page=1, last_page=5,
+                                          poppler_path=poppler_path)
+                for page in pages:
+                    arr = cv2.cvtColor(np.array(page), cv2.COLOR_RGB2BGR)
+                    total += len(detect_faces_cv2(arr, neighbors=neighbors))
+                    if total > 0:
+                        break
+
+        elif ext == ".docx":
+            from docx import Document
+            from docx.enum.shape import WD_INLINE_SHAPE
+            doc = Document(str(path))
+            for shape in doc.inline_shapes:
+                try:
+                    if shape.type != WD_INLINE_SHAPE.PICTURE:
+                        continue
+                    blip = shape._inline.graphic.graphicData.pic.blipFill.blip
+                    blob = doc.part.related_parts[blip.embed].blob
+                    arr  = np.frombuffer(blob, dtype=np.uint8)
+                    img  = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+                    if img is not None:
+                        total += len(detect_faces_cv2(img, neighbors=neighbors))
+                except Exception:
+                    pass
+
+        elif ext in {".xlsx", ".xlsm"}:
+            import openpyxl
+            wb = openpyxl.load_workbook(str(path), read_only=False, data_only=True)
+            for sname in wb.sheetnames:
+                for img_obj in wb[sname]._images:
+                    try:
+                        blob = img_obj._data()
+                        arr  = np.frombuffer(blob, dtype=np.uint8)
+                        img  = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+                        if img is not None:
+                            total += len(detect_faces_cv2(img, neighbors=neighbors))
+                    except Exception:
+                        pass
+            wb.close()
+
+    except Exception:
+        pass
+
+    return total
diff --git a/file_scanner.py b/file_scanner.py
new file mode 100644
index 0000000..e8fdfea
--- /dev/null
+++ b/file_scanner.py
@@ -0,0 +1,600 @@
+"""
+file_scanner.py — Unified local and SMB/CIFS file iterator for GDPR Scanner.
+
+Provides FileScanner.iter_files() which yields (relative_path, bytes, metadata)
+regardless of whether the source is a local path or a network share.
+
+gdpr_scanner.py imports this module and calls iter_files() inside run_file_scan().
+All CPR scanning, card broadcasting, and DB persistence stay in gdpr_scanner.py.
+
+Optional dependencies:
+    smbprotocol>=1.13   — native SMB2/3 without mounting (pip install smbprotocol)
+    keyring>=25.0       — OS keychain credential storage  (pip install keyring)
+    python-dotenv>=1.0  — .env file fallback              (pip install python-dotenv)
+
+If smbprotocol is not installed, the scanner falls back to local-path mode.
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import uuid
+import hashlib
+from pathlib import Path, PurePosixPath
+from typing import Iterator
+
+# ── Optional dependency flags ─────────────────────────────────────────────────
+
+try:
+    import smbprotocol  # noqa: F401 — just checking availability
+    from smbprotocol.connection import Connection
+    from smbprotocol.session import Session
+    from smbprotocol.tree import TreeConnect
+    from smbprotocol.open import (
+        Open, CreateDisposition, CreateOptions,
+        FileAttributes, FilePipePrinterAccessMask, ShareAccess,
+        ImpersonationLevel,
+    )
+    from smbprotocol.query_info import FileDirectoryInformation
+    SMB_OK = True
+except ImportError:
+    SMB_OK = False
+
+try:
+    import keyring as _keyring
+    KEYRING_OK = True
+except ImportError:
+    KEYRING_OK = False
+
+try:
+    from dotenv import dotenv_values as _dotenv_values
+    DOTENV_OK = True
+except ImportError:
+    DOTENV_OK = False
+
+
+# ── Public constants ──────────────────────────────────────────────────────────
+
+KEYCHAIN_SERVICE = "gdpr-scanner-nas"
+
+# File extensions passed through to _scan_bytes().  Matches SUPPORTED_EXTS in
+# gdpr_scanner.py; kept here too so FileScanner can filter without importing it.
+DEFAULT_EXTENSIONS = {
+    ".pdf", ".docx", ".doc", ".xlsx", ".xlsm", ".csv",
+    ".txt", ".eml", ".msg",
+    ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp",
+    ".heic", ".heif",
+}
+
+# Extensions for local/SMB file scans — PDFs now included; OCR runs in a spawned
+# subprocess with a 60-second hard timeout via _scan_bytes_timeout so hanging
+# Tesseract/Poppler processes can never block the scan thread indefinitely.
+FILE_SCAN_EXTENSIONS = DEFAULT_EXTENSIONS
+
+# Maximum file size to load into memory (bytes).  Files larger than this are
+# skipped with a warning — same guard used by the M365 attachment scanner.
+MAX_FILE_BYTES = 20 * 1024 * 1024  # 20 MB
+
+# SMB pre-fetch sliding window (#22)
+PREFETCH_WINDOW  = 1   # 1 SMB read in flight — halves peak concurrent buffer memory
+SMB_READ_TIMEOUT = 60  # seconds before an individual SMB read is abandoned
+
+# Directories to silently skip — system/sync/trash folders that never contain
+# user documents and would only generate noise or permission errors.
+SKIP_DIRS = {
+    ".recycle", ".recycler", "recycler", "$recycle.bin", ".trash", ".trashes",
+    ".sync", ".btsync", ".syncthing",
+    ".git", ".svn", ".hg",
+    "__pycache__", "node_modules",
+    ".spotlight-v100", ".fseventsd", ".temporaryitems",
+    "system volume information", "lost+found",
+}
+
+
+# ── Credential helpers ────────────────────────────────────────────────────────
+
+def get_smb_password(smb_host: str, smb_user: str,
+                     keychain_key: str | None = None) -> str | None:
+    """Return SMB password from the best available source.
+
+    Priority:
+        1. OS keychain via keyring (keychain_key or smb_user as account name)
+        2. NAS_PASSWORD environment variable
+        3. .env file in the current working directory
+    """
+    # 1. OS keychain
+    if KEYRING_OK:
+        account = keychain_key or smb_user
+        try:
+            pw = _keyring.get_password(KEYCHAIN_SERVICE, account)
+            if pw:
+                return pw
+        except Exception:
+            pass
+
+    # 2. Environment variable
+    pw = os.environ.get("NAS_PASSWORD")
+    if pw:
+        return pw
+
+    # 3. .env file
+    if DOTENV_OK:
+        env = _dotenv_values(".env")
+        pw = env.get("NAS_PASSWORD")
+        if pw:
+            return pw
+
+    return None
+
+
+def store_smb_password(smb_host: str, smb_user: str,
+                       password: str,
+                       keychain_key: str | None = None) -> bool:
+    """Store SMB password in the OS keychain.  Returns True on success."""
+    if not KEYRING_OK:
+        return False
+    account = keychain_key or smb_user
+    try:
+        _keyring.set_password(KEYCHAIN_SERVICE, account, password)
+        return True
+    except Exception:
+        return False
+
+
+# ── FileScanner ───────────────────────────────────────────────────────────────
+
+class FileScanner:
+    """Unified local + SMB/CIFS file iterator."""
+
+    FILE_SCAN_EXTENSIONS = FILE_SCAN_EXTENSIONS  # excludes .pdf
+    """Unified iterator over local paths and SMB/CIFS network shares.
+
+    Usage::
+
+        fs = FileScanner("/mnt/data")
+        for rel_path, content, meta in fs.iter_files():
+            result = _scan_bytes(content, rel_path)
+            ...
+
+        fs = FileScanner("//nas.school.dk/shares",
+                         smb_host="nas.school.dk",
+                         smb_user="DOMAIN\\\\henrik",
+                         smb_password="secret")
+        for rel_path, content, meta in fs.iter_files():
+            ...
+    """
+
+    def __init__(
+        self,
+        path: str,
+        smb_host: str | None = None,
+        smb_user: str | None = None,
+        smb_password: str | None = None,
+        smb_domain: str | None = None,
+        keychain_key: str | None = None,
+        max_file_bytes: int = MAX_FILE_BYTES,
+    ):
+        self.path           = path
+        self.smb_user       = smb_user
+        self.smb_domain     = smb_domain or ""
+        self.keychain_key   = keychain_key
+        self.max_file_bytes = max_file_bytes
+
+        # Detect SMB path by prefix; auto-derive host if not provided
+        _is_smb_path = path.startswith("//") or path.startswith("\\\\")
+        if _is_smb_path and not smb_host:
+            # Extract host from path: //host/share → host
+            _norm = path.replace("\\", "/").lstrip("/")
+            smb_host = _norm.split("/")[0] or None
+        self.smb_host = smb_host
+
+        self.is_smb = _is_smb_path and SMB_OK
+
+        # Resolve password from keychain / env / .env if not provided directly
+        self._password = smb_password
+        if self.is_smb and not self._password:
+            self._password = get_smb_password(
+                smb_host or "", smb_user or "", keychain_key
+            )
+
+    # ── Public ────────────────────────────────────────────────────────────────
+
+    def iter_files(
+        self,
+        extensions: set[str] | None = None,
+        progress_cb=None,
+    ) -> Iterator[tuple[str, bytes, dict]]:
+        """Yield (relative_path, content_bytes, metadata) for every scannable file.
+
+        Args:
+            extensions:  Set of lowercase extensions to include, e.g. {".pdf", ".docx"}.
+                         Defaults to DEFAULT_EXTENSIONS.
+            progress_cb: Optional callable(rel_path) called before each file is read,
+                         so the caller can update a progress indicator.
+
+        Yields:
+            rel_path  — path relative to the root (e.g. "subfolder/doc.pdf")
+            content   — raw bytes of the file
+            metadata  — dict with keys: size_kb, modified, source_type, source_root
+        """
+        exts = extensions or DEFAULT_EXTENSIONS
+
+        if self.is_smb:
+            yield from self._iter_smb(exts, progress_cb)
+        else:
+            yield from self._iter_local(exts, progress_cb)
+
+    @property
+    def source_type(self) -> str:
+        return "smb" if self.is_smb else "local"
+
+    @staticmethod
+    def smb_available() -> bool:
+        return SMB_OK
+
+    # ── Local walker ──────────────────────────────────────────────────────────
+
+    def _iter_local(self, exts: set[str], progress_cb) -> Iterator[tuple[str, bytes, dict]]:
+        root = Path(self.path).expanduser().resolve()
+        if not root.exists():
+            raise FileNotFoundError(f"Path not found: {root}")
+
+        for dirpath, _dirs, filenames in os.walk(root):
+            # Skip junk/system directories in-place
+            _dirs[:] = [d for d in _dirs if d.lower() not in SKIP_DIRS and not d.startswith(".")]
+            for fname in filenames:
+                full = Path(dirpath) / fname
+                ext  = full.suffix.lower()
+                if ext not in exts:
+                    continue
+
+                try:
+                    size = full.stat().st_size
+                except OSError:
+                    continue
+
+                if size > self.max_file_bytes:
+                    yield _skip(str(full.relative_to(root)), size, "local", str(root))
+                    continue
+
+                rel = str(full.relative_to(root))
+                if progress_cb:
+                    progress_cb(rel)
+
+                try:
+                    content  = full.read_bytes()
+                    modified = time.strftime(
+                        "%Y-%m-%d",
+                        time.localtime(full.stat().st_mtime)
+                    )
+                    meta = {
+                        "size_kb":     round(size / 1024, 1),
+                        "modified":    modified,
+                        "source_type": "local",
+                        "source_root": str(root),
+                        "full_path":   str(full),
+                        "skipped":     False,
+                    }
+                    yield rel, content, meta
+                except (OSError, PermissionError) as e:
+                    yield _error(rel, str(e), "local", str(root))
+
+    # ── SMB walker ────────────────────────────────────────────────────────────
+
+    def _iter_smb(self, exts: set[str], progress_cb) -> Iterator[tuple[str, bytes, dict]]:
+        """Walk an SMB share using smbprotocol with a sliding-window pre-fetcher.
+
+        Directory traversal and file reads are decoupled:
+          1. _smb_collect() walks the tree metadata-only (fast — no file I/O).
+          2. A ThreadPoolExecutor submits _smb_read_file() calls up to
+             PREFETCH_WINDOW at a time. Each future has SMB_READ_TIMEOUT seconds
+             to complete; timed-out reads yield an error sentinel and are abandoned
+             without blocking the scan thread.
+        """
+        if not SMB_OK:
+            raise RuntimeError(
+                "smbprotocol not installed — run: pip install smbprotocol"
+            )
+
+        # Parse //host/share/optional/subpath — normalise backslashes
+        norm = self.path.replace("\\", "/").lstrip("/")
+        parts = norm.split("/", 2)
+        host  = parts[0] if len(parts) > 0 else self.smb_host or ""
+        share = parts[1] if len(parts) > 1 else ""
+        sub   = parts[2] if len(parts) > 2 else ""
+
+        if not host or not share:
+            raise ValueError(
+                f"Cannot parse SMB path '{self.path}' — expected //host/share[/subpath]"
+            )
+
+        source_root = f"//{host}/{share}"
+
+        conn = Connection(uuid.uuid4(), host, 445)
+        conn.connect(timeout=30)
+        try:
+            session = Session(conn,
+                              username=self.smb_user or "",
+                              password=self._password or "",
+                              require_encryption=False)
+            session.connect()
+            try:
+                tree = TreeConnect(session, f"\\\\{host}\\{share}")
+                tree.connect()
+                try:
+                    # Phase 1: collect all candidate file descriptors (no reads)
+                    candidates = list(self._smb_collect(
+                        tree, sub, sub, exts, source_root
+                    ))
+
+                    # Phase 2: resolve sentinels, then sliding-window parallel reads
+                    # Sentinels from _smb_collect are yielded immediately; only real
+                    # file entries enter the executor queue.
+                    real_candidates = []
+                    for item in candidates:
+                        marker = item[0]
+                        if marker is _COLLECT_ERROR:
+                            yield _error(item[1] or ".", item[4], "smb", source_root)
+                        elif marker is _COLLECT_SKIP:
+                            yield _skip(item[1], item[2], "smb", source_root)
+                        else:
+                            real_candidates.append(item)
+
+                    from concurrent.futures import ThreadPoolExecutor
+                    from collections import deque
+
+                    pending: deque = deque()  # (future, display_rel, size, modified, src_root)
+
+                    def _submit_next(item):
+                        display_rel, smb_path, size, modified, src_root = item
+                        fut = executor.submit(_smb_read_file, tree, smb_path)
+                        pending.append((fut, display_rel, size, modified, src_root))
+
+                    with ThreadPoolExecutor(max_workers=PREFETCH_WINDOW) as executor:
+                        it = iter(real_candidates)
+                        # Seed the window
+                        for item in it:
+                            if progress_cb:
+                                progress_cb(item[0])
+                            _submit_next(item)
+                            if len(pending) >= PREFETCH_WINDOW:
+                                break
+
+                        while pending:
+                            fut, display_rel, size, modified, src_root = pending.popleft()
+
+                            # Submit the next candidate to keep the window full
+                            nxt = next(it, None)
+                            if nxt is not None:
+                                if progress_cb:
+                                    progress_cb(nxt[0])
+                                _submit_next(nxt)
+
+                            try:
+                                content = fut.result(timeout=SMB_READ_TIMEOUT)
+                                meta = {
+                                    "size_kb":     round(size / 1024, 1),
+                                    "modified":    modified,
+                                    "source_type": "smb",
+                                    "source_root": src_root,
+                                    "full_path":   f"{src_root}/{display_rel}",
+                                    "skipped":     False,
+                                }
+                                yield display_rel, content, meta
+                            except TimeoutError:
+                                fut.cancel()
+                                yield _error(display_rel,
+                                             f"SMB read timed out after {SMB_READ_TIMEOUT}s",
+                                             "smb", src_root)
+                            except Exception as e:
+                                err = str(e)
+                                if "STATUS_END_OF_FILE" in err or "0xc0000011" in err:
+                                    continue  # empty/placeholder — skip silently
+                                yield _error(display_rel, err, "smb", src_root)
+
+                finally:
+                    tree.disconnect()
+            finally:
+                session.disconnect()
+        finally:
+            conn.disconnect()
+
+    def _smb_collect(
+        self,
+        tree,
+        directory: str,
+        root_sub: str,
+        exts: set[str],
+        source_root: str,
+    ) -> Iterator[tuple[str, str, int, str, str]]:
+        """Recursively walk an SMB directory tree, yielding file descriptors only.
+
+        Yields (display_rel, smb_path, size_bytes, modified_str, source_root).
+        No file reads are performed — this is directory-listing only.
+        Over-size files are yielded as _skip() sentinels via a side-channel;
+        those are handled in _iter_smb before the prefetch loop.
+        """
+        query_path = directory.replace("/", "\\") if directory else ""
+        pattern    = (query_path + "\\" if query_path else "") + "*"
+
+        try:
+            entries = _smb_list_dir(tree, pattern)
+        except Exception as e:
+            # Can't list directory — emit error sentinel via a special marker
+            # _iter_smb won't see it; we raise so it propagates as a read error
+            yield _COLLECT_ERROR, "", 0, "", source_root  # sentinel handled below
+            return
+
+        for entry in entries:
+            name = entry["name"]
+            if name in (".", ".."):
+                continue
+
+            rel = (directory + "/" + name) if directory else name
+            display_rel = rel[len(root_sub):].lstrip("/") if root_sub else rel
+            display_rel = display_rel or name
+
+            is_dir = bool(entry["attributes"] & 0x10)
+            size   = entry["size"]
+
+            if is_dir:
+                if name.lower() in SKIP_DIRS or (name.startswith(".") and name not in (".", "..")):
+                    continue
+                yield from self._smb_collect(tree, rel, root_sub, exts, source_root)
+                continue
+
+            ext = PurePosixPath(name).suffix.lower()
+            if ext not in exts:
+                continue
+
+            if size > self.max_file_bytes:
+                # Mark as over-size — _iter_smb skips before submitting to executor
+                yield _COLLECT_SKIP, display_rel, size, "", source_root
+                continue
+
+            modified = _smb_ts(entry.get("last_write_time", 0))
+            yield display_rel, rel.replace("/", "\\"), size, modified, source_root
+
+
+# Sentinel strings for _smb_collect side-channel messages
+_COLLECT_ERROR = "\x00__error__"
+_COLLECT_SKIP  = "\x00__skip__"
+
+
+# ── SMB helpers ───────────────────────────────────────────────────────────────
+
+def uuid4_str() -> str:
+    import uuid
+    return str(uuid.uuid4())
+
+
+def _smb_list_dir(tree, pattern: str) -> list[dict]:
+    """List directory entries matching pattern on an SMB tree."""
+    from smbprotocol.open import (
+        Open, CreateDisposition, CreateOptions,
+        FileAttributes, DirectoryAccessMask, ShareAccess,
+        ImpersonationLevel, FileInformationClass,
+    )
+    from smbprotocol.file_info import FileDirectoryInformation
+    import smbprotocol.exceptions as smb_exc
+
+    # Open directory
+    dir_path = "\\".join(pattern.replace("/", "\\").split("\\")[:-1])
+    file_pattern = pattern.replace("/", "\\").split("\\")[-1] or "*"
+
+    fh = Open(tree, dir_path or "")
+    fh.create(
+        ImpersonationLevel.Impersonation,
+        DirectoryAccessMask.FILE_LIST_DIRECTORY |
+        DirectoryAccessMask.FILE_READ_ATTRIBUTES,
+        FileAttributes.FILE_ATTRIBUTE_DIRECTORY,
+        ShareAccess.FILE_SHARE_READ | ShareAccess.FILE_SHARE_WRITE |
+        ShareAccess.FILE_SHARE_DELETE,
+        CreateDisposition.FILE_OPEN,
+        CreateOptions.FILE_DIRECTORY_FILE,
+    )
+
+    entries = []
+    try:
+        raw = fh.query_directory(
+            pattern=file_pattern,
+            file_information_class=FileInformationClass.FILE_DIRECTORY_INFORMATION,
+            flags=0,
+            max_output=65536,
+        )
+        for info in raw:
+            fname = info["file_name"].get_value()
+            if isinstance(fname, bytes):
+                fname = fname.decode("utf-16-le", errors="replace").rstrip("\x00")
+            attrs = info["file_attributes"].get_value()
+            entries.append({
+                "name":            fname,
+                "attributes":      int(attrs) if not isinstance(attrs, int) else attrs,
+                "size":            info["end_of_file"].get_value(),
+                "last_write_time": info["last_write_time"].get_value(),
+            })
+    except smb_exc.SMBOSError:
+        pass  # Empty directory or no match
+    finally:
+        try:
+            fh.close(get_attributes=False)
+        except Exception:
+            pass
+
+    return entries
+
+
+def _smb_read_file(tree, smb_path: str) -> bytes:
+    """Read a complete file from an SMB tree into bytes."""
+    from smbprotocol.open import (
+        Open, CreateDisposition, CreateOptions,
+        FileAttributes, FilePipePrinterAccessMask, ShareAccess,
+        ImpersonationLevel,
+    )
+
+    fh = Open(tree, smb_path)
+    fh.create(
+        ImpersonationLevel.Impersonation,
+        FilePipePrinterAccessMask.FILE_READ_DATA |
+        FilePipePrinterAccessMask.FILE_READ_ATTRIBUTES,
+        FileAttributes.FILE_ATTRIBUTE_NORMAL,
+        ShareAccess.FILE_SHARE_READ,
+        CreateDisposition.FILE_OPEN,
+        CreateOptions.FILE_NON_DIRECTORY_FILE,
+    )
+    try:
+        chunks = []
+        offset = 0
+        chunk_size = 1024 * 1024  # 1 MB chunks
+        while True:
+            data = fh.read(offset, chunk_size)
+            if not data:
+                break
+            chunks.append(bytes(data))
+            offset += len(data)
+            if len(data) < chunk_size:
+                break
+        return b"".join(chunks)
+    finally:
+        fh.close(get_attributes=False)
+
+
+def _smb_ts(windows_ts: int) -> str:
+    """Convert Windows FILETIME (100ns intervals since 1601-01-01) to YYYY-MM-DD."""
+    if not windows_ts:
+        return ""
+    try:
+        # FILETIME → Unix epoch
+        unix_ts = (windows_ts - 116444736000000000) / 10_000_000
+        return time.strftime("%Y-%m-%d", time.gmtime(unix_ts))
+    except Exception:
+        return ""
+
+
+# ── Sentinel yield helpers ────────────────────────────────────────────────────
+
+def _skip(rel: str, size: int, source_type: str, source_root: str):
+    """Yield a skipped-file sentinel (content=None, meta['skipped']=True)."""
+    return rel, None, {
+        "size_kb":     round(size / 1024, 1),
+        "modified":    "",
+        "source_type": source_type,
+        "source_root": source_root,
+        "full_path":   f"{source_root}/{rel}",
+        "skipped":     True,
+        "skip_reason": f"File too large ({size // 1_048_576} MB)",
+    }
+
+
+def _error(rel: str, error: str, source_type: str, source_root: str):
+    """Yield an error sentinel (content=None, meta['error']=...)."""
+    return rel, None, {
+        "size_kb":     0,
+        "modified":    "",
+        "source_type": source_type,
+        "source_root": source_root,
+        "full_path":   f"{source_root}/{rel}",
+        "skipped":     True,
+        "skip_reason": f"Error: {error}",
+    }
diff --git a/gdpr_db.py b/gdpr_db.py
new file mode 100644
index 0000000..1f21e34
--- /dev/null
+++ b/gdpr_db.py
@@ -0,0 +1,954 @@
+#!/usr/bin/env python3
+"""
+gdpr_db.py — SQLite persistence layer for GDPRScanner
+
+Stores scan results alongside the existing JSON cache.  Neither replaces the
+other: JSON is fast and portable, SQLite enables querying, trending, and the
+data-subject index.
+
+Database location: ~/.gdpr_scanner.db  (configurable via DB_PATH)
+
+Schema
+------
+    scans          one row per completed scan run
+    flagged_items  one row per flagged file / email
+    cpr_index      (cpr_hash, item_id) — powers data-subject lookup
+    pii_hits       per-type PII counts per item
+    dispositions   compliance officer decisions per item
+    scan_history   aggregated stats for trend tracking
+
+Usage (from gdpr_scanner.py)
+-----------------------------
+    from gdpr_db import ScanDB
+    db = ScanDB()
+    scan_id = db.begin_scan(options)
+    db.save_item(scan_id, card, cprs)      # called for each flagged card
+    db.finish_scan(scan_id, total_scanned)
+    db.close()
+"""
+
+import hashlib
+import json
+import sqlite3
+import time
+from pathlib import Path
+from typing import Iterator
+
+from pathlib import Path as _P
+_DATA_DIR = _P.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+DB_PATH = _DATA_DIR / "scanner.db"
+
+# ── Retention cutoff helper ──────────────────────────────────────────────────
+
+def overdue_cutoff(years: int = 5, fiscal_year_end: str | None = None) -> str:
+    """Return the ISO date string before which items are considered overdue.
+
+    Two modes:
+    - Rolling (default, fiscal_year_end=None):
+        Exactly N years before today.
+        E.g. years=5 on 2026-03-17 -> 2021-03-17
+        Correct for GDPR general data minimisation.
+
+    - Fiscal year end (fiscal_year_end="MM-DD", e.g. "12-31"):
+        N years before the most recently completed fiscal year end.
+        E.g. years=5, FY end Dec 31, run on 2026-03-17:
+          Last FY end = 2025-12-31  ->  cutoff = 2020-12-31
+        Documents from the FY ending 2020-12-31 expire on 2025-12-31,
+        so on 2026-03-17 they are overdue. This is correct for
+        Bogforingsloven (Danish bookkeeping law) which requires records
+        for 5 years from the END of the financial year.
+    """
+    from datetime import date, timedelta
+
+    today = date.today()
+
+    if fiscal_year_end:
+        # Parse MM-DD
+        try:
+            month, day = (int(x) for x in fiscal_year_end.split("-"))
+        except (ValueError, AttributeError):
+            raise ValueError(f"fiscal_year_end must be MM-DD, got {fiscal_year_end!r}")
+
+        # Find the most recently completed fiscal year end date
+        fy_this_year = date(today.year, month, day)
+        if fy_this_year >= today:
+            # This year's FY end is in the future -- use last year's
+            fy_end = date(today.year - 1, month, day)
+        else:
+            fy_end = fy_this_year
+
+        # Cutoff is N years before that FY end
+        cutoff = fy_end.replace(year=fy_end.year - years)
+    else:
+        # Rolling: exactly N years before today
+        cutoff = today.replace(year=today.year - years)
+
+    return cutoff.isoformat()
+
+
+# ── Schema DDL ────────────────────────────────────────────────────────────────
+_DDL = """
+PRAGMA journal_mode = WAL;
+PRAGMA foreign_keys = ON;
+
+CREATE TABLE IF NOT EXISTS scans (
+    id            INTEGER PRIMARY KEY AUTOINCREMENT,
+    started_at    REAL    NOT NULL,
+    finished_at   REAL,
+    sources       TEXT    NOT NULL DEFAULT '[]',   -- JSON array
+    user_count    INTEGER NOT NULL DEFAULT 0,
+    options       TEXT    NOT NULL DEFAULT '{}',   -- JSON object
+    total_scanned INTEGER NOT NULL DEFAULT 0,
+    flagged_count INTEGER NOT NULL DEFAULT 0,
+    delta         INTEGER NOT NULL DEFAULT 0       -- 0=full, 1=delta
+);
+
+CREATE TABLE IF NOT EXISTS flagged_items (
+    id          TEXT    NOT NULL,                  -- Graph item ID
+    scan_id     INTEGER NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
+    name        TEXT    NOT NULL DEFAULT '',
+    source      TEXT    NOT NULL DEFAULT '',
+    source_type TEXT    NOT NULL DEFAULT '',       -- email/onedrive/sharepoint/teams
+    account_id  TEXT    NOT NULL DEFAULT '',
+    folder      TEXT    NOT NULL DEFAULT '',
+    url         TEXT    NOT NULL DEFAULT '',
+    drive_id    TEXT    NOT NULL DEFAULT '',
+    size_kb     REAL    NOT NULL DEFAULT 0,
+    modified    TEXT    NOT NULL DEFAULT '',       -- YYYY-MM-DD
+    cpr_count   INTEGER NOT NULL DEFAULT 0,
+    risk        TEXT,
+    user_role   TEXT    NOT NULL DEFAULT 'other',  -- student/staff/other                              -- LOW/MEDIUM/HIGH
+    thumb_b64   TEXT    NOT NULL DEFAULT '',
+    thumb_mime  TEXT    NOT NULL DEFAULT 'image/svg+xml',
+    attachments TEXT    NOT NULL DEFAULT '[]',     -- JSON array
+    scanned_at  REAL    NOT NULL,
+    PRIMARY KEY (id, scan_id)
+);
+
+CREATE TABLE IF NOT EXISTS cpr_index (
+    cpr_hash    TEXT    NOT NULL,                  -- SHA-256 of the raw CPR string
+    item_id     TEXT    NOT NULL,
+    scan_id     INTEGER NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
+    first_seen  REAL    NOT NULL,
+    PRIMARY KEY (cpr_hash, item_id, scan_id)
+);
+
+CREATE TABLE IF NOT EXISTS pii_hits (
+    item_id     TEXT    NOT NULL,
+    scan_id     INTEGER NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
+    pii_type    TEXT    NOT NULL,                  -- phone/email/iban/name/address/org
+    hit_count   INTEGER NOT NULL DEFAULT 0,
+    PRIMARY KEY (item_id, scan_id, pii_type)
+);
+
+CREATE TABLE IF NOT EXISTS dispositions (
+    item_id       TEXT    NOT NULL PRIMARY KEY,
+    status        TEXT    NOT NULL DEFAULT 'unreviewed',
+    legal_basis   TEXT,
+    notes         TEXT,
+    reviewed_by   TEXT,
+    reviewed_at   REAL
+);
+
+CREATE TABLE IF NOT EXISTS scan_history (
+    scan_id           INTEGER PRIMARY KEY REFERENCES scans(id) ON DELETE CASCADE,
+    scan_date         TEXT    NOT NULL,            -- YYYY-MM-DD
+    flagged_count     INTEGER NOT NULL DEFAULT 0,
+    special_category  INTEGER NOT NULL DEFAULT 0,
+    overdue_count     INTEGER NOT NULL DEFAULT 0,
+    deleted_count     INTEGER NOT NULL DEFAULT 0,
+    sources_json      TEXT    NOT NULL DEFAULT '{}'
+);
+
+CREATE TABLE IF NOT EXISTS deletion_log (
+    id            INTEGER PRIMARY KEY AUTOINCREMENT,
+    deleted_at    REAL    NOT NULL,                -- Unix timestamp
+    item_id       TEXT    NOT NULL,
+    item_name     TEXT    NOT NULL DEFAULT '',
+    source_type   TEXT    NOT NULL DEFAULT '',     -- email/onedrive/sharepoint/teams
+    account_id    TEXT    NOT NULL DEFAULT '',
+    account_name  TEXT    NOT NULL DEFAULT '',
+    cpr_count     INTEGER NOT NULL DEFAULT 0,
+    reason        TEXT    NOT NULL DEFAULT 'manual',  -- manual/bulk/retention/data-subject-request
+    legal_basis   TEXT    NOT NULL DEFAULT '',     -- from dispositions table if set
+    deleted_by    TEXT    NOT NULL DEFAULT '',     -- authenticated user or "headless"
+    scan_id       INTEGER                          -- which scan found this item (nullable)
+);
+
+CREATE INDEX IF NOT EXISTS idx_dellog_time    ON deletion_log(deleted_at);
+CREATE INDEX IF NOT EXISTS idx_dellog_item    ON deletion_log(item_id);
+CREATE INDEX IF NOT EXISTS idx_dellog_reason  ON deletion_log(reason);
+
+-- Indexes
+CREATE INDEX IF NOT EXISTS idx_items_scan    ON flagged_items(scan_id);
+CREATE INDEX IF NOT EXISTS idx_items_source  ON flagged_items(source_type);
+CREATE INDEX IF NOT EXISTS idx_items_account ON flagged_items(account_id);
+CREATE INDEX IF NOT EXISTS idx_items_risk    ON flagged_items(risk);
+CREATE INDEX IF NOT EXISTS idx_cpr_hash      ON cpr_index(cpr_hash);
+CREATE INDEX IF NOT EXISTS idx_cpr_item      ON cpr_index(item_id);
+CREATE INDEX IF NOT EXISTS idx_history_date  ON scan_history(scan_date);
+"""
+
+# ── Migration helpers ─────────────────────────────────────────────────────────
+_MIGRATIONS: list[tuple[int, str]] = [
+    # (version, sql)
+    # Each runs once and is recorded in the user_version pragma.
+    (1, "ALTER TABLE flagged_items ADD COLUMN user_role TEXT NOT NULL DEFAULT 'other'"),
+    (2, "ALTER TABLE flagged_items ADD COLUMN transfer_risk TEXT NOT NULL DEFAULT ''"),
+    (3, "ALTER TABLE flagged_items ADD COLUMN special_category TEXT NOT NULL DEFAULT '[]'"),
+    (4, "ALTER TABLE flagged_items ADD COLUMN face_count INTEGER NOT NULL DEFAULT 0"),
+    (5, "ALTER TABLE flagged_items ADD COLUMN exif_json TEXT NOT NULL DEFAULT '{}'"),
+    (6, "ALTER TABLE flagged_items ADD COLUMN full_path TEXT NOT NULL DEFAULT ''"),
+    (7, """CREATE TABLE IF NOT EXISTS schedule_runs (
+        id          INTEGER PRIMARY KEY AUTOINCREMENT,
+        started_at  REAL    NOT NULL,
+        finished_at REAL,
+        status      TEXT    NOT NULL DEFAULT 'running',
+        profile_id  TEXT    NOT NULL DEFAULT '',
+        flagged     INTEGER NOT NULL DEFAULT 0,
+        scanned     INTEGER NOT NULL DEFAULT 0,
+        emailed     INTEGER NOT NULL DEFAULT 0,
+        error       TEXT    NOT NULL DEFAULT ''
+    )"""),
+]
+
+
+class ScanDB:
+    """Thread-safe SQLite wrapper for GDPRScanner results."""
+
+    def __init__(self, path: Path = DB_PATH):
+        self._path = path
+        self._conn: sqlite3.Connection | None = None
+
+    # ── Connection ────────────────────────────────────────────────────────────
+
+    def _connect(self) -> sqlite3.Connection:
+        if self._conn is None:
+            self._conn = sqlite3.connect(
+                str(self._path),
+                check_same_thread=False,
+                timeout=15,
+            )
+            self._conn.row_factory = sqlite3.Row
+            self._conn.executescript(_DDL)
+            self._conn.commit()
+            self._run_migrations()
+        return self._conn
+
+    def _run_migrations(self) -> None:
+        conn = self._conn
+        cur_ver = conn.execute("PRAGMA user_version").fetchone()[0]
+        for ver, sql in _MIGRATIONS:
+            if ver > cur_ver:
+                try:
+                    conn.executescript(sql)
+                except Exception:
+                    pass  # column may already exist on fresh DBs
+                conn.execute(f"PRAGMA user_version = {ver}")
+                conn.commit()
+
+    def close(self) -> None:
+        if self._conn:
+            try:
+                self._conn.close()
+            except Exception:
+                pass
+            self._conn = None
+
+    def reset(self) -> None:
+        """Drop all tables and recreate the schema from scratch.
+
+        This permanently deletes all scan results, CPR index, dispositions,
+        deletion log, and scan history. Use with caution.
+        Closes and reopens the connection so the fresh schema is in effect.
+        """
+        c = self._connect()
+        tables = [
+            "deletion_log", "pii_hits", "cpr_index",
+            "dispositions", "scan_history", "flagged_items", "scans",
+        ]
+        for tbl in tables:
+            c.execute(f"DROP TABLE IF EXISTS {tbl}")
+        c.execute("PRAGMA user_version = 0")
+        c.commit()
+        # Reopen so _connect() rebuilds schema fresh
+        self.close()
+        self._connect()
+
+
+
+    def begin_scan(self, options: dict) -> int:
+        """Create a scan record and return its id."""
+        c = self._connect()
+        sources    = options.get("sources", [])
+        user_ids   = options.get("user_ids", [])
+        scan_opts  = options.get("options", {})
+        delta      = 1 if scan_opts.get("delta") else 0
+        cur = c.execute(
+            """INSERT INTO scans
+               (started_at, sources, user_count, options, delta)
+               VALUES (?, ?, ?, ?, ?)""",
+            (
+                time.time(),
+                json.dumps(sources),
+                len(user_ids),
+                json.dumps(scan_opts),
+                delta,
+            ),
+        )
+        c.commit()
+        return cur.lastrowid
+
+    def save_item(self, scan_id: int, card: dict, cprs: list | None = None,
+                  pii_counts: dict | None = None) -> None:
+        """Persist one flagged item and its CPR/PII data."""
+        c = self._connect()
+        now = time.time()
+
+        c.execute(
+            """INSERT OR REPLACE INTO flagged_items
+               (id, scan_id, name, source, source_type, account_id, folder,
+                url, drive_id, size_kb, modified, cpr_count, risk,
+                thumb_b64, thumb_mime, attachments, user_role, transfer_risk,
+                special_category, face_count, exif_json, full_path, scanned_at)
+               VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
+            (
+                card.get("id", ""),
+                scan_id,
+                card.get("name", ""),
+                card.get("source", ""),
+                card.get("source_type", ""),
+                card.get("account_id", ""),
+                card.get("folder", ""),
+                card.get("url", ""),
+                card.get("drive_id", ""),
+                card.get("size_kb", 0),
+                card.get("modified", ""),
+                card.get("cpr_count", 0),
+                card.get("risk"),
+                card.get("thumb_b64", ""),
+                card.get("thumb_mime", "image/svg+xml"),
+                json.dumps(card.get("attachments", [])),
+                card.get("user_role", "other"),
+                card.get("transfer_risk", ""),
+                json.dumps(card.get("special_category", [])),
+                card.get("face_count", 0),
+                json.dumps(card.get("exif", {})),
+                card.get("full_path", ""),
+                now,
+            ),
+        )
+
+        # CPR index — store hash only (never store raw CPR numbers in DB)
+        item_id = card.get("id", "")
+        if cprs:
+            for cpr in cprs:
+                cpr_hash = hashlib.sha256(str(cpr).encode()).hexdigest()
+                c.execute(
+                    """INSERT OR IGNORE INTO cpr_index
+                       (cpr_hash, item_id, scan_id, first_seen)
+                       VALUES (?,?,?,?)""",
+                    (cpr_hash, item_id, scan_id, now),
+                )
+
+        # PII hit counts
+        if pii_counts:
+            for pii_type, count in pii_counts.items():
+                if count and count > 0:
+                    c.execute(
+                        """INSERT OR REPLACE INTO pii_hits
+                           (item_id, scan_id, pii_type, hit_count)
+                           VALUES (?,?,?,?)""",
+                        (item_id, scan_id, pii_type, count),
+                    )
+
+        c.commit()
+
+    def finish_scan(self, scan_id: int, total_scanned: int,
+                    deleted_count: int = 0) -> None:
+        """Mark scan as complete and write history row."""
+        c = self._connect()
+        now = time.time()
+
+        flagged = c.execute(
+            "SELECT COUNT(*) FROM flagged_items WHERE scan_id=?", (scan_id,)
+        ).fetchone()[0]
+
+        c.execute(
+            """UPDATE scans SET finished_at=?, total_scanned=?, flagged_count=?
+               WHERE id=?""",
+            (now, total_scanned, flagged, scan_id),
+        )
+
+        # Per-source breakdown for history
+        rows = c.execute(
+            """SELECT source_type, COUNT(*) FROM flagged_items
+               WHERE scan_id=? GROUP BY source_type""",
+            (scan_id,),
+        ).fetchall()
+        sources_json = json.dumps({r[0]: r[1] for r in rows})
+
+        # Count overdue items using rolling 5-year window (baseline for history)
+        overdue = c.execute(
+            """SELECT COUNT(*) FROM flagged_items
+               WHERE scan_id=? AND modified != ''
+               AND date(modified) < ?""",
+            (scan_id, overdue_cutoff(5)),
+        ).fetchone()[0]
+
+        special_count = c.execute(
+            """SELECT COUNT(*) FROM flagged_items
+               WHERE scan_id=? AND special_category != '[]' AND special_category != ''""",
+            (scan_id,),
+        ).fetchone()[0]
+
+        scan_date = time.strftime("%Y-%m-%d", time.localtime(now))
+        c.execute(
+            """INSERT OR REPLACE INTO scan_history
+               (scan_id, scan_date, flagged_count, special_category,
+                overdue_count, deleted_count, sources_json)
+               VALUES (?,?,?,?,?,?,?)""",
+            (scan_id, scan_date, flagged, special_count, overdue, deleted_count, sources_json),
+        )
+
+        c.commit()
+
+    # ── Query helpers ─────────────────────────────────────────────────────────
+
+    def latest_scan_id(self) -> int | None:
+        """Return the id of the most recent completed scan."""
+        row = self._connect().execute(
+            "SELECT id FROM scans WHERE finished_at IS NOT NULL ORDER BY id DESC LIMIT 1"
+        ).fetchone()
+        return row[0] if row else None
+
+    def get_flagged_items(self, scan_id: int | None = None) -> list[dict]:
+        """Return flagged items for a scan (defaults to latest)."""
+        sid = scan_id or self.latest_scan_id()
+        if not sid:
+            return []
+        rows = self._connect().execute(
+            """SELECT fi.*, COALESCE(d.status, 'unreviewed') AS disposition
+               FROM flagged_items fi
+               LEFT JOIN dispositions d ON d.item_id = fi.id
+               WHERE fi.scan_id=? ORDER BY fi.cpr_count DESC""",
+            (sid,),
+        ).fetchall()
+        result = []
+        for r in rows:
+            d = dict(r)
+            d["attachments"] = json.loads(d.get("attachments") or "[]")
+            result.append(d)
+        return result
+
+    def get_session_items(self, window_seconds: int = 300) -> list[dict]:
+        """Return flagged items from all scans in the same session as the latest scan.
+
+        A session is all scans whose started_at is within *window_seconds* of the
+        most recently started completed scan.  This captures concurrent M365, Google,
+        and file scans which each create their own scan_id but start within seconds
+        of each other.
+        """
+        row = self._connect().execute(
+            "SELECT started_at FROM scans WHERE finished_at IS NOT NULL ORDER BY id DESC LIMIT 1"
+        ).fetchone()
+        if not row:
+            return []
+        latest_start = row[0]
+        rows = self._connect().execute(
+            """SELECT fi.*, COALESCE(d.status, 'unreviewed') AS disposition
+               FROM flagged_items fi
+               JOIN scans s ON fi.scan_id = s.id
+               LEFT JOIN dispositions d ON d.item_id = fi.id
+               WHERE s.started_at >= ? AND s.finished_at IS NOT NULL
+               ORDER BY fi.cpr_count DESC""",
+            (latest_start - window_seconds,),
+        ).fetchall()
+        result = []
+        for r in rows:
+            d = dict(r)
+            d["attachments"] = json.loads(d.get("attachments") or "[]")
+            result.append(d)
+        return result
+
+    def lookup_data_subject(self, cpr: str) -> list[dict]:
+        """Find all flagged items containing a given CPR number (by hash)."""
+        cpr_hash = hashlib.sha256(str(cpr).encode()).hexdigest()
+        rows = self._connect().execute(
+            """SELECT fi.*, ci.first_seen AS cpr_first_seen
+               FROM cpr_index ci
+               JOIN flagged_items fi ON fi.id = ci.item_id AND fi.scan_id = ci.scan_id
+               WHERE ci.cpr_hash = ?
+               ORDER BY fi.modified DESC""",
+            (cpr_hash,),
+        ).fetchall()
+        result = []
+        for r in rows:
+            d = dict(r)
+            d["attachments"] = json.loads(d.get("attachments") or "[]")
+            result.append(d)
+        return result
+
+    def get_overdue_items(self, years: int = 5,
+                          scan_id: int | None = None,
+                          fiscal_year_end: str | None = None) -> list[dict]:
+        """Return items older than the retention cutoff.
+
+        Args:
+            years:            Retention period in years (default 5).
+            scan_id:          Scan to query (defaults to latest).
+            fiscal_year_end:  "MM-DD" for fiscal-year-aligned cutoff
+                              (e.g. "12-31" for Danish bookkeeping law).
+                              None = rolling window from today.
+        """
+        sid = scan_id or self.latest_scan_id()
+        if not sid:
+            return []
+        cutoff = overdue_cutoff(years, fiscal_year_end)
+        rows = self._connect().execute(
+            """SELECT * FROM flagged_items
+               WHERE scan_id=? AND modified != ''
+               AND date(modified) < ?
+               ORDER BY modified ASC""",
+            (sid, cutoff),
+        ).fetchall()
+        result = [dict(r) for r in rows]
+        for r in result:
+            r["cutoff_date"]  = cutoff
+            r["cutoff_mode"]  = "fiscal" if fiscal_year_end else "rolling"
+        return result
+
+    def get_trend(self, last_n: int = 20) -> list[dict]:
+        """Return the last N scan history rows for trend display."""
+        rows = self._connect().execute(
+            """SELECT sh.*, s.delta, s.sources
+               FROM scan_history sh
+               JOIN scans s ON s.id = sh.scan_id
+               ORDER BY sh.scan_id DESC LIMIT ?""",
+            (last_n,),
+        ).fetchall()
+        return [dict(r) for r in reversed(rows)]
+
+    def set_disposition(self, item_id: str, status: str,
+                        legal_basis: str = "", notes: str = "",
+                        reviewed_by: str = "") -> None:
+        """Record a compliance officer's decision on an item."""
+        self._connect().execute(
+            """INSERT OR REPLACE INTO dispositions
+               (item_id, status, legal_basis, notes, reviewed_by, reviewed_at)
+               VALUES (?,?,?,?,?,?)""",
+            (item_id, status, legal_basis, notes, reviewed_by, time.time()),
+        )
+        self._connect().commit()
+
+    def get_disposition(self, item_id: str) -> dict | None:
+        row = self._connect().execute(
+            "SELECT * FROM dispositions WHERE item_id=?", (item_id,)
+        ).fetchone()
+        return dict(row) if row else None
+
+    def get_prior_disposition(self, item_id: str) -> str | None:
+        """Return prior disposition status if set (not 'unreviewed'), else None."""
+        row = self._connect().execute(
+            "SELECT status FROM dispositions WHERE item_id=?", (item_id,)
+        ).fetchone()
+        if row and row[0] and row[0] != "unreviewed":
+            return row[0]
+        return None
+
+    def get_stats(self, scan_id: int | None = None) -> dict:
+        """Return summary stats for a scan."""
+        sid = scan_id or self.latest_scan_id()
+        if not sid:
+            return {}
+        c = self._connect()
+        scan = c.execute("SELECT * FROM scans WHERE id=?", (sid,)).fetchone()
+        if not scan:
+            return {}
+        by_source = c.execute(
+            """SELECT source_type, COUNT(*), SUM(cpr_count)
+               FROM flagged_items WHERE scan_id=? GROUP BY source_type""",
+            (sid,),
+        ).fetchall()
+        unique_subjects = c.execute(
+            "SELECT COUNT(DISTINCT cpr_hash) FROM cpr_index WHERE scan_id=?",
+            (sid,),
+        ).fetchone()[0]
+        overdue = c.execute(
+            """SELECT COUNT(*) FROM flagged_items
+               WHERE scan_id=? AND modified != ''
+               AND date(modified) < ?""",
+            (sid, overdue_cutoff(5)),
+        ).fetchone()[0]
+        return {
+            "scan_id":        sid,
+            "started_at":     scan["started_at"],
+            "finished_at":    scan["finished_at"],
+            "total_scanned":  scan["total_scanned"],
+            "flagged_count":  scan["flagged_count"],
+            "unique_subjects": unique_subjects,
+            "overdue_count":  overdue,
+            "delta":          bool(scan["delta"]),
+            "by_source": {
+                r[0]: {"items": r[1], "cpr_hits": r[2]}
+                for r in by_source
+            },
+        }
+
+    def iter_all_items(self, scan_id: int | None = None) -> Iterator[dict]:
+        """Iterate over flagged items without loading all into memory."""
+        sid = scan_id or self.latest_scan_id()
+        if not sid:
+            return
+        cur = self._connect().execute(
+            "SELECT * FROM flagged_items WHERE scan_id=? ORDER BY id",
+            (sid,),
+        )
+        for row in cur:
+            d = dict(row)
+            d["attachments"] = json.loads(d.get("attachments") or "[]")
+            yield d
+
+    def scans_list(self, limit: int = 50) -> list[dict]:
+        """Return recent scan summaries."""
+        rows = self._connect().execute(
+            """SELECT id, started_at, finished_at, sources, user_count,
+                      total_scanned, flagged_count, delta
+               FROM scans
+               WHERE finished_at IS NOT NULL
+               ORDER BY id DESC LIMIT ?""",
+            (limit,),
+        ).fetchall()
+        result = []
+        for r in rows:
+            d = dict(r)
+            d["sources"] = json.loads(d.get("sources") or "[]")
+            result.append(d)
+        return result
+
+    def log_deletion(self, item: dict, reason: str = "manual",
+                     deleted_by: str = "", scan_id: int | None = None) -> None:
+        """Write an immutable deletion audit record.
+
+        Args:
+            item:       flagged_item dict (or any dict with id, name, source_type, etc.)
+            reason:     "manual" | "bulk" | "retention" | "data-subject-request"
+            deleted_by: identity of the actor — authenticated M365 user UPN,
+                        "headless" for scheduled runs, or "" for UI with no user context
+            scan_id:    which scan originally found this item (optional)
+        """
+        c   = self._connect()
+        now = time.time()
+
+        # Pull legal_basis from dispositions table if available
+        legal_basis = ""
+        disp = self.get_disposition(item.get("id", ""))
+        if disp:
+            legal_basis = disp.get("legal_basis", "") or ""
+
+        c.execute(
+            """INSERT INTO deletion_log
+               (deleted_at, item_id, item_name, source_type, account_id,
+                account_name, cpr_count, reason, legal_basis, deleted_by, scan_id)
+               VALUES (?,?,?,?,?,?,?,?,?,?,?)""",
+            (
+                now,
+                item.get("id", ""),
+                item.get("name", ""),
+                item.get("source_type", ""),
+                item.get("account_id", ""),
+                item.get("account_name", ""),
+                item.get("cpr_count", 0),
+                reason,
+                legal_basis,
+                deleted_by,
+                scan_id,
+            ),
+        )
+        c.commit()
+
+    def get_deletion_log(self, limit: int = 500,
+                         reason: str | None = None) -> list[dict]:
+        """Return deletion audit records, most recent first."""
+        c = self._connect()
+        if reason:
+            rows = c.execute(
+                "SELECT * FROM deletion_log WHERE reason=? ORDER BY deleted_at DESC LIMIT ?",
+                (reason, limit),
+            ).fetchall()
+        else:
+            rows = c.execute(
+                "SELECT * FROM deletion_log ORDER BY deleted_at DESC LIMIT ?",
+                (limit,),
+            ).fetchall()
+        return [dict(r) for r in rows]
+
+    def deletion_log_stats(self) -> dict:
+        """Return summary counts of the deletion log."""
+        c = self._connect()
+        total = c.execute("SELECT COUNT(*) FROM deletion_log").fetchone()[0]
+        by_reason = {
+            r[0]: r[1] for r in c.execute(
+                "SELECT reason, COUNT(*) FROM deletion_log GROUP BY reason"
+            ).fetchall()
+        }
+        cpr_deleted = c.execute(
+            "SELECT SUM(cpr_count) FROM deletion_log"
+        ).fetchone()[0] or 0
+        return {"total": total, "by_reason": by_reason, "cpr_hits_deleted": cpr_deleted}
+
+    def delete_item_record(self, item_id: str, scan_id: int | None = None) -> None:
+        """Remove a flagged item from the DB (after it has been deleted in M365)."""
+        c = self._connect()
+        if scan_id:
+            c.execute(
+                "DELETE FROM flagged_items WHERE id=? AND scan_id=?",
+                (item_id, scan_id),
+            )
+            c.execute(
+                "DELETE FROM cpr_index WHERE item_id=? AND scan_id=?",
+                (item_id, scan_id),
+            )
+        else:
+            c.execute("DELETE FROM flagged_items WHERE id=?", (item_id,))
+            c.execute("DELETE FROM cpr_index WHERE item_id=?", (item_id,))
+        c.commit()
+
+
+    # ── Scheduler runs ────────────────────────────────────────────────────────
+
+    def begin_schedule_run(self, profile_id: str = "") -> int:
+        """Insert a new schedule_runs row and return its id."""
+        import time
+        c = self._connect()
+        cur = c.execute(
+            "INSERT INTO schedule_runs (started_at, profile_id) VALUES (?, ?)",
+            (time.time(), profile_id))
+        c.commit()
+        return cur.lastrowid
+
+    def finish_schedule_run(self, run_id: int, *,
+                            status: str = "completed",
+                            flagged: int = 0, scanned: int = 0,
+                            emailed: int = 0, error: str = "") -> None:
+        import time
+        c = self._connect()
+        c.execute(
+            """UPDATE schedule_runs
+               SET finished_at=?, status=?, flagged=?, scanned=?, emailed=?, error=?
+               WHERE id=?""",
+            (time.time(), status, flagged, scanned, emailed, error, run_id))
+        c.commit()
+
+    def get_schedule_runs(self, limit: int = 20) -> list[dict]:
+        c = self._connect()
+        rows = c.execute(
+            "SELECT * FROM schedule_runs ORDER BY started_at DESC LIMIT ?",
+            (limit,)).fetchall()
+        return [dict(r) for r in rows]
+
+
+    def export_db(self, out_path: Path) -> dict:
+        """Export the database to a structured ZIP archive.
+
+        Contents:
+            export_meta.json   — metadata (date, schema version, row counts)
+            scans.json         — scan run summaries
+            flagged_items.json — flagged items (thumb_b64 stripped)
+            cpr_index.json     — CPR hashes (never raw CPR)
+            pii_hits.json      — per-type PII counts
+            dispositions.json  — compliance decisions
+            scan_history.json  — aggregated trend data
+            deletion_log.json  — full deletion audit trail
+
+        Returns a summary dict with row counts.
+        """
+        import zipfile as _zf, json as _json, datetime as _dt
+
+        c = self._connect()
+
+        def _rows(table: str, strip_cols: list | None = None) -> list[dict]:
+            rows = [dict(r) for r in c.execute(f"SELECT * FROM {table}").fetchall()]
+            if strip_cols:
+                for row in rows:
+                    for col in strip_cols:
+                        row.pop(col, None)
+            return rows
+
+        tables = {
+            "scans":         _rows("scans"),
+            "flagged_items": _rows("flagged_items", strip_cols=["thumb_b64"]),
+            "cpr_index":     _rows("cpr_index"),
+            "pii_hits":      _rows("pii_hits"),
+            "dispositions":  _rows("dispositions"),
+            "scan_history":  _rows("scan_history"),
+            "deletion_log":  _rows("deletion_log"),
+            "schedule_runs": _rows("schedule_runs"),
+        }
+
+        schema_ver = c.execute("PRAGMA user_version").fetchone()[0]
+        meta = {
+            "exported_at":    _dt.datetime.now().isoformat(),
+            "schema_version": schema_ver,
+            "db_path":        str(self._path),
+            "row_counts":     {k: len(v) for k, v in tables.items()},
+        }
+
+        out_path = Path(out_path)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        with _zf.ZipFile(out_path, "w", _zf.ZIP_DEFLATED, compresslevel=9) as zf:
+            zf.writestr("export_meta.json", _json.dumps(meta, indent=2))
+            for name, rows in tables.items():
+                zf.writestr(f"{name}.json", _json.dumps(rows, indent=2, default=str))
+
+        return meta
+
+    def import_db(self, zip_path: Path, mode: str = "merge") -> dict:
+        """Import a previously exported ZIP archive into the database.
+
+        Args:
+            zip_path: Path to the export ZIP file.
+            mode:     "merge"   — import dispositions and deletion_log into
+                                  the current DB, leave existing data intact.
+                      "replace" — wipe the DB first, then import everything.
+
+        Returns a summary dict with imported row counts.
+        """
+        import zipfile as _zf, json as _json
+
+        zip_path = Path(zip_path)
+        if not zip_path.exists():
+            raise FileNotFoundError(f"Export file not found: {zip_path}")
+
+        with _zf.ZipFile(zip_path, "r") as zf:
+            names = zf.namelist()
+            if "export_meta.json" not in names:
+                raise ValueError("Not a valid GDPRScanner export — missing export_meta.json")
+
+            meta = _json.loads(zf.read("export_meta.json"))
+
+            def _load(fname: str) -> list[dict]:
+                if fname not in names:
+                    return []
+                return _json.loads(zf.read(fname))
+
+            scans         = _load("scans.json")
+            flagged_items = _load("flagged_items.json")
+            cpr_index     = _load("cpr_index.json")
+            pii_hits      = _load("pii_hits.json")
+            dispositions  = _load("dispositions.json")
+            scan_history  = _load("scan_history.json")
+            deletion_log  = _load("deletion_log.json")
+            schedule_runs = _load("schedule_runs.json")
+
+        if mode == "replace":
+            self.reset()
+
+        c = self._connect()
+        imported: dict[str, int] = {}
+
+        if mode == "replace":
+            # Full restore — import all tables
+            for row in scans:
+                try:
+                    c.execute(
+                        """INSERT OR IGNORE INTO scans
+                           (id,started_at,finished_at,sources,user_count,
+                            options,total_scanned,flagged_count,delta)
+                           VALUES (:id,:started_at,:finished_at,:sources,:user_count,
+                            :options,:total_scanned,:flagged_count,:delta)""", row)
+                except Exception: pass
+            imported["scans"] = len(scans)
+
+            for row in flagged_items:
+                row.setdefault("thumb_b64", "")
+                row.setdefault("user_role", "other")
+                try:
+                    c.execute(
+                        """INSERT OR IGNORE INTO flagged_items
+                           (id,scan_id,name,source,source_type,account_id,folder,
+                            url,drive_id,size_kb,modified,cpr_count,risk,
+                            thumb_b64,thumb_mime,attachments,user_role,scanned_at)
+                           VALUES (:id,:scan_id,:name,:source,:source_type,:account_id,
+                            :folder,:url,:drive_id,:size_kb,:modified,:cpr_count,:risk,
+                            :thumb_b64,:thumb_mime,:attachments,:user_role,:scanned_at)""", row)
+                except Exception: pass
+            imported["flagged_items"] = len(flagged_items)
+
+            for row in cpr_index:
+                try:
+                    c.execute(
+                        "INSERT OR IGNORE INTO cpr_index (cpr_hash,item_id,scan_id,first_seen) "
+                        "VALUES (:cpr_hash,:item_id,:scan_id,:first_seen)", row)
+                except Exception: pass
+            imported["cpr_index"] = len(cpr_index)
+
+            for row in pii_hits:
+                try:
+                    c.execute(
+                        "INSERT OR IGNORE INTO pii_hits (item_id,scan_id,pii_type,hit_count) "
+                        "VALUES (:item_id,:scan_id,:pii_type,:hit_count)", row)
+                except Exception: pass
+            imported["pii_hits"] = len(pii_hits)
+
+            for row in scan_history:
+                try:
+                    c.execute(
+                        """INSERT OR IGNORE INTO scan_history
+                           (scan_id,scan_date,flagged_count,special_category,
+                            overdue_count,deleted_count,sources_json)
+                           VALUES (:scan_id,:scan_date,:flagged_count,:special_category,
+                            :overdue_count,:deleted_count,:sources_json)""", row)
+                except Exception: pass
+            imported["scan_history"] = len(scan_history)
+
+        # Both modes: merge dispositions and deletion_log
+        for row in dispositions:
+            try:
+                c.execute(
+                    """INSERT OR REPLACE INTO dispositions
+                       (item_id,status,legal_basis,notes,reviewed_by,reviewed_at)
+                       VALUES (:item_id,:status,:legal_basis,:notes,:reviewed_by,:reviewed_at)""",
+                    row)
+            except Exception: pass
+        imported["dispositions"] = len(dispositions)
+
+        for row in deletion_log:
+            try:
+                c.execute(
+                    """INSERT OR IGNORE INTO deletion_log
+                       (id,deleted_at,item_id,item_name,source_type,account_id,
+                        account_name,cpr_count,reason,legal_basis,deleted_by,scan_id)
+                       VALUES (:id,:deleted_at,:item_id,:item_name,:source_type,:account_id,
+                        :account_name,:cpr_count,:reason,:legal_basis,:deleted_by,:scan_id)""",
+                    row)
+            except Exception: pass
+        imported["deletion_log"] = len(deletion_log)
+
+        for row in schedule_runs:
+            try:
+                c.execute(
+                    """INSERT OR IGNORE INTO schedule_runs
+                       (id,started_at,finished_at,status,profile_id,
+                        flagged,scanned,emailed,error)
+                       VALUES (:id,:started_at,:finished_at,:status,:profile_id,
+                        :flagged,:scanned,:emailed,:error)""",
+                    row)
+            except Exception: pass
+        imported["schedule_runs"] = len(schedule_runs)
+
+        c.commit()
+        return {"mode": mode, "exported_at": meta.get("exported_at"), "imported": imported}
+
+
+# ── Module-level singleton ────────────────────────────────────────────────────
+_db: ScanDB | None = None
+
+
+def get_db(path: Path = DB_PATH) -> ScanDB:
+    """Return the module-level ScanDB singleton, creating it if needed."""
+    global _db
+    if _db is None:
+        _db = ScanDB(path)
+    return _db
diff --git a/gdpr_scanner.py b/gdpr_scanner.py
new file mode 100644
index 0000000..aaa3fb9
--- /dev/null
+++ b/gdpr_scanner.py
@@ -0,0 +1,2212 @@
+#!/usr/bin/env python3
+"""
+GDPRScanner — Scan Exchange, OneDrive, SharePoint & Teams for CPR numbers.
+Run with:  python gdpr_scanner.py [--port 5100]
+
+Requires:
+    pip install flask msal requests pillow
+
+Optional (better PDF scanning):
+    pip install pymupdf
+"""
+
+import argparse
+import base64
+import hashlib
+import io
+import json
+import logging
+import logging.handlers
+import os
+import queue
+from collections import deque
+import re
+import sys
+import tempfile
+import concurrent.futures
+import threading
+import time
+from pathlib import Path
+
+# ── Logging setup ─────────────────────────────────────────────────────────────
+def _configure_logging() -> None:
+    """Configure root logger with console + rotating file handler.
+
+    Called once at startup before any module-level code uses the logger.
+    Idempotent — skipped if handlers are already attached (e.g. under pytest).
+    """
+    _data_dir = Path.home() / ".gdprscanner"
+    _data_dir.mkdir(exist_ok=True)
+    root = logging.getLogger()
+    if root.handlers:
+        return
+    root.setLevel(logging.INFO)
+    _fmt = logging.Formatter(
+        "%(asctime)s %(levelname)-8s %(name)s — %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    _sh = logging.StreamHandler()
+    _sh.setFormatter(_fmt)
+    _fh = logging.handlers.RotatingFileHandler(
+        _data_dir / "gdpr_scanner.log",
+        maxBytes=2 * 1024 * 1024,
+        backupCount=3,
+        encoding="utf-8",
+    )
+    _fh.setFormatter(_fmt)
+    root.addHandler(_sh)
+    root.addHandler(_fh)
+    # Suppress noisy third-party loggers
+    logging.getLogger("pdfminer").setLevel(logging.ERROR)
+    logging.getLogger("pdfplumber").setLevel(logging.ERROR)
+    logging.getLogger("werkzeug").setLevel(logging.WARNING)
+
+_configure_logging()
+logger = logging.getLogger(__name__)
+
+# ── Module identity fix ───────────────────────────────────────────────────────
+# When run as `python gdpr_scanner.py`, Python loads this module as `__main__`.
+# When scan_scheduler.py does `import gdpr_scanner`, Python would load a SECOND
+# copy with its own _sse_queues, broadcast(), etc. — so scheduled scan events
+# would never reach the browser's SSE connection.
+# Fix: register this module under both names so all imports share one instance.
+if __name__ == "__main__":
+    sys.modules["gdpr_scanner"] = sys.modules[__name__]
+
+
+# ── One-time migration shim: rename ~/.m365_scanner_* → ~/.gdpr_scanner_* ────
+# Runs silently on first startup after upgrading from v1.5.x.
+# Safe to re-run — only moves files that don't already exist at the new path.
+def _migrate_legacy_files():
+    _LEGACY = [
+        (".m365_scanner_config.json",       ".gdpr_scanner_config.json"),
+        (".m365_scanner.db",                ".gdpr_scanner.db"),
+        (".m365_scanner_token.json",        ".gdpr_scanner_token.json"),
+        (".m365_scanner_delta.json",        ".gdpr_scanner_delta.json"),
+        (".m365_scanner_settings.json",     ".gdpr_scanner_settings.json"),
+        (".m365_scanner_smtp.json",         ".gdpr_scanner_smtp.json"),
+        (".m365_scanner_role_overrides.json",".gdpr_scanner_role_overrides.json"),
+        (".m365_scanner_file_sources.json", ".gdpr_scanner_file_sources.json"),
+        (".m365_scanner_machine_id",        ".gdpr_scanner_machine_id"),
+        (".m365_scanner_checkpoint.json",   ".gdpr_scanner_checkpoint.json"),
+        (".m365_scanner_schedule.json",     ".gdpr_scanner_schedule.json"),
+        (".m365_scanner_msal_cache.bin",    ".gdpr_scanner_msal_cache.bin"),
+        (".m365_scanner_lang",              ".gdpr_scanner_lang"),
+    ]
+    home = Path.home()
+    for old_name, new_name in _LEGACY:
+        old = home / old_name
+        new = home / new_name
+        if old.exists() and not new.exists():
+            try:
+                old.rename(new)
+                logger.info("[migrate] %s → %s", old_name, new_name)
+            except Exception as _e:
+                logger.warning("[migrate] Could not rename %s: %s", old_name, _e)
+
+_migrate_legacy_files()
+
+# ── One-time migration: move ~/.gdpr_scanner_* → ~/.gdprscanner/ ────────────
+# Runs silently on first startup after upgrading from v1.6.2 or earlier.
+def _migrate_to_data_dir():
+    _DATA_DIR = Path.home() / ".gdprscanner"
+    _DATA_DIR.mkdir(exist_ok=True)
+    _MOVES = [
+        (".gdpr_scanner_config.json",        "config.json"),
+        (".gdpr_scanner.db",                 "scanner.db"),
+        (".gdpr_scanner_token.json",         "token.json"),
+        (".gdpr_scanner_delta.json",         "delta.json"),
+        (".gdpr_scanner_settings.json",      "settings.json"),
+        (".gdpr_scanner_smtp.json",          "smtp.json"),
+        (".gdpr_scanner_role_overrides.json","role_overrides.json"),
+        (".gdpr_scanner_file_sources.json",  "file_sources.json"),
+        (".gdpr_scanner_machine_id",         "machine_id"),
+        (".gdpr_scanner_checkpoint.json",    "checkpoint.json"),
+        (".gdpr_scanner_schedule.json",      "schedule.json"),
+        (".gdpr_scanner_msal_cache.bin",     "msal_cache.bin"),
+        (".gdpr_scanner_lang",               "lang"),
+        (".gdpr_scanner_google.json",        "google.json"),
+        (".gdpr_scanner_google_sa.json",     "google_sa.json"),
+        (".gdpr_scanner_src_toggles.json",   "src_toggles.json"),
+    ]
+    home = Path.home()
+    for old_name, new_name in _MOVES:
+        old = home / old_name
+        new = _DATA_DIR / new_name
+        if old.exists() and not new.exists():
+            try:
+                old.rename(new)
+                logger.info("[migrate] ~/%s → ~/.gdprscanner/%s", old_name, new_name)
+            except Exception as _e:
+                logger.warning("[migrate] Could not move %s: %s", old_name, _e)
+
+_migrate_to_data_dir()
+
+
+# ── Flask ─────────────────────────────────────────────────────────────────────
+try:
+    from flask import Flask, Response, jsonify, render_template, request, session
+except ImportError:
+    print("Flask required: pip install flask")
+    sys.exit(1)
+
+# ── PIL ───────────────────────────────────────────────────────────────────────
+try:
+    from PIL import Image as PILImage
+    PIL_OK = True
+except ImportError:
+    PIL_OK = False
+
+# ── Scanner ───────────────────────────────────────────────────────────────────
+sys.path.insert(0, str(Path(__file__).parent))
+try:
+    import document_scanner as ds
+    SCANNER_OK = True
+except ImportError as e:
+    logger.warning("document_scanner not found: %s", e)
+    SCANNER_OK = False
+
+try:
+    from file_scanner import FileScanner, store_smb_password, SMB_OK as _SMB_OK
+    FILE_SCANNER_OK = True
+except ImportError:
+    FILE_SCANNER_OK = False
+    _SMB_OK = False
+
+# ── Connector ─────────────────────────────────────────────────────────────────
+try:
+    from m365_connector import M365Connector, M365Error, M365PermissionError, M365DeltaTokenExpired, MSAL_OK, REQUESTS_OK
+    CONNECTOR_OK = True
+except ImportError as e:
+    logger.warning("m365_connector not found: %s", e)
+    CONNECTOR_OK = False
+
+try:
+    from google_connector import GoogleConnector as _GoogleConnector, GOOGLE_AUTH_OK, load_saved_key as _load_google_key
+    GOOGLE_CONNECTOR_OK = True
+except ImportError:
+    GOOGLE_CONNECTOR_OK = False
+    GOOGLE_AUTH_OK = False
+    def _load_google_key(): return None
+
+try:
+    from gdpr_db import get_db as _get_db, ScanDB as _ScanDB
+    DB_OK = True
+except ImportError:
+    DB_OK = False
+    def _get_db(*a, **kw): return None
+
+# Single source of truth — read from VERSION file alongside this script
+APP_VERSION = (Path(__file__).parent / "VERSION").read_text().strip()
+
+# Compiled once — matches a bare UUID with no surrounding text
+_GUID_RE = re.compile(
+    r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.I
+)
+# Localised variants of the generic guest-account placeholder
+_GENERIC_DISPLAY_NAMES = {
+    "microsoft konto", "microsoft account", "microsoftkonto",
+    "microsoft-konto", "compte microsoft", "cuenta de microsoft",
+}
+
+def _resolve_display_name(display_name: str,
+                           email: str = "",
+                           upn: str = "") -> str:
+    """Return the best human-readable name for a Microsoft 365 user.
+
+    Guest accounts (personal Microsoft accounts invited to the tenant) often
+    have their displayName set to either:
+      - A raw GUID  (e.g. "c710b7e1-4f9a-4066-a66f-f8b0b1b0ade3")
+      - A generic localised placeholder  (e.g. "Microsoft Konto")
+    In those cases we fall back to the email address or UPN, which is always
+    human-readable and uniquely identifies the account.
+    """
+    dn = (display_name or "").strip()
+    if not dn or _GUID_RE.match(dn) or dn.lower() in _GENERIC_DISPLAY_NAMES:
+        return email or upn or dn
+    return dn
+
+
+# ── Sub-module imports — re-export everything for blueprint __getattr__ ────────
+from app_config import (
+    _load_src_toggles, _save_src_toggles,
+    LANG, _load_lang, _load_lang_forced, _lang_override, _set_lang_override,
+    _load_keywords, _check_special_category,
+    _compiled_keywords, _keyword_data, _keyword_flat,
+    _load_config, _save_config,
+    _get_admin_pin_hash, _set_admin_pin, _verify_admin_pin, _admin_pin_is_set,
+    _profiles_load, _profiles_write, _profiles_save_all, _profile_from_settings,
+    _profile_get, _profile_save, _profile_delete, _profile_touch,
+    _save_settings, _load_settings,
+    _load_role_overrides, _save_role_overrides,
+    _load_file_sources, _save_file_sources,
+    _get_fernet, _encrypt_password, _decrypt_password,
+    _load_smtp_config, _save_smtp_config,
+    _SETTINGS_PATH, _SMTP_CONFIG_PATH, _ROLE_OVERRIDES_PATH,
+    _FILE_SOURCES_PATH, _MACHINE_ID_PATH,
+)
+# _load_keywords already called by app_config at import time
+
+from checkpoint import (
+    _checkpoint_key, _save_checkpoint, _load_checkpoint, _clear_checkpoint,
+    _load_delta_tokens, _save_delta_tokens,
+    _CHECKPOINT_PATH, _DELTA_PATH,
+)
+
+from sse import broadcast, _sse_queues, _sse_buffer
+import sse as _sse_mod  # for _current_scan_id access at call time
+
+from cpr_detector import (
+    _scan_bytes, _scan_bytes_timeout, _scan_text_direct, _html_esc, _get_pii_counts,
+    _make_thumb, _placeholder_svg,
+    _extract_exif, _detect_photo_faces,
+    SUPPORTED_EXTS, PHOTO_EXTS,
+    _EXIF_PII_TAGS,
+)
+# Inject runtime deps into cpr_detector
+import cpr_detector as _cprd
+_cprd.ds             = ds
+_cprd.SCANNER_OK     = SCANNER_OK
+_cprd.PILImage       = PILImage if PIL_OK else None
+_cprd.PIL_OK         = PIL_OK
+_cprd.LANG           = LANG
+_cprd._check_special_category = _check_special_category
+
+from scan_engine import run_scan, run_file_scan
+# Inject runtime deps into scan_engine
+import scan_engine as _se
+_se.broadcast        = broadcast
+_se._sse_buffer      = _sse_buffer
+_se.LANG             = LANG
+_se.SCANNER_OK       = SCANNER_OK
+_se.PIL_OK           = PIL_OK
+_se.FILE_SCANNER_OK  = FILE_SCANNER_OK
+_se.CONNECTOR_OK     = CONNECTOR_OK
+_se.DB_OK            = DB_OK
+_se.PHOTO_EXTS       = PHOTO_EXTS
+_se.SUPPORTED_EXTS   = SUPPORTED_EXTS
+# cpr helpers
+_se._scan_bytes              = _scan_bytes
+_se._scan_bytes_timeout      = _scan_bytes_timeout
+_se._detect_photo_faces      = _detect_photo_faces
+_se._extract_exif            = _extract_exif
+_se._make_thumb              = _make_thumb
+_se._placeholder_svg         = _placeholder_svg
+_se._check_special_category  = _check_special_category
+_se._get_pii_counts          = _get_pii_counts
+_se._html_esc                = _html_esc
+# checkpoint
+_se._load_checkpoint    = _load_checkpoint
+_se._save_checkpoint    = _save_checkpoint
+_se._clear_checkpoint   = _clear_checkpoint
+_se._checkpoint_key     = _checkpoint_key
+_se._load_delta_tokens  = _load_delta_tokens
+_se._save_delta_tokens  = _save_delta_tokens
+
+# ── App state ─────────────────────────────────────────────────────────────────
+import os as _os
+_BASE_DIR = _os.path.dirname(_os.path.abspath(__file__))
+if getattr(sys, "frozen", False):  # PyInstaller bundle
+    _BASE_DIR = sys._MEIPASS
+app = Flask(__name__,
+            template_folder=_os.path.join(_BASE_DIR, "templates"),
+            static_folder=_os.path.join(_BASE_DIR, "static"))
+
+# Session secret — derived from machine_id so it survives restarts without a separate file.
+# machine_id is also the Fernet key (base64-encoded 32 bytes); we use its raw bytes as the secret.
+try:
+    from app_config import _MACHINE_ID_PATH as _mid_path  # type: ignore[attr-defined]
+    import base64 as _b64
+    _mid_bytes = _mid_path.read_bytes() if _mid_path.exists() else None
+    app.secret_key = _b64.b64decode(_mid_bytes) if _mid_bytes else _os.urandom(32)
+except Exception:
+    app.secret_key = _os.urandom(32)
+
+_connector:  "M365Connector | None" = None
+# _scan_lock and _scan_abort live in routes/state.py
+from routes.state import _scan_lock, _scan_abort
+# _sse_queues, _sse_buffer, _current_scan_id live in sse.py
+flagged_items: list = []
+scan_meta:    dict = {}
+
+# ── Checkpoint (incremental / resumable scans) ────────────────────────────────
+# ── HTML ──────────────────────────────────────────────────────────────────────
+
+# ── Shared state (imported by route blueprints) ───────────────────────────────
+from routes import state as _state
+# Wire the mutable globals to the state module so blueprints share the same objects
+# These assignments run once at startup; blueprints use state.X to read/write them.
+_state.LANG              = LANG
+_state.connector         = _connector
+_state.flagged_items     = flagged_items
+_state.scan_meta         = scan_meta
+_state.compiled_keywords = _compiled_keywords
+_state.keyword_data      = _keyword_data
+_state.keyword_flat      = _keyword_flat
+
+# ── Auto-restore Google Workspace connector from saved key ────────────────────
+if GOOGLE_CONNECTOR_OK:
+    try:
+        _gkey = _load_google_key()
+        if _gkey:
+            from routes.google_auth import _load_google_config as _lgcfg
+            _gcfg = _lgcfg()
+            _state.google_connector = _GoogleConnector(_gkey, admin_email=_gcfg.get("admin_email", ""))
+    except Exception as _ge:
+        logger.warning("[google] Could not restore connector: %s", _ge)
+
+# Helper so scan engine can update state.flagged_items in-place rather than rebind
+def _sync_state():
+    """Called after scan updates flagged_items/scan_meta to sync the state module."""
+    _state.flagged_items[:] = flagged_items
+    _state.scan_meta.clear()
+    _state.scan_meta.update(scan_meta)
+
+# ── HTML template ────────────────────────────────────────────────────────────
+# Served from templates/index.html via Flask render_template().
+# JavaScript served from static/app.js via Flask static file handling.
+
+
+# ── Auth state ─────────────────────────────────────────────────────────────────
+# ── Routes ────────────────────────────────────────────────────────────────────
+
+@app.route("/")
+def index():
+    return render_template("index.html", app_version=APP_VERSION,
+                            lang_json=json.dumps(LANG, ensure_ascii=False),
+                            viewer_mode=False)
+
+
+@app.route("/view")
+def viewer():
+    from app_config import validate_viewer_token, get_viewer_pin_hash
+    token = request.args.get("token", "").strip()
+    if token:
+        if validate_viewer_token(token) is None:
+            return render_template("viewer_denied.html"), 403
+        # Bind a session so the viewer doesn't need the token on every navigation
+        session["viewer_ok"] = True
+        return render_template("index.html", app_version=APP_VERSION,
+                                lang_json=json.dumps(LANG, ensure_ascii=False),
+                                viewer_mode=True)
+    if session.get("viewer_ok"):
+        return render_template("index.html", app_version=APP_VERSION,
+                                lang_json=json.dumps(LANG, ensure_ascii=False),
+                                viewer_mode=True)
+    # No token, no session — show PIN form if a PIN is configured, else deny
+    pin_hash = get_viewer_pin_hash()
+    if pin_hash:
+        return render_template("viewer_pin.html")
+    return render_template("viewer_denied.html"), 403
+
+
+def _build_excel_bytes() -> tuple[bytes, str]:
+    """Build the M365 scan Excel workbook and return (bytes, filename).
+    Raises on error. Used by export_excel() and send_report()."""
+    from openpyxl import Workbook
+    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
+    from openpyxl.utils import get_column_letter
+
+    HEADER_BG  = "1F3864"
+    HEADER_FG  = "FFFFFF"
+    ALT_BG     = "EEF2FF"
+    SOURCE_MAP = {
+        "email":      ("📧 Outlook",     "D6E4F7"),
+        "onedrive":   ("💾 OneDrive",   "D6F7E4"),
+        "sharepoint": ("🌐 SharePoint", "FFF0D6"),
+        "teams":      ("💬 Teams",      "F7D6F0"),
+        "local":      ("📁 Local",      "E6F7E6"),
+        "smb":        ("🌐 Network",    "E0F0FA"),
+    }
+    COLS = [
+        ("Name / Subject",    45),
+        ("CPR Hits",           9),
+        ("Face count",         9),
+        ("GPS",                6),
+        ("Special category",  22),
+        ("EXIF author",       18),
+        ("Folder",            30),
+        ("Account",           24),
+        ("Role",              10),
+        ("Disposition",       18),
+        ("Date Modified",     14),
+        ("Size (KB)",         10),
+        ("URL",               50),
+    ]
+
+    thin   = Side(style="thin", color="CCCCCC")
+    border = Border(left=thin, right=thin, top=thin, bottom=thin)
+
+    def _fill(hex_col):
+        return PatternFill("solid", fgColor=hex_col)
+
+    def _write_sheet(ws, rows, tab_color):
+        ws.sheet_properties.tabColor = tab_color
+        for col_idx, (col_name, col_w) in enumerate(COLS, 1):
+            cell = ws.cell(row=1, column=col_idx, value=col_name)
+            cell.font      = Font(name="Arial", bold=True, color=HEADER_FG, size=10)
+            cell.fill      = _fill(HEADER_BG)
+            cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
+            cell.border    = border
+            ws.column_dimensions[get_column_letter(col_idx)].width = col_w
+        ws.row_dimensions[1].height = 20
+        ws.freeze_panes = "A2"
+
+        for r_idx, item in enumerate(rows, 2):
+            row_fill = _fill(ALT_BG if r_idx % 2 == 0 else "FFFFFF")
+            _disp = ""
+            if DB_OK:
+                try:
+                    _d = _get_db().get_disposition(item.get("id", ""))
+                    _disp = (_d.get("status", "") if _d else "")
+                except Exception:
+                    pass
+            _sc = item.get("special_category", [])
+            _sc_str = ", ".join(
+                s for s in (_sc if isinstance(_sc, list) else [str(_sc or "")])
+                if s not in ("gps_location", "exif_pii")
+            )
+            _exif   = item.get("exif") or {}
+            _gps    = _exif.get("gps")
+            _author = _exif.get("author") or ""
+            values = [
+                item.get("name", ""),
+                item.get("cpr_count", 0),
+                item.get("face_count", 0),
+                "✔" if _gps else "",
+                _sc_str,
+                _author,
+                item.get("folder", ""),
+                item.get("account_name", "") or item.get("source", ""),
+                item.get("user_role", ""),
+                _disp,
+                item.get("modified", ""),
+                item.get("size_kb", ""),
+                item.get("url", ""),
+            ]
+            for col_idx, val in enumerate(values, 1):
+                is_url = col_idx == 13 and val
+                cell = ws.cell(row=r_idx, column=col_idx, value=val)
+                cell.font      = Font(name="Arial", size=10,
+                                     color="1155CC" if is_url else "000000",
+                                     underline="single" if is_url else None)
+                cell.fill      = row_fill
+                cell.alignment = Alignment(vertical="center", wrap_text=(col_idx == 1))
+                cell.border    = border
+            ws.row_dimensions[r_idx].height = 16
+
+        if rows:
+            tr = len(rows) + 2
+            ws.cell(row=tr, column=1, value="Total").font = Font(name="Arial", bold=True, size=10)
+            ws.cell(row=tr, column=2, value=f"=SUM(B2:B{tr-1})").font = Font(name="Arial", bold=True, size=10)
+            for col_idx in range(1, len(COLS) + 1):
+                ws.cell(row=tr, column=col_idx).fill   = _fill("D0D8F0")
+                ws.cell(row=tr, column=col_idx).border = border
+
+        ws.auto_filter.ref = f"A1:{get_column_letter(len(COLS))}1"
+
+    wb     = Workbook()
+    ws_sum = wb.active
+    ws_sum.title = "Summary"
+    ws_sum.sheet_properties.tabColor = "1F3864"
+    ws_sum["A1"] = "GDPRScanner — Export"
+    ws_sum["A1"].font = Font(name="Arial", bold=True, size=14, color=HEADER_FG)
+    ws_sum["A1"].fill = _fill(HEADER_BG)
+    ws_sum.merge_cells("A1:D1")
+    ws_sum["A1"].alignment = Alignment(horizontal="center", vertical="center")
+    ws_sum.row_dimensions[1].height = 28
+
+    import datetime as _dt
+    ws_sum["A2"] = "Generated:"
+    ws_sum["B2"] = _dt.datetime.now().strftime("%Y-%m-%d %H:%M")
+    ws_sum["A3"] = "Total flagged items:"
+    ws_sum["B3"] = len(flagged_items)
+    gps_count = sum(1 for i in flagged_items if (i.get("exif") or {}).get("gps"))
+    if gps_count:
+        ws_sum["A4"] = "Items with GPS data:"
+        ws_sum["B4"] = gps_count
+    for cell in (ws_sum["A2"], ws_sum["A3"], ws_sum["A4"]):
+        cell.font = Font(name="Arial", bold=True, size=10)
+    for cell in (ws_sum["B2"], ws_sum["B3"], ws_sum["B4"]):
+        cell.font = Font(name="Arial", size=10)
+    ws_sum.column_dimensions["A"].width = 22
+    ws_sum.column_dimensions["B"].width = 20
+
+    for ci, h in enumerate(["Source", "Items", "Total CPR Hits"], 1):
+        cell = ws_sum.cell(row=6, column=ci, value=h)
+        cell.font      = Font(name="Arial", bold=True, color=HEADER_FG, size=10)
+        cell.fill      = _fill(HEADER_BG)
+        cell.border    = border
+        cell.alignment = Alignment(horizontal="center", vertical="center")
+    ws_sum.row_dimensions[6].height = 18
+    ws_sum.column_dimensions["C"].width = 16
+
+    by_source: dict = {}
+    for item in flagged_items:
+        by_source.setdefault(item.get("source_type", "other"), []).append(item)
+
+    sum_row = 7
+    for src_key, (label, tab_bg) in SOURCE_MAP.items():
+        items = by_source.get(src_key, [])
+        if not items:
+            continue
+        ws_sum.cell(row=sum_row, column=1, value=label).font = Font(name="Arial", size=10)
+        ws_sum.cell(row=sum_row, column=2, value=len(items)).font = Font(name="Arial", size=10)
+        ws_sum.cell(row=sum_row, column=3, value=sum(i.get("cpr_count", 0) for i in items)).font = Font(name="Arial", size=10)
+        for ci in range(1, 4):
+            ws_sum.cell(row=sum_row, column=ci).border = border
+            ws_sum.cell(row=sum_row, column=ci).fill = _fill("EEF2FF" if sum_row % 2 == 0 else "FFFFFF")
+        sum_row += 1
+
+    for src_key, (label, tab_bg) in SOURCE_MAP.items():
+        items = by_source.get(src_key, [])
+        if not items:
+            continue
+        clean_label = label.split(" ", 1)[1]
+        _write_sheet(wb.create_sheet(title=clean_label), items, tab_bg)
+
+    # GPS items sheet
+    gps_items = [i for i in flagged_items if (i.get("exif") or {}).get("gps")]
+    if gps_items:
+        ws_gps = wb.create_sheet(title="GPS locations")
+        ws_gps.sheet_properties.tabColor = "1A7A6E"
+        GPS_COLS = [
+            ("Name", 40), ("Latitude", 14), ("Longitude", 14),
+            ("Maps link", 50), ("Account", 24), ("Date Modified", 14),
+        ]
+        for col_idx, (col_name, col_w) in enumerate(GPS_COLS, 1):
+            cell = ws_gps.cell(row=1, column=col_idx, value=col_name)
+            cell.font      = Font(name="Arial", bold=True, color=HEADER_FG, size=10)
+            cell.fill      = _fill("1A7A6E")
+            cell.alignment = Alignment(horizontal="center", vertical="center")
+            cell.border    = border
+            ws_gps.column_dimensions[get_column_letter(col_idx)].width = col_w
+        ws_gps.freeze_panes = "A2"
+        for r_idx, item in enumerate(gps_items, 2):
+            _exif = item.get("exif") or {}
+            _gps  = _exif.get("gps") or {}
+            row_fill = _fill("E0F7F4" if r_idx % 2 == 0 else "FFFFFF")
+            for col_idx, val in enumerate([
+                item.get("name", ""),
+                _gps.get("lat", ""),
+                _gps.get("lon", ""),
+                _gps.get("maps_url", ""),
+                item.get("account_name", "") or item.get("source", ""),
+                item.get("modified", ""),
+            ], 1):
+                is_link = col_idx == 4 and val
+                cell = ws_gps.cell(row=r_idx, column=col_idx, value=val)
+                cell.font   = Font(name="Arial", size=10,
+                                   color="1155CC" if is_link else "000000",
+                                   underline="single" if is_link else None)
+                cell.fill   = row_fill
+                cell.border = border
+        ws_gps.auto_filter.ref = f"A1:{get_column_letter(len(GPS_COLS))}1"
+
+    # External transfers sheet
+    ext_items = [i for i in flagged_items
+                 if i.get("transfer_risk") in ("external-recipient", "external-share", "shared")]
+    if ext_items:
+        ws_ext = wb.create_sheet(title="External transfers")
+        _write_sheet(ws_ext, ext_items, "E74C3C")
+        ws_ext.sheet_properties.tabColor = "E74C3C"
+        ws_sum.cell(row=sum_row, column=1, value="⚠ External transfers").font = Font(name="Arial", size=10, bold=True, color="E74C3C")
+        ws_sum.cell(row=sum_row, column=2, value=len(ext_items)).font = Font(name="Arial", size=10, bold=True, color="E74C3C")
+        ws_sum.cell(row=sum_row, column=3, value=sum(i.get("cpr_count", 0) for i in ext_items)).font = Font(name="Arial", size=10, bold=True, color="E74C3C")
+        for ci in range(1, 4):
+            ws_sum.cell(row=sum_row, column=ci).border = border
+            ws_sum.cell(row=sum_row, column=ci).fill = _fill("FDE8E8")
+
+    buf = io.BytesIO()
+    wb.save(buf)
+    buf.seek(0)
+    fname = f"gdpr_scan_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+    return buf.read(), fname
+
+
+# ── Article 30 report ─────────────────────────────────────────────────────────
+
+def _build_article30_docx() -> tuple[bytes, str]:
+    """Generate a GDPR Article 30 Register of Processing Activities as .docx.
+    Returns (bytes, filename). Strings are translated using the active LANG dict."""
+    try:
+        from docx import Document as _Document
+        from docx.shared import Pt, RGBColor, Inches, Cm
+        from docx.enum.text import WD_ALIGN_PARAGRAPH
+        from docx.oxml.ns import qn
+        from docx.oxml import OxmlElement
+    except ImportError:
+        raise ImportError("python-docx not installed — run: pip install python-docx")
+
+    import datetime as _dt
+
+    # Translate helper — falls back to English default if key missing
+    def L(key: str, default: str = "") -> str:
+        return LANG.get(key, default)
+
+    # ── Data ─────────────────────────────────────────────────────────────────
+    db    = _get_db() if DB_OK else None
+    stats   = db.get_stats() if db else {}
+    items   = db.get_flagged_items() if db else list(flagged_items)
+    trend   = db.get_trend(10) if db else []
+    overdue = db.get_overdue_items(5) if db else []
+
+    # Build account_id → display_name map from the scan's stored user_ids
+    # This lets us resolve GUIDs and "Microsoft Konto" placeholders that
+    # were stored in account_name before _resolve_display_name was applied.
+    _acct_map: dict[str, str] = {}
+    if db:
+        try:
+            scan_id = stats.get("scan_id") or db.latest_scan_id()
+            if scan_id:
+                row = db._connect().execute(
+                    "SELECT user_count, options FROM scans WHERE id=?", (scan_id,)
+                ).fetchone()
+                # user_ids are stored in the options JSON column
+                opts_json = json.loads(row["options"] or "{}") if row else {}
+                for u in opts_json.get("user_ids", []):
+                    uid  = u.get("id", "")
+                    name = u.get("displayName", "")
+                    if uid and name:
+                        _acct_map[uid] = name
+        except Exception:
+            pass
+    # Also seed from in-memory flagged_items (catches current scan not yet in DB)
+    for item in flagged_items:
+        aid  = item.get("account_id", "")
+        name = item.get("account_name", "")
+        if aid and name and not _GUID_RE.match(name.strip()):
+            _acct_map.setdefault(aid, name)
+
+    def _acct_label(item: dict) -> str:
+        """Return the best human-readable account label for an item."""
+        aid  = item.get("account_id", "")
+        name = item.get("account_name", "")
+        # Try the lookup map first (most reliable — built from scan user_ids)
+        if aid and aid in _acct_map:
+            return _acct_map[aid]
+        # Fall back to stored name, resolving GUIDs/placeholders against account_id
+        return _resolve_display_name(name, aid)
+    overdue_ids = {o["id"] for o in overdue}
+
+    now_str   = _dt.datetime.now().strftime("%Y-%m-%d %H:%M")
+    date_str  = _dt.datetime.now().strftime("%Y-%m-%d")
+    fname     = f"article30_{date_str}.docx"
+
+    # Aggregate by source
+    by_source: dict = {}
+    for item in items:
+        st = item.get("source_type", "other")
+        by_source.setdefault(st, []).append(item)
+
+    SOURCE_LABELS = {
+        "email":      "Exchange (Outlook)",
+        "onedrive":   "OneDrive",
+        "sharepoint": "SharePoint",
+        "teams":      "Teams",
+        "gmail":      "Gmail",
+        "gdrive":     "Google Drive",
+        "local":      "Lokal",
+        "smb":        "Netværk (SMB)",
+    }
+
+    # ── Colour palette ────────────────────────────────────────────────────────
+    DARK_BLUE  = RGBColor(0x1F, 0x38, 0x64)
+    MID_BLUE   = RGBColor(0x00, 0x78, 0xD4)
+    LIGHT_GREY = RGBColor(0xF2, 0xF2, 0xF2)
+    RED        = RGBColor(0xC0, 0x39, 0x2B)
+    ORANGE     = RGBColor(0xC5, 0x5A, 0x00)
+    WHITE      = RGBColor(0xFF, 0xFF, 0xFF)
+
+    def _hex(c: RGBColor) -> str:
+        return f"{c[0]:02X}{c[1]:02X}{c[2]:02X}"
+
+    # ── Document setup ────────────────────────────────────────────────────────
+    doc = _Document()
+    doc.core_properties.title   = "GDPR Article 30 — Register of Processing Activities"
+    doc.core_properties.author  = "GDPRScanner"
+    doc.core_properties.subject = "GDPR Compliance"
+
+    # Page margins — A4 with 2.5 cm margins
+    for section in doc.sections:
+        section.top_margin    = Cm(2.5)
+        section.bottom_margin = Cm(2.5)
+        section.left_margin   = Cm(2.5)
+        section.right_margin  = Cm(2.5)
+
+    # ── Helper: set cell background ──────────────────────────────────────────
+    def _cell_bg(cell, hex_color: str):
+        tc   = cell._tc
+        tcPr = tc.get_or_add_tcPr()
+        shd  = OxmlElement("w:shd")
+        shd.set(qn("w:val"),   "clear")
+        shd.set(qn("w:color"), "auto")
+        shd.set(qn("w:fill"),  hex_color)
+        tcPr.append(shd)
+
+    def _set_cell_border(cell, **kwargs):
+        tc   = cell._tc
+        tcPr = tc.get_or_add_tcPr()
+        tcBorders = OxmlElement("w:tcBorders")
+        for edge in ("top", "left", "bottom", "right"):
+            cfg = kwargs.get(edge, {})
+            el  = OxmlElement(f"w:{edge}")
+            el.set(qn("w:val"),   cfg.get("val",   "single"))
+            el.set(qn("w:sz"),    cfg.get("sz",    "4"))
+            el.set(qn("w:space"), cfg.get("space", "0"))
+            el.set(qn("w:color"), cfg.get("color", "CCCCCC"))
+            tcBorders.append(el)
+        tcPr.append(tcBorders)
+
+    def _para(text: str = "", bold=False, size=11, color=None,
+              align=WD_ALIGN_PARAGRAPH.LEFT, space_before=0, space_after=6) -> object:
+        p = doc.add_paragraph()
+        p.alignment = align
+        p.paragraph_format.space_before = Pt(space_before)
+        p.paragraph_format.space_after  = Pt(space_after)
+        if text:
+            run = p.add_run(text)
+            run.bold      = bold
+            run.font.size = Pt(size)
+            if color:
+                run.font.color.rgb = color
+        return p
+
+    def _heading(text: str, level: int = 1):
+        p  = doc.add_heading(text, level=level)
+        r  = p.runs[0] if p.runs else p.add_run(text)
+        r.font.color.rgb = DARK_BLUE
+        r.font.size      = Pt(16 if level == 1 else 13)
+        r.bold           = True
+        p.paragraph_format.space_before = Pt(14 if level == 1 else 10)
+        p.paragraph_format.space_after  = Pt(4)
+        return p
+
+    def _kv(label: str, value: str, label_width=2.5, bold=False, highlight=False):
+        """Two-column key-value paragraph using a 2-cell table row."""
+        tbl = doc.add_table(rows=1, cols=2)
+        tbl.style = "Table Grid"
+        w_label = int(label_width * 1440)
+        w_value = int((16.0 - label_width) * 1440 * 0.6)  # approx content width
+        c1, c2 = tbl.rows[0].cells
+        _cell_bg(c1, "FFF3E0" if highlight else "F2F2F2")
+        _cell_bg(c2, "FFF3E0" if highlight else "FFFFFF")
+        c1.width = Inches(label_width)
+        c2.width = Inches(16.0 - label_width)
+        p1 = c1.paragraphs[0]; p1.clear()
+        r1 = p1.add_run(label); r1.bold = True; r1.font.size = Pt(10)
+        p2 = c2.paragraphs[0]; p2.clear()
+        r2 = p2.add_run(value); r2.font.size = Pt(10); r2.bold = bold
+        if highlight:
+            r1.font.color.rgb = RGBColor(0x6B, 0x00, 0x6B)
+            r2.font.color.rgb = RGBColor(0x6B, 0x00, 0x6B)
+        for cell in (c1, c2):
+            _set_cell_border(cell, top={"color": "E0E0E0"}, bottom={"color": "E0E0E0"},
+                             left={"color": "E0E0E0"}, right={"color": "E0E0E0"})
+        return tbl
+
+    # ── Cover page ────────────────────────────────────────────────────────────
+    _para()
+    title_p = doc.add_paragraph()
+    title_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    title_p.paragraph_format.space_before = Pt(40)
+    r = title_p.add_run(L("a30_title", "GDPR Article 30"))
+    r.bold = True; r.font.size = Pt(28); r.font.color.rgb = DARK_BLUE
+
+    sub_p = doc.add_paragraph()
+    sub_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    r2 = sub_p.add_run(L("a30_subtitle", "Register of Processing Activities"))
+    r2.font.size = Pt(16); r2.font.color.rgb = MID_BLUE
+
+    _para()
+    meta_p = doc.add_paragraph()
+    meta_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    r3 = meta_p.add_run(f"{L('a30_generated','Generated')}: {now_str}  ·  GDPRScanner")
+    r3.font.size = Pt(10); r3.font.color.rgb = RGBColor(0x88, 0x88, 0x88)
+
+    # Divider line
+    _para()
+    div = doc.add_paragraph()
+    div_fmt = div.paragraph_format
+    div_fmt.space_after = Pt(20)
+    pPr = div._p.get_or_add_pPr()
+    pBdr = OxmlElement("w:pBdr")
+    bot  = OxmlElement("w:bottom")
+    bot.set(qn("w:val"), "single"); bot.set(qn("w:sz"), "6")
+    bot.set(qn("w:color"), _hex(MID_BLUE))
+    pBdr.append(bot); pPr.append(pBdr)
+
+    doc.add_page_break()
+
+    # ── Section 1: Summary ────────────────────────────────────────────────────
+    _heading(L("a30_s1", "1. Summary"))
+
+    total_items    = len(items)
+    total_cpr      = sum(i.get("cpr_count", 0) for i in items)
+    special_items  = [i for i in items if i.get("special_category") and
+                      i["special_category"] not in ("[]", "", None, [])]
+    photo_items    = [i for i in items if i.get("face_count", 0) > 0]
+    gps_items      = [i for i in items if "gps_location" in (i.get("special_category") or [])]
+    exif_pii_items = [i for i in items if "exif_pii" in (i.get("special_category") or [])]
+    unique_subj    = stats.get("unique_subjects", 0)
+    total_scanned  = stats.get("total_scanned", 0)
+    scan_date      = _dt.datetime.fromtimestamp(
+        stats.get("started_at", 0)).strftime("%Y-%m-%d %H:%M") if stats.get("started_at") else "—"
+    special_items  = [i for i in items if i.get("special_category") and
+                      i["special_category"] not in ("[]", "", None, [])]
+
+    _kv(L("a30_scan_date",       "Scan date"),                scan_date)
+    _kv(L("a30_items_scanned",   "Items scanned"),            str(total_scanned))
+    _kv(L("a30_flagged",         "Flagged items"),            str(total_items))
+    _kv(L("a30_cpr_hits",        "Total CPR hits"),           str(total_cpr))
+    _kv(L("a30_data_subjects",   "Estimated data subjects"),  str(unique_subj))
+    _kv(L("a30_overdue",         "Overdue items (>5 yrs)"),   str(len(overdue_ids)))
+    if gps_items:
+        _kv(L("a30_gps_items", "Items with GPS location data (Art. 4 — location = personal data)"),
+            str(len(gps_items)))
+    if exif_pii_items:
+        _kv(L("a30_exif_pii_items", "Items with EXIF PII (author, description, keywords)"),
+            str(len(exif_pii_items)))
+    if photo_items:
+        total_faces = sum(i.get("face_count", 0) for i in photo_items)
+        _kv(L("a30_photo_items", "Photos with detected faces (Art. 9 biometric)"),
+            f"{len(photo_items)} items / {total_faces} faces")
+        _para(L("a30_photo_note",
+                "Photographs of identifiable persons are biometric data under Art. 9 GDPR. "
+                "Retention requires a documented legal basis under Art. 9(2). "
+                "For school photographs of pupils under 15, parental consent is required "
+                "(Databeskyttelsesloven §6). See Datatilsynet guidance on school photography."),
+              size=9, space_after=4)
+    if special_items:
+        _kv(L("a30_special_cat", "Art. 9 special category items"),
+            str(len(special_items)))
+        _para(L("a30_special_cat_note",
+                "These items contain health, criminal, biometric, religious, ethnic, "
+                "trade union, political, or sexual orientation data. "
+                "An explicit legal basis (Art. 9(2)) and possibly a DPIA (Art. 35) is required."),
+              size=9, space_after=4)
+
+    _para()
+
+    # Per-source breakdown table
+    _para(L("a30_by_source", "Breakdown by source"), bold=True, size=11, space_before=10)
+
+    src_tbl = doc.add_table(rows=1, cols=5)
+    src_tbl.style = "Table Grid"
+    hdr_cells = src_tbl.rows[0].cells
+    for cell, txt in zip(hdr_cells, [L("a30_col_source","Source"), L("a30_col_items","Items"),
+                                     L("a30_col_cpr","CPR hits"), L("a30_col_overdue","Overdue"),
+                                     L("a30_col_special","Art. 9")]):
+        _cell_bg(cell, _hex(DARK_BLUE))
+        p = cell.paragraphs[0]; p.clear()
+        r = p.add_run(txt); r.bold = True
+        r.font.size = Pt(10); r.font.color.rgb = WHITE
+
+    for src_key in ("email", "onedrive", "sharepoint", "teams"):
+        src_items = by_source.get(src_key, [])
+        if not src_items:
+            continue
+        row   = src_tbl.add_row().cells
+        n_ov   = sum(1 for i in src_items if i.get("id") in overdue_ids)
+        n_cpr  = sum(i.get("cpr_count", 0) for i in src_items)
+        n_spec = sum(1 for i in src_items if i.get("special_category") and
+                     i["special_category"] not in ("[]", "", None, []))
+        for cell, val in zip(row, [
+            SOURCE_LABELS.get(src_key, src_key),
+            str(len(src_items)), str(n_cpr), str(n_ov),
+            str(n_spec) if n_spec else "—"
+        ]):
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(val); r.font.size = Pt(10)
+            if val != "0" and cell == row[3]:
+                r.font.color.rgb = ORANGE
+            if n_spec and cell == row[4]:
+                r.font.color.rgb = RGBColor(0x7B, 0x00, 0x82)
+                r.bold = True
+
+    # ── Section 2: Data categories ────────────────────────────────────────────
+    doc.add_page_break()
+    _heading(L("a30_s2", "2. Personal Data Categories Identified"))
+
+    _para(L("a30_s2_intro", "The following categories of personal data were detected during scanning."),
+          size=10, space_after=8)
+
+    # Aggregate PII from DB or from items
+    pii_totals: dict = {}
+    if db:
+        rows = db._connect().execute(
+            """SELECT pii_type, SUM(hit_count) FROM pii_hits
+               WHERE scan_id=? GROUP BY pii_type""",
+            (stats.get("scan_id") or db.latest_scan_id() or 0,)
+        ).fetchall()
+        for pii_type, count in rows:
+            pii_totals[pii_type] = count
+
+    PII_LABELS = {
+        "PHONE":        L("a30_pii_phone",        "Phone numbers"),
+        "EMAIL":        L("a30_pii_email",        "Email addresses"),
+        "IBAN":         L("a30_pii_iban",         "IBAN bank numbers"),
+        "BANK_ACCOUNT": L("a30_pii_bank",         "Bank account numbers"),
+        "NAME":         L("a30_pii_name",         "Personal names (NER)"),
+        "ADDRESS":      L("a30_pii_address",      "Addresses (NER)"),
+        "ORG":          L("a30_pii_org",          "Organisations (NER)"),
+    }
+
+    pii_tbl = doc.add_table(rows=1, cols=3)
+    pii_tbl.style = "Table Grid"
+    for cell, txt in zip(pii_tbl.rows[0].cells,
+                          [L("a30_col_category","Data category"), L("a30_col_count","Count"), L("a30_col_gdpr_class","GDPR classification")]):
+        _cell_bg(cell, _hex(DARK_BLUE))
+        p = cell.paragraphs[0]; p.clear()
+        r = p.add_run(txt); r.bold = True
+        r.font.size = Pt(10); r.font.color.rgb = WHITE
+
+    # CPR row first — always
+    cpr_row = pii_tbl.add_row().cells
+    for cell, val in zip(cpr_row, [L("a30_cpr_label", "CPR numbers (Danish personal ID)"), str(total_cpr),
+                                    L("a30_cpr_class", "Art. 9 — national identifier")]):
+        p = cell.paragraphs[0]; p.clear()
+        r = p.add_run(val); r.font.size = Pt(10)
+        _cpr_class = L("a30_cpr_class", "Art. 9 — national identifier")
+        if val == _cpr_class:
+            r.font.color.rgb = RED; r.bold = True
+
+    for pii_type, label in PII_LABELS.items():
+        count = pii_totals.get(pii_type, 0)
+        if not count:
+            continue
+        cls = L("a30_pii_class_9", "Art. 9 — health/sensitive") if pii_type in ("NAME", "ADDRESS") else L("a30_pii_class_4", "Art. 4 — personal data")
+        row = pii_tbl.add_row().cells
+        for cell, val in zip(row, [label, str(count), cls]):
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(val); r.font.size = Pt(10)
+
+    # ── Section 3: Data inventory ─────────────────────────────────────────────
+    doc.add_page_break()
+    _heading(L("a30_s3", "3. Data Inventory"))
+
+    _para(L("a30_s3_intro", "All flagged items are listed below with location, retention status, and compliance disposition."),
+          size=10, space_after=8)
+
+    # Split by user role for separate presentation
+    student_items = [i for i in items if i.get("user_role") == "student"]
+    staff_items   = [i for i in items if i.get("user_role") != "student"]
+
+    _disp_map = {
+        "unreviewed":       L("a30_disp_unreviewed",      "Unreviewed"),
+        "retain-legal":     L("a30_disp_retain_legal",    "Retain — Legal obligation"),
+        "retain-legitimate": L("a30_disp_retain_legit",   "Retain — Legitimate interest"),
+        "retain-contract":  L("a30_disp_retain_contract", "Retain — Contract"),
+        "delete-scheduled": L("a30_disp_delete_sched",    "Delete — Scheduled"),
+        "deleted":          L("a30_disp_deleted",         "Deleted"),
+    }
+
+    def _inv_table(tbl_items: list):
+        tbl = doc.add_table(rows=1, cols=6)
+        tbl.style = "Table Grid"
+        col_hdrs = [L("a30_col_name","Name / Subject"), L("a30_col_source","Source"),
+                    L("a30_col_account","Account"), L("a30_col_modified","Modified"),
+                    L("a30_col_cpr_short","CPR"), L("a30_col_disp","Disposition")]
+        for cell, txt in zip(tbl.rows[0].cells, col_hdrs):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(9); r.font.color.rgb = WHITE
+        sorted_tbl = sorted(tbl_items,
+            key=lambda x: (0 if x.get("id") in overdue_ids else 1, -x.get("cpr_count", 0)))
+        for idx, item in enumerate(sorted_tbl[:500]):
+            disp_rec = db.get_disposition(item["id"]) if db else None
+            raw_disp = disp_rec.get("status", "unreviewed") if disp_rec else "unreviewed"
+            disp_str = _disp_map.get(raw_disp, raw_disp.replace("-", " ").title())
+            is_ov    = item.get("id") in overdue_ids
+            row = tbl.add_row().cells
+            vals = [
+                (item.get("name", "")[:60] + ("…" if len(item.get("name", "")) > 60 else "")),
+                SOURCE_LABELS.get(item.get("source_type", ""), item.get("source_type", "")),
+                _acct_label(item),
+                item.get("modified", ""),
+                str(item.get("cpr_count", 0)),
+                disp_str,
+            ]
+            bg = "FFF8F0" if is_ov else ("FFFFFF" if idx % 2 == 0 else "F8F8F8")
+            for cell, val in zip(row, vals):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(8)
+                if is_ov and cell == row[3]:
+                    r.font.color.rgb = ORANGE
+        if len(tbl_items) > 500:
+            _para(f"… {len(tbl_items) - 500} {L('a30_more_items', 'additional items not shown.')}",
+                  size=9, color=RGBColor(0x88, 0x88, 0x88), space_before=4)
+
+    if staff_items:
+        if student_items:
+            _para(L("a30_inv_staff", "👔 Staff / Faculty"), bold=True, size=11, space_before=6, space_after=4)
+        _inv_table(staff_items)
+
+    if student_items:
+        _para(L("a30_inv_students", "🎓 Students"), bold=True, size=11, space_before=14, space_after=2)
+        _para(L("a30_student_consent_note",
+                "Note: Student accounts in Danish folkeskole (pupils under age 15) require parental "
+                "consent for processing of personal data under Databeskyttelsesloven §6. "
+                "Items in student accounts must not be auto-deleted — any action requires "
+                "review by school administration and, for pupils under 15, notification of parents "
+                "or guardians as rights holders under GDPR Article 8."),
+              size=9, color=RGBColor(0x88, 0x44, 0x00), space_after=6)
+        _inv_table(student_items)
+
+    # ── Section 4: Retention analysis ────────────────────────────────────────
+    if overdue:
+        doc.add_page_break()
+        _heading(L("a30_s4", "4. Retention Analysis"))
+
+        _para(L("a30_s4_intro", "The following items exceed the 5-year retention threshold and should be reviewed for deletion under GDPR Article 5(1)(e) — storage limitation."),
+              size=10, space_after=8)
+
+        ret_tbl = doc.add_table(rows=1, cols=5)
+        ret_tbl.style = "Table Grid"
+        for cell, txt in zip(ret_tbl.rows[0].cells,
+                              [L("a30_col_name","Name"), L("a30_col_source","Source"), L("a30_col_account","Account"), L("a30_col_modified","Modified"), L("a30_col_cpr","CPR hits")]):
+            _cell_bg(cell, _hex(ORANGE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(9); r.font.color.rgb = WHITE
+
+        for item in overdue[:200]:
+            row = ret_tbl.add_row().cells
+            for cell, val in zip(row, [
+                item.get("name", "")[:55],
+                SOURCE_LABELS.get(item.get("source_type", ""), ""),
+                _acct_label(item),
+                item.get("modified", ""),
+                str(item.get("cpr_count", 0)),
+            ]):
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(8)
+
+    # ── Section 5: Scan history ───────────────────────────────────────────────
+    if trend:
+        sec_num = "5" if overdue else "4"
+        doc.add_page_break()
+        _heading(f"{sec_num}. {L('a30_s5','Compliance Trend').split('. ',1)[-1]}")
+
+        _para(L("a30_s5_intro", "Flagged item counts over the last scans (most recent first)."),
+              size=10, space_after=8)
+
+        trend_tbl = doc.add_table(rows=1, cols=4)
+        trend_tbl.style = "Table Grid"
+        for cell, txt in zip(trend_tbl.rows[0].cells,
+                              [L("a30_col_scan_date","Scan date"), L("a30_col_flagged","Flagged"), L("a30_col_overdue","Overdue"), L("a30_col_scan_type","Scan type")]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(9); r.font.color.rgb = WHITE
+
+        for t in reversed(trend):
+            row = trend_tbl.add_row().cells
+            for cell, val in zip(row, [
+                t.get("scan_date", ""),
+                str(t.get("flagged_count", 0)),
+                str(t.get("overdue_count", 0)),
+                L("a30_scan_delta", "Delta") if t.get("delta") else L("a30_scan_full", "Full"),
+            ]):
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(9)
+
+    # ── Section: Deletion audit log ───────────────────────────────────────────
+    del_log   = db.get_deletion_log(limit=500) if db else []
+    del_stats = db.deletion_log_stats() if db else {}
+
+    # Running section counter — starts at 3 (summary, categories, inventory always present)
+    last_sec  = 3
+    last_sec += 1 if overdue  else 0   # retention analysis
+    last_sec += 1 if trend    else 0   # compliance trend
+
+    if del_log:
+        del_sec   = last_sec
+        last_sec += 1
+        doc.add_page_break()
+        _heading(f"{del_sec}. {L('a30_s_dellog', 'Deletion Audit Log')}")
+
+        _para(L("a30_dellog_intro",
+                f"A total of {del_stats.get('total', len(del_log))} item(s) containing personal data "
+                f"have been deleted via GDPRScanner. "
+                f"CPR hits removed: {del_stats.get('cpr_hits_deleted', 0)}. "
+                f"This log satisfies the accountability obligation under GDPR Article 5(2)."),
+              size=10, space_after=8)
+
+        # Summary by reason
+        by_reason = del_stats.get("by_reason", {})
+        if by_reason:
+            _para(L("a30_dellog_by_reason", "Deletions by reason"), bold=True, size=10, space_before=4, space_after=4)
+            reason_tbl = doc.add_table(rows=1, cols=2)
+            reason_tbl.style = "Table Grid"
+            for cell, txt in zip(reason_tbl.rows[0].cells,
+                                  [L("a30_col_reason", "Reason"), L("a30_col_count", "Count")]):
+                _cell_bg(cell, _hex(DARK_BLUE))
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(txt); r.bold = True
+                r.font.size = Pt(9); r.font.color.rgb = WHITE
+            REASON_LABELS = {
+                "manual":               L("a30_reason_manual",    "Manual (individual card delete)"),
+                "bulk":                 L("a30_reason_bulk",       "Bulk delete"),
+                "retention":            L("a30_reason_retention",  "Retention policy enforcement"),
+                "data-subject-request": L("a30_reason_dsr",        "Data subject erasure request (Art. 17)"),
+            }
+            for reason, count in sorted(by_reason.items()):
+                row = reason_tbl.add_row().cells
+                for cell, val in zip(row, [REASON_LABELS.get(reason, reason), str(count)]):
+                    p = cell.paragraphs[0]; p.clear()
+                    r = p.add_run(val); r.font.size = Pt(9)
+
+        # Full log table
+        _para(L("a30_dellog_records", "Deletion records"), bold=True, size=10, space_before=10, space_after=4)
+        log_tbl = doc.add_table(rows=1, cols=7)
+        log_tbl.style = "Table Grid"
+        for cell, txt in zip(log_tbl.rows[0].cells, [
+            L("a30_col_deleted_at",  "Deleted at"),
+            L("a30_col_name",        "Name"),
+            L("a30_col_source",      "Source"),
+            L("a30_col_account",     "Account"),
+            L("a30_col_cpr",         "CPR hits"),
+            L("a30_col_reason",      "Reason"),
+            L("a30_col_deleted_by",  "Deleted by"),
+        ]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(8); r.font.color.rgb = WHITE
+
+        for idx, entry in enumerate(del_log):
+            ts  = _dt.datetime.fromtimestamp(entry.get("deleted_at", 0)).strftime("%Y-%m-%d %H:%M")
+            bg  = "FFFFFF" if idx % 2 == 0 else "F8F8F8"
+            row = log_tbl.add_row().cells
+            for cell, val in zip(row, [
+                ts,
+                entry.get("item_name", "")[:40],
+                SOURCE_LABELS.get(entry.get("source_type", ""), entry.get("source_type", "")),
+                _acct_map.get(entry.get("account_id", "")) or _resolve_display_name(entry.get("account_name", ""), entry.get("account_id", "")),
+                str(entry.get("cpr_count", 0)),
+                REASON_LABELS.get(entry.get("reason", ""), entry.get("reason", "")),
+                entry.get("deleted_by", "") or "—",
+            ]):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(7)
+
+    # ── Section: Article 9 special categories ────────────────────────────────
+    if special_items:
+        last_sec += 1
+        doc.add_page_break()
+        _heading(f"{last_sec}. {L('a30_s_special', 'Special Category Data (Article 9)')}")
+
+        _para(L("a30_special_intro",
+                f"{len(special_items)} item(s) were detected as containing special category "
+                f"data under GDPR Article 9. These require an explicit legal basis beyond "
+                f"Article 6, and processing should be covered by a Data Protection Impact "
+                f"Assessment (DPIA) under Article 35."),
+              size=10, space_after=8)
+
+        # Category breakdown table
+        from collections import Counter as _Counter
+        cat_counts: dict = _Counter()
+        for item in special_items:
+            sc = item.get("special_category", [])
+            if isinstance(sc, str):
+                import json as _scjson
+                try:
+                    sc = _scjson.loads(sc)
+                except Exception:
+                    sc = []
+            for c in sc:
+                cat_counts[c] += 1
+
+        if cat_counts:
+            _para(L("a30_special_by_cat", "Detected categories"), bold=True, size=10,
+                  space_before=4, space_after=4)
+            cat_tbl = doc.add_table(rows=1, cols=2)
+            cat_tbl.style = "Table Grid"
+            for cell, txt in zip(cat_tbl.rows[0].cells,
+                                  [L("a30_col_category", "Category"),
+                                   L("a30_col_count", "Items")]):
+                _cell_bg(cell, _hex(DARK_BLUE))
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(txt); r.bold = True
+                r.font.size = Pt(9); r.font.color.rgb = WHITE
+            CAT_LABELS = {
+                "health":           L("a30_cat_health",    "Health data (Art. 9)"),
+                "mental_health":    L("a30_cat_mental",    "Mental health (Art. 9)"),
+                "criminal":         L("a30_cat_criminal",  "Criminal records (Art. 10)"),
+                "trade_union":      L("a30_cat_union",     "Trade union membership (Art. 9)"),
+                "religion":         L("a30_cat_religion",  "Religious beliefs (Art. 9)"),
+                "ethnicity":        L("a30_cat_ethnicity", "Racial/ethnic origin (Art. 9)"),
+                "political":        L("a30_cat_political", "Political opinions (Art. 9)"),
+                "biometric":        L("a30_cat_biometric", "Biometric data (Art. 9)"),
+                "sexual_orientation": L("a30_cat_sexual",  "Sexual orientation (Art. 9)"),
+            }
+            for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]):
+                row = cat_tbl.add_row().cells
+                for cell, val in zip(row, [CAT_LABELS.get(cat, cat), str(count)]):
+                    p = cell.paragraphs[0]; p.clear()
+                    r = p.add_run(val); r.font.size = Pt(9)
+
+        # Item list (capped at 50)
+        _para(L("a30_special_items", "Affected items (up to 50)"), bold=True, size=10,
+              space_before=10, space_after=4)
+        sc_tbl = doc.add_table(rows=1, cols=5)
+        sc_tbl.style = "Table Grid"
+        for cell, txt in zip(sc_tbl.rows[0].cells, [
+            L("a30_col_name",     "Name"),
+            L("a30_col_account",  "Account"),
+            L("a30_col_source",   "Source"),
+            L("a30_col_category", "Category"),
+            L("a30_col_cpr",      "CPR hits"),
+        ]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(8); r.font.color.rgb = WHITE
+
+        for idx, item in enumerate(special_items[:50]):
+            bg = "FFFFFF" if idx % 2 == 0 else "FFF0F8"
+            sc = item.get("special_category", [])
+            if isinstance(sc, str):
+                try:
+                    import json as _scj2; sc = _scj2.loads(sc)
+                except Exception:
+                    sc = []
+            row = sc_tbl.add_row().cells
+            for cell, val in zip(row, [
+                item.get("name", "")[:35],
+                _acct_map.get(item.get("account_id", "")) or item.get("account_name", ""),
+                SOURCE_LABELS.get(item.get("source_type", ""), item.get("source_type", "")),
+                ", ".join(CAT_LABELS.get(c, c) for c in sc)[:45],
+                str(item.get("cpr_count", 0)),
+            ]):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(7)
+
+    # ── Section: Photographs / biometric data (#9) ───────────────────────────
+    if photo_items:
+        last_sec += 1
+        doc.add_page_break()
+        _heading(f"{last_sec}. {L('a30_s_photos', 'Photographs and Biometric Data (Article 9)')}")
+
+        total_faces = sum(i.get("face_count", 0) for i in photo_items)
+        _para(L("a30_photo_intro",
+                f"{len(photo_items)} image file(s) containing {total_faces} detected face(s) "
+                f"were found in the scan. Photographs of identifiable persons constitute "
+                f"biometric data under GDPR Article 9 and are subject to the same "
+                f"heightened protection as health or criminal records data."),
+              size=10, space_after=8)
+
+        _para(L("a30_photo_guidance", "Retention guidance"), bold=True, size=10,
+              space_before=4, space_after=4)
+        for line in [
+            L("a30_photo_g1",
+              "Photos may only be retained while the original purpose remains valid "
+              "(Art. 5(1)(b) — purpose limitation)."),
+            L("a30_photo_g2",
+              "Pupils under 15 require parental consent (Databeskyttelsesloven §6). "
+              "Consent must be freely given, specific, and documented."),
+            L("a30_photo_g3",
+              "Photos on public-facing websites must be removed promptly after a person "
+              "leaves the organisation or withdraws consent (Art. 17 — right to erasure)."),
+            L("a30_photo_g4",
+              "Historical/archive use may justify longer retention under Art. 89 only "
+              "with specific safeguards and case-by-case assessment."),
+        ]:
+            p = doc.add_paragraph(style="List Bullet")
+            r = p.add_run(line); r.font.size = Pt(9)
+
+        # GPS items sub-section
+        if gps_items:
+            _para(L("a30_gps_title", "Items with GPS location data"), bold=True, size=10,
+                  space_before=10, space_after=4)
+            _para(L("a30_gps_intro",
+                    "The following files contain GPS coordinates embedded in EXIF metadata. "
+                    "Location data constitutes personal data under Art. 4 GDPR. For photos of children "
+                    "or staff, GPS data may reveal sensitive patterns (home address, health institution, "
+                    "religious site). Consider stripping EXIF before sharing or publishing."),
+                  size=9, space_after=6)
+            gps_tbl = doc.add_table(rows=1, cols=4)
+            gps_tbl.style = "Table Grid"
+            for cell, txt in zip(gps_tbl.rows[0].cells, [
+                L("a30_col_name", "Name"),
+                L("a30_gps_col_lat", "Latitude"),
+                L("a30_gps_col_lon", "Longitude"),
+                L("a30_col_date", "Modified"),
+            ]):
+                _cell_bg(cell, _hex(DARK_BLUE))
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(txt); r.bold = True
+                r.font.size = Pt(8); r.font.color.rgb = WHITE
+            for idx, item in enumerate(gps_items[:50]):
+                bg = "FFFFFF" if idx % 2 == 0 else "E8F7FF"
+                row = gps_tbl.add_row().cells
+                exif = item.get("exif") or {}
+                gps  = exif.get("gps") or {}
+                for cell, val in zip(row, [
+                    item.get("name", "")[:40],
+                    str(gps.get("lat", ""))[:12],
+                    str(gps.get("lon", ""))[:12],
+                    item.get("modified", ""),
+                ]):
+                    _cell_bg(cell, bg)
+                    p = cell.paragraphs[0]; p.clear()
+                    r = p.add_run(val); r.font.size = Pt(7)
+
+        # Photo item list (capped at 50)
+        _para(L("a30_photo_items", "Detected photo items (up to 50)"), bold=True, size=10,
+              space_before=10, space_after=4)
+        ph_tbl = doc.add_table(rows=1, cols=6)
+        ph_tbl.style = "Table Grid"
+        for cell, txt in zip(ph_tbl.rows[0].cells, [
+            L("a30_col_name",    "Name"),
+            L("a30_col_account", "Account"),
+            L("a30_col_source",  "Source"),
+            L("a30_photo_col_faces", "Faces"),
+            L("a30_gps_col",     "GPS"),
+            L("a30_col_date",    "Modified"),
+        ]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(8); r.font.color.rgb = WHITE
+
+        for idx, item in enumerate(photo_items[:50]):
+            bg = "FFFFFF" if idx % 2 == 0 else "E8F7FF"
+            row = ph_tbl.add_row().cells
+            for cell, val in zip(row, [
+                item.get("name", "")[:40],
+                _acct_map.get(item.get("account_id", "")) or item.get("account_name", ""),
+                SOURCE_LABELS.get(item.get("source_type", ""), item.get("source_type", "")),
+                str(item.get("face_count", 0)),
+                "✔" if (item.get("exif") or {}).get("gps") else "",
+                item.get("modified", ""),
+            ]):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(7)
+
+    # ── Section: Methodology ─────────────────────────────────────────────────
+    # last_sec already reflects all optional sections that were added above
+    doc.add_page_break()
+    _heading(f"{last_sec}. {L('a30_s6_short', 'Methodology and Legal Basis')}")
+
+    _para(L("a30_method_title", "Scanning methodology"), bold=True, size=11, space_before=6, space_after=4)
+    for line in [
+        L("a30_method_1", "CPR numbers are detected using pattern matching against the official Danish CPR format (DDMMYY-XXXX)."),
+        L("a30_method_2", "Additional personal data (phone numbers, email addresses, IBANs, bank accounts, names, addresses, and organisations) is detected using regular expressions and spaCy NER."),
+        L("a30_method_3", "CPR numbers stored in this document's database are SHA-256 hashed and never stored in plaintext."),
+        L("a30_method_4", "Scanning covers Exchange mailboxes (all folders including Sent Items), OneDrive, SharePoint, and Microsoft Teams channel files via the Microsoft Graph API. When connected, Google Workspace scanning covers Gmail and Google Drive via a service account with domain-wide delegation."),
+        L("a30_method_5", "When photo scanning is enabled, image files are analysed using OpenCV Haar cascade face detection to identify photographs of persons (Art. 9 biometric data)."),
+    ]:
+        p = doc.add_paragraph(style="List Bullet")
+        r = p.add_run(line); r.font.size = Pt(10)
+
+    _para(L("a30_gdpr_title", "GDPR Articles referenced"), bold=True, size=11, space_before=10, space_after=4)
+    for line in [
+        L("a30_gdpr_1", "Article 5(1)(c) — Data minimisation: only necessary data should be retained"),
+        L("a30_gdpr_2", "Article 5(1)(e) — Storage limitation: data must not be kept longer than necessary"),
+        L("a30_gdpr_3", "Article 9 — Special categories: health, criminal, trade union, and similar data require explicit legal basis"),
+        L("a30_gdpr_4", "Article 15 — Right of access: data subjects may request information about their data"),
+        L("a30_gdpr_5", "Article 17 — Right to erasure: data subjects may request deletion"),
+        L("a30_gdpr_6", "Article 30 — Records of processing activities: this document satisfies the obligation"),
+    ]:
+        p = doc.add_paragraph(style="List Bullet")
+        r = p.add_run(line); r.font.size = Pt(10)
+
+    _para(f"{L('a30_generated','Generated')}: {now_str}  ·  GDPRScanner  ·  {L('a30_confidential','Confidential — GDPR compliance document')}",
+          size=9, color=RGBColor(0x88, 0x88, 0x88), align=WD_ALIGN_PARAGRAPH.CENTER, space_before=20)
+
+    # ── Serialise ─────────────────────────────────────────────────────────────
+    buf = io.BytesIO()
+    doc.save(buf)
+    buf.seek(0)
+    return buf.read(), fname
+
+
+
+
+
+
+@app.route("/api/scan/stream")
+def scan_stream():
+    q = queue.Queue(maxsize=512)
+    _sse_queues.append(q)
+    # Filter replay buffer: only include events from the current scan
+    # (avoids replaying stale events from a previous scan)
+    replay_scan_id = _sse_mod._current_scan_id
+    buf = []
+    if replay_scan_id:
+        for msg in list(_sse_buffer):
+            if f'"scan_id": "{replay_scan_id}"' in msg:
+                buf.append(msg)
+    else:
+        buf = list(_sse_buffer)
+    def generate():
+        try:
+            yield ": connected\n\n"
+            if buf:
+                yield f"event: sse_replay\ndata: {{\"count\": {len(buf)}}}\n\n"
+            for msg in buf:
+                yield msg
+            if buf:
+                yield "event: sse_replay_done\ndata: {}\n\n"
+            logger.debug("[SSE] generator live, q_id=%d, replayed=%d", id(q), len(buf))
+            while True:
+                try:
+                    msg = q.get(timeout=5)
+                    yield msg
+                except queue.Empty:
+                    yield ": heartbeat\n\n"
+        except GeneratorExit:
+            pass
+        finally:
+            if q in _sse_queues:
+                _sse_queues.remove(q)
+    return Response(generate(), mimetype="text/event-stream",
+                    headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
+
+
+
+# ── Blueprint registration ────────────────────────────────────────────────────
+from routes.auth      import bp as auth_bp
+from routes.users     import bp as users_bp
+from routes.scan      import bp as scan_bp
+from routes.sources   import bp as sources_bp
+from routes.profiles  import bp as profiles_bp
+from routes.email     import bp as email_bp, _send_report_email
+from routes.database  import bp as database_bp
+from routes.export    import bp as export_bp
+from routes.app_routes import bp as app_routes_bp
+from routes.scheduler import bp as scheduler_bp
+from routes.google_auth import bp as google_auth_bp
+from routes.google_scan import bp as google_scan_bp
+from routes.viewer      import bp as viewer_bp
+
+for _bp in [auth_bp, users_bp, scan_bp, sources_bp, profiles_bp,
+            email_bp, database_bp, export_bp, app_routes_bp, scheduler_bp,
+            google_auth_bp, google_scan_bp, viewer_bp]:
+    app.register_blueprint(_bp)
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+# ── Entry point ───────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="M365 CPR Scanner",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Headless (scheduled) usage:
+  python gdpr_scanner.py --headless --output ~/Reports/
+
+  Auth credentials (Application mode) must be provided via:
+    environment variables:  M365_CLIENT_ID, M365_TENANT_ID, M365_CLIENT_SECRET
+    or a settings JSON:     --settings /path/to/settings.json
+
+  Scan options are loaded from ~/.gdpr_scanner_settings.json (saved automatically
+  after any interactive scan), or overridden in the --settings file.
+
+  SMTP config is loaded from ~/.gdpr_scanner_smtp.json (saved in the UI) or from
+  an 'smtp' key in the --settings file.
+
+Example cron (weekly, Mondays at 06:00):
+  0 6 * * 1 M365_CLIENT_ID=... M365_TENANT_ID=... M365_CLIENT_SECRET=... \\
+            python /path/to/gdpr_scanner.py --headless --output /reports/ \\
+            --email-to compliance@company.com,ciso@company.com
+
+Example Windows Task Scheduler (run batch file):
+  m365_scan.bat:
+    set M365_CLIENT_ID=<id>
+    set M365_TENANT_ID=<tid>
+    set M365_CLIENT_SECRET=<secret>
+    python gdpr_scanner.py --headless --output C:\\Reports\\ --email-to compliance@company.com
+
+Example --settings file with SMTP:
+  {
+    "client_id": "...", "tenant_id": "...", "client_secret": "...",
+    "sources": ["email", "onedrive"],
+    "options": {"older_than_days": 365, "delta": true},
+    "smtp": {
+      "host": "smtp.office365.com", "port": 587,
+      "username": "scanner@company.com", "password": "...",
+      "use_tls": true
+    }
+  }
+""",
+    )
+    parser.add_argument("--port",     type=int, default=5100)
+    parser.add_argument("--host",     default="127.0.0.1")
+    parser.add_argument("--headless", action="store_true",
+                        help="Run a non-interactive scan and export Excel, then exit")
+    parser.add_argument("--output",   default=".",
+                        help="Output directory for Excel export in headless mode (default: .)")
+    parser.add_argument("--settings", default=None,
+                        help="Path to a JSON settings file (overrides ~/.gdpr_scanner_settings.json)")
+    parser.add_argument("--email-to", default=None,
+                        help="Comma-separated recipient addresses — send Excel report by email (headless only)")
+    parser.add_argument("--retention-years", type=int, default=None,
+                        help="Auto-delete items older than N years after headless scan (requires --headless)")
+    parser.add_argument("--fiscal-year-end", default=None,
+                        help="Fiscal year end as MM-DD for retention cutoff (e.g. 12-31 for Bogforingsloven). Omit for rolling window.")
+    parser.add_argument("--reset-db", action="store_true",
+                        help="Reset the results database (~/.gdpr_scanner.db) — permanently deletes all scan history, "
+                             "dispositions, and deletion log. Prompts for confirmation unless --yes is also passed.")
+    parser.add_argument("--yes", action="store_true",
+                        help="Skip confirmation prompts (use with --reset-db for scripted resets)")
+    parser.add_argument("--purge", action="store_true",
+                        help="Permanently delete all data files created by the scanner "
+                             "(database, token cache, credentials, checkpoints, settings, OCR cache). "
+                             "Use before decommissioning or moving to a new server. "
+                             "Prompts for confirmation unless --yes is also passed.")
+    parser.add_argument("--export-db", default=None, metavar="FILE",
+                        help="Export the database to a ZIP archive (e.g. gdpr_export_2026.zip) and exit")
+    parser.add_argument("--import-db", default=None, metavar="FILE",
+                        help="Import a previously exported ZIP archive into the database and exit")
+    parser.add_argument("--import-mode", default="merge", choices=["merge", "replace"],
+                        help="Import mode: 'merge' (default) keeps existing data and adds dispositions/deletion log; "
+                             "'replace' wipes the DB first then imports everything")
+    parser.add_argument("--profile", default=None, metavar="NAME",
+                        help="Name of the scan profile to use for headless mode")
+    parser.add_argument("--list-profiles", action="store_true",
+                        help="List all saved scan profiles and exit")
+    parser.add_argument("--save-profile", default=None, metavar="NAME",
+                        help="Save the provided CLI options as a named profile and exit")
+    parser.add_argument("--delete-profile", default=None, metavar="NAME",
+                        help="Delete a saved profile by name and exit")
+
+    # ── File scanning CLI flags (#8) ──────────────────────────────────────────
+    parser.add_argument("--scan-path", default=None, metavar="PATH",
+                        help="Scan a local folder or SMB share for CPR numbers and PII. "
+                             "Local: ~/Documents  SMB: //nas.school.dk/shares/staff")
+    parser.add_argument("--smb-user", default=None, metavar="USER",
+                        help="SMB username (e.g. DOMAIN\\username) for --scan-path on a network share")
+    parser.add_argument("--smb-host", default=None, metavar="HOST",
+                        help="SMB hostname (auto-detected from --scan-path if not specified)")
+    parser.add_argument("--smb-domain", default=None, metavar="DOMAIN",
+                        help="SMB/Windows domain (optional, included in --smb-user as DOMAIN\\user)")
+    parser.add_argument("--smb-keychain-key", default=None, metavar="KEY",
+                        help="Account name used to retrieve the SMB password from the OS keychain")
+    parser.add_argument("--smb-store-creds", action="store_true",
+                        help="Store SMB credentials in the OS keychain and exit. "
+                             "Requires --smb-host and --smb-user. Prompts for password interactively.")
+    parser.add_argument("--scan-label", default=None, metavar="LABEL",
+                        help="Display label for --scan-path results (defaults to the path)")
+    parser.add_argument("--scan-photos", action="store_true",
+                        help="Enable face detection on image files during --scan-path scan (slower)")
+    parser.add_argument("--max-file-mb", default=50, type=int, metavar="MB",
+                        help="Maximum file size in MB to scan (default: 50). "
+                             "Files larger than this are skipped.")
+
+    args = parser.parse_args()
+
+    # ── File scan CLI flags (#8) ─────────────────────────────────────────────
+    if getattr(args, "smb_store_creds", False):
+        if not FILE_SCANNER_OK:
+            print("ERROR: file_scanner.py not found — cannot store credentials.")
+            sys.exit(1)
+        smb_host = getattr(args, "smb_host", None) or ""
+        smb_user = getattr(args, "smb_user", None) or ""
+        if not smb_user:
+            print("ERROR: --smb-user required with --smb-store-creds")
+            sys.exit(1)
+        import getpass
+        pw = getpass.getpass(f"SMB password for {smb_user}@{smb_host}: ")
+        key = getattr(args, "smb_keychain_key", None) or smb_user
+        ok = store_smb_password(smb_host, smb_user, pw, key)
+        if ok:
+            print(f"  [ok] Credentials stored in OS keychain (service=gdpr-scanner-nas, account={key})")
+        else:
+            print("  [warn] keyring not available — install: pip install keyring")
+        sys.exit(0)
+
+    if getattr(args, "scan_path", None):
+        if not FILE_SCANNER_OK:
+            print("ERROR: file_scanner.py not found — cannot scan file system.")
+            sys.exit(1)
+        source = {
+            "path":         args.scan_path,
+            "label":        getattr(args, "scan_label", None) or args.scan_path,
+            "smb_host":     getattr(args, "smb_host", None) or "",
+            "smb_user":     getattr(args, "smb_user", None) or "",
+            "smb_domain":   getattr(args, "smb_domain", None) or "",
+            "keychain_key": getattr(args, "smb_keychain_key", None) or "",
+            "scan_photos":  bool(getattr(args, "scan_photos", False)),
+            "max_file_mb":  int(getattr(args, "max_file_mb", 50)),
+        }
+        print(f"[file scan] {source['label']}")
+        run_file_scan(source)
+        # Write Excel report if output path provided
+        if getattr(args, "output", None) and flagged_items:
+            try:
+                out_path = _write_excel_report(args.output)
+                if out_path:
+                    print(f"[file scan] report: {out_path}")
+            except Exception as e:
+                print(f"[file scan] report failed: {e}")
+        sys.exit(0)
+
+    # ── Profile management (15b) ──────────────────────────────────────────────
+    if getattr(args, "list_profiles", False):
+        import sys as _sys
+        profiles = _profiles_load()
+        if not profiles:
+            print("  No profiles saved. Run a scan first, or use --save-profile to create one.")
+        else:
+            print(f"\n  {'#':<4} {'Name':<30} {'Sources':<30} {'Last run':<20} {'Scan ID'}")
+            print(f"  {'-'*4} {'-'*30} {'-'*30} {'-'*20} {'-'*8}")
+            for i, p in enumerate(profiles, 1):
+                srcs    = ", ".join(p.get("sources", [])) or "—"
+                last    = (p.get("last_run") or "never")[:19]
+                scan_id = str(p.get("last_scan_id") or "—")
+                print(f"  {i:<4} {p.get('name',''):<30} {srcs:<30} {last:<20} {scan_id}")
+                if p.get("description"):
+                    print(f"       {p['description']}")
+        print()
+        _sys.exit(0)
+
+    if getattr(args, "save_profile", None):
+        import sys as _sys
+        name = args.save_profile
+        # Build profile from CLI args
+        sources = []
+        if getattr(args, "sources", None):
+            sources = [s.strip() for s in args.sources.split(",") if s.strip()]
+        profile = _profile_from_settings({
+            "sources":         sources,
+            "user_ids":        [],
+            "options":         {
+                "email_body":    True,
+                "attachments":   getattr(args, "attachments", False),
+                "older_than_days": 0,
+            },
+            "retention_years": getattr(args, "retention_years", None),
+            "fiscal_year_end": getattr(args, "fiscal_year_end", None),
+            "email_to":        getattr(args, "email_to", "") or "",
+        }, name=name)
+        existing = _profile_get(name)
+        if existing:
+            profile["id"] = existing["id"]
+        saved = _profile_save(profile)
+        print(f"\n  ✔ Profile '{name}' saved (id: {saved['id']})")
+        print(f"    Sources:   {', '.join(saved.get('sources', [])) or 'none'}")
+        if saved.get("email_to"):
+            print(f"    Email to:  {saved['email_to']}")
+        if saved.get("retention_years"):
+            print(f"    Retention: {saved['retention_years']} years")
+        print()
+        _sys.exit(0)
+
+    if getattr(args, "delete_profile", None):
+        import sys as _sys
+        name = args.delete_profile
+        ok   = _profile_delete(name)
+        if ok:
+            print(f"\n  ✔ Profile '{name}' deleted.\n")
+        else:
+            print(f"\n  ✖ Profile '{name}' not found.\n")
+            print("  Available profiles:")
+            for p in _profiles_load():
+                print(f"    • {p.get('name')}")
+            print()
+        _sys.exit(0)
+
+    # ── Resolve --profile for headless mode ───────────────────────────────────
+    _active_profile_id: str | None = None
+    if getattr(args, "profile", None) and args.headless:
+        import sys as _sys
+        p = _profile_get(args.profile)
+        if not p:
+            print(f"\n  ✖ Profile '{args.profile}' not found.\n")
+            print("  Available profiles:")
+            for pr in _profiles_load():
+                print(f"    • {pr.get('name')}")
+            print()
+            _sys.exit(1)
+        # Populate args from profile (profile overrides individual CLI flags)
+        _active_profile_id = p["id"]
+        if p.get("sources"):
+            args.sources = ",".join(p["sources"])  # used by headless scan builder
+        if p.get("retention_years") and not args.retention_years:
+            args.retention_years = p["retention_years"]
+        if p.get("fiscal_year_end") and not args.fiscal_year_end:
+            args.fiscal_year_end = p["fiscal_year_end"]
+        if p.get("email_to") and not args.email_to:
+            args.email_to = p["email_to"]
+        print(f"\n  Profile: '{p['name']}'")
+        if p.get("description"):
+            print(f"  {p['description']}")
+        if p.get("last_run"):
+            print(f"  Last run: {p['last_run'][:19]}")
+        print()
+
+    # ── Purge all scanner data files ─────────────────────────────────────────
+    if getattr(args, "purge", False):
+        import sys as _sys
+        from gdpr_db import DB_PATH as _DB_PATH
+
+        # All files created by either scanner
+        PURGE_FILES = [
+            # GDPRScanner
+            (_DB_PATH,                                              "SQLite results database"),
+            (_CONFIG_FILE,                                          "Azure app credentials"),
+            (_SMTP_CONFIG_PATH,                                     "SMTP credentials"),
+            (_SETTINGS_PATH,                                        "Headless scan settings"),
+            (_ROLE_OVERRIDES_PATH,                                  "Manual role overrides"),
+            (_FILE_SOURCES_PATH,                                    "File source definitions"),
+            (_CHECKPOINT_PATH,                                      "Scan checkpoint (resume state)"),
+            (_DELTA_PATH,                                           "Delta scan tokens"),
+            (_LANG_OVERRIDE_FILE,                                   "Language preference"),
+            (Path.home() / ".gdprscanner" / "schedule.json",           "Scheduler configuration"),
+            # Document Scanner
+            (Path.home() / ".document_scanner_ocr_cache.db",       "OCR cache"),
+            (Path.home() / ".document_scanner_lang",               "Document Scanner language preference"),
+            # MSAL token cache (created by msal library)
+            (Path.home() / ".gdprscanner" / "msal_cache.bin",         "MSAL token cache"),
+        ]
+
+        print("\n  ── GDPR Scanner — Purge data files ──────────────────────────────")
+        print("  This will permanently delete all data files created by the scanner.")
+        print("  No scan results, credentials, or cached data will remain.\n")
+
+        existing = [(p, desc) for p, desc in PURGE_FILES if p.exists()]
+        if not existing:
+            print("  No scanner data files found — nothing to delete.")
+            _sys.exit(0)
+
+        total_kb = sum(p.stat().st_size for p, _ in existing) / 1024
+        print(f"  Files to delete ({len(existing)}, {total_kb:.0f} KB total):")
+        for p, desc in existing:
+            kb = p.stat().st_size / 1024
+            print(f"    {desc:40s} {p.name}  ({kb:.0f} KB)")
+
+        print()
+        if not getattr(args, "yes", False):
+            print("  ⚠  This cannot be undone. Export the database first if you need a record.")
+            answer = input("  Type 'yes' to confirm: ").strip().lower()
+            if answer != "yes":
+                print("  Cancelled — no files deleted.")
+                _sys.exit(0)
+
+        deleted = 0
+        failed  = 0
+        for p, desc in existing:
+            try:
+                p.unlink()
+                print(f"  ✔ Deleted: {p}")
+                deleted += 1
+            except Exception as e:
+                print(f"  ✖ Failed:  {p} — {e}")
+                failed += 1
+
+        print(f"\n  Purge complete: {deleted} deleted, {failed} failed.")
+        if failed == 0:
+            print("  The scanner has left no data files on this machine.")
+        _sys.exit(0)
+
+
+    if args.reset_db:
+        import sys as _sys
+        from gdpr_db import DB_PATH as _DB_PATH
+        db_path = _DB_PATH
+        print(f"\n  Database reset requested: {db_path}")
+        if db_path.exists():
+            size_kb = round(db_path.stat().st_size / 1024, 1)
+            print(f"  Current size: {size_kb} KB")
+        else:
+            print("  (database file does not exist yet — nothing to reset)")
+            _sys.exit(0)
+
+        if not args.yes:
+            print("\n  ⚠  This will permanently delete:")
+            print("       • All scan results and flagged items")
+            print("       • CPR index and PII hit counts")
+            print("       • All compliance dispositions")
+            print("       • Deletion audit log")
+            print("       • Scan history and trend data")
+            print()
+            answer = input("  Type 'yes' to confirm: ").strip().lower()
+            if answer != "yes":
+                print("  Cancelled — database not modified.")
+                _sys.exit(0)
+
+        if DB_OK:
+            try:
+                _get_db().reset()
+                print(f"  ✔ Database reset complete: {db_path}")
+            except Exception as e:
+                print(f"  ✖ Reset failed: {e}")
+                _sys.exit(1)
+        else:
+            print("  ✖ m365_db not available — cannot reset")
+            _sys.exit(1)
+
+        # Also clear the JSON checkpoint so the UI starts with no cached results
+        _clear_checkpoint()
+        if not _CHECKPOINT_PATH.exists():
+            print(f"  ✔ Checkpoint cleared")
+
+        # Clear delta tokens too — stale after a full DB reset
+        if _DELTA_PATH.exists():
+            _DELTA_PATH.unlink()
+            print(f"  ✔ Delta tokens cleared")
+
+        if not args.headless:
+            _sys.exit(0)  # reset-only — done
+
+    # ── Export database ───────────────────────────────────────────────────────
+    if getattr(args, "export_db", None):
+        import sys as _sys
+        if not DB_OK:
+            print("  ✖ m365_db not available — cannot export")
+            _sys.exit(1)
+        out = Path(args.export_db)
+        print(f"\n  Exporting database to: {out}")
+        try:
+            meta = _get_db().export_db(out)
+            print(f"  ✔ Export complete: {out}")
+            print(f"  Exported at: {meta['exported_at']}")
+            for table, count in meta["row_counts"].items():
+                if count:
+                    print(f"    {table:20s} {count} rows")
+            print(f"  Size: {out.stat().st_size / 1024:.0f} KB")
+        except Exception as e:
+            print(f"  ✖ Export failed: {e}")
+            _sys.exit(1)
+        _sys.exit(0)
+
+    # ── Import database ───────────────────────────────────────────────────────
+    if getattr(args, "import_db", None):
+        import sys as _sys
+        if not DB_OK:
+            print("  ✖ m365_db not available — cannot import")
+            _sys.exit(1)
+        src  = Path(args.import_db)
+        mode = getattr(args, "import_mode", "merge")
+        print(f"\n  Importing from: {src}")
+        print(f"  Mode: {mode}")
+        if mode == "replace":
+            print("  ⚠  Replace mode will wipe the current database first.")
+            if not getattr(args, "yes", False):
+                answer = input("  Type 'yes' to confirm: ").strip().lower()
+                if answer != "yes":
+                    print("  Cancelled — database not modified.")
+                    _sys.exit(0)
+        try:
+            result = _get_db().import_db(src, mode=mode)
+            print(f"  ✔ Import complete ({mode} mode)")
+            print(f"  Source export date: {result.get('exported_at', 'unknown')}")
+            for table, count in result["imported"].items():
+                if count:
+                    print(f"    {table:20s} {count} rows imported")
+        except Exception as e:
+            print(f"  ✖ Import failed: {e}")
+            _sys.exit(1)
+        _sys.exit(0)
+
+    if not MSAL_OK:
+        print("⚠  msal not installed — run: pip install msal requests")
+    if not SCANNER_OK:
+        print("⚠  document_scanner not found — CPR scanning unavailable")
+
+    if args.headless:
+        # ── Headless / scheduled mode ─────────────────────────────────────────
+        import sys as _sys
+        print("\n  GDPRScanner — Headless mode")
+        print("  ─────────────────────────────────────────")
+
+        # Load settings from --settings file or saved defaults
+        cfg: dict = {}
+        if args.settings:
+            try:
+                cfg = json.loads(Path(args.settings).read_text(encoding="utf-8"))
+                print(f"  Settings loaded from: {args.settings}")
+            except Exception as e:
+                print(f"  ✖ Cannot read settings file: {e}")
+                _sys.exit(1)
+        else:
+            saved = _load_settings()
+            if saved:
+                cfg = saved
+                print(f"  Settings loaded from: {_SETTINGS_PATH}")
+            else:
+                print(f"  ✖ No saved settings found. Run an interactive scan first, or provide --settings.")
+                _sys.exit(1)
+
+        # Auth credentials from environment or settings file
+        client_id     = cfg.get("client_id")     or os.environ.get("M365_CLIENT_ID", "")
+        tenant_id     = cfg.get("tenant_id")     or os.environ.get("M365_TENANT_ID", "")
+        client_secret = cfg.get("client_secret") or os.environ.get("M365_CLIENT_SECRET", "")
+
+        if not all([client_id, tenant_id, client_secret]):
+            print("  ✖ App credentials required for headless mode.")
+            print("    Set M365_CLIENT_ID, M365_TENANT_ID, M365_CLIENT_SECRET")
+            print("    or include client_id / tenant_id / client_secret in --settings JSON.")
+            _sys.exit(1)
+
+        # Authenticate
+        try:
+            from m365_connector import M365Connector
+            conn = M365Connector(client_id, tenant_id, client_secret=client_secret)
+            conn.authenticate_app_mode()
+            print("  ✔ Authenticated (Application / client credentials)")
+        except Exception as e:
+            print(f"  ✖ Authentication failed: {e}")
+            _sys.exit(1)
+
+        # Set connector in module globals (works whether running as __main__ or imported)
+        _mod = _sys.modules[__name__]
+        _mod._connector = conn
+
+        # Build scan options from config
+        sources  = cfg.get("sources", ["email", "onedrive"])
+        user_ids = cfg.get("user_ids", [])
+        opts     = cfg.get("options",  {})
+
+        if not user_ids:
+            # Default: scan all tenant users
+            print("  No user_ids in settings — fetching all tenant users…")
+            try:
+                all_users = conn.list_users()
+                user_ids  = [{"id": u["id"],
+                              "displayName": _resolve_display_name(
+                                  u.get("displayName", ""),
+                                  u.get("mail") or u.get("userPrincipalName", ""))}
+                             for u in all_users if u.get("id")]
+                print(f"  Found {len(user_ids)} users")
+            except Exception as e:
+                print(f"  ✖ Could not list users: {e}")
+                _sys.exit(1)
+
+        scan_options = {
+            "sources":  sources,
+            "user_ids": user_ids,
+            "options":  opts,
+        }
+
+        # Print scan summary
+        print(f"  Sources: {', '.join(sources)}")
+        print(f"  Users:   {len(user_ids)}")
+        older = opts.get("older_than_days", 0)
+        print(f"  Cutoff:  {'%d days' % older if older else 'All'}")
+        print("  Scanning…\n")
+
+        # Replace broadcast with a stdout logger for headless mode
+        def _headless_broadcast(event: str, data: dict):
+            if event == "scan_phase":
+                print(f"  {data.get('phase', '')}", flush=True)
+            elif event == "scan_start":
+                resumed = data.get("resumed", 0)
+                total   = data.get("total", 0)
+                msg = f"  Items to scan: {total}"
+                if resumed:
+                    msg += f"  ({resumed} skipped — already scanned)"
+                print(msg, flush=True)
+            elif event == "scan_progress":
+                pct     = data.get("pct", 0)
+                name    = data.get("file", "")[:55]
+                eta     = data.get("eta", "")
+                bar     = "█" * (pct // 5) + "░" * (20 - pct // 5)
+                eta_str = f"  {eta} left" if eta else ""
+                print(f"\r  [{bar}] {pct:3d}%  {name:<55}{eta_str}", end="", flush=True)
+            elif event == "scan_file_flagged":
+                print(f"\n  ✔ {data.get('name', '')} — {data.get('cpr_count', 0)} CPR", flush=True)
+            elif event == "scan_done":
+                print(f"\n\n  Done — {data.get('flagged_count', 0)} flagged / {data.get('total_scanned', 0)} scanned", flush=True)
+            elif event == "scan_error":
+                print(f"\n  ✖ {data.get('file', '')}: {data.get('error', '')}", flush=True)
+            elif event == "scan_cancelled":
+                print(f"\n  Scan stopped after {data.get('completed', 0)} items.", flush=True)
+
+        _orig_broadcast = _mod.broadcast
+        _mod.broadcast  = _headless_broadcast
+
+        try:
+            run_scan(scan_options)
+        except Exception as e:
+            print(f"\n  ✖ Scan error: {e}")
+            _sys.exit(1)
+        finally:
+            _mod.broadcast = _orig_broadcast
+
+        if not flagged_items:
+            print("  No flagged items — no Excel file written.")
+            _sys.exit(0)
+
+        # Export Excel
+        out_dir = Path(args.output).expanduser()
+        out_dir.mkdir(parents=True, exist_ok=True)
+        import datetime as _dt
+        fname    = f"m365_scan_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+        out_path = out_dir / fname
+
+        try:
+            xl_bytes, fname = _build_excel_bytes()
+            out_path = out_dir / fname
+            out_path.write_bytes(xl_bytes)
+            print(f"  Excel saved: {out_path}")
+        except Exception as e:
+            print(f"  ✖ Excel export failed: {e}")
+            _sys.exit(1)
+
+        # ── Email the report if --email-to was specified ──────────────────────
+        email_to = getattr(args, "email_to", None)
+        if email_to:
+            recipients = [r.strip() for r in email_to.replace(";", ",").split(",") if r.strip()]
+            # SMTP config: --settings file takes priority, then saved ~/.gdpr_scanner_smtp.json
+            smtp_cfg = _load_smtp_config()
+            if cfg.get("smtp"):
+                smtp_cfg = {**smtp_cfg, **cfg["smtp"]}
+            if not smtp_cfg.get("host"):
+                print("  ✖ Cannot send email — no SMTP config found.")
+                print("    Configure SMTP in the UI (✉ Email report panel) or add an 'smtp' key to --settings.")
+            else:
+                print(f"  Sending report to: {', '.join(recipients)}…")
+                try:
+                    _send_report_email(xl_bytes, fname, smtp_cfg, recipients)
+                    print(f"  ✔ Report emailed to {', '.join(recipients)}")
+                except Exception as e:
+                    print(f"  ✖ Email send failed: {e}")
+                    # Don't exit 1 — the Excel file was saved successfully
+
+        # ── Retention auto-delete if --retention-years was specified ──────────
+        retention_years   = getattr(args, "retention_years", None)
+        fiscal_year_end   = getattr(args, "fiscal_year_end", None)
+        if retention_years and DB_OK:
+            try:
+                from gdpr_db import overdue_cutoff
+                cutoff = overdue_cutoff(retention_years, fiscal_year_end)
+                overdue_items = _get_db().get_overdue_items(
+                    retention_years, fiscal_year_end=fiscal_year_end
+                )
+                mode_str = f"fiscal year end {fiscal_year_end}" if fiscal_year_end else "rolling"
+                print(f"\n  Retention policy: {retention_years} years ({mode_str})")
+                print(f"  Cutoff date:      {cutoff}")
+                print(f"  Overdue items:    {len(overdue_items)}")
+
+                if not overdue_items:
+                    print("  No overdue items to delete.")
+                else:
+                    # Confirm unless --yes / non-interactive
+                    import sys as _sys2
+                    if _sys2.stdin.isatty():
+                        answer = input(f"\n  Delete {len(overdue_items)} overdue item(s)? [y/N] ").strip().lower()
+                        if answer != "y":
+                            print("  Skipped — no items deleted.")
+                        else:
+                            _do_retention_delete(overdue_items)
+                    else:
+                        # Non-interactive (cron) — delete automatically
+                        print("  Non-interactive mode — deleting automatically…")
+                        _do_retention_delete(overdue_items)
+            except Exception as e:
+                print(f"  ✖ Retention check failed: {e}")
+
+        # ── Auto-delete items tagged delete-scheduled in disposition table ────
+        if DB_OK:
+            try:
+                db = _get_db()
+                if db:
+                    # Find all flagged items whose disposition is delete-scheduled
+                    scheduled = [
+                        item for item in flagged_items
+                        if item.get("id") and (
+                            lambda d: d and d.get("status") == "delete-scheduled"
+                        )(db.get_disposition(item.get("id", "")))
+                    ]
+                    if scheduled:
+                        print(f"\n  Disposition auto-delete: {len(scheduled)} item(s) tagged 'delete-scheduled'")
+                        import sys as _sys2
+                        if _sys2.stdin.isatty():
+                            answer = input(f"  Delete {len(scheduled)} scheduled item(s)? [y/N] ").strip().lower()
+                            if answer != "y":
+                                print("  Skipped.")
+                                scheduled = []
+                        else:
+                            print("  Non-interactive mode — deleting automatically…")
+                        if scheduled:
+                            _do_retention_delete(scheduled)
+            except Exception as e:
+                print(f"  ✖ Disposition auto-delete failed: {e}")
+
+        # Update profile last_run if a named profile was used
+        if _active_profile_id:
+            try:
+                sid = _get_db().latest_scan_id() if DB_OK else None
+                _profile_touch(_active_profile_id, sid)
+            except Exception:
+                pass
+
+        print("\n  ✔ Headless scan complete.\n")
+        _sys.exit(0)
+
+    else:
+        # ── Interactive web UI mode ───────────────────────────────────────────
+        # Single-instance guard — prevent two servers sharing the same DB/settings.
+        _lock_fh = None
+        def _acquire_lock() -> bool:
+            global _lock_fh
+            from app_config import _DATA_DIR
+            _DATA_DIR.mkdir(parents=True, exist_ok=True)
+            try:
+                _lock_fh = open(_DATA_DIR / "app.lock", "w")
+                if sys.platform == "win32":
+                    import msvcrt as _msvcrt
+                    _msvcrt.locking(_lock_fh.fileno(), _msvcrt.LK_NBLCK, 1)
+                else:
+                    import fcntl as _fcntl
+                    _fcntl.flock(_lock_fh, _fcntl.LOCK_EX | _fcntl.LOCK_NB)
+                _lock_fh.write(str(_os.getpid()))
+                _lock_fh.flush()
+                return True
+            except (IOError, OSError):
+                if _lock_fh:
+                    _lock_fh.close()
+                return False
+
+        if not _acquire_lock():
+            print("GDPRScanner is already running. Stop the existing instance first.", file=sys.stderr)
+            sys.exit(1)
+
+        # Find a free port — auto-increment from the requested port if in use.
+        import socket as _socket
+        def _find_free_port(start: int, host: str) -> int:
+            for p in range(start, start + 100):
+                with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as s:
+                    try:
+                        s.bind((host, p))
+                        return p
+                    except OSError:
+                        continue
+            raise RuntimeError(f"No free port found in range {start}–{start + 99}")
+
+        actual_port = _find_free_port(args.port, args.host)
+        if actual_port != args.port:
+            print(f"  [!] Port {args.port} in use — using {actual_port} instead")
+        args.port = actual_port
+        # Machine-readable port line — parseable by a parent process via stdout.
+        print(f"GDPR_PORT={args.port}", flush=True)
+
+        print(f"\n  GDPRScanner\n  ──────────────────────────────")
+        print(f"  Open: http://{args.host}:{args.port}")
+
+        # Start in-process scheduler (#19)
+        try:
+            import scan_scheduler as _sched_mod
+            scan_scheduler = _sched_mod.scan_scheduler
+            if scan_scheduler.start():
+                _sched_cfg = _sched_mod.load_schedule_config()
+                if _sched_cfg.get("enabled"):
+                    _nxt = scan_scheduler.next_run_time() or "—"
+                    print(f"  Scheduler: enabled (next run: {_nxt})")
+                else:
+                    print("  Scheduler: disabled (enable in Settings → Scheduler)")
+            else:
+                print("  Scheduler: unavailable (pip install apscheduler)")
+        except Exception as _sched_err:
+            print(f"  Scheduler: failed to start ({_sched_err})")
+
+        print(f"  Press Ctrl+C to stop\n")
+        app.run(host=args.host, port=args.port, debug=False, threaded=True)
diff --git a/google_connector.py b/google_connector.py
new file mode 100644
index 0000000..d901fa2
--- /dev/null
+++ b/google_connector.py
@@ -0,0 +1,726 @@
+#!/usr/bin/env python3
+"""
+google_connector.py — Google Workspace connector for GDPR Scanner.
+
+Handles service-account authentication with domain-wide delegation and exposes
+iterators for:
+  - Gmail messages (body + attachments) via the Gmail API
+  - Google Drive files (with export for native Docs/Sheets/Slides) via Drive API
+
+All file content is yielded as (metadata_dict, bytes_content) tuples, matching
+the same contract used by m365_connector so the scan engine can reuse _scan_bytes.
+
+Authentication:
+  Service account JSON key with domain-wide delegation enabled in Google Workspace
+  Admin Console → Security → API Controls → Domain-wide delegation.
+
+  Required OAuth scopes (add to the service account's delegation entry):
+    https://www.googleapis.com/auth/gmail.readonly
+    https://www.googleapis.com/auth/drive.readonly
+    https://www.googleapis.com/auth/admin.directory.user.readonly   (user listing)
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+import logging
+import time
+import threading
+from pathlib import Path
+from typing import Iterator, Optional
+
+# ── google-auth / google-api-python-client ────────────────────────────────────
+try:
+    from google.oauth2 import service_account
+    from googleapiclient.discovery import build
+    from googleapiclient.errors import HttpError
+    from googleapiclient.http import MediaIoBaseDownload
+    GOOGLE_AUTH_OK = True
+
+    # Suppress the googleapiclient.http WARNING that fires before raising
+    # HttpError for exportSizeLimitExceeded — we handle it ourselves below.
+    class _SuppressExportSizeWarning(logging.Filter):
+        def filter(self, record: logging.LogRecord) -> bool:
+            return "exportSizeLimitExceeded" not in record.getMessage()
+
+    logging.getLogger("googleapiclient.http").addFilter(_SuppressExportSizeWarning())
+
+except ImportError:
+    GOOGLE_AUTH_OK = False
+
+_DATA_DIR    = Path.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+_SA_KEY_FILE       = _DATA_DIR / "google_sa.json"
+_GOOGLE_TOKEN_FILE = _DATA_DIR / "google_token.json"
+
+PERSONAL_SCOPES = [
+    "https://www.googleapis.com/auth/gmail.readonly",
+    "https://www.googleapis.com/auth/drive.readonly",
+]
+_DEVICE_AUTH_URL = "https://oauth2.googleapis.com/device/code"
+_TOKEN_URL       = "https://oauth2.googleapis.com/token"
+_USERINFO_URL    = "https://www.googleapis.com/oauth2/v2/userinfo"
+_DEVICE_GRANT    = "urn:ietf:params:oauth:grant-type:device_code"
+
+GMAIL_SCOPES = [
+    "https://www.googleapis.com/auth/gmail.readonly",
+]
+DRIVE_SCOPES = [
+    "https://www.googleapis.com/auth/drive.readonly",
+]
+ADMIN_SCOPES = [
+    "https://www.googleapis.com/auth/admin.directory.user.readonly",
+]
+
+# Google-native MIME types and the export format we request
+_EXPORT_MAP = {
+    "application/vnd.google-apps.document":     ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx"),
+    "application/vnd.google-apps.spreadsheet":  ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx"),
+    "application/vnd.google-apps.presentation": ("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx"),
+    "application/vnd.google-apps.drawing":      ("application/pdf", ".pdf"),
+    "application/vnd.google-apps.form":         ("application/pdf", ".pdf"),
+}
+
+# Maximum export size for native Google files (bytes) — skip larger ones
+_MAX_EXPORT_BYTES = 20 * 1024 * 1024  # 20 MB
+
+# ── OU role mapping ───────────────────────────────────────────────────────────
+_OU_ROLES_PATH = Path(__file__).parent / "classification" / "google_ou_roles.json"
+
+def _load_ou_roles() -> tuple[list, list]:
+    """Load student/staff OU prefix lists from skus/google_ou_roles.json.
+    Returns (student_prefixes, staff_prefixes) — both lowercased."""
+    try:
+        import json as _j
+        data = _j.loads(_OU_ROLES_PATH.read_text(encoding="utf-8"))
+        students = [p.lower() for p in data.get("student_ou_prefixes", [])]
+        staff    = [p.lower() for p in data.get("staff_ou_prefixes", [])]
+        return students, staff
+    except Exception:
+        return ["/elever", "/students"], ["/personale", "/staff", "/lærere", "/ansatte"]
+
+def classify_ou_role(org_unit_path: str) -> str:
+    """Return 'student', 'staff', or 'other' based on orgUnitPath prefix."""
+    if not org_unit_path:
+        return "other"
+    path_lower = org_unit_path.lower()
+    students, staff = _load_ou_roles()
+    for prefix in students:
+        if path_lower.startswith(prefix):
+            return "student"
+    for prefix in staff:
+        if path_lower.startswith(prefix):
+            return "staff"
+    return "other"
+
+
+
+class GoogleError(Exception):
+    pass
+
+
+class GoogleConnector:
+    """
+    Wraps service-account + domain-wide delegation auth for Gmail and Drive.
+
+    Usage:
+        conn = GoogleConnector(key_dict, admin_email="admin@domain.com")
+        for meta, data in conn.iter_gmail_messages("user@domain.com"):
+            ...
+    """
+
+    def __init__(self, key_dict: dict, admin_email: str = ""):
+        if not GOOGLE_AUTH_OK:
+            raise GoogleError(
+                "google-auth not installed — run: "
+                "pip install google-auth google-auth-httplib2 google-api-python-client"
+            )
+        self._key_dict    = key_dict
+        self._admin_email = admin_email.strip()
+        self._lock        = threading.Lock()
+        # Validate the key looks sane
+        if key_dict.get("type") != "service_account":
+            raise GoogleError("Key file must be a service_account JSON — found type: " + str(key_dict.get("type")))
+
+    # ── Credential factories ──────────────────────────────────────────────────
+
+    def _creds_for(self, user_email: str, scopes: list):
+        """Return delegated credentials impersonating user_email."""
+        base = service_account.Credentials.from_service_account_info(
+            self._key_dict, scopes=scopes
+        )
+        return base.with_subject(user_email)
+
+    def _admin_creds(self):
+        """Admin Directory API credentials (impersonating admin_email)."""
+        if not self._admin_email:
+            raise GoogleError("admin_email required to list workspace users")
+        return self._creds_for(self._admin_email, ADMIN_SCOPES + GMAIL_SCOPES + DRIVE_SCOPES)
+
+    # ── Connectivity check ────────────────────────────────────────────────────
+
+    def is_authenticated(self) -> bool:
+        """Light check — verifies credentials refresh without making API calls."""
+        try:
+            creds = service_account.Credentials.from_service_account_info(
+                self._key_dict, scopes=GMAIL_SCOPES
+            )
+            return bool(creds)
+        except Exception:
+            return False
+
+    def get_service_account_email(self) -> str:
+        return self._key_dict.get("client_email", "")
+
+    def get_project_id(self) -> str:
+        return self._key_dict.get("project_id", "")
+
+    # ── User listing ─────────────────────────────────────────────────────────
+
+    def list_users(self, domain: str = "") -> list[dict]:
+        """
+        Return [{id, email, displayName}] for all active users in the domain.
+        Requires Admin Directory API scope on the service account delegation.
+        Falls back gracefully if admin_email is not set.
+        """
+        if not self._admin_email:
+            return []
+        try:
+            creds   = self._admin_creds()
+            service = build("admin", "directory_v1", credentials=creds, cache_discovery=False)
+            results = []
+            page_token = None
+            params: dict = {"customer": "my_customer", "maxResults": 500, "orderBy": "email", "projection": "full"}
+            if domain:
+                params["domain"] = domain
+            while True:
+                if page_token:
+                    params["pageToken"] = page_token
+                resp = service.users().list(**params).execute()
+                for u in resp.get("users", []):
+                    if not u.get("suspended") and not u.get("archived"):
+                        ou_path = u.get("orgUnitPath", "")
+                        results.append({
+                            "id":           u.get("id", ""),
+                            "email":        u.get("primaryEmail", ""),
+                            "displayName":  u.get("name", {}).get("fullName", ""),
+                            "orgUnitPath":  ou_path,
+                            "userRole":     classify_ou_role(ou_path),
+                        })
+                page_token = resp.get("nextPageToken")
+                if not page_token:
+                    break
+            return results
+        except HttpError as e:
+            raise GoogleError(f"Admin Directory API error: {e}") from e
+
+    # ── Gmail iterator ────────────────────────────────────────────────────────
+
+    def iter_gmail_messages(
+        self,
+        user_email: str,
+        max_messages: int = 2000,
+        scan_body: bool = True,
+        scan_attachments: bool = True,
+        max_attach_mb: float = 20.0,
+    ) -> Iterator[tuple[dict, bytes]]:
+        """
+        Yield (metadata, content_bytes) for each Gmail message / attachment.
+
+        For messages with only inline text body: yields one item with the body text.
+        For attachments: yields one item per attachment (skips if > max_attach_mb).
+        """
+        try:
+            creds   = self._creds_for(user_email, GMAIL_SCOPES)
+            service = build("gmail", "v1", credentials=creds, cache_discovery=False)
+        except HttpError as e:
+            raise GoogleError(f"Gmail auth failed for {user_email}: {e}") from e
+        yield from _gmail_iter(service, user_email, max_messages, scan_body, scan_attachments, max_attach_mb)
+
+    # ── Drive iterator ────────────────────────────────────────────────────────
+
+    def iter_drive_files(
+        self,
+        user_email: str,
+        max_files: int = 5000,
+        max_file_mb: float = 50.0,
+    ) -> Iterator[tuple[dict, bytes]]:
+        """
+        Yield (metadata, content_bytes) for each Drive file.
+
+        Native Google formats (Docs/Sheets/Slides) are exported to Office format.
+        Binary files are downloaded directly (skipped if > max_file_mb).
+        """
+        try:
+            creds   = self._creds_for(user_email, DRIVE_SCOPES)
+            service = build("drive", "v3", credentials=creds, cache_discovery=False)
+        except HttpError as e:
+            raise GoogleError(f"Drive auth failed for {user_email}: {e}") from e
+        yield from _drive_iter(service, user_email, max_files, max_file_mb)
+
+
+# ── Persistence helpers ───────────────────────────────────────────────────────
+
+def load_saved_key() -> Optional[dict]:
+    """Load service account key from disk. Returns None if not found."""
+    if _SA_KEY_FILE.exists():
+        try:
+            return json.loads(_SA_KEY_FILE.read_text())
+        except Exception:
+            return None
+    return None
+
+
+def save_key(key_dict: dict) -> None:
+    """Persist service account key to disk (chmod 600)."""
+    _SA_KEY_FILE.write_text(json.dumps(key_dict, indent=2))
+    try:
+        _SA_KEY_FILE.chmod(0o600)
+    except Exception:
+        pass
+
+
+def delete_key() -> None:
+    """Remove persisted service account key."""
+    try:
+        if _SA_KEY_FILE.exists():
+            _SA_KEY_FILE.unlink()
+    except Exception:
+        pass
+
+
+# ── Internal helpers ──────────────────────────────────────────────────────────
+
+def _epoch_to_iso(epoch_secs: int) -> str:
+    from datetime import datetime, timezone
+    try:
+        return datetime.fromtimestamp(epoch_secs, tz=timezone.utc).isoformat()
+    except Exception:
+        return ""
+
+
+def _extract_body(payload: dict) -> bytes:
+    """Recursively extract plain-text (or HTML) body from a Gmail message payload."""
+    mime = payload.get("mimeType", "")
+    body_data = payload.get("body", {}).get("data", "")
+
+    if mime == "text/plain" and body_data:
+        return base64.urlsafe_b64decode(body_data)
+    if mime == "text/html" and body_data:
+        # Return raw HTML bytes — _scan_bytes handles HTML stripping
+        return base64.urlsafe_b64decode(body_data)
+
+    # Recurse into multipart
+    for part in payload.get("parts", []):
+        result = _extract_body(part)
+        if result:
+            return result
+    return b""
+
+
+def _iter_parts(payload: dict):
+    """Yield all leaf parts (for attachment scanning)."""
+    parts = payload.get("parts", [])
+    if not parts:
+        yield payload
+    else:
+        for part in parts:
+            yield from _iter_parts(part)
+
+
+# ── Shared iteration helpers (used by both GoogleConnector and PersonalGoogleConnector) ──
+
+def _gmail_iter(
+    service,
+    user_email: str,
+    max_messages: int,
+    scan_body: bool,
+    scan_attachments: bool,
+    max_attach_mb: float,
+) -> Iterator[tuple[dict, bytes]]:
+    """Paginate Gmail messages and yield (metadata, bytes) tuples."""
+    ids: list[str] = []
+    page_token = None
+    while len(ids) < max_messages:
+        params: dict = {"userId": "me", "maxResults": min(500, max_messages - len(ids))}
+        if page_token:
+            params["pageToken"] = page_token
+        try:
+            resp = service.users().messages().list(**params).execute()
+        except HttpError as e:
+            raise GoogleError(f"Gmail list error for {user_email}: {e}") from e
+        ids.extend(m["id"] for m in resp.get("messages", []))
+        page_token = resp.get("nextPageToken")
+        if not page_token:
+            break
+
+    max_attach_bytes = int(max_attach_mb * 1024 * 1024)
+
+    for msg_id in ids:
+        try:
+            msg = service.users().messages().get(
+                userId="me", id=msg_id, format="full"
+            ).execute()
+        except HttpError:
+            continue
+
+        headers = {h["name"].lower(): h["value"] for h in msg.get("payload", {}).get("headers", [])}
+        meta = {
+            "id":           f"gmail:{msg_id}",
+            "name":         headers.get("subject", "(no subject)"),
+            "_source":      "gmail",
+            "_source_type": "gmail",
+            "_account":     user_email,
+            "_account_id":  user_email,
+            "_url":         f"https://mail.google.com/mail/u/0/#inbox/{msg_id}",
+            "receivedDateTime": _epoch_to_iso(int(msg.get("internalDate", 0)) // 1000),
+            "size":         msg.get("sizeEstimate", 0),
+        }
+
+        payload = msg.get("payload", {})
+
+        if scan_body:
+            body_bytes = _extract_body(payload)
+            if body_bytes:
+                yield (meta, body_bytes)
+
+        if scan_attachments:
+            for part in _iter_parts(payload):
+                filename = part.get("filename", "")
+                body     = part.get("body", {})
+                att_id   = body.get("attachmentId")
+                size     = body.get("size", 0)
+                if not att_id or not filename:
+                    continue
+                if size > max_attach_bytes:
+                    continue
+                try:
+                    att = service.users().messages().attachments().get(
+                        userId="me", messageId=msg_id, id=att_id
+                    ).execute()
+                    data = base64.urlsafe_b64decode(att.get("data", ""))
+                except HttpError:
+                    continue
+                att_meta = {
+                    **meta,
+                    "id":   f"gmail:{msg_id}:{att_id}",
+                    "name": filename,
+                    "size": len(data),
+                }
+                yield (att_meta, data)
+
+
+def _drive_iter(
+    service,
+    user_email: str,
+    max_files: int,
+    max_file_mb: float,
+) -> Iterator[tuple[dict, bytes]]:
+    """Paginate Drive files and yield (metadata, bytes) tuples."""
+    max_bytes = int(max_file_mb * 1024 * 1024)
+    fields = "nextPageToken,files(id,name,mimeType,size,webViewLink,modifiedTime,owners,parents)"
+    page_token = None
+    fetched = 0
+
+    while fetched < max_files:
+        params: dict = {
+            "pageSize": min(1000, max_files - fetched),
+            "fields": fields,
+            "q": "trashed = false",
+        }
+        if page_token:
+            params["pageToken"] = page_token
+        try:
+            resp = service.files().list(**params).execute()
+        except HttpError as e:
+            raise GoogleError(f"Drive list error for {user_email}: {e}") from e
+
+        for f in resp.get("files", []):
+            fetched += 1
+            mime  = f.get("mimeType", "")
+            fid   = f.get("id", "")
+            fname = f.get("name", "")
+            size  = int(f.get("size", 0) or 0)
+
+            meta = {
+                "id":           f"gdrive:{fid}",
+                "name":         fname,
+                "_source":      "gdrive",
+                "_source_type": "gdrive",
+                "_account":     user_email,
+                "_account_id":  user_email,
+                "_url":         f.get("webViewLink", ""),
+                "lastModifiedDateTime": f.get("modifiedTime", "")[:10],
+                "size":         size,
+            }
+
+            if mime in _EXPORT_MAP:
+                export_mime, ext = _EXPORT_MAP[mime]
+                try:
+                    req   = service.files().export_media(fileId=fid, mimeType=export_mime)
+                    buf   = io.BytesIO()
+                    dl    = MediaIoBaseDownload(buf, req, chunksize=4 * 1024 * 1024)
+                    done  = False
+                    total = 0
+                    while not done:
+                        status, done = dl.next_chunk()
+                        total = buf.tell()
+                        if total > _MAX_EXPORT_BYTES:
+                            break
+                    if total > _MAX_EXPORT_BYTES:
+                        continue
+                    meta["name"] = fname + ext
+                    meta["size"] = total
+                    data = buf.getvalue()
+                    del buf
+                    yield (meta, data)
+                except HttpError as e:
+                    if "exportSizeLimitExceeded" in str(e):
+                        print(
+                            f"[gdrive] skip '{fname}' — file too large for Google export API"
+                            f" (exportSizeLimitExceeded); fid={fid}",
+                            flush=True,
+                        )
+                    continue
+            else:
+                if mime.startswith("application/vnd.google-apps."):
+                    continue   # other native formats we can't export — skip
+                if size == 0 or size > max_bytes:
+                    continue
+                try:
+                    req  = service.files().get_media(fileId=fid)
+                    buf  = io.BytesIO()
+                    dl   = MediaIoBaseDownload(buf, req, chunksize=4 * 1024 * 1024)
+                    done = False
+                    while not done:
+                        _, done = dl.next_chunk()
+                    data = buf.getvalue()
+                    del buf
+                    yield (meta, data)
+                except HttpError:
+                    continue
+
+        page_token = resp.get("nextPageToken")
+        if not page_token:
+            break
+
+
+# ── Personal Google account (OAuth device-code) connector ────────────────────
+
+class PersonalGoogleConnector:
+    """
+    OAuth 2.0 device-code connector for personal Google accounts.
+
+    Provides the same public interface as GoogleConnector so the scan engine
+    can use either transparently via state.google_connector.
+
+    Authentication:
+      GCP project with an OAuth 2.0 Desktop App credential.
+      Required scopes: gmail.readonly, drive.readonly.
+    """
+
+    def __init__(self, token_data: dict):
+        """
+        Construct from a stored token dict with keys:
+          access_token, refresh_token, client_id, client_secret, token_uri, scopes
+        """
+        if not GOOGLE_AUTH_OK:
+            raise GoogleError(
+                "google-auth not installed — run: "
+                "pip install google-auth google-auth-httplib2 google-api-python-client"
+            )
+        self._token_data = token_data
+        self._creds = self._build_creds()
+
+    def _build_creds(self):
+        from google.oauth2.credentials import Credentials
+        return Credentials(
+            token=self._token_data.get("access_token"),
+            refresh_token=self._token_data.get("refresh_token"),
+            token_uri=self._token_data.get("token_uri", _TOKEN_URL),
+            client_id=self._token_data.get("client_id"),
+            client_secret=self._token_data.get("client_secret"),
+            scopes=self._token_data.get("scopes", PERSONAL_SCOPES),
+        )
+
+    def _refresh_if_needed(self) -> None:
+        from google.auth.transport.requests import Request
+        if not self._creds.valid:
+            if self._creds.expired and self._creds.refresh_token:
+                self._creds.refresh(Request())
+                updated = dict(self._token_data)
+                updated["access_token"] = self._creds.token
+                save_personal_token(updated)
+                self._token_data = updated
+
+    def is_authenticated(self) -> bool:
+        try:
+            self._refresh_if_needed()
+            return bool(self._creds.token)
+        except Exception:
+            return False
+
+    def get_user_info(self) -> dict:
+        """Return {id, email, displayName} for the authenticated user."""
+        if not REQUESTS_OK:
+            raise GoogleError("requests library required")
+        self._refresh_if_needed()
+        resp = _requests.get(
+            _USERINFO_URL,
+            headers={"Authorization": f"Bearer {self._creds.token}"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        return {
+            "id":          data.get("id", ""),
+            "email":       data.get("email", ""),
+            "displayName": data.get("name", ""),
+        }
+
+    def list_users(self, domain: str = "") -> list[dict]:
+        """Return a single-item list for the signed-in user (no admin access needed)."""
+        info = self.get_user_info()
+        return [{
+            "id":          info["email"],
+            "email":       info["email"],
+            "displayName": info["displayName"],
+            "orgUnitPath": "",
+            "userRole":    "other",
+        }]
+
+    def iter_gmail_messages(
+        self,
+        user_email: str,
+        max_messages: int = 2000,
+        scan_body: bool = True,
+        scan_attachments: bool = True,
+        max_attach_mb: float = 20.0,
+    ) -> Iterator[tuple[dict, bytes]]:
+        """Yield (metadata, bytes) for each Gmail message / attachment."""
+        self._refresh_if_needed()
+        try:
+            service = build("gmail", "v1", credentials=self._creds, cache_discovery=False)
+        except HttpError as e:
+            raise GoogleError(f"Gmail auth failed: {e}") from e
+        yield from _gmail_iter(service, user_email, max_messages, scan_body, scan_attachments, max_attach_mb)
+
+    def iter_drive_files(
+        self,
+        user_email: str,
+        max_files: int = 5000,
+        max_file_mb: float = 50.0,
+    ) -> Iterator[tuple[dict, bytes]]:
+        """Yield (metadata, bytes) for each Drive file."""
+        self._refresh_if_needed()
+        try:
+            service = build("drive", "v3", credentials=self._creds, cache_discovery=False)
+        except HttpError as e:
+            raise GoogleError(f"Drive auth failed: {e}") from e
+        yield from _drive_iter(service, user_email, max_files, max_file_mb)
+
+    @staticmethod
+    def get_device_code_flow(client_id: str, client_secret: str) -> dict:
+        """
+        Initiate a Google device-code flow.
+        Returns a flow dict containing user_code, verification_url, device_code, etc.
+        """
+        if not REQUESTS_OK:
+            raise GoogleError("requests library required — run: pip install requests")
+        resp = _requests.post(_DEVICE_AUTH_URL, data={
+            "client_id": client_id,
+            "scope":     " ".join(PERSONAL_SCOPES),
+        }, timeout=10)
+        data = resp.json()
+        if "device_code" not in data:
+            raise GoogleError(
+                f"Failed to start device flow: {data.get('error_description', data)}"
+            )
+        return {
+            "device_code":      data["device_code"],
+            "user_code":        data["user_code"],
+            "verification_url": data.get("verification_url", "https://www.google.com/device"),
+            "expires_in":       data.get("expires_in", 1800),
+            "interval":         data.get("interval", 5),
+            "client_id":        client_id,
+            "client_secret":    client_secret,
+        }
+
+    @staticmethod
+    def complete_device_code_flow(flow: dict) -> "PersonalGoogleConnector":
+        """
+        Poll until the user completes sign-in at verification_url.
+        Blocks the calling thread. Returns a ready PersonalGoogleConnector.
+        """
+        if not REQUESTS_OK:
+            raise GoogleError("requests library required — run: pip install requests")
+        client_id     = flow["client_id"]
+        client_secret = flow["client_secret"]
+        device_code   = flow["device_code"]
+        interval      = flow.get("interval", 5)
+        expires_in    = flow.get("expires_in", 1800)
+        deadline      = time.time() + expires_in
+
+        while time.time() < deadline:
+            time.sleep(interval)
+            resp = _requests.post(_TOKEN_URL, data={
+                "client_id":     client_id,
+                "client_secret": client_secret,
+                "device_code":   device_code,
+                "grant_type":    _DEVICE_GRANT,
+            }, timeout=10)
+            data = resp.json()
+            if "access_token" in data:
+                token_data = {
+                    "access_token":  data["access_token"],
+                    "refresh_token": data.get("refresh_token", ""),
+                    "client_id":     client_id,
+                    "client_secret": client_secret,
+                    "token_uri":     _TOKEN_URL,
+                    "scopes":        PERSONAL_SCOPES,
+                }
+                save_personal_token(token_data)
+                return PersonalGoogleConnector(token_data)
+            err = data.get("error", "")
+            if err == "authorization_pending":
+                continue
+            if err == "slow_down":
+                interval = max(interval + 5, 5)
+                continue
+            raise GoogleError(
+                f"Device flow error: {data.get('error_description', err)}"
+            )
+
+        raise GoogleError("Device code flow timed out")
+
+
+# ── Personal token persistence ────────────────────────────────────────────────
+
+def save_personal_token(data: dict) -> None:
+    """Persist OAuth token to disk (chmod 600)."""
+    _GOOGLE_TOKEN_FILE.write_text(json.dumps(data, indent=2))
+    try:
+        _GOOGLE_TOKEN_FILE.chmod(0o600)
+    except Exception:
+        pass
+
+
+def load_personal_token() -> Optional[dict]:
+    """Load OAuth token from disk. Returns None if not found."""
+    if _GOOGLE_TOKEN_FILE.exists():
+        try:
+            return json.loads(_GOOGLE_TOKEN_FILE.read_text())
+        except Exception:
+            return None
+    return None
+
+
+def delete_personal_token() -> None:
+    """Remove persisted OAuth token."""
+    try:
+        if _GOOGLE_TOKEN_FILE.exists():
+            _GOOGLE_TOKEN_FILE.unlink()
+    except Exception:
+        pass
diff --git a/icon_gdpr.icns b/icon_gdpr.icns
new file mode 100644
index 0000000..2a7ccec
Binary files /dev/null and b/icon_gdpr.icns differ
diff --git a/icon_gdpr.ico b/icon_gdpr.ico
new file mode 100644
index 0000000..e232955
Binary files /dev/null and b/icon_gdpr.ico differ
diff --git a/icon_gdpr.png b/icon_gdpr.png
new file mode 100644
index 0000000..24cc986
Binary files /dev/null and b/icon_gdpr.png differ
diff --git a/install_macos.sh b/install_macos.sh
new file mode 100755
index 0000000..21fc32f
--- /dev/null
+++ b/install_macos.sh
@@ -0,0 +1,423 @@
+#!/usr/bin/env bash
+# ══════════════════════════════════════════════════════════════════════════════
+# Document Scanner — macOS Installation Script
+# ══════════════════════════════════════════════════════════════════════════════
+# Installs all dependencies for document_scanner.py, server.py, build.py,
+# gdpr_scanner.py and m365_connector.py:
+#   - Homebrew (if not present)
+#   - Python 3.11 or 3.12  (3.13+ blocked — spaCy incompatible)
+#   - Tesseract OCR with Danish + English language packs
+#   - Poppler (required by pdf2image for PDF rendering)
+#   - A virtualenv at ./venv with all Python packages
+#   - spaCy Danish NER model (~500 MB)
+#
+# All Python packages are installed into a virtualenv (./venv) to avoid the
+# "externally-managed-environment" error from Homebrew Python 3.12+.
+#
+# Usage:
+#   chmod +x install_macos.sh && ./install_macos.sh
+# ══════════════════════════════════════════════════════════════════════════════
+
+set -euo pipefail
+
+# ── Colours ───────────────────────────────────────────────────────────────────
+RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
+CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m'
+
+step()  { echo -e "\n${CYAN}==> $1${RESET}"; }
+ok()    { echo -e "    ${GREEN}[OK]${RESET} $1"; }
+warn()  { echo -e "    ${YELLOW}[!!]${RESET} $1"; }
+fail()  { echo -e "    ${RED}[XX]${RESET} $1"; exit 1; }
+
+# Where the virtualenv will live — next to this script
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_DIR="$SCRIPT_DIR/venv"
+
+echo ""
+echo -e "${BOLD}  Document Scanner — macOS Setup${RESET}"
+echo "  -----------------------------------------"
+echo ""
+
+# ── 0. Detect architecture ────────────────────────────────────────────────────
+ARCH=$(uname -m)
+if [[ "$ARCH" == "arm64" ]]; then
+    BREW_PREFIX="/opt/homebrew"
+    ok "Apple Silicon (M-series) — Homebrew prefix: $BREW_PREFIX"
+else
+    BREW_PREFIX="/usr/local"
+    ok "Intel Mac — Homebrew prefix: $BREW_PREFIX"
+fi
+
+# ── 1. Install Homebrew ───────────────────────────────────────────────────────
+step "Checking Homebrew"
+if command -v brew &>/dev/null; then
+    ok "Homebrew already installed: $(brew --version | head -1)"
+else
+    echo "    Installing Homebrew..."
+    /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+    eval "$($BREW_PREFIX/bin/brew shellenv)"
+    ok "Homebrew installed"
+fi
+eval "$($BREW_PREFIX/bin/brew shellenv)" 2>/dev/null || true
+
+# ── 2. Find or install Python 3.11 / 3.12 ────────────────────────────────────
+# Homebrew Python 3.12+ is "externally managed" — pip installs must go into
+# a virtualenv. We find a compatible base interpreter here; all packages will
+# be installed into ./venv below, not into the system interpreter.
+step "Checking Python (need 3.11 or 3.12 — spaCy incompatible with 3.13+)"
+
+find_compatible_python() {
+    for cmd in \
+        "$BREW_PREFIX/bin/python3.12" \
+        "$BREW_PREFIX/bin/python3.11" \
+        python3.12 python3.11 python3 python; do
+        if command -v "$cmd" &>/dev/null 2>&1; then
+            local ver maj min
+            ver=$("$cmd" --version 2>&1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
+            maj=$(echo "$ver" | cut -d. -f1)
+            min=$(echo "$ver" | cut -d. -f2)
+            if [[ "$maj" == "3" ]] && { [[ "$min" == "11" ]] || [[ "$min" == "12" ]]; }; then
+                echo "$cmd"
+                return 0
+            fi
+        fi
+    done
+    return 1
+}
+
+BASE_PYTHON=""
+if BASE_PYTHON=$(find_compatible_python); then
+    ok "Compatible Python: $($BASE_PYTHON --version 2>&1)  ($BASE_PYTHON)"
+else
+    if command -v python3 &>/dev/null; then
+        EXISTING=$(python3 --version 2>&1 | grep -oE '[0-9]+\.[0-9]+' | head -1)
+        EXIST_MIN=$(echo "$EXISTING" | cut -d. -f2)
+        if [[ "$EXIST_MIN" -ge 13 ]]; then
+            warn "Python $EXISTING is too new (spaCy requires ≤ 3.12)"
+        fi
+    fi
+    echo "    Installing Python 3.12 via Homebrew..."
+    brew install python@3.12
+    BASE_PYTHON="$BREW_PREFIX/bin/python3.12"
+    if [[ ! -x "$BASE_PYTHON" ]]; then
+        echo "    python3.12 not found, trying python3.11..."
+        brew install python@3.11
+        BASE_PYTHON="$BREW_PREFIX/bin/python3.11"
+    fi
+    [[ -x "$BASE_PYTHON" ]] || fail "Python install failed. Try: brew install python@3.12"
+    ok "Python installed: $($BASE_PYTHON --version 2>&1)"
+fi
+
+# Confirm version
+$BASE_PYTHON --version 2>&1 | grep -qE 'Python 3\.(11|12)' \
+    || fail "Unexpected version: $($BASE_PYTHON --version 2>&1)"
+
+# ── 3. Create virtualenv ──────────────────────────────────────────────────────
+step "Setting up virtualenv at $VENV_DIR"
+
+if [[ -d "$VENV_DIR" && -x "$VENV_DIR/bin/python" ]]; then
+    # Validate it was built with a compatible interpreter
+    VENV_VER=$("$VENV_DIR/bin/python" --version 2>&1 | grep -oE '[0-9]+\.[0-9]+' | head -1)
+    VENV_MIN=$(echo "$VENV_VER" | cut -d. -f2)
+    if [[ "$VENV_MIN" == "11" || "$VENV_MIN" == "12" ]]; then
+        ok "Existing virtualenv is compatible (Python $VENV_VER) — reusing"
+    else
+        warn "Existing virtualenv uses Python $VENV_VER — rebuilding"
+        rm -rf "$VENV_DIR"
+        $BASE_PYTHON -m venv "$VENV_DIR"
+        ok "Virtualenv rebuilt"
+    fi
+else
+    $BASE_PYTHON -m venv "$VENV_DIR"
+    ok "Virtualenv created"
+fi
+
+# All subsequent Python/pip commands use the venv
+PYTHON="$VENV_DIR/bin/python"
+PIP="$PYTHON -m pip"
+
+# Upgrade pip inside the venv (no restrictions here)
+echo "    Upgrading pip..."
+$PIP install --upgrade pip --quiet
+ok "pip up to date: $($PIP --version)"
+
+# ── 4. Install Tesseract OCR ──────────────────────────────────────────────────
+step "Installing Tesseract OCR + language packs"
+if brew list tesseract &>/dev/null 2>&1; then
+    ok "Tesseract already installed: $(tesseract --version 2>&1 | head -1)"
+else
+    brew install tesseract
+    ok "Tesseract installed: $(tesseract --version 2>&1 | head -1)"
+fi
+
+if brew list tesseract-lang &>/dev/null 2>&1; then
+    ok "Tesseract language packs already installed"
+else
+    echo "    Installing tesseract-lang (~300 MB)..."
+    brew install tesseract-lang
+    ok "Language packs installed"
+fi
+
+if tesseract --list-langs 2>&1 | grep -q "^dan$"; then
+    ok "Danish (dan) OCR available"
+else
+    warn "Danish language pack not found — try: brew reinstall tesseract-lang"
+fi
+
+# ── 5. Install Poppler ────────────────────────────────────────────────────────
+step "Installing Poppler (required for PDF rendering)"
+if brew list poppler &>/dev/null 2>&1; then
+    ok "Poppler already installed"
+else
+    brew install poppler
+    ok "Poppler installed"
+fi
+command -v pdftoppm &>/dev/null \
+    && ok "pdftoppm: $(which pdftoppm)" \
+    || warn "pdftoppm not on PATH — launcher will probe Homebrew paths automatically"
+
+# ── 6. Install Python packages into venv ─────────────────────────────────────
+step "Installing Python packages into virtualenv"
+
+packages=(
+    "flask"
+    "pdfplumber"
+    "pdf2image"
+    "pytesseract"
+    "pypdf"
+    "reportlab"
+    "python-docx"
+    "openpyxl"
+    "img2pdf"
+    "opencv-python-headless"
+    "numpy"
+    "Pillow"
+    "spacy"
+    "py7zr"
+    "pymupdf"
+    "pywebview"
+    "pystray"
+    "pyinstaller"
+    "pyinstaller-hooks-contrib"
+    # GDPRScanner
+    "msal"
+    "requests"
+    # Optional — File system scanning (#8)
+    # smbprotocol: native SMB2/3 without mounting (needed for network share scanning)
+    # keyring: OS keychain credential storage for SMB passwords
+    # python-dotenv: .env file fallback for headless SMB credentials
+    "smbprotocol"
+    "keyring"
+    "python-dotenv"
+    # Scheduler (#19)
+    "APScheduler"
+    # Google Workspace scanning (#10)
+    "google-auth"
+    "google-auth-httplib2"
+    "google-api-python-client"
+)
+
+failed=()
+for pkg in "${packages[@]}"; do
+    printf "    %-36s" "$pkg..."
+    if $PIP install "$pkg" --quiet --disable-pip-version-check 2>/dev/null; then
+        echo -e "${GREEN}OK${RESET}"
+    else
+        echo -e "${RED}FAILED${RESET}"
+        failed+=("$pkg")
+    fi
+done
+
+if [[ ${#failed[@]} -gt 0 ]]; then
+    warn "Failed: ${failed[*]}"
+    warn "Retry: $PIP install ${failed[*]}"
+fi
+
+# ── 7. Install create-dmg ─────────────────────────────────────────────────────
+step "Checking create-dmg (optional — for .dmg packaging)"
+if command -v create-dmg &>/dev/null; then
+    ok "create-dmg already installed"
+else
+    brew install create-dmg 2>/dev/null \
+        && ok "create-dmg installed" \
+        || warn "create-dmg unavailable — install manually: brew install create-dmg"
+fi
+
+# ── 8. Install spaCy Danish NER model ─────────────────────────────────────────
+step "Installing spaCy Danish NER model (~500 MB)"
+
+# spaCy's download command uses shutil.which("pip") to find a package
+# installer. Inside a venv the wrapper may be named pip3 only. Ensure a
+# `pip` executable exists so spaCy can find it.
+if [[ ! -x "$VENV_DIR/bin/pip" ]]; then
+    echo "    Creating pip wrapper in venv (needed by spaCy download)…"
+    cat > "$VENV_DIR/bin/pip" << 'PIPSHIM'
+#!/usr/bin/env bash
+exec "$(dirname "$0")/python3" -m pip "$@"
+PIPSHIM
+    chmod +x "$VENV_DIR/bin/pip"
+fi
+# Verify pip is now visible
+if "$VENV_DIR/bin/pip" --version &>/dev/null; then
+    ok "pip available: $("$VENV_DIR/bin/pip" --version 2>&1)"
+else
+    warn "pip wrapper not working — will use direct pip install fallback"
+fi
+
+if $PYTHON -c "import da_core_news_lg" &>/dev/null 2>&1; then
+    ok "spaCy Danish model already installed"
+else
+    installed=false
+    for model in da_core_news_lg da_core_news_md da_core_news_sm; do
+        echo "    Trying $model..."
+
+        # Method 1: spacy download with venv/bin explicitly on PATH
+        # (spaCy uses shutil.which("pip") which searches PATH)
+        if PATH="$VENV_DIR/bin:$PATH" $PYTHON -m spacy download "$model" 2>/dev/null; then
+            ok "Installed: $model (via spacy download)"
+            installed=true
+            break
+        fi
+
+        # Method 2: direct pip install — spaCy models are regular PyPI packages
+        echo "    spacy download failed — trying pip install..."
+        if $PIP install "$model" 2>&1; then
+            if $PYTHON -c "import ${model//-/_}" &>/dev/null 2>&1; then
+                ok "Installed: $model (via pip)"
+                installed=true
+                break
+            else
+                warn "$model pip install reported success but import failed"
+            fi
+        fi
+    done
+    if [[ "$installed" == false ]]; then
+        warn "No spaCy model installed — anonymisation unavailable"
+        warn "Retry manually:  $PIP install da_core_news_sm"
+    fi
+fi
+
+# ── 9. Verify ─────────────────────────────────────────────────────────────────
+step "Verifying installation"
+
+ok "Python (venv): $($PYTHON --version 2>&1)"
+ok "Tesseract: $(tesseract --version 2>&1 | head -1)"
+ok "Poppler: $(pdftoppm -v 2>&1 | head -1 || echo 'available via Homebrew PATH')"
+
+$PYTHON - <<'PYCHECK'
+import sys
+checks = [
+    ('flask',         'flask'),
+    ('pdfplumber',    'pdfplumber'),
+    ('pdf2image',     'pdf2image'),
+    ('pytesseract',   'pytesseract'),
+    ('pypdf',         'pypdf'),
+    ('reportlab',     'reportlab'),
+    ('python-docx',   'docx'),
+    ('openpyxl',      'openpyxl'),
+    ('opencv-python-headless', 'cv2'),
+    ('numpy',         'numpy'),
+    ('Pillow',        'PIL'),
+    ('spacy',         'spacy'),
+    ('img2pdf',       'img2pdf'),
+    ('pywebview',     'webview'),
+    ('pystray',       'pystray'),
+    ('PyInstaller',   'PyInstaller'),
+    ('py7zr',         'py7zr'),
+    # GDPRScanner
+    ('msal',          'msal'),
+    ('requests',      'requests'),
+]
+optional_checks = [
+    ('smbprotocol',   'smbprotocol',  'SMB/CIFS network share scanning'),
+    ('keyring',       'keyring',      'OS keychain credential storage'),
+    ('python-dotenv', 'dotenv',       '.env file credential fallback'),
+    ('APScheduler',   'apscheduler',  'In-process scheduled scans'),
+]
+missing = []
+for name, imp in checks:
+    try:
+        __import__(imp)
+        print(f'    \033[32m[OK]\033[0m {name}')
+    except ImportError:
+        print(f'    \033[31m[!!]\033[0m {name}  MISSING')
+        missing.append(name)
+print('\n    Optional (file system scanning):')
+for name, imp, desc in optional_checks:
+    try:
+        __import__(imp)
+        print(f'    \033[32m[OK]\033[0m {name}  — {desc}')
+    except ImportError:
+        print(f'    \033[33m[--]\033[0m {name}  — {desc} (not installed)')
+if missing:
+    print(f'\n    Missing: {", ".join(missing)}')
+    sys.exit(1)
+print('\n    All packages verified.')
+PYCHECK
+
+ALL_OK=$?
+
+# ── 10. Shell profile ─────────────────────────────────────────────────────────
+step "Shell PATH configuration"
+SHELL_RC=""
+if [[ "$SHELL" == *"zsh"*  ]]; then SHELL_RC="$HOME/.zshrc"; fi
+if [[ "$SHELL" == *"bash"* ]]; then SHELL_RC="$HOME/.bash_profile"; fi
+
+if [[ -n "$SHELL_RC" ]]; then
+    if grep -q "brew shellenv" "$SHELL_RC" 2>/dev/null; then
+        ok "Homebrew already configured in $SHELL_RC"
+    else
+        echo "" >> "$SHELL_RC"
+        echo "# Homebrew" >> "$SHELL_RC"
+        echo "eval \"\$($BREW_PREFIX/bin/brew shellenv)\"" >> "$SHELL_RC"
+        ok "Homebrew added to $SHELL_RC — restart Terminal or: source $SHELL_RC"
+    fi
+fi
+
+# ── 11. Create launch scripts ─────────────────────────────────────────────────
+step "Creating launch scripts"
+
+# start_gdpr.sh — launches GDPRScanner
+cat > "$SCRIPT_DIR/start_gdpr.sh" << M365EOF
+#!/usr/bin/env bash
+# GDPRScanner — launch script (uses ./venv)
+SCRIPT_DIR="\$(cd "\$(dirname "\${BASH_SOURCE[0]}")" && pwd)"
+source "\$SCRIPT_DIR/venv/bin/activate"
+exec python3 "\$SCRIPT_DIR/gdpr_scanner.py" "\${@}"
+M365EOF
+chmod +x "$SCRIPT_DIR/start_gdpr.sh"
+ok "Created: start_gdpr.sh"
+
+# build_gdpr.sh — builds standalone GDPRScanner .app
+cat > "$SCRIPT_DIR/build_gdpr.sh" << BLD365EOF
+#!/usr/bin/env bash
+# GDPRScanner — build .app (uses ./venv)
+SCRIPT_DIR="\$(cd "\$(dirname "\${BASH_SOURCE[0]}")" && pwd)"
+source "\$SCRIPT_DIR/venv/bin/activate"
+exec python3 "\$SCRIPT_DIR/build_gdpr.py" --clean "\$@"
+BLD365EOF
+chmod +x "$SCRIPT_DIR/build_gdpr.sh"
+ok "Created: build_gdpr.sh"
+
+
+# ── Done ──────────────────────────────────────────────────────────────────────
+echo ""
+echo "  -----------------------------------------"
+[[ $ALL_OK -eq 0 ]] \
+    && echo -e "  ${GREEN}${BOLD}Installation complete!${RESET}" \
+    || echo -e "  ${YELLOW}${BOLD}Installation complete with warnings — see above${RESET}"
+echo ""
+echo -e "  ${BOLD}GDPRScanner:${RESET}"
+echo -e "    ${CYAN}./start_gdpr.sh${RESET}"
+echo "    Then open: http://127.0.0.1:5100"
+echo ""
+echo -e "  ${BOLD}File system scanning (optional):${RESET}"
+echo -e "    ${CYAN}./start_gdpr.sh --scan-path ~/Documents${RESET}"
+echo -e "    ${CYAN}./start_gdpr.sh --scan-path //nas/shares --smb-user 'DOMAIN\\user'${RESET}"
+echo "    Or use the '📁 File sources' panel in the GDPRScanner UI"
+echo ""
+echo -e "  ${BOLD}Build standalone app:${RESET}"
+echo -e "    ${CYAN}./build_gdpr.sh${RESET}   → dist/GDPRScanner.app"
+echo ""
+echo "  -----------------------------------------"
+echo ""
diff --git a/install_windows.ps1 b/install_windows.ps1
new file mode 100644
index 0000000..b65603d
--- /dev/null
+++ b/install_windows.ps1
@@ -0,0 +1,568 @@
+#Requires -RunAsAdministrator
+# Always run from the folder this script lives in
+Set-Location -Path $PSScriptRoot
+<#
+.SYNOPSIS
+    M365 GDPR Scanner -- Windows Installation Script
+.DESCRIPTION
+    Installs all dependencies for gdpr_scanner.py and m365_connector.py:
+      - Python 3.11 or 3.12  (3.13+ blocked -- spaCy incompatible)
+      - Tesseract OCR 5.x with Danish + English language packs
+      - Poppler (required by pdfplumber for PDF rendering)
+      - All Python packages including pywebview, pystray
+      - spaCy Danish NER model (da_core_news_lg, ~500 MB)
+    Adds Tesseract and Poppler to the system PATH.
+.NOTES
+    Run from an elevated PowerShell prompt:
+        PowerShell -ExecutionPolicy Bypass -File install_windows.ps1
+#>
+
+Set-StrictMode -Version Latest
+$ErrorActionPreference = "Stop"
+
+# -- Colours --------------------------------------------------------------------
+function Write-Step  { param($msg) Write-Host "`n==> $msg" -ForegroundColor Cyan }
+function Write-OK    { param($msg) Write-Host "    [OK] $msg" -ForegroundColor Green }
+function Write-Warn  { param($msg) Write-Host "    [!!] $msg" -ForegroundColor Yellow }
+function Write-Fail  { param($msg) Write-Host "    [XX] $msg" -ForegroundColor Red; exit 1 }
+
+Write-Host ""
+Write-Host "  M365 GDPR Scanner - Windows Setup" -ForegroundColor White
+Write-Host "  -----------------------------------------" -ForegroundColor DarkGray
+Write-Host ""
+
+# -- 0. Check architecture ------------------------------------------------------
+if ($env:PROCESSOR_ARCHITECTURE -ne "AMD64") {
+    Write-Warn "This script targets 64-bit Windows. Proceeding anyway."
+}
+
+# -- 1. Install Chocolatey (if not present) -------------------------------------
+Write-Step "Checking Chocolatey package manager"
+if (-not (Get-Command choco -ErrorAction SilentlyContinue)) {
+    Write-Host "    Installing Chocolatey..."
+    Set-ExecutionPolicy Bypass -Scope Process -Force
+    [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
+    Invoke-Expression ((New-Object System.Net.WebClient).DownloadString(
+        'https://community.chocolatey.org/install.ps1'))
+    $env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
+                [System.Environment]::GetEnvironmentVariable("PATH","User")
+    Write-OK "Chocolatey installed"
+} else {
+    Write-OK "Chocolatey already installed ($((choco --version)))"
+}
+
+# -- Virtualenv path -----------------------------------------------------------
+$VenvDir    = Join-Path $PSScriptRoot "venv"
+$VenvPython = Join-Path $VenvDir "Scripts\python.exe"
+
+# -- 2. Install / validate Python ---------------------------------------------------
+# Compatible: 3.11.x or 3.12.x
+# spaCy does not support 3.13+. pywebview requires 3.8+.
+Write-Step "Checking Python (need 3.11 or 3.12 -- prefer 3.12, spaCy incompatible with 3.13+)"
+
+function Get-PythonExe {
+    # Returns the path/command of a compatible Python (3.11 or 3.12), or $null.
+    $candidates = @()
+
+    # py launcher -- wrap in try/catch so "No runtime found" exit codes don't bubble up
+    if (Get-Command py -ErrorAction SilentlyContinue) {
+        foreach ($v in @("3.12", "3.11")) {
+            try {
+                $test = $null
+                $prev = $ErrorActionPreference
+                $ErrorActionPreference = 'SilentlyContinue'
+                $test = & py "-$v" --version 2>&1
+                $ErrorActionPreference = $prev
+            } catch { $ErrorActionPreference = $prev }
+            if ("$test" -match "^Python $v") { $candidates += "py -$v" }
+        }
+    }
+
+    # Direct python / python3 commands
+    foreach ($cmd in @("python3.12", "python3.11", "python", "python3")) {
+        if (Get-Command $cmd -ErrorAction SilentlyContinue) {
+            $candidates += $cmd
+        }
+    }
+
+    # Well-known install locations (e.g. installed from python.org without PATH update)
+    $wellKnown = @(
+        "$env:LOCALAPPDATA\Programs\Python\Python312\python.exe",
+        "$env:LOCALAPPDATA\Programs\Python\Python311\python.exe",
+        "C:\Python312\python.exe",
+        "C:\Python311\python.exe",
+        "C:\Program Files\Python312\python.exe",
+        "C:\Program Files\Python311\python.exe"
+    )
+    foreach ($p in $wellKnown) {
+        if (Test-Path $p) { $candidates += $p }
+    }
+
+    foreach ($cmd in $candidates) {
+        $parts = $cmd -split " "
+        $raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
+        if ("$raw" -match "Python (\d+)\.(\d+)") {
+            $maj = [int]$Matches[1]; $min = [int]$Matches[2]
+            if ($maj -eq 3 -and ($min -eq 11 -or $min -eq 12)) { return $cmd }
+        }
+    }
+    return $null
+}
+
+function Get-PythonVersionStr {
+    param($cmd)
+    $parts = $cmd -split " "
+    $raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
+    return $raw
+}
+
+function Invoke-Py {
+    param([string[]]$PyArgs)
+    $parts = $script:pythonCmd -split " "
+    if ($parts.Count -gt 1) { & $parts[0] $parts[1] @PyArgs }
+    else                     { & $parts[0] @PyArgs }
+    return $LASTEXITCODE
+}
+
+$pythonCmd = Get-PythonExe
+
+if ($pythonCmd) {
+    $verStr = Get-PythonVersionStr $pythonCmd
+    Write-OK "Compatible Python found: $verStr  (using '$pythonCmd')"
+} else {
+    # Check if an incompatible version is present so we can warn clearly
+    if (Get-Command python -ErrorAction SilentlyContinue) {
+        $raw = & python --version 2>&1
+        if ($raw -match "Python (\d+)\.(\d+)") {
+            $maj = [int]$Matches[1]; $min = [int]$Matches[2]
+            if ($maj -eq 3 -and $min -ge 13) {
+                Write-Warn "Python $maj.$min is installed but too new (spaCy needs <= 3.12)"
+                Write-Warn "Python 3.11 will be installed alongside it"
+            } elseif ($maj -eq 3 -and $min -le 10) {
+                Write-Warn "Python $maj.$min is installed but too old (need >= 3.11)"
+            }
+        }
+    }
+    # ---- Try Chocolatey first (fast, silent) ----
+    $chocoOk = $false
+    if (Get-Command choco -ErrorAction SilentlyContinue) {
+        Write-Host "    Installing Python 3.12 via Chocolatey..."
+        choco install python312 -y --no-progress | Out-Null
+        $env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
+                    [System.Environment]::GetEnvironmentVariable("PATH","User")
+        $pythonCmd = Get-PythonExe
+        if ($pythonCmd) { $chocoOk = $true }
+    }
+
+    # ---- Direct download from python.org (works without Chocolatey) ----
+    if (-not $chocoOk) {
+        $PyVersion  = "3.12.9"
+        $PyInstaller = "$env:TEMP\python-$PyVersion-amd64.exe"
+        $PyUrl       = "https://www.python.org/ftp/python/$PyVersion/python-$PyVersion-amd64.exe"
+
+        Write-Host "    Downloading Python $PyVersion from python.org..."
+        try {
+            [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
+            & curl.exe -L --silent --show-error -o $PyInstaller $PyUrl
+            if ($LASTEXITCODE -ne 0) { throw "curl.exe download failed" }
+        } catch {
+            Write-Fail "Download failed: $_`nInstall Python 3.12 manually from https://www.python.org/downloads/ then re-run this script."
+        }
+
+        Write-Host "    Installing Python $PyVersion (silent, all users)..."
+        $installArgs = "/quiet InstallAllUsers=0 PrependPath=0 Include_test=0"
+        Start-Process -FilePath $PyInstaller -ArgumentList $installArgs -Wait -NoNewWindow
+
+        # Reload PATH so the new python.exe is visible in this session
+        $env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
+                    [System.Environment]::GetEnvironmentVariable("PATH","User")
+
+        $pythonCmd = Get-PythonExe
+        if (-not $pythonCmd) {
+            Write-Fail ("Python $PyVersion was installed but could not be found.`n" +
+                        "  -- Open a NEW PowerShell window and re-run this script, or`n" +
+                        "  -- Install manually from https://www.python.org/downloads/")
+        }
+    }
+
+    $verStr = Get-PythonVersionStr $pythonCmd
+    Write-OK "Python installed: $verStr"
+}
+
+# Final sanity check
+$parts = $pythonCmd -split " "
+$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
+if ($raw -notmatch "Python 3\.(11|12)") {
+    Write-Fail "Could not confirm a Python 3.11 or 3.12 interpreter. Got: $raw"
+}
+
+# -- Create / reuse virtualenv -------------------------------------------------
+Write-Step "Setting up virtualenv at $VenvDir"
+if (Test-Path $VenvPython) {
+    Write-OK "Existing virtualenv found -- reusing"
+} else {
+    if (Test-Path $VenvDir) { Remove-Item $VenvDir -Recurse -Force }
+    Write-Host "    Creating virtualenv..."
+    Invoke-Py @("-m", "venv", $VenvDir)
+    Write-OK "Virtualenv created: $VenvDir"
+}
+
+function Invoke-VenvPip {
+    param([string[]]$PipArgs)
+    & $VenvPython -m pip @PipArgs
+    return $LASTEXITCODE
+}
+
+Write-Host "    Upgrading pip..."
+Invoke-VenvPip @("install", "--upgrade", "pip", "--quiet") | Out-Null
+Write-OK "pip up to date"
+
+# -- 3. Install Visual C++ Redistributable (required by OpenCV/cv2) -----------
+Write-Step "Checking Visual C++ Redistributable 2015-2022"
+$vcKey = "HKLM:\SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64"
+$vcAlt = "HKLM:\SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64"
+$vcInstalled = (Test-Path $vcKey) -or (Test-Path $vcAlt)
+if ($vcInstalled) {
+    Write-OK "Visual C++ Redistributable already installed"
+} else {
+    Write-Host "    Downloading VC++ Redistributable..."
+    $vcUrl = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
+    $vcInstaller = "$env:TEMP\vc_redist.x64.exe"
+    & curl.exe -L --silent --show-error -o $vcInstaller $vcUrl
+    if ($LASTEXITCODE -ne 0) { Write-Warn "VC++ download failed -- skipping (may already be installed)" }
+    Write-Host "    Installing silently..."
+    Start-Process -FilePath $vcInstaller -ArgumentList "/install", "/quiet", "/norestart" -Wait
+    Remove-Item $vcInstaller -Force
+    Write-OK "Visual C++ Redistributable installed"
+}
+
+# -- 4. Install Tesseract OCR ---------------------------------------------------
+Write-Step "Installing Tesseract OCR"
+$ToolsDir = Join-Path $PSScriptRoot "tools"
+$TessDir  = Join-Path $ToolsDir "tesseract"
+$tessExe  = Join-Path $TessDir "tesseract.exe"
+New-Item -ItemType Directory -Force -Path $ToolsDir | Out-Null
+if (Test-Path $tessExe) {
+    $tessVer = & $tessExe --version 2>&1 | Select-Object -First 1
+    Write-OK "Tesseract already installed: $tessVer"
+} else {
+    Write-Host "    Downloading Tesseract 5.x installer..."
+    # Download Tesseract installer -- try multiple mirrors
+    $tessInstaller = "$env:TEMP\tesseract-setup.exe"
+    $tessUrls = @(
+        "https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-5.3.4.20240503.exe",
+        "https://github.com/UB-Mannheim/tesseract/releases/download/v5.3.4.20240503/tesseract-ocr-w64-setup-5.3.4.20240503.exe"
+    )
+    $downloaded = $false
+    foreach ($tessUrl in $tessUrls) {
+        Write-Host "    Trying: $tessUrl"
+        # Suppress NativeCommandError -- check exit code manually
+        $prev = $ErrorActionPreference; $ErrorActionPreference = "SilentlyContinue"
+        & curl.exe -L --fail --silent --show-error -o $tessInstaller $tessUrl 2>&1 | Out-Null
+        $curlExit = $LASTEXITCODE
+        $ErrorActionPreference = $prev
+        $sz = if (Test-Path $tessInstaller) { (Get-Item $tessInstaller).Length } else { 0 }
+        if ($curlExit -eq 0 -and $sz -gt 1MB) {
+            Write-OK "Downloaded ($([math]::Round($sz/1MB,1)) MB)"
+            $downloaded = $true
+            break
+        }
+        Write-Host "    Failed (exit $curlExit, $sz bytes) -- trying next mirror..."
+        if (Test-Path $tessInstaller) { Remove-Item $tessInstaller -Force }
+    }
+    if (-not $downloaded) {
+        Write-Host ""
+        Write-Host "    Automatic download failed." -ForegroundColor Yellow
+        Write-Host "    Please download the installer manually:" -ForegroundColor Yellow
+        Write-Host "    https://github.com/UB-Mannheim/tesseract/releases/tag/v5.3.4.20240503" -ForegroundColor Cyan
+        Write-Host "    Save it as: $tessInstaller" -ForegroundColor Cyan
+        Write-Host "    Then press Enter to continue..." -ForegroundColor Yellow
+        Read-Host
+        if (-not (Test-Path $tessInstaller) -or (Get-Item $tessInstaller).Length -lt 1MB) {
+            Write-Fail "Installer not found at $tessInstaller"
+        }
+    }
+    Write-Host "    Running installer (silent)..."
+    Start-Process -FilePath $tessInstaller -ArgumentList "/S /D=$TessDir" -Wait
+    Remove-Item $tessInstaller -Force
+    Write-OK "Tesseract installed in project tools\ folder"
+}
+
+# Tesseract is local in tools\ -- session PATH set above
+
+# -- 4. Install Tesseract language packs ---------------------------------------
+Write-Step "Installing Tesseract language packs (Danish + English)"
+$tessData = Join-Path $TessDir "tessdata"
+New-Item -ItemType Directory -Force -Path $tessData | Out-Null
+$langFiles = @{
+    "dan" = "https://github.com/tesseract-ocr/tessdata/raw/main/dan.traineddata"
+    "eng" = "https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata"
+}
+foreach ($lang in $langFiles.Keys) {
+    $dest = Join-Path $tessData "$lang.traineddata"
+    if (Test-Path $dest) {
+        Write-OK "'$lang' language pack already present"
+    } else {
+        Write-Host "    Downloading $lang.traineddata..."
+        & curl.exe -L --silent --show-error -o $dest $langFiles[$lang]
+        if ($LASTEXITCODE -ne 0) { Write-Warn "Failed to download $lang language pack" }
+        Write-OK "'$lang' installed"
+    }
+}
+
+# -- 5. Install Poppler --------------------------------------------------------
+Write-Step "Installing Poppler (required for PDF rendering)"
+$PopplerDir = Join-Path $ToolsDir "poppler"
+$popplerBin  = Join-Path $PopplerDir "Library\bin"
+if (Test-Path (Join-Path $popplerBin "pdftoppm.exe")) {
+    Write-OK "Poppler already installed"
+} else {
+    Write-Host "    Downloading Poppler for Windows..."
+    $popplerUrl = "https://github.com/oschwartz10612/poppler-windows/releases/download/v24.07.0-0/Release-24.07.0-0.zip"
+    $popplerZip = "$env:TEMP\poppler.zip"
+    & curl.exe -L --silent --show-error -o $popplerZip $popplerUrl
+    if ($LASTEXITCODE -ne 0) { Write-Fail "Poppler download failed. Try re-running the script." }
+    Write-Host "    Extracting to $popplerBase..."
+    Expand-Archive -Path $popplerZip -DestinationPath $PopplerDir -Force
+    Remove-Item $popplerZip -Force
+    $found = Get-ChildItem -Path $PopplerDir -Recurse -Filter "pdftoppm.exe" |
+             Select-Object -First 1
+    if ($found) {
+        $popplerBin = $found.DirectoryName
+        Write-OK "Poppler extracted: $popplerBin"
+    } else {
+        Write-Fail "Poppler extraction failed -- pdftoppm.exe not found"
+    }
+}
+
+# Poppler is local in tools\ -- session PATH set above
+$env:PATH = "$env:PATH;$popplerBin"
+
+# -- 6. Install Python packages -------------------------------------------------
+Write-Step "Installing Python packages"
+
+$packages = @(
+    # Web server
+    @{ name="flask";                      desc="web server" },
+    # PDF handling
+    @{ name="pdfplumber";                 desc="PDF text extraction" },
+    @{ name="pdf2image";                  desc="PDF to image (needs Poppler)" },
+    @{ name="pytesseract";                desc="OCR wrapper (needs Tesseract)" },
+    @{ name="pypdf";                      desc="PDF read/write" },
+    @{ name="reportlab";                  desc="PDF generation for redaction" },
+    # Document formats
+    @{ name="python-docx";                desc="Word documents" },
+    @{ name="openpyxl";                   desc="Excel files" },
+    @{ name="img2pdf";                    desc="image to PDF" },
+    # Image / CV
+    @{ name="opencv-python-headless";     desc="face detection (headless, fewer DLL deps)" },
+    @{ name="numpy";                      desc="image processing" },
+    @{ name="Pillow";                     desc="image handling" },
+    # NER / anonymisation
+    @{ name="spacy";                      desc="named entity recognition" },
+    # Archive scanning
+    # Native app window
+    @{ name="pymupdf";                    desc="secure PDF redaction (physical text removal)" },
+    @{ name="pywebview";                  desc="native webview window" },
+    @{ name="pystray";                    desc="system tray icon (fallback)" },
+    # App bundling
+    @{ name="pyinstaller";                desc="app packager" },
+    @{ name="pyinstaller-hooks-contrib";  desc="PyInstaller hooks" },
+    # GDPRScanner
+    @{ name="msal";                          desc="Microsoft authentication" },
+    @{ name="requests";                      desc="HTTP client for Graph API" },
+    # Optional — File system scanning (#8)
+    @{ name="smbprotocol";                   desc="native SMB2/3 network share scanning (optional)" },
+    @{ name="keyring";                        desc="OS keychain credential storage for SMB (optional)" },
+    @{ name="python-dotenv";                  desc=".env file credential fallback (optional)" },
+    # Scheduler (#19)
+    @{ name="APScheduler";                    desc="in-process scheduled scans (optional)" },
+    # Google Workspace scanning (#10)
+    @{ name="google-auth";                    desc="Google service account auth (optional)" },
+    @{ name="google-auth-httplib2";           desc="Google auth HTTP transport (optional)" },
+    @{ name="google-api-python-client";       desc="Gmail + Drive + Admin APIs (optional)" }
+)
+
+$failed = @()
+foreach ($pkg in $packages) {
+    Write-Host ("    {0,-36} {1}" -f ($pkg.name + "..."), $pkg.desc) -NoNewline
+    Invoke-VenvPip @("install", $pkg.name, "--quiet", "--disable-pip-version-check") | Out-Null
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "  FAILED" -ForegroundColor Red
+        $failed += $pkg.name
+    } else {
+        Write-Host "  OK" -ForegroundColor Green
+    }
+}
+
+# pywebview 5.x used a [win32] extra; 6.x+ ships WebView2 support built-in -- no extra needed
+if ($LASTEXITCODE -eq 0) { Write-Host "  OK" -ForegroundColor Green }
+else { Write-Host "  skipped" -ForegroundColor Yellow }
+
+if ($failed.Count -gt 0) {
+    Write-Warn "Failed to install: $($failed -join ', ')"
+    Write-Warn "Retry manually: python -m pip install $($failed -join ' ')"
+}
+
+# -- 7. Install spaCy language model -------------------------------------------
+Write-Step "Installing spaCy Danish NER model (~500 MB, may take several minutes)"
+
+# Check if any model already installed
+$spaCyHasModel = & $VenvPython -c "import spacy; [spacy.load(m) for m in ['da_core_news_lg','da_core_news_md','da_core_news_sm'] if spacy.util.is_package(m)]; print('ok')" 2>$null
+if ($LASTEXITCODE -eq 0) {
+    Write-OK "spaCy Danish model already installed"
+} else {
+    $models = @("da_core_news_lg", "da_core_news_md", "da_core_news_sm")
+    $installed = $false
+    foreach ($model in $models) {
+        Write-Host "    Trying $model..."
+        & $VenvPython -m spacy download $model --quiet 2>$null | Out-Null
+        if ($LASTEXITCODE -eq 0) {
+            Write-OK "Installed: $model"
+            $installed = $true
+            break
+        }
+    }
+    if (-not $installed) {
+        Write-Warn "No spaCy Danish model installed -- anonymisation will be unavailable"
+        Write-Warn "Retry manually:  python -m spacy download da_core_news_sm"
+    }
+}
+
+# -- 8. Verify installation -----------------------------------------------------
+Write-Step "Verifying installation"
+
+# Python
+Write-OK "Python: $(Get-PythonVersionStr $pythonCmd)"
+
+# Tesseract
+try {
+    $tessVer = & tesseract --version 2>&1 | Select-Object -First 1
+    Write-OK "Tesseract: $tessVer"
+    $langs = & tesseract --list-langs 2>&1 | Where-Object { $_ -match "^(dan|eng)$" }
+    Write-OK "OCR languages: $($langs -join ', ')"
+} catch {
+    Write-Warn "Tesseract not on PATH -- restart PowerShell and re-run if needed"
+}
+
+# Poppler
+try {
+    $pp = Get-Command pdftoppm -ErrorAction Stop
+    Write-OK "Poppler: $($pp.Source)"
+} catch {
+    Write-Warn "Poppler not on PATH -- restart PowerShell and re-run if needed"
+}
+
+# All Python imports -- write to a temp file to avoid PowerShell expanding {vars} in f-strings
+$importScriptPath = Join-Path $env:TEMP "gdpr_verify.py"
+Set-Content -Path $importScriptPath -Encoding UTF8 -Value @'
+import sys
+checks = [
+    ('flask',         'flask'),
+    ('pdfplumber',    'pdfplumber'),
+    ('pdf2image',     'pdf2image'),
+    ('pytesseract',   'pytesseract'),
+    ('pypdf',         'pypdf'),
+    ('reportlab',     'reportlab'),
+    ('python-docx',   'docx'),
+    ('openpyxl',      'openpyxl'),
+    ('opencv-python-headless', 'cv2'),
+    ('numpy',         'numpy'),
+    ('Pillow',        'PIL'),
+    ('spacy',         'spacy'),
+    ('img2pdf',       'img2pdf'),
+    ('pymupdf',       'fitz'),
+    ('pywebview',     'webview'),
+    ('pystray',       'pystray'),
+    ('PyInstaller',   'PyInstaller'),
+    ('msal',          'msal'),
+    ('requests',      'requests'),
+]
+optional_checks = [
+    ('smbprotocol',   'smbprotocol'),
+    ('keyring',       'keyring'),
+    ('python-dotenv', 'dotenv'),
+    ('APScheduler',   'apscheduler'),
+]
+missing = []
+for name, imp in checks:
+    try:
+        __import__(imp)
+        print("    [OK] " + name)
+    except ImportError:
+        print("    [!!] " + name + "  MISSING")
+        missing.append(name)
+print("\n    Optional (file system scanning):")
+for name, imp in optional_checks:
+    try:
+        __import__(imp)
+        print("    [OK] " + name)
+    except ImportError:
+        print("    [--] " + name + "  (not installed)")
+if missing:
+    print("\nMissing required: " + ", ".join(missing))
+    sys.exit(1)
+print("\nAll required packages verified.")
+sys.exit(0)
+'@
+
+& $VenvPython $importScriptPath
+$allOk = ($LASTEXITCODE -eq 0)
+Remove-Item $importScriptPath -ErrorAction SilentlyContinue
+
+# -- 9. Create launch scripts ---------------------------------------------------
+Write-Step "Creating launch scripts"
+
+Set-Content -Path "start_gdpr.bat" -Encoding ASCII -Value @'
+@echo off
+:: GDPRScanner - Web UI
+cd /d "%~dp0"
+set PATH=%~dp0tools\tesseract;%~dp0tools\poppler\Library\bin;%PATH%
+set TESSDATA_PREFIX=%~dp0tools\tesseract\tessdata
+set PORT=5100
+echo.
+echo   GDPRScanner
+echo   Open in browser: http://localhost:%PORT%
+echo   Press Ctrl+C to stop
+echo.
+"%~dp0venv\Scripts\python.exe" "%~dp0gdpr_scanner.py" --port %PORT%
+pause
+'@
+Write-OK "Created: start_gdpr.bat"
+
+Set-Content -Path "build_m365.bat" -Encoding ASCII -Value @'
+@echo off
+:: GDPRScanner -- Build standalone .exe
+cd /d "%~dp0"
+set PATH=%~dp0tools\tesseract;%~dp0tools\poppler\Library\bin;%PATH%
+set TESSDATA_PREFIX=%~dp0tools\tesseract\tessdata
+echo Building GDPRScanner...
+echo.
+"%~dp0venv\Scripts\python.exe" "%~dp0build_gdpr.py" --clean %*
+pause
+'@
+Write-OK "Created: build_m365.bat"
+
+
+# -- Done -----------------------------------------------------------------------
+Write-Host ""
+Write-Host "  -----------------------------------------" -ForegroundColor DarkGray
+if ($allOk) {
+    Write-Host "  Installation complete!" -ForegroundColor Green
+} else {
+    Write-Host "  Installation complete with warnings -- see above" -ForegroundColor Yellow
+}
+Write-Host ""
+Write-Host "  GDPRScanner:" -ForegroundColor White
+Write-Host "    Double-click  start_gdpr.bat" -ForegroundColor Cyan
+Write-Host "    Web UI: http://localhost:5100" -ForegroundColor White
+Write-Host ""
+Write-Host "  File system scanning (optional):" -ForegroundColor White
+Write-Host "    python gdpr_scanner.py --scan-path C:\Users\Me\Documents" -ForegroundColor Cyan
+Write-Host "    python gdpr_scanner.py --scan-path //nas/shares --smb-user DOMAIN\user" -ForegroundColor Cyan
+Write-Host "    Or use the File sources panel in the GDPRScanner UI" -ForegroundColor Gray
+Write-Host ""
+Write-Host "  Build standalone app:" -ForegroundColor White
+Write-Host "    Double-click  build_gdpr.bat   ->  dist\GDPRScanner.exe" -ForegroundColor Cyan
+Write-Host "  -----------------------------------------" -ForegroundColor DarkGray
+Write-Host ""
diff --git a/keywords/da.json b/keywords/da.json
new file mode 100644
index 0000000..6197694
--- /dev/null
+++ b/keywords/da.json
@@ -0,0 +1,532 @@
+{
+  "_comment": "GDPR Article 9 sensitive category keywords — Danish",
+  "_version": "1.0",
+  "_note": "Keywords are matched case-insensitively. A match within ~150 characters of a personal identifier (CPR, name, address) elevates the item to HIGH risk and adds a special_category badge. Edit this file to add organisation-specific terms.",
+  "health": {
+    "_label_da": "Helbred",
+    "_label_en": "Health data",
+    "_article": "Art. 9(1) — data concerning health",
+    "keywords": [
+      "diagnose",
+      "diagnos",
+      "diagnosen",
+      "diagnoser",
+      "sygemelding",
+      "sygemeldingen",
+      "sygemeldinger",
+      "sygedagpenge",
+      "sygefravær",
+      "sygeorlov",
+      "sygefraværssamtale",
+      "sygefraværspolitik",
+      "indlæggelse",
+      "indlæggelsen",
+      "indlæggelser",
+      "udskrivning",
+      "udskrivningsbrev",
+      "hospitalsindlæggelse",
+      "operation",
+      "opereret",
+      "ambulant",
+      "ambulatorium",
+      "skadestue",
+      "lægehenvisning",
+      "lægeerklæring",
+      "lægeattest",
+      "lægejournalen",
+      "lægejounal",
+      "patientjournal",
+      "epikrisen",
+      "epikrise",
+      "behandling",
+      "behandlingsplan",
+      "behandlingsforløb",
+      "medicinsk",
+      "medicin",
+      "medicindosering",
+      "medicinstatus",
+      "medicinliste",
+      "recept",
+      "receptpligtigt",
+      "bivirkninger",
+      "dosering",
+      "præparat",
+      "antidepressiv",
+      "antipsykotisk",
+      "beroligende medicin",
+      "smertestillende",
+      "kronisk sygdom",
+      "kronisk lidelse",
+      "kronisk",
+      "alvorlig sygdom",
+      "terminal",
+      "terminalt syg",
+      "palliativ",
+      "kræft",
+      "kræftbehandling",
+      "kemoterapi",
+      "stråleterapi",
+      "diabetes",
+      "type 1 diabetes",
+      "type 2 diabetes",
+      "insulinbehandling",
+      "blodsukkermåling",
+      "hjertesygdom",
+      "hjerteinsufficiens",
+      "hjertesvigt",
+      "hjerneblødning",
+      "blodprop",
+      "apopleksi",
+      "sklerose",
+      "multipel sklerose",
+      "epilepsi",
+      "epileptisk anfald",
+      "astma",
+      "kol",
+      "rygerlunger",
+      "allergi",
+      "allergisk",
+      "anafylaktisk",
+      "depression",
+      "angst",
+      "angstlidelse",
+      "panikangst",
+      "social fobi",
+      "ptsd",
+      "posttraumatisk",
+      "bipolar",
+      "bipolar lidelse",
+      "skizofreni",
+      "skizofreni diagnose",
+      "personlighedsforstyrrelse",
+      "borderline",
+      "adhd",
+      "add",
+      "autisme",
+      "autismespektrum",
+      "asperger",
+      "ocd",
+      "tvangstanker",
+      "tvangshandlinger",
+      "selvskade",
+      "selvmordstanker",
+      "suicidaltanker",
+      "suicidalitet",
+      "psykiatri",
+      "psykiatrisk",
+      "psykiatrisk indlæggelse",
+      "psykiatrisk behandling",
+      "psykolog",
+      "psykologforløb",
+      "psykoterapi",
+      "terapi",
+      "terapiforløb",
+      "familiebehandling",
+      "misbrugsbehandling",
+      "alkoholmisbrug",
+      "alkoholbehandling",
+      "alkoholafhængighed",
+      "stofmisbrug",
+      "narkotikamisbrug",
+      "narkobehandling",
+      "rehabilitering",
+      "genoptræning",
+      "arbejdsskade",
+      "erhvervssygdom",
+      "erhvervsevnetab",
+      "varig men",
+      "handicap",
+      "handicapkompensation",
+      "hjælpemiddel",
+      "kørestol",
+      "gangbesvær",
+      "synshæmmet",
+      "hørehæmmet",
+      "høretab",
+      "cochlear implantat",
+      "graviditet",
+      "gravid",
+      "barsel",
+      "barselsperiode",
+      "barselsorlov",
+      "abort",
+      "spontan abort",
+      "dødfødt",
+      "fertilitet",
+      "fertilitetsbehandling",
+      "reagensglasbefrugtning",
+      "ivf",
+      "overgangsalder",
+      "menopause",
+      "stofskifte",
+      "stofskiftesygdom",
+      "blodtryk",
+      "forhøjet blodtryk",
+      "kolesterol",
+      "overvægt",
+      "fedme",
+      "spiseforstyrrelser",
+      "anoreksi",
+      "bulimi",
+      "hiv",
+      "aids",
+      "seksuelt overførbar",
+      "kønssygdom",
+      "hepatitis",
+      "tuberkulose",
+      "organdonor",
+      "transplantation",
+      "blodtype",
+      "blodprøve",
+      "blodprøvesvar",
+      "scanningssvar",
+      "røntgensvar",
+      "mri-scanning",
+      "ct-scanning",
+      "helbredstilstand",
+      "funktionsevne",
+      "nedsatfunktionsevne",
+      "pfandplan",
+      "senhjerneskade",
+      "hjerneskade",
+      "demens",
+      "alzheimers",
+      "frontallapsdemens",
+      "åndelig lidelse"
+    ]
+  },
+  "mental_health": {
+    "_label_da": "Psykisk helbred",
+    "_label_en": "Mental health",
+    "_article": "Art. 9(1) — data concerning health (mental)",
+    "_note": "Subset of health — flagged separately for higher scrutiny",
+    "keywords": [
+      "ppp-plan",
+      "handleplan",
+      "behandlingsplan psykiatri",
+      "psykiatrisk journal",
+      "mentalerklæring",
+      "retsmedicinsk",
+      "mentalobservation",
+      "psykologisk vurdering",
+      "psykologisk rapport",
+      "kognitiv test",
+      "intelligenstest",
+      "iq-test",
+      "neuropsykologisk",
+      "funktionsvurdering",
+      "støtte-kontaktperson",
+      "socialpædagogisk støtte",
+      "botilbud",
+      "bostøtte",
+      "socialpsykiatri",
+      "§ 85-støtte",
+      "§ 107",
+      "§ 108",
+      "aktivitets- og samværstilbud"
+    ]
+  },
+  "criminal": {
+    "_label_da": "Strafbare forhold",
+    "_label_en": "Criminal records and offences",
+    "_article": "Art. 10 — data relating to criminal convictions",
+    "keywords": [
+      "straffeoplysning",
+      "straffeoplysninger",
+      "straffeattest",
+      "børneattest",
+      "ren børneattest",
+      "udvidet børneattest",
+      "dom",
+      "domfældt",
+      "straffet",
+      "straffedom",
+      "betinget dom",
+      "ubetinget dom",
+      "fængselsstraf",
+      "fængslet",
+      "fængsling",
+      "varetægtsfængslet",
+      "varetægtsfængsling",
+      "varetægt",
+      "løsladelse",
+      "løsladt",
+      "prøveløsladt",
+      "prøveløsladelse",
+      "afsoning",
+      "afsoningstid",
+      "sigtelse",
+      "sigtet",
+      "sigtet for",
+      "tiltale",
+      "tiltalt",
+      "anklage",
+      "anklaget",
+      "politianmeldelse",
+      "anmeldt til politiet",
+      "ransagning",
+      "anholdelse",
+      "anholdt",
+      "bøde",
+      "bødeforelæg",
+      "betinget frakendelse",
+      "ubetinget frakendelse",
+      "kørekortfrakendelse",
+      "samfundstjeneste",
+      "fodlænke",
+      "elektronisk fodlænke",
+      "probation",
+      "tiltalefrafald",
+      "tiltaleopgivelse",
+      "straffesag",
+      "strafferet",
+      "kriminalitet",
+      "kriminel",
+      "recidiv",
+      "gentagelseskriminalitet",
+      "sexregistret",
+      "dna-register",
+      "efterlyst",
+      "udvisning",
+      "udvist",
+      "udvisningssag",
+      "udvisningsbeslutning",
+      "udvisningsdom"
+    ]
+  },
+  "trade_union": {
+    "_label_da": "Fagforeningsmedlemskab",
+    "_label_en": "Trade union membership",
+    "_article": "Art. 9(1) — data revealing trade union membership",
+    "keywords": [
+      "fagforening",
+      "fagforeningsmedlem",
+      "fagforeningsmedlemskab",
+      "fagforbund",
+      "tillidsrepræsentant",
+      "tr",
+      "fællestillidsrepræsentant",
+      "ftr",
+      "arbejdsmiljørepræsentant",
+      "amr",
+      "strejke",
+      "strejkevagt",
+      "strejkebrydervirksomhed",
+      "sympatistrejke",
+      "lockout",
+      "overenskomst",
+      "overenskomstforhandling",
+      "overenskomstmæssig",
+      "faglig organisation",
+      "faglig konflikt",
+      "kollektiv overenskomst",
+      "a-kasse",
+      "arbejdsløshedskasse",
+      "fh",
+      "3f",
+      "hk",
+      "dso",
+      "dsto",
+      "dtl",
+      "bupl",
+      "dlf",
+      "ftf",
+      "fagbevægelsen",
+      "fagpolitisk",
+      "fagretlig",
+      "faglig sag"
+    ]
+  },
+  "religion": {
+    "_label_da": "Religion og tro",
+    "_label_en": "Religious or philosophical beliefs",
+    "_article": "Art. 9(1) — data revealing religious or philosophical beliefs",
+    "keywords": [
+      "religion",
+      "religiøs",
+      "religiøsitet",
+      "tro",
+      "trosfrihed",
+      "trossamfund",
+      "menighedsråd",
+      "kirke",
+      "kirkemedlem",
+      "kirkeskat",
+      "folkekirken",
+      "sognepræst",
+      "konfirmation",
+      "konfirmand",
+      "dåb",
+      "begravelse",
+      "begravelsesritual",
+      "bisættelse",
+      "kirkegård",
+      "moské",
+      "imam",
+      "islamisk",
+      "muslim",
+      "muslimsk",
+      "halal",
+      "ramadan",
+      "fredagsbøn",
+      "synagoge",
+      "rabbi",
+      "jødisk",
+      "kosher",
+      "hinduistisk",
+      "hindu",
+      "buddhistisk",
+      "buddhist",
+      "sikh",
+      "sikhisme",
+      "kristen",
+      "katolik",
+      "katolsk",
+      "protestant",
+      "luthersk",
+      "baptism",
+      "baptist",
+      "jehovas vidner",
+      "mormon",
+      "frikirke",
+      "pinsekirke",
+      "ateist",
+      "agnostiker",
+      "humanist",
+      "sekulariseret",
+      "religiøst fritagelse",
+      "dispensation af religiøse grunde",
+      "religiøs overbevisning",
+      "religiøst tilhørsforhold",
+      "religiøst tilknytning",
+      "filosofisk overbevisning"
+    ]
+  },
+  "ethnicity": {
+    "_label_da": "Race og etnisk oprindelse",
+    "_label_en": "Racial or ethnic origin",
+    "_article": "Art. 9(1) — data revealing racial or ethnic origin",
+    "keywords": [
+      "etnisk oprindelse",
+      "etnisk baggrund",
+      "etnicitet",
+      "nationalitet",
+      "herkomst",
+      "national herkomst",
+      "fremmed herkomst",
+      "indvandrerbaggrund",
+      "efterkommer",
+      "andengenerationsindvandrer",
+      "tosproget",
+      "tosprogede elever",
+      "modersmål",
+      "modersmålsundervisning",
+      "flygtning",
+      "asylansøger",
+      "asylsag",
+      "opholdstilladelse",
+      "opholdsstatus",
+      "statsborgerskab",
+      "dansk statsborgerskab",
+      "naturaliseret",
+      "udlænding",
+      "udlændingeloven",
+      "fremmedlov",
+      "visumpligtig",
+      "indrejseforbud",
+      "udsendelsesland",
+      "racediskrimination",
+      "racisme",
+      "hadforbrydelse",
+      "racistisk overgreb"
+    ]
+  },
+  "political": {
+    "_label_da": "Politisk overbevisning",
+    "_label_en": "Political opinions",
+    "_article": "Art. 9(1) — data revealing political opinions",
+    "_note": "Included for completeness — less common in school context but relevant for staff",
+    "keywords": [
+      "politisk overbevisning",
+      "politisk holdning",
+      "politisk tilhørsforhold",
+      "partimedlem",
+      "partimedlemskab",
+      "politisk aktiv",
+      "venstrefløj",
+      "højrefløj",
+      "venstreorienteret",
+      "højreorienteret",
+      "radikal",
+      "konservativ",
+      "socialdemokrat",
+      "liberalist",
+      "anarkist",
+      "kommunist",
+      "politisk flygtning",
+      "politisk forfølgelse",
+      "politisk asyl",
+      "samvittighedsfange",
+      "politisk dissens"
+    ]
+  },
+  "biometric": {
+    "_label_da": "Biometriske oplysninger",
+    "_label_en": "Biometric data",
+    "_article": "Art. 9(1) — biometric data for the purpose of uniquely identifying a natural person",
+    "keywords": [
+      "fingeraftryk",
+      "fingeraftryksscanning",
+      "ansigtsgenkendelse",
+      "iris-scanning",
+      "nethindescanning",
+      "stemmebiometri",
+      "dna-profil",
+      "dna-analyse",
+      "dna-prøve",
+      "genetisk profil",
+      "biometrisk",
+      "biometrisk id",
+      "biometrisk data",
+      "biometrisk verificering",
+      "pas med chip",
+      "ansigtsscanning",
+      "kropsscanning",
+      "gangartsanalyse"
+    ]
+  },
+  "sexual_orientation": {
+    "_label_da": "Seksuel orientering",
+    "_label_en": "Data concerning sex life or sexual orientation",
+    "_article": "Art. 9(1) — data concerning a natural person's sex life or sexual orientation",
+    "keywords": [
+      "seksuel orientering",
+      "seksualitet",
+      "homoseksuel",
+      "bøsse",
+      "lesbisk",
+      "biseksuel",
+      "transseksuel",
+      "transperson",
+      "transkønnet",
+      "ikke-binær",
+      "queer",
+      "kønsskifte",
+      "kønsskifteoperation",
+      "juridisk kønsskifte",
+      "kønsdysfori",
+      "lgbtq",
+      "lgbt",
+      "coming out",
+      "skeiv"
+    ]
+  },
+  "_proximity_note": "A keyword match only triggers a special_category flag when it appears within 150 characters of a personal identifier (CPR number, full name, or address). Isolated keyword occurrences in general text do not flag the item.",
+  "_false_positive_guidance": {
+    "behandling": "Very common word — also means 'processing' in legal text. Consider requiring proximity to a health-related term before flagging.",
+    "dom": "Also means 'cathedral' (domkirke) and appears in many compound words. Match as a standalone token only.",
+    "tro": "Also a given name and common word. Match only in context with 'religiøs', 'trossamfund' etc.",
+    "lo": "Abbreviation — match only in known union context.",
+    "allergi": "Common in school contexts (food allergies). Low risk unless near a CPR number."
+  }
+}
\ No newline at end of file
diff --git a/lang/CLAUDE.md b/lang/CLAUDE.md
new file mode 100644
index 0000000..cd3ed58
--- /dev/null
+++ b/lang/CLAUDE.md
@@ -0,0 +1,7 @@
+# lang/ — i18n Rules
+
+- `en.json` is the source of truth. Always update `da.json` and `de.json` when adding or changing keys.
+- `/api/langs` globs both `*.json` and `*.lang` — both formats coexist.
+- Loader in `app_config.py` prefers `.json`, falls back to `.lang`.
+- JS: `t(key, default)` — Python: `LANG.get(key, default)`
+- No emojis or symbol prefixes in translation values used as button labels.
diff --git a/lang/da.json b/lang/da.json
new file mode 100644
index 0000000..719c8dc
--- /dev/null
+++ b/lang/da.json
@@ -0,0 +1,773 @@
+{
+  "app_name": "Document Scanner",
+  "label_root_folder": "Rodmappe",
+  "label_older_than": "Markér filer med data ældre end",
+  "placeholder_folder": "/sti/til/dokumenter",
+  "btn_scan": "Start scanning",
+  "btn_stop": "Stop scanning",
+  "toggle_anonymise": "Anonymisér",
+  "toggle_mask": "Maskér kun CPR",
+  "toggle_blur_faces": "Slør ansigter",
+  "toggle_skip_cloud": "Spring skybaserede filer over",
+  "toggle_ocr": "OCR-scan af PDF'er",
+  "label_face_sensitivity": "Ansigtsfølsomhed",
+  "face_sensitivity_high": "Høj",
+  "face_sensitivity_low": "Lav",
+  "face_sensitivity_hint": "Højere = færre fejlregistreringer",
+  "label_ocr_language": "OCR-sprog",
+  "label_ocr_dpi": "DPI (kvalitet vs. hastighed)",
+  "lang_danish": "Dansk",
+  "lang_danish_english": "Dansk + Engelsk",
+  "lang_english": "Engelsk",
+  "lang_norwegian": "Norsk",
+  "lang_swedish": "Svensk",
+  "lang_german": "Tysk",
+  "lang_french": "Fransk",
+  "lang_dutch": "Hollandsk",
+  "time_any": "Alle",
+  "time_1y": "1 år",
+  "time_2y": "2 år",
+  "time_5y": "5 år",
+  "time_10y": "10 år",
+  "stat_scanned": "Filer scannet",
+  "stat_flagged": "Markerede",
+  "stat_high_risk": "Høj risiko",
+  "stat_cpr": "CPR-numre fundet",
+  "col_file": "Fil",
+  "col_cpr": "CPR-numre",
+  "col_oldest": "Ældste dato",
+  "col_risk": "Risiko",
+  "col_action": "Handling",
+  "col_detail": "Detaljer",
+  "sort_name_az": "Navn A–Z",
+  "sort_name_za": "Navn Z–A",
+  "sort_cpr_desc": "CPR-antal ↓",
+  "sort_oldest_desc": "Ældste dato ↓",
+  "sort_risk_desc": "Risiko ↓",
+  "sort_size_desc": "Størrelse ↓",
+  "filter_all_types": "Alle typer",
+  "filter_pdf": "PDF",
+  "filter_word": "Word",
+  "filter_excel": "Excel",
+  "filter_image": "Billede",
+  "placeholder_search": "Søg filnavn…",
+  "btn_anonymise": "Anonymisér",
+  "btn_mask": "Maskér CPR",
+  "btn_blur": "Slør",
+  "btn_preview": "Forhåndsvis",
+  "btn_show_in_folder": "Vis i mappe",
+  "btn_move_to_trash": "Flyt til papirkurv",
+  "btn_undo": "Fortryd",
+  "btn_export_csv": "CSV",
+  "btn_select_all": "Vælg alle",
+  "btn_anonymise_flagged": "Anonymisér markerede",
+  "btn_anonymise_all": "Anonymisér alle markerede",
+  "btn_cancel": "Annullér",
+  "btn_close": "Luk",
+  "btn_clear": "Ryd",
+  "preview_original": "Original",
+  "preview_processed": "Behandlet",
+  "preview_anonymise": "Anonymisér → forhåndsvis",
+  "preview_mask": "Maskér CPR → forhåndsvis",
+  "preview_blur_faces": "Slør {n} ansigt(er) → forhåndsvis",
+  "preview_no_faces": "✓ Ingen ansigter fundet",
+  "preview_scanning_faces": "Scanner efter ansigter…",
+  "preview_processing_faces": "Behandler ansigter…",
+  "preview_rendering": "Indlæser…",
+  "scan_preparing": "Forbereder…",
+  "scan_scanning": "Scanner…",
+  "scan_face_scanning": "Scanner {total} fil(er) for ansigter…",
+  "scan_face_progress": "Ansigter: {index} / {total} — {file}",
+  "scan_eta": "{eta} tilbage",
+  "scan_stopped": "Scanning stoppet.",
+  "empty_flagged": "Ingen markerede dokumenter",
+  "empty_flagged_detail": "Kør en scanning for at se markerede dokumenter",
+  "empty_filter": "Ingen filer matcher dit filter",
+  "no_audit": "Ingen revisionslog endnu",
+  "dialog_delete_title": "Slet filer?",
+  "dialog_delete_confirm": "Flyt til papirkurv",
+  "all_trashed": "Alle markerede dokumenter er flyttet til papirkurven.",
+  "btn_audit_log": "Revisionslog",
+  "audit_cleared": "Revisionslog ryddet",
+  "failed_audit": "Kunne ikke indlæse revisionslog",
+  "about_title": "Om",
+  "label_python": "Python",
+  "label_spacy": "spaCy-model",
+  "label_tesseract": "Tesseract",
+  "label_pymupdf": "PyMuPDF",
+  "label_opencv": "OpenCV",
+  "no_model": "ingen model installeret",
+  "not_installed": "ikke installeret",
+  "btn_about": "Om",
+  "lbl_size": "Størrelse",
+  "lbl_time": "Tid",
+  "lbl_space": "Mellemrum",
+  "lbl_loading": "Indlæser…",
+  "lbl_blurred": "Sløret",
+  "lbl_none": "Ingen",
+  "lbl_scanner": "Scanner",
+  "lbl_document": "Dokument",
+  "lbl_folder": "Mappe",
+  "empty_scan_hint": "Angiv en mappesti og klik",
+  "empty_flagged_found": "Ingen markerede dokumenter fundet.",
+  "preview_click_hint": "Klik på et dokument for at forhåndsvise det",
+  "kbd_select": "vælg",
+  "kbd_delete": "slet",
+  "kbd_close_preview": "luk forhåndsvisning",
+  "kbd_select_all": "vælg alle",
+  "sort_cpr_asc": "CPR-antal ↑",
+  "preview_error": "Forhåndsvisningsfejl",
+  "preview_unavailable": "Forhåndsvisning utilgængelig",
+  "preview_not_available": "Forhåndsvisning ikke tilgængelig for denne filtype",
+  "lbl_anonymised": "Anonymiseret",
+  "lbl_masked": "CPR maskeret",
+  "lbl_processing": "Behandler…",
+  "lbl_error": "Fejl",
+  "lbl_no_pii": "Ingen ændringer — ingen PII fundet",
+  "badge_anonymised": "✓ anonymiseret",
+  "badge_masked": "✓ maskeret",
+  "badge_blurred": "✓ sløret",
+  "lbl_working": "Arbejder…",
+  "lbl_stopping": "Stopper…",
+  "lbl_no_files_selected": "Ingen filer valgt",
+  "lbl_selected_1": "fil valgt",
+  "lbl_selected_n": "filer valgt",
+  "dialog_delete_body": "Dette vil permanent slette de valgte filer fra disken. Denne handling kan ikke fortrydes.",
+  "lbl_flagged_docs_1": "markeret dokument",
+  "lbl_flagged_docs_n": "markerede dokumenter",
+  "banner_all_clean": "Ingen fund",
+  "banner_files_scanned": "fil(er) scannet, ingen CPR-numre fundet",
+  "banner_need_attention": "fil(er) kræver opmærksomhed ud af",
+  "banner_scanned": "scannet",
+  "summary_face_blur": "billede(r) til ansigtsslørning",
+  "badge_face": "ansigt",
+  "badge_shared": "delt",
+  "badge_archive": "arkiv",
+  "badge_shared_cpr": "Delt CPR",
+  "lbl_also_in": "også i",
+  "filter_shared_cpr": "⚠ Delt CPR",
+  "risk_high": "HØJ",
+  "risk_medium": "MIDDEL",
+  "risk_low": "LAV",
+  "reason_cpr_number": "CPR-nummer",
+  "reason_cpr_numbers": "CPR-numre",
+  "reason_cpr_confirmed": "CPR(er) med nøgleordskontext",
+  "reason_unique_individuals": "unikke personer",
+  "reason_cpr_shared": "CPR delt på tværs af {n} filer",
+  "reason_data_10y": "data > 10 år gammel",
+  "reason_data_5y": "data > 5 år gammel",
+  "btn_export_excel": "Eksporter rapport som Excel",
+  "btn_audit_log_short": "Revisionslog",
+  "btn_delete_selected": "Slet markerede",
+  "audit_action_scan": "Scanning",
+  "audit_action_redact": "Anonymisering",
+  "audit_action_blur_faces": "Ansigtsslørning",
+  "audit_action_delete": "Sletning",
+  "audit_action_restore": "Gendannelse",
+  "audit_action_export": "Eksport",
+  "audit_files": "filer",
+  "audit_flagged": "markerede",
+  "audit_high_risk": "høj risiko",
+  "audit_regions": "områder",
+  "audit_faces": "ansigter",
+  "audit_permanent": "permanent",
+  "audit_trash": "papirkurv",
+  "audit_files_restored": "fil(er) gendannet",
+  "confirm_clear_audit": "Ryd hele revisionsloggen? Dette kan ikke fortrydes.",
+  "lang_spanish": "Spansk",
+  "lang_italian": "Italiensk",
+  "lang_portuguese": "Portugisisk",
+  "lang_finnish": "Finsk",
+  "lang_polish": "Polsk",
+  "lang_czech": "Tjekkisk",
+  "lang_russian": "Russisk",
+  "lang_arabic": "Arabisk",
+  "lang_chinese_simplified": "Kinesisk (forenklet)",
+  "lang_chinese_traditional": "Kinesisk (traditionelt)",
+  "lang_japanese": "Japansk",
+  "lang_korean": "Koreansk",
+  "lbl_root": "rod",
+  "lbl_root_folder": "rodmappe",
+  "lbl_scanning": "Scanner:",
+  "btn_deselect_all": "Fravælg alle",
+  "filter_high_risk": "🔴 Høj risiko",
+  "filter_in_archive": "📦 I arkiv",
+  "log_starting_scan": "Starter scanning af",
+  "log_found_files": "Fandt {n} fil(er) at scanne",
+  "log_cloud_skipped": "kun-sky filer sprunget over",
+  "log_faces_detected": "ansigt(er) fundet",
+  "log_ocr_pages": "side(r)",
+  "log_pages_skipped": "billedside(r) sprunget over (aktivér OCR)",
+  "log_scan_complete": "Scanning fuldført",
+  "log_files_with_cpr": "fil(er) med CPR",
+  "log_no_faces_in": "Ingen ansigter fundet i",
+  "pii_phone": "telefon",
+  "pii_email": "e-mail",
+  "pii_iban": "IBAN",
+  "pii_bank_account": "bankkonto",
+  "pii_name": "navn",
+  "pii_address": "adresse",
+  "pii_org": "org",
+  "lbl_other_pii": "Andre PII",
+  "lbl_found": "fundet",
+  "btn_clear_results_cache": "Ryd resultatcache",
+  "btn_clear_ocr_cache": "Ryd OCR-cache",
+  "confirm_clear_results_cache": "Ryd alle gemte scanningsresultater? Gitteret vil blive ryddet.",
+  "confirm_clear_ocr_cache": "Ryd OCR-cache? Dette vil tvinge ny OCR ved næste scanning.",
+  "log_cache_cleared": "Resultatcache ryddet",
+  "log_ocr_cache_cleared": "OCR-cache ryddet",
+  "m365_app_name": "GDPRScanner",
+  "m365_sources": "Kilder",
+  "m365_options": "Indstillinger",
+  "m365_accounts": "Konti",
+  "m365_stats": "Statistik",
+  "m365_src_email": "Outlook",
+  "m365_src_onedrive": "OneDrive",
+  "m365_src_sharepoint": "SharePoint",
+  "m365_src_teams": "Teams",
+  "m365_opt_date_from": "Scan e-mails/filer fra",
+  "m365_opt_date_from_hint": "Lad være tom for at scanne alt",
+  "m365_opt_email_body": "Scan e-mailindhold",
+  "m365_opt_attachments": "Scan vedhæftede filer",
+  "m365_opt_max_attach": "Maks. vedhæftet filstørrelse (MB)",
+  "m365_opt_max_emails": "Maks. e-mails pr. bruger",
+  "m365_connect_title": "Opret forbindelse til Microsoft 365",
+  "m365_connect_sub": "Angiv dine Azure-appoplysninger for at logge ind.",
+  "m365_label_client_id": "Klient-ID (Applikations-ID)",
+  "m365_label_tenant_id": "Lejer-ID",
+  "m365_label_client_secret": "Klienthemmelighed",
+  "m365_secret_hint": "(valgfri — aktiverer scanning på tværs af organisationen)",
+  "m365_secret_desc_app": "appen tilgår alle brugeres data direkte (applikationstilladelser, kræver ikke login).",
+  "m365_secret_desc_delegated": "du logger ind som dig selv og kan kun scanne egne data, medmindre du er Global Admin.",
+  "m365_btn_connect": "Opret forbindelse",
+  "m365_device_code_go": "Gå til",
+  "m365_device_code_enter": "og indtast denne kode",
+  "m365_btn_cancel_auth": "Annullér",
+  "m365_btn_reconfigure": "Rekonfigurér",
+  "m365_btn_sign_out": "Log ud",
+  "m365_mode_app": "🔑 App-tilstand — hele org.",
+  "m365_mode_delegated": "Delegeret",
+  "m365_search_users": "Søg brugere…",
+  "m365_add_account_label": "Tilføj konto manuelt:",
+  "m365_add_account_placeholder": "e-mail eller UPN",
+  "m365_admin_note": "Viser kun din konto. For at liste alle brugere skal en administrator give samtykke til <strong>User.Read.All</strong> i Azure Portal, eller tilføj konti manuelt nedenfor.",
+  "m365_btn_scan": "Scan",
+  "m365_btn_stop": "Stop",
+  "m365_pill_flagged": "markerede",
+  "m365_pill_scanned": "scannet",
+  "m365_filter_all_sources": "Alle kilder",
+  "m365_filter_email": "Outlook",
+  "m365_filter_onedrive": "OneDrive",
+  "m365_filter_sharepoint": "SharePoint",
+  "m365_filter_teams": "Teams",
+  "m365_empty_hint": "Vælg kilder og klik på <strong>Scan</strong><br>for at finde dokumenter med CPR-numre",
+  "m365_stat_flagged": "Markerede",
+  "m365_stat_cpr": "CPR-fund",
+  "m365_preview_open": "Åbn i M365 ↗",
+  "m365_preview_close": "Luk",
+  "m365_auth_mode_app": "Godkendelsestilstand: Applikation (klientoplysninger — hele org.)",
+  "m365_auth_mode_delegated": "Godkendelsestilstand: Delegeret (enhedskode — kun indlogget bruger)",
+  "m365_phase_teams_index": "Bygger Teams-medlemskabsindeks…",
+  "m365_phase_sharepoint": "Indsamler SharePoint-filer…",
+  "m365_btn_about": "Om",
+  "m365_stat_scanned": "Scannet",
+  "m365_no_users_found": "Ingen brugere fundet",
+  "m365_no_users_match": "Ingen brugere matcher",
+  "m365_no_cpr_found": "Ingen CPR-numre fundet.",
+  "m365_no_matches": "Ingen match",
+  "m365_btn_export_excel": "Eksporter Excel",
+  "m365_export_no_data": "Ingen resultater at eksportere.",
+  "m365_phase_emails": "Indsamler Outlook-beskeder",
+  "m365_phase_onedrive": "Indsamler OneDrive",
+  "m365_phase_teams": "Indsamler Teams",
+  "m365_preset_1yr": "1 år",
+  "m365_preset_2yr": "2 år",
+  "m365_preset_5yr": "5 år",
+  "m365_preset_10yr": "10 år",
+  "m365_preset_any": "Alle",
+  "m365_auth_mode_app_short": "Application permissions · client credentials",
+  "m365_auth_mode_delegated_short": "Delegated permissions · device code flow",
+  "m365_info_permissions": "Tilladelser",
+  "m365_info_signin": "Log-ind krævet",
+  "m365_info_scope": "Rækkevidde",
+  "m365_info_scope_org": "Alle brugere i tenant",
+  "m365_info_scope_user": "Kun den indloggede bruger",
+  "m365_info_consent": "Administratorsamtykke",
+  "m365_info_required": "Påkrævet",
+  "m365_info_admin": "Global Administrator",
+  "m365_info_expands_scope": "Udvider rækkevidde til alle brugere",
+  "m365_info_no": "Nej",
+  "m365_info_yes": "Ja",
+  "m365_info_app_desc": "Appen godkender sig med et Client Secret og tilgår alle brugeres data direkte via Microsoft Graph — intet interaktivt login kræves. Ideel til automatiserede eller planlagte scanninger.",
+  "m365_info_delegated_desc": "Appen handler på vegne af den indloggede bruger via device code flow. Som standard er kun den pågældende brugers data tilgængeligt. En Global Administrator kan give bredere samtykke til at scanne alle brugere.",
+  "m365_filter_search": "Søg…",
+  "m365_filter_clear": "Ryd",
+  "m365_btn_list_view": "Liste",
+  "m365_btn_grid_view": "Gitter",
+  "m365_log_found_items": "Fandt",
+  "m365_log_items_to_scan": "element(er) til scanning",
+  "m365_log_starting_scan": "Starter scanning:",
+  "m365_log_accounts": "konto(er)",
+  "m365_btn_bulk_delete": "Slet",
+  "m365_bulk_delete_title": "Massesletning",
+  "m365_bulk_delete_sub": "E-mails flyttes til Slettet post · Filer sendes til papirkurven",
+  "m365_bulk_filter_heading": "Filtrer hvad der skal slettes",
+  "m365_bulk_filter_source": "Kildetype",
+  "m365_bulk_filter_min_cpr": "Min. CPR-fund",
+  "m365_bulk_filter_older_than": "Ældre end dato",
+  "m365_bulk_no_match": "Ingen elementer matcher disse kriterier.",
+  "m365_bulk_match_count": "element(er) vil blive slettet",
+  "m365_bulk_confirm_q": "element(er) slettes permanent. Fortsæt?",
+  "m365_bulk_deleting": "Sletter…",
+  "m365_bulk_deleted": "slettet",
+  "m365_bulk_failed": "mislykkedes",
+  "m365_bulk_delete_confirm": "Slet matchende elementer",
+  "m365_delete_confirm": "Slet",
+  "m365_delete_warning": "Dette kan ikke fortrydes.",
+  "m365_log_deleted": "Slettet:",
+  "m365_log_delete_failed": "Sletning mislykkedes:",
+  "m365_log_bulk_done": "Massesletning:",
+  "m365_log_older_than": "ældre end",
+  "m365_eta_left": "tilbage",
+  "btn_all": "Alle",
+  "btn_errors": "Fejl",
+  "log_copy": "Kopier",
+  "btn_none": "Ingen",
+  "m365_btn_resume": "Genoptag",
+  "m365_btn_start_fresh": "Start forfra",
+  "m365_resume_banner": "Tidligere scanning afbrudt — {scanned} skannet, {flagged} fundet",
+  "m365_log_resuming": "Genoptager scanning:",
+  "m365_log_already_scanned": "allerede skannet — sprunget over",
+  "m365_resuming": "Genoptager — springer allerede skannede elementer over…",
+  "m365_opt_delta": "Delta-scanning",
+  "m365_opt_delta_hint": "Kun ændrede elementer (efter første fulde scanning)",
+  "m365_delta_tokens_saved": "Tokens gemt",
+  "m365_delta_clear": "Ryd tokens",
+  "m365_delta_cleared": "Delta-tokens ryddet — næste scanning bliver fuld scanning.",
+  "m365_delta_mode": "Delta-tilstand — henter kun ændrede elementer…",
+  "m365_smtp_title": "✉ Send rapport",
+  "m365_smtp_desc": "Send Excel-rapporten via e-mail efter scanning.",
+  "m365_smtp_host": "SMTP-server",
+  "m365_smtp_port": "Port",
+  "m365_smtp_user": "Brugernavn",
+  "m365_smtp_pass": "Adgangskode",
+  "m365_smtp_from": "Afsenderadresse",
+  "m365_smtp_tls": "STARTTLS",
+  "m365_smtp_ssl": "SSL",
+  "m365_smtp_recipients": "Modtagere",
+  "m365_smtp_recipients_hint": "Adskil med komma eller semikolon",
+  "m365_smtp_save": "Gem",
+  "m365_smtp_send": "Send nu",
+  "m365_smtp_saved": "Indstillinger gemt.",
+  "m365_smtp_sending": "Sender…",
+  "m365_smtp_sent": "Rapport sendt.",
+  "m365_smtp_no_recipients": "Angiv mindst én modtager.",
+  "m365_smtp_configure": "Konfigurer",
+  "m365_smtp_from_hint": "(valgfri — standard er brugernavn)",
+  "m365_subject_title": "🔍 Registreret person",
+  "m365_subject_btn": "Slå op",
+  "m365_subject_desc": "Find alle markerede elementer med et givet CPR-nummer. CPR-nummeret hashes før søgning og gemmes aldrig i klartekst.",
+  "m365_subject_placeholder": "DDMMYY-XXXX",
+  "m365_subject_search": "Søg",
+  "m365_subject_searching": "Søger…",
+  "m365_subject_found": "element(er) fundet",
+  "m365_subject_not_found": "Ingen markerede elementer fundet for dette CPR-nummer.",
+  "m365_subject_delete_all": "Slet alle for denne person",
+  "m365_subject_delete_confirm": "element(er) slettes permanent. Fortsæt?",
+  "m365_disposition_label": "Disposition",
+  "m365_disp_unreviewed": "Ikke gennemgået",
+  "m365_disp_retain_legal": "Opbevar — lovkrav",
+  "m365_disp_retain_legit": "Opbevar — legitim interesse",
+  "m365_disp_retain_contract": "Opbevar — kontrakt",
+  "m365_disp_delete_sched": "Slet — planlagt",
+  "m365_disp_personal_use": "Privat brug — uden for scope",
+  "m365_disp_deleted": "Slettet",
+  "m365_disp_save": "Gem",
+  "m365_disp_saved": "✓ Gemt",
+  "m365_opt_retention": "Opbevaringspolitik",
+  "m365_opt_retention_hint": "Flag og slet elementer ældre end N år",
+  "m365_ret_years": "Opbevaringsår",
+  "m365_ret_fy_end": "Regnskabsårs afslutning",
+  "m365_ret_fy_rolling": "Løbende (fra i dag)",
+  "m365_ret_fy_dec": "31 dec. (Bogføringsloven)",
+  "m365_ret_fy_jun": "30 jun.",
+  "m365_ret_fy_mar": "31 mar.",
+  "m365_ret_mode_rolling": "løbende",
+  "m365_ret_mode_fiscal": "regnskabsår",
+  "m365_ret_cutoff_hint": "Elementer ændret før",
+  "m365_ret_cutoff_flagged": "markeres som forfaldne",
+  "m365_overdue_found": "forfaldne element(er) fundet",
+  "m365_bulk_overdue_btn": "Filtrer forfaldne",
+  "m365_bulk_clear_filters": "Ryd filtre",
+  "m365_btn_export_article30": "Art.30",
+  "m365_article30_done": "Artikel 30-rapport klar.",
+  "a30_title": "GDPR Artikel 30",
+  "a30_subtitle": "Fortegnelse over behandlingsaktiviteter",
+  "a30_generated": "Genereret",
+  "a30_confidential": "Fortroligt — GDPR-overholdelses dokument",
+  "a30_s1": "1. Oversigt",
+  "a30_scan_date": "Scanningsdato",
+  "a30_items_scanned": "Scannede elementer",
+  "a30_flagged": "Markerede elementer",
+  "a30_cpr_hits": "CPR-fund i alt",
+  "a30_data_subjects": "Anslåede registrerede",
+  "a30_overdue": "Forfaldne elementer (>5 år)",
+  "a30_by_source": "Fordeling efter kilde",
+  "a30_col_source": "Kilde",
+  "a30_col_items": "Elementer",
+  "a30_col_cpr": "CPR-fund",
+  "a30_col_overdue": "Forfaldne",
+  "a30_s2": "2. Identificerede kategorier af personoplysninger",
+  "a30_s2_intro": "Følgende kategorier af personoplysninger blev fundet under scanning.",
+  "a30_col_gdpr_class": "GDPR-klassifikation",
+  "a30_cpr_label": "CPR-numre (dansk personnummer)",
+  "a30_cpr_class": "Art. 9 — nationalt identifikationsnummer",
+  "a30_pii_class_9": "Art. 9 — helbred/følsomme",
+  "a30_pii_class_4": "Art. 4 — personoplysninger",
+  "a30_s3": "3. Datafortegnelse",
+  "a30_s3_intro": "Alle markerede elementer er listet nedenfor med placering, opbevaringsstatus og dispositionsstatus.",
+  "a30_col_name": "Navn / Emne",
+  "a30_col_account": "Konto",
+  "a30_col_modified": "Ændret",
+  "a30_col_disp": "Disposition",
+  "a30_more_items": "yderligere elementer vises ikke. Eksporter Excel-rapporten for den fulde liste.",
+  "a30_s4": "4. Opbevaringsanalyse",
+  "a30_s4_intro": "Følgende elementer overskrider 5-års opbevaringsgrænsen og bør gennemgås for sletning iht. GDPR artikel 5(1)(e) — opbevaringsbegrænsning.",
+  "a30_s5": "5. Overholdelsestrend",
+  "a30_s5_intro": "Antal markerede elementer over de seneste scanninger (nyeste først).",
+  "a30_col_scan_date": "Scanningsdato",
+  "a30_col_scan_type": "Scanningstype",
+  "a30_scan_delta": "Delta",
+  "a30_scan_full": "Fuld",
+  "a30_s6": "6. Metode og retsgrundlag",
+  "a30_method_title": "Scanningsmetode",
+  "a30_method_1": "CPR-numre registreres ved mønstergenkendelse mod det officielle danske CPR-format (DDMMYY-XXXX) med fuld datovalidering og århundrede-cifferverifikation iht. CPR-registerets regler.",
+  "a30_method_2": "Yderligere personoplysninger (telefonnumre, e-mailadresser, IBAN, bankkontonumre, navne, adresser og organisationer) registreres med regex og spaCy NER.",
+  "a30_method_3": "CPR-numre i dette dokuments database er SHA-256-hashede og gemmes aldrig i klartekst.",
+  "a30_method_4": "Scanning dækker Exchange-mailbokse (alle mapper inkl. Sendt post), OneDrive, SharePoint og Microsoft Teams-kanalefiler via Microsoft Graph API. Ved tilslutning dækker Google Workspace-scanning Gmail og Google Drev via en tjenestekonto med domæneomfattende delegation. Lokale og netværksbaserede filshares (SMB) scannes direkte.",
+  "a30_gdpr_title": "Refererede GDPR-artikler",
+  "a30_gdpr_1": "Artikel 5(1)(c) — Dataminimering: kun nødvendige oplysninger må opbevares",
+  "a30_gdpr_2": "Artikel 5(1)(e) — Opbevaringsbegrænsning: data må ikke opbevares længere end nødvendigt",
+  "a30_gdpr_3": "Artikel 9 — Særlige kategorier: helbredsoplysninger, strafbare forhold, fagforeningsmedlemskab mv. kræver udtrykkeligt retsgrundlag",
+  "a30_gdpr_4": "Artikel 15 — Ret til indsigt: registrerede kan anmode om oplysninger om deres data",
+  "a30_gdpr_5": "Artikel 17 — Ret til sletning: registrerede kan anmode om sletning",
+  "a30_gdpr_6": "Artikel 30 — Fortegnelse over behandlingsaktiviteter: dette dokument opfylder forpligtelsen",
+  "a30_disp_unreviewed": "Ikke gennemgået",
+  "a30_disp_retain_legal": "Opbevar — Lovkrav",
+  "a30_disp_retain_legit": "Opbevar — Legitim interesse",
+  "a30_disp_retain_contract": "Opbevar — Kontrakt",
+  "a30_disp_delete_sched": "Slet — Planlagt",
+  "a30_disp_personal_use": "Personal use — out of GDPR scope (Art. 2(2)(c))",
+  "a30_disp_deleted": "Slettet",
+  "a30_s6_short": "Metode og retsgrundlag",
+  "m365_role_all": "Alle",
+  "m365_role_staff": "Ansat",
+  "m365_role_student": "Elev",
+  "a30_s_dellog": "Sletningslog",
+  "a30_dellog_intro": "element(er) med personoplysninger er slettet via GDPRScanner. Denne log opfylder dokumentationspligten i GDPR artikel 5, stk. 2.",
+  "a30_dellog_by_reason": "Sletninger efter årsag",
+  "a30_dellog_records": "Sletningsposter",
+  "a30_col_reason": "Årsag",
+  "a30_col_count": "Antal",
+  "a30_col_deleted_at": "Slettet den",
+  "a30_col_deleted_by": "Slettet af",
+  "a30_reason_manual": "Manuel (enkelt kort)",
+  "a30_reason_bulk": "Massesletning",
+  "a30_reason_retention": "Opbevaringspolitik",
+  "a30_reason_dsr": "Sletningsanmodning fra registreret (art. 17)",
+  "m365_filter_all_disp": "Alle dispositioner",
+  "m365_trend_title": "Tendens",
+  "m365_trend_flagged": "Markerede",
+  "m365_trend_overdue": "Forfaldne",
+  "m365_filter_all_transfer": "Alle elementer",
+  "m365_filter_ext_recipient": "Ekstern modtager",
+  "m365_filter_ext_share": "Eksternt delt",
+  "m365_filter_shared": "Delt",
+  "m365_badge_ext_recipient": "Ekstern",
+  "m365_badge_shared": "Delt",
+  "a30_s_special": "Særlige kategorier af personoplysninger (artikel 9)",
+  "a30_special_intro": "element(er) indeholder særlige kategorier af personoplysninger iht. GDPR artikel 9. Kræver eksplicit retsgrundlag og konsekvensanalyse (DPIA).",
+  "a30_special_by_cat": "Opdagede kategorier",
+  "a30_special_items": "Berørte elementer (op til 50)",
+  "a30_col_category": "Kategori",
+  "a30_cat_health": "Helbredsoplysninger (art. 9)",
+  "a30_cat_mental": "Psykisk helbred (art. 9)",
+  "a30_cat_criminal": "Strafbare forhold (art. 10)",
+  "a30_cat_union": "Fagforeningsmedlemskab (art. 9)",
+  "a30_cat_religion": "Religiøs overbevisning (art. 9)",
+  "a30_cat_ethnicity": "Etnisk oprindelse (art. 9)",
+  "a30_cat_political": "Politisk overbevisning (art. 9)",
+  "a30_cat_biometric": "Biometriske oplysninger (art. 9)",
+  "a30_cat_sexual": "Seksuel orientering (art. 9)",
+  "m365_filter_all_special": "Alle risikoniveauer",
+  "m365_filter_special_only": "Art. 9 følsomme data",
+  "m365_badge_special": "Art.9",
+  "m365_phase_scanning": "Scanner…",
+  "a30_special_cat": "Særlige kategorier (artikel 9)",
+  "a30_special_cat_note": "Disse elementer indeholder helbredsoplysninger, straffeoplysninger, biometriske data, religiøse, etniske, fagforeningsmæssige, politiske eller seksuelle oplysninger. Et eksplicit retsgrundlag (art. 9, stk. 2) og eventuelt en konsekvensanalyse (art. 35) er påkrævet.",
+  "a30_col_special": "Art. 9",
+  "a30_pii_phone": "Telefonnumre",
+  "a30_pii_email": "E-mailadresser",
+  "a30_pii_iban": "IBAN-bankkontonumre",
+  "a30_pii_bank": "Bankkontonumre",
+  "a30_pii_name": "Personnavne (NER)",
+  "a30_pii_address": "Adresser (NER)",
+  "a30_pii_org": "Organisationer (NER)",
+  "a30_col_cpr_short": "CPR",
+  "a30_inv_staff": "Personale / Undervisere",
+  "a30_inv_students": "Elever",
+  "a30_student_consent_note": "Bemærk: Elevers konti i dansk folkeskole (elever under 15 år) kræver forældrenes samtykke til behandling af personoplysninger i henhold til Databeskyttelsesloven §6. Elementer i elevers konti må ikke slettes automatisk — enhver handling kræver godkendelse fra skoleledelsen, og for elever under 15 år skal forældre eller værger som rettighedshavere underrettes i henhold til GDPR artikel 8.",
+  "m365_profile_label": "Profil:",
+  "m365_profile_placeholder": "— Vælg profil —",
+  "m365_profile_save_tip": "Gem aktuelle indstillinger som profil",
+  "m365_profile_save_prompt": "Profilnavn:",
+  "m365_profile_applied": "Profil indlæst",
+  "m365_profile_saved": "Profil gemt",
+  "m365_profile_manage_btn": "Profiler",
+  "m365_profile_clear_btn": "Ryd",
+  "m365_profile_save_btn": "Gem",
+  "m365_profile_manage_title": "⚙ Administrer profiler",
+  "m365_profile_no_profiles": "Ingen gemte profiler endnu. Brug 💾 til at gemme de aktuelle sidebjælkeindstillinger som en profil.",
+  "m365_profile_use": "Brug",
+  "m365_profile_edit": "Rediger",
+  "m365_profile_duplicate": "Dupliker",
+  "m365_profile_delete": "Slet",
+  "m365_profile_delete_confirm": "Slet profil",
+  "m365_profile_duplicated": "Profil duplikeret",
+  "m365_profile_deleted": "Profil slettet",
+  "m365_profile_never": "aldrig",
+  "m365_profile_last_run": "Sidst kørt",
+  "m365_profile_name_placeholder": "Profilnavn",
+  "m365_profile_desc_placeholder": "Beskrivelse (valgfri)",
+  "m365_profile_name_required": "Profilnavn er påkrævet.",
+  "m365_db_title": "🗄 Database",
+  "m365_db_export": "Eksporter",
+  "m365_db_import": "Importer",
+  "m365_db_export_error": "Eksport mislykkedes",
+  "m365_db_exported": "Database eksporteret",
+  "m365_db_import_title": "📥 Importer database",
+  "m365_db_import_desc": "Vælg en tidligere eksporteret .zip-fil. Sammenfletning tilføjer dispositioner og slettelog. Erstatning sletter alt og gendanner fuldt ud.",
+  "m365_db_import_file": "ZIP-fil",
+  "m365_db_import_mode": "Tilstand:",
+  "m365_db_import_merge": "Sammenflet (sikker)",
+  "m365_db_import_replace": "Erstat (fuld gendannelse)",
+  "m365_db_import_replace_warn": "⚠ Erstatningstilstand sletter alle eksisterende scanningsdata inden gendannelse. Sørg for at have en sikkerhedskopi af ~/.gdpr_scanner.db først.",
+  "m365_db_import_replace_confirm": "Erstatningstilstand sletter ALLE eksisterende scanningsdata og gendanner fra arkivet.\\n\\nSørg for at have en manuel sikkerhedskopi af ~/.gdpr_scanner.db.\\n\\nFortsæt?",
+  "m365_db_import_no_file": "Vælg venligst en ZIP-fil først.",
+  "m365_db_importing": "Importerer…",
+  "m365_db_imported": "Importeret",
+  "m365_db_import_run": "Importer",
+  "m365_opt_scan_photos": "Søg efter ansigter i billeder",
+  "m365_opt_scan_photos_hint": "Markerer billeder med registrerede ansigter som Art. 9 biometriske data. Langsommere — aktivér efter behov.",
+  "m365_filter_photo_only": "📷 Billeder / biometrisk",
+  "m365_badge_faces": "ansigter",
+  "a30_photo_items": "Billeder med registrerede ansigter (Art. 9 biometrisk)",
+  "a30_photo_note": "Fotografier af identificerbare personer er biometriske data i henhold til Art. 9 GDPR. Opbevaring kræver et dokumenteret retsgrundlag i henhold til Art. 9(2). For skolefotografier af elever under 15 år er forældrenes samtykke påkrævet (Databeskyttelsesloven §6). Se Datatilsynets vejledning om fotografering i skoler.",
+  "a30_s_photos": "Fotografier og biometriske data (artikel 9)",
+  "a30_photo_intro": "billedfil(er) med registrerede ansigt(er) blev fundet i scanningen. Fotografier af identificerbare personer udgør biometriske data i henhold til GDPR artikel 9 og er underlagt den samme forhøjede beskyttelse som sundheds- eller straffeoplysninger.",
+  "a30_photo_guidance": "Vejledning om opbevaringsperiode",
+  "a30_photo_g1": "Billeder må kun opbevares, så længe det oprindelige formål er gyldigt (Art. 5(1)(b) — formålsbegrænsning).",
+  "a30_photo_g2": "Elever under 15 år kræver forældrenes samtykke (Databeskyttelsesloven §6). Samtykket skal være frit givet, specifikt og dokumenteret.",
+  "a30_photo_g3": "Billeder på offentligt tilgængelige hjemmesider skal fjernes straks, når en person forlader organisationen eller trækker sit samtykke tilbage (Art. 17 — retten til sletning).",
+  "a30_photo_g4": "Historisk/arkivbrug kan begrunde længere opbevaring i henhold til Art. 89 kun med specifikke sikkerhedsforanstaltninger og en konkret vurdering.",
+  "a30_photo_col_faces": "Ansigter",
+  "a30_method_5": "Når billedscanning er aktiveret, analyseres billedfiler ved hjælp af OpenCV Haar cascade-ansigtsdetektion for at identificere fotografier af personer (Art. 9 biometriske data).",
+  "m365_role_cycle_tip": "Klik for at ændre rolle (skifter elev → personale → andet → auto)",
+  "m365_role_set": "Rolle sat",
+  "m365_role_cleared": "Rolletilsidesættelse fjernet",
+  "m365_sku_debug_title": "🔍 Lejer-SKU-ID'er",
+  "m365_sku_debug_desc": "Dette er de rå SKU-ID'er tildelt dine brugere. Dem markeret ❓ ukendt er ikke i classification/m365_skus.json — kopiér dem ind under student_ids eller staff_ids og genstart.",
+  "m365_sku_debug_none": "Ingen licensdata returneret — kontrollér at appen har User.Read.All tilladelse.",
+  "m365_file_sources_title": "📁 Filkilder",
+  "m365_file_sources_manage": "Administrer",
+  "m365_file_sources_empty": "Ingen filkilder konfigureret. Tilføj en lokal mappe eller netværksdeling nedenfor.",
+  "m365_file_sources_add": "Tilføj kilde",
+  "m365_fsrc_label": "Betegnelse",
+  "m365_fsrc_path": "Sti",
+  "m365_fsrc_smb_detected": "SMB/CIFS-netværksdeling registreret",
+  "m365_fsrc_smb_host": "SMB-vært",
+  "m365_fsrc_smb_user": "Brugernavn",
+  "m365_fsrc_smb_pw": "Adgangskode",
+  "m365_fsrc_smb_pw_hint": "Adgangskoden gemmes i nøglekæden — aldrig i en fil.",
+  "m365_fsrc_add_btn": "Tilføj",
+  "m365_fsrc_saved": "Kilde gemt",
+  "m365_fsrc_saving": "Gemmer...",
+  "m365_fsrc_path_required": "Sti er påkrævet.",
+  "m365_fsrc_scan_btn": "Scan",
+  "m365_fsrc_scan_start": "Starter filscanning",
+  "m365_src_group_files": "Filkilder",
+  "m365_no_sources": "Ingen kilder valgt — intet at scanne.",
+  "m365_fsrc_name_required": "Navn er påkrævet.",
+  "m365_srcmgmt_title": "⚙ Kildeadministration",
+  "m365_srcmgmt_tab_m365": "Microsoft 365",
+  "m365_srcmgmt_tab_google": "Google Workspace",
+  "m365_srcmgmt_tab_files": "Filkilder",
+  "m365_srcmgmt_connection": "Forbindelse",
+  "m365_srcmgmt_azure_creds": "Azure-legitimationsoplysninger",
+  "m365_srcmgmt_sources_m365": "Kilder der skal scannes",
+  "m365_srcmgmt_connected": "Forbundet",
+  "m365_srcmgmt_not_connected": "Ikke forbundet",
+  "m365_srcmgmt_coming_soon": "Kommer snart",
+  "m365_srcmgmt_google_sub": "Gmail og Google Drev-scanning vises her, når det er implementeret.",
+  "m365_srcmgmt_file_sources": "Filkilder",
+  "m365_sources_manage_btn": "Kilder",
+  "m365_connecting": "Opretter forbindelse...",
+  "m365_err_creds_required": "Klient-ID og lejer-ID er påkrævet",
+  "m365_signout_confirm": "Afbryd forbindelsen og ryd legitimationsoplysninger?",
+  "m365_btn_settings": "Indstillinger",
+  "m365_settings_title": "⚙ Indstillinger",
+  "m365_settings_tab_general": "Generelt",
+  "m365_settings_tab_email": "E-mailrapport",
+  "m365_settings_tab_database": "Database",
+  "m365_settings_appearance": "Udseende",
+  "m365_settings_language": "Sprog",
+  "m365_settings_theme": "Tema",
+  "m365_settings_db_actions": "Handlinger",
+  "m365_db_reset": "Nulstil database",
+  "m365_db_reset_confirm": "Nulstil database? Alle scanresultater slettes.",
+  "m365_db_reset_done": "Database nulstillet",
+  "m365_db_scans": "Scanninger",
+  "m365_smtp_saving": "Gemmer...",
+  "m365_settings_admin_pin": "Admin-PIN",
+  "m365_settings_pin_hint": "Påkrævet ved destruktive handlinger (f.eks. nulstil database). Lad stå tom for at deaktivere.",
+  "m365_settings_current_pin": "Nuværende PIN",
+  "m365_settings_new_pin": "Ny PIN",
+  "m365_settings_confirm_pin": "Bekræft PIN",
+  "m365_settings_pin_set": "Admin-PIN er indstillet",
+  "m365_settings_pin_not_set": "Ingen PIN — nulstil database er ubeskyttet",
+  "m365_settings_pin_required": "PIN er påkrævet.",
+  "m365_settings_pin_mismatch": "PINs stemmer ikke overens.",
+  "m365_settings_pin_wrong": "Forkert PIN — nulstilling annulleret.",
+  "m365_settings_pin_saved": "PIN gemt",
+  "m365_settings_enter_pin": "Indtast admin-PIN",
+  "m365_settings_enter_pin_reset": "Indtast admin-PIN for at nulstille databasen.",
+  "btn_confirm": "Bekræft",
+  "m365_log_scan_started": "Scanning startet",
+  "m365_preview_local_file": "Lokal fil — ingen cloud-forhåndsvisning tilgængelig",
+  "m365_badge_gps": "GPS-position",
+  "a30_gps_items": "Elementer med GPS-placeringsdata (Art. 4 — placering = persondata)",
+  "a30_exif_pii_items": "Elementer med EXIF-personoplysninger (forfatter, beskrivelse, nøgleord)",
+  "a30_gps_title": "Elementer med GPS-placeringsdata",
+  "a30_gps_intro": "Følgende filer indeholder GPS-koordinater indlejret i EXIF-metadata. Placeringsdata udgør personoplysninger i henhold til GDPR Art. 4.",
+  "a30_gps_col_lat": "Breddegrad",
+  "a30_gps_col_lon": "Længdegrad",
+  "m365_accounts_disabled_tip": "Vælg en Microsoft 365-kilde for at aktivere kontovalg",
+  "m365_smtp_test": "Test",
+  "m365_smtp_testing": "Sender test-email…",
+  "m365_smtp_test_ok": "Test-email sendt",
+  "m365_smtp_test_fail": "Forbindelse mislykkedes",
+  "m365_fsrc_edit_btn": "Rediger",
+  "m365_fsrc_save_changes": "Gem ændringer",
+  "m365_settings_tab_scheduler": "Planlægger",
+  "m365_sched_title": "Planlagte scanninger",
+  "m365_sched_next": "Næste",
+  "m365_sched_hint": "Kør scanninger automatisk på et fast tidspunkt. Kræver en aktiv M365-forbindelse (app-tilstand anbefales).",
+  "m365_sched_no_aps": "⚠ APScheduler ikke installeret. Kør: pip install apscheduler",
+  "m365_sched_enabled": "Aktivér planlægger",
+  "m365_sched_frequency": "Frekvens",
+  "m365_sched_dow": "Ugedag",
+  "m365_sched_dom": "Dag i måneden",
+  "m365_sched_time": "Tidspunkt",
+  "m365_sched_profile": "Profil",
+  "m365_sched_profile_last": "Sidst gemte indstillinger",
+  "m365_sched_after_scan": "Efter scanning",
+  "m365_sched_auto_email": "Send rapport automatisk",
+  "m365_sched_auto_retention": "Håndhæv opbevaringspolitik",
+  "m365_sched_status": "Status",
+  "m365_sched_run_now": "▶ Kør nu",
+  "m365_sched_add": "+ Tilføj planlagt scanning",
+  "m365_sched_name": "Navn",
+  "m365_sched_editor_new": "Ny planlagt scanning",
+  "m365_sched_editor_edit": "Rediger planlagt scanning",
+  "m365_sched_name_required": "Navn er påkrævet",
+  "m365_sched_no_runs": "Ingen planlagte kørsler endnu",
+  "m365_sched_freq_daily": "Dagligt",
+  "m365_sched_freq_weekly": "Ugentligt",
+  "m365_sched_freq_monthly": "Månedligt",
+  "m365_sched_dow_mon": "Mandag",
+  "m365_sched_dow_tue": "Tirsdag",
+  "m365_sched_dow_wed": "Onsdag",
+  "m365_sched_dow_thu": "Torsdag",
+  "m365_sched_dow_fri": "Fredag",
+  "m365_sched_dow_sat": "Lørdag",
+  "m365_sched_dow_sun": "Søndag",
+  "btn_save": "Gem",
+  "m365_settings_about": "Om",
+  "m365_settings_save_pin": "Gem PIN",
+  "m365_sse_reconnecting": "Genopretter forbindelse til kørende scanning…",
+  "m365_sse_replay_note": "Live-log genoptaget — tidligere indlæg afspillet fra igangværende scanning.",
+  "m365_google_sa_creds": "Tjenestekonto-legitimationsoplysninger",
+  "m365_google_sa_key_file": "JSON-nøgle til tjenestekonto",
+  "m365_google_sa_key_hint": "Download fra Google Cloud Console → IAM & Admin → Tjenestekonti → Nøgler → Tilføj nøgle → JSON",
+  "m365_google_admin_email": "Admin-e-mail",
+  "m365_google_admin_email_hint": "Bruges til domæneomfattende delegation — skal være Workspace-superadmin.",
+  "m365_google_libs_missing": "Biblioteker ikke installeret",
+  "m365_google_key_required": "Vælg en JSON-nøglefil til tjenestekontoen",
+  "m365_google_invalid_json": "Ugyldig JSON-fil",
+  "m365_srcmgmt_sources_google": "Kilder der skal scannes",
+  "m365_google_src_gmail": "Gmail",
+  "m365_google_src_drive": "Google Drev",
+  "m365_google_setup_title": "Opsætning kræves i Google Workspace:",
+  "m365_google_setup_step1": "Opret et Google Cloud-projekt og aktiver Gmail API + Drive API + Admin SDK.",
+  "m365_google_setup_step2": "Opret en tjenestekonto, download JSON-nøglen, og aktiver domæneomfattende delegation.",
+  "m365_google_setup_step3": "I Workspace Admin → Sikkerhed → API-kontroller → Domæneomfattende delegation, tilføj tjenestekontoens klient-ID med omfang:",
+  "m365_google_auth_mode": "Godkendelsestilstand",
+  "m365_google_mode_workspace": "Workspace",
+  "m365_google_mode_personal": "Personlig konto",
+  "m365_google_personal_creds": "Personlig konto",
+  "m365_google_personal_client_id": "Klient-ID",
+  "m365_google_personal_client_secret": "Klienthemmelighed",
+  "m365_google_personal_hint": "Opret OAuth 2.0 Desktop-legitimationsoplysninger i Google Cloud Console, og indsæt klient-ID og hemmelighed ovenfor.",
+  "m365_google_personal_sign_in": "Log ind",
+  "m365_google_personal_creds_required": "Klient-ID og hemmelighed er påkrævet",
+  "m365_google_personal_setup_title": "Opsætning kræves:",
+  "m365_google_personal_setup_step1": "I Google Cloud Console skal du oprette et projekt og aktivere Gmail API + Drive API.",
+  "m365_google_personal_setup_step2": "Opret OAuth 2.0-legitimationsoplysninger (typen Desktop-app) og kopiér klient-ID og hemmelighed.",
+  "m365_google_personal_setup_step3": "Tilføj din Google-konto-e-mail til listen over testbrugere på OAuth-samtykkeskærmen.",
+  "m365_auth_waiting": "Venter på login…",
+  "role_staff": "Ansat",
+  "role_student": "Elev",
+  "role_other": "Anden",
+
+  "m365_settings_tab_security": "Sikkerhed",
+
+  "share_modal_title": "Del resultater",
+  "share_modal_desc": "Skrivebeskyttede links lader en DPO eller gennemganger se resultater og tilknytte dispositioner uden adgang til scanningskontroller eller legitimationsoplysninger.",
+  "share_new_link": "Nyt link",
+  "share_label_lbl": "Etiket (valgfrit)",
+  "share_label_placeholder": "f.eks. DPO-gennemgang 2026",
+  "share_expires_in": "Udløber om",
+  "share_expires_never": "Aldrig",
+  "share_expires_7d": "7 dage",
+  "share_expires_30d": "30 dage",
+  "share_expires_90d": "90 dage",
+  "share_expires_1y": "1 år",
+  "share_create": "Opret",
+  "share_copy_link_prompt": "Kopiér link:",
+  "share_active_links": "Aktive links",
+  "share_viewer_pin_label": "Seerens PIN:",
+  "share_pin_configure": "Konfigurér",
+  "share_pin_set": "Angivet",
+  "share_pin_not_set": "Ikke angivet",
+  "share_no_links": "Ingen aktive links.",
+  "share_unlabelled": "Uden etiket",
+  "share_expires_prefix": "Udløber:",
+  "share_last_used": "Sidst brugt:",
+  "share_revoke": "Tilbagekald",
+  "share_copied": "Kopiéret!",
+  "share_load_error": "Kunne ikke indlæse links.",
+  "share_create_error": "Kunne ikke oprette link:",
+  "share_revoke_confirm": "Tilbagekald dette link? Alle der bruger det, mister straks adgang.",
+  "share_revoke_error": "Kunne ikke tilbagekalde:",
+
+  "viewer_pin_group_title": "Seerens PIN",
+  "viewer_pin_desc": "En numerisk PIN (4–8 cifre), der lader alle åbne <code style=\"font-size:10px\">/view</code> i en browser for skrivebeskyttet adgang til resultater uden et token-link.",
+  "viewer_pin_clear": "Ryd PIN",
+  "viewer_pin_is_set": "Seerens PIN er angivet",
+  "viewer_pin_not_set_msg": "Ingen PIN angivet — /view kræver et token-link",
+  "viewer_pin_format": "PIN skal være 4–8 cifre.",
+  "viewer_pin_saving": "Gemmer…",
+  "viewer_pin_saved": "PIN gemt",
+  "viewer_pin_clear_confirm": "Fjern seerens PIN? /view vil igen kræve et token-link.",
+  "viewer_pin_cleared": "PIN ryddet"
+}
\ No newline at end of file
diff --git a/lang/de.json b/lang/de.json
new file mode 100644
index 0000000..70ca97c
--- /dev/null
+++ b/lang/de.json
@@ -0,0 +1,773 @@
+{
+  "app_name": "Document Scanner",
+  "label_root_folder": "Stammordner",
+  "label_older_than": "Dateien mit Daten älter als markieren",
+  "placeholder_folder": "/pfad/zu/dokumenten",
+  "btn_scan": "Scan starten",
+  "btn_stop": "Scan stoppen",
+  "toggle_anonymise": "Anonymisieren",
+  "toggle_mask": "Nur Ausweis-Nr. maskieren",
+  "toggle_blur_faces": "Gesichter unscharf",
+  "toggle_skip_cloud": "Nur-Cloud-Dateien überspringen",
+  "toggle_ocr": "OCR für gescannte PDFs",
+  "label_face_sensitivity": "Gesichtsempfindlichkeit",
+  "face_sensitivity_high": "Hoch",
+  "face_sensitivity_low": "Niedrig",
+  "face_sensitivity_hint": "Höher = weniger Fehlerkennungen",
+  "label_ocr_language": "OCR-Sprache",
+  "label_ocr_dpi": "DPI (Qualität vs. Geschwindigkeit)",
+  "lang_danish": "Dänisch",
+  "lang_danish_english": "Dänisch + Englisch",
+  "lang_english": "Englisch",
+  "lang_norwegian": "Norwegisch",
+  "lang_swedish": "Schwedisch",
+  "lang_german": "Deutsch",
+  "lang_french": "Französisch",
+  "lang_dutch": "Niederländisch",
+  "lang_spanish": "Spanisch",
+  "lang_italian": "Italienisch",
+  "lang_portuguese": "Portugiesisch",
+  "lang_finnish": "Finnisch",
+  "lang_polish": "Polnisch",
+  "lang_czech": "Tschechisch",
+  "lang_russian": "Russisch",
+  "lang_arabic": "Arabisch",
+  "lang_chinese_simplified": "Chinesisch (vereinfacht)",
+  "lang_chinese_traditional": "Chinesisch (traditionell)",
+  "lang_japanese": "Japanisch",
+  "lang_korean": "Koreanisch",
+  "time_any": "Beliebig",
+  "time_1y": "1 Jahr",
+  "time_2y": "2 Jahre",
+  "time_5y": "5 Jahre",
+  "time_10y": "10 Jahre",
+  "stat_scanned": "Gescannte Dateien",
+  "stat_flagged": "Markierte",
+  "stat_high_risk": "Hohes Risiko",
+  "stat_cpr": "Ausweis-Nr. gefunden",
+  "col_file": "Datei",
+  "col_cpr": "Ausweis-Nr.",
+  "col_oldest": "Ältestes Datum",
+  "col_risk": "Risiko",
+  "col_action": "Aktion",
+  "col_detail": "Details",
+  "sort_name_az": "Name A–Z",
+  "sort_name_za": "Name Z–A",
+  "sort_cpr_desc": "Anzahl Ausweis-Nr. ↓",
+  "sort_cpr_asc": "Anzahl Ausweis-Nr. ↑",
+  "sort_oldest_desc": "Ältestes Datum ↓",
+  "sort_risk_desc": "Risiko ↓",
+  "sort_size_desc": "Größe ↓",
+  "filter_all_types": "Alle Typen",
+  "filter_pdf": "PDF",
+  "filter_word": "Word",
+  "filter_excel": "Excel",
+  "filter_image": "Bild",
+  "filter_shared_cpr": "⚠ Geteilte Ausweis-Nr.",
+  "placeholder_search": "Dateiname suchen…",
+  "btn_anonymise": "Anonymisieren",
+  "btn_mask": "Ausweis-Nr. maskieren",
+  "btn_blur": "Unscharf",
+  "btn_preview": "Vorschau",
+  "btn_show_in_folder": "Im Ordner anzeigen",
+  "btn_move_to_trash": "In den Papierkorb",
+  "btn_undo": "Rückgängig",
+  "btn_export_csv": "CSV",
+  "btn_select_all": "Alle auswählen",
+  "btn_anonymise_flagged": "Markierte anonymisieren",
+  "btn_anonymise_all": "Alle markierten anonymisieren",
+  "btn_cancel": "Abbrechen",
+  "btn_close": "Schließen",
+  "btn_clear": "Leeren",
+  "btn_export_excel": "Bericht als Excel exportieren",
+  "btn_audit_log_short": "Protokoll",
+  "btn_delete_selected": "Auswahl löschen",
+  "btn_about": "Über",
+  "preview_original": "Original",
+  "preview_processed": "Verarbeitet",
+  "preview_anonymise": "Anonymisieren → Vorschau",
+  "preview_mask": "Ausweis-Nr. maskieren → Vorschau",
+  "preview_blur_faces": "{n} Gesicht(er) unscharf → Vorschau",
+  "preview_no_faces": "✓ Keine Gesichter erkannt",
+  "preview_scanning_faces": "Gesichter werden gesucht…",
+  "preview_processing_faces": "Gesichter werden verarbeitet…",
+  "preview_rendering": "Wird geladen…",
+  "preview_click_hint": "Auf ein Dokument klicken, um die Vorschau anzuzeigen",
+  "preview_error": "Vorschaufehler",
+  "preview_unavailable": "Vorschau nicht verfügbar",
+  "preview_not_available": "Vorschau für diesen Dateityp nicht verfügbar",
+  "scan_preparing": "Vorbereitung…",
+  "scan_scanning": "Wird gescannt…",
+  "scan_face_scanning": "{total} Datei(en) auf Gesichter prüfen…",
+  "scan_face_progress": "Gesichter: {index} / {total} — {file}",
+  "scan_eta": "{eta} verbleibend",
+  "scan_stopped": "Scan gestoppt.",
+  "empty_flagged": "Keine markierten Dokumente",
+  "empty_flagged_detail": "Scan starten, um markierte Dokumente anzuzeigen",
+  "empty_flagged_found": "Keine markierten Dokumente gefunden.",
+  "empty_filter": "Keine Dateien entsprechen dem Filter",
+  "empty_scan_hint": "Ordnerpfad eingeben und klicken",
+  "no_audit": "Noch keine Protokolleinträge",
+  "dialog_delete_title": "Dateien löschen?",
+  "dialog_delete_confirm": "In den Papierkorb",
+  "dialog_delete_body": "Die ausgewählten Dateien werden dauerhaft von der Festplatte gelöscht. Diese Aktion kann nicht rückgängig gemacht werden.",
+  "all_trashed": "Alle markierten Dokumente wurden in den Papierkorb verschoben.",
+  "confirm_clear_audit": "Gesamtes Protokoll leeren? Dies kann nicht rückgängig gemacht werden.",
+  "btn_audit_log": "Protokoll",
+  "audit_cleared": "Protokoll geleert",
+  "failed_audit": "Protokoll konnte nicht geladen werden",
+  "audit_action_scan": "Scan",
+  "audit_action_redact": "Schwärzung",
+  "audit_action_blur_faces": "Gesichtsunschärfe",
+  "audit_action_delete": "Löschen",
+  "audit_action_restore": "Wiederherstellen",
+  "audit_action_export": "Export",
+  "audit_files": "Dateien",
+  "audit_flagged": "markiert",
+  "audit_high_risk": "hohes Risiko",
+  "audit_regions": "Bereiche",
+  "audit_faces": "Gesichter",
+  "audit_permanent": "dauerhaft",
+  "audit_trash": "Papierkorb",
+  "audit_files_restored": "Datei(en) wiederhergestellt",
+  "about_title": "Über",
+  "label_python": "Python",
+  "label_spacy": "spaCy-Modell",
+  "label_tesseract": "Tesseract",
+  "label_pymupdf": "PyMuPDF",
+  "label_opencv": "OpenCV",
+  "no_model": "kein Modell installiert",
+  "not_installed": "nicht installiert",
+  "risk_high": "HOCH",
+  "risk_medium": "MITTEL",
+  "risk_low": "NIEDRIG",
+  "reason_cpr_number": "Ausweis-Nr.",
+  "reason_cpr_numbers": "Ausweis-Nummern",
+  "reason_cpr_confirmed": "Ausweis-Nr. mit Schlüsselwortkontext",
+  "reason_unique_individuals": "eindeutige Personen",
+  "reason_cpr_shared": "Ausweis-Nr. in {n} Dateien geteilt",
+  "reason_data_10y": "Daten > 10 Jahre alt",
+  "reason_data_5y": "Daten > 5 Jahre alt",
+  "badge_face": "Gesicht",
+  "badge_shared": "geteilt",
+  "badge_archive": "Archiv",
+  "badge_shared_cpr": "Geteilte Ausweis-Nr.",
+  "lbl_also_in": "auch in",
+  "badge_anonymised": "✓ anonymisiert",
+  "badge_masked": "✓ maskiert",
+  "badge_blurred": "✓ unscharf gemacht",
+  "lbl_anonymised": "Anonymisiert",
+  "lbl_masked": "Ausweis-Nr. maskiert",
+  "lbl_processing": "Wird verarbeitet…",
+  "lbl_error": "Fehler",
+  "lbl_no_pii": "Keine Änderungen — keine personenbez. Daten gefunden",
+  "lbl_working": "Wird bearbeitet…",
+  "lbl_stopping": "Wird gestoppt…",
+  "lbl_loading": "Wird geladen…",
+  "lbl_blurred": "Unscharf gemacht",
+  "lbl_none": "Keine",
+  "lbl_size": "Größe",
+  "lbl_time": "Zeit",
+  "lbl_space": "Leerzeichen",
+  "lbl_scanner": "Scanner",
+  "lbl_document": "Dokument",
+  "lbl_folder": "Ordner",
+  "lbl_no_files_selected": "Keine Dateien ausgewählt",
+  "lbl_selected_1": "Datei ausgewählt",
+  "lbl_selected_n": "Dateien ausgewählt",
+  "lbl_root": "Stamm",
+  "lbl_root_folder": "Stammordner",
+  "lbl_scanning": "Scannen:",
+  "banner_all_clean": "Keine Funde",
+  "banner_files_scanned": "Datei(en) gescannt, keine Ausweis-Nr. gefunden",
+  "banner_need_attention": "Datei(en) erfordern Aufmerksamkeit von",
+  "banner_scanned": "gescannt",
+  "summary_face_blur": "Bild(er) zur Gesichtsunschärfe",
+  "kbd_select": "auswählen",
+  "kbd_delete": "löschen",
+  "kbd_close_preview": "Vorschau schließen",
+  "kbd_select_all": "alle auswählen",
+  "lbl_flagged_docs_1": "markiertes Dokument",
+  "lbl_flagged_docs_n": "markierte Dokumente",
+  "btn_deselect_all": "Alle abwählen",
+  "filter_high_risk": "🔴 Hohes Risiko",
+  "filter_in_archive": "📦 Im Archiv",
+  "log_starting_scan": "Scan gestartet von",
+  "log_found_files": "{n} Datei(en) gefunden",
+  "log_cloud_skipped": "Nur-Cloud-Dateien übersprungen",
+  "log_faces_detected": "Gesicht(er) erkannt",
+  "log_ocr_pages": "Seite(n)",
+  "log_pages_skipped": "Bildseite(n) übersprungen (OCR aktivieren)",
+  "log_scan_complete": "Scan abgeschlossen",
+  "log_files_with_cpr": "Datei(en) mit Ausweis-Nr.",
+  "log_no_faces_in": "Keine Gesichter erkannt in",
+  "pii_phone": "Telefon",
+  "pii_email": "E-Mail",
+  "pii_iban": "IBAN",
+  "pii_bank_account": "Bankkonto",
+  "pii_name": "Name",
+  "pii_address": "Adresse",
+  "pii_org": "Org",
+  "lbl_other_pii": "Weitere PII",
+  "lbl_found": "gefunden",
+  "btn_clear_results_cache": "Ergebniscache leeren",
+  "btn_clear_ocr_cache": "OCR-Cache leeren",
+  "confirm_clear_results_cache": "Alle gespeicherten Scan-Ergebnisse löschen? Das Raster wird geleert.",
+  "confirm_clear_ocr_cache": "OCR-Cache leeren? Beim nächsten Scan wird OCR erneut durchgeführt.",
+  "log_cache_cleared": "Ergebniscache geleert",
+  "log_ocr_cache_cleared": "OCR-Cache geleert",
+  "m365_app_name": "GDPRScanner",
+  "m365_sources": "Quellen",
+  "m365_options": "Optionen",
+  "m365_accounts": "Konten",
+  "m365_stats": "Statistik",
+  "m365_src_email": "Outlook",
+  "m365_src_onedrive": "OneDrive",
+  "m365_src_sharepoint": "SharePoint",
+  "m365_src_teams": "Teams",
+  "m365_opt_date_from": "E-Mails/Dateien scannen ab",
+  "m365_opt_date_from_hint": "Leer lassen, um alles zu scannen",
+  "m365_opt_email_body": "E-Mail-Text scannen",
+  "m365_opt_attachments": "Anhänge scannen",
+  "m365_opt_max_attach": "Max. Anhangsgröße (MB)",
+  "m365_opt_max_emails": "Max. E-Mails pro Benutzer",
+  "m365_connect_title": "Mit Microsoft 365 verbinden",
+  "m365_connect_sub": "Geben Sie Ihre Azure-App-Anmeldedaten ein.",
+  "m365_label_client_id": "Client-ID (Anwendungs-ID)",
+  "m365_label_tenant_id": "Mandanten-ID",
+  "m365_label_client_secret": "Client-Geheimnis",
+  "m365_secret_hint": "(optional — ermöglicht organisationsweites Scannen)",
+  "m365_secret_desc_app": "App greift direkt auf Daten aller Benutzer zu (Anwendungsberechtigungen, keine Anmeldung erforderlich).",
+  "m365_secret_desc_delegated": "Sie melden sich als sich selbst an und können nur eigene Daten scannen, sofern Sie kein globaler Administrator sind.",
+  "m365_btn_connect": "Verbinden",
+  "m365_device_code_go": "Gehen Sie zu",
+  "m365_device_code_enter": "und geben Sie diesen Code ein",
+  "m365_btn_cancel_auth": "Abbrechen",
+  "m365_btn_reconfigure": "Neu konfigurieren",
+  "m365_btn_sign_out": "Abmelden",
+  "m365_mode_app": "🔑 App-Modus — organisationsweit",
+  "m365_mode_delegated": "Delegiert",
+  "m365_search_users": "Benutzer suchen…",
+  "m365_add_account_label": "Konto manuell hinzufügen:",
+  "m365_add_account_placeholder": "E-Mail oder UPN",
+  "m365_admin_note": "Es wird nur Ihr Konto angezeigt. Um alle Benutzer aufzulisten, muss ein Administrator <strong>User.Read.All</strong> im Azure Portal genehmigen oder Konten manuell unten hinzufügen.",
+  "m365_btn_scan": "Scannen",
+  "m365_btn_stop": "Stop",
+  "m365_pill_flagged": "markiert",
+  "m365_pill_scanned": "gescannt",
+  "m365_filter_all_sources": "Alle Quellen",
+  "m365_filter_email": "Outlook",
+  "m365_filter_onedrive": "OneDrive",
+  "m365_filter_sharepoint": "SharePoint",
+  "m365_filter_teams": "Teams",
+  "m365_empty_hint": "Quellen auswählen und auf <strong>Scannen</strong> klicken,<br>um Dokumente mit Ausweis-Nr. zu finden",
+  "m365_stat_flagged": "Markiert",
+  "m365_stat_cpr": "Treffer",
+  "m365_preview_open": "In M365 öffnen ↗",
+  "m365_preview_close": "Schließen",
+  "m365_auth_mode_app": "Auth-Modus: Anwendung (Client-Anmeldedaten — organisationsweit)",
+  "m365_auth_mode_delegated": "Auth-Modus: Delegiert (Gerätecode — nur angemeldeter Benutzer)",
+  "m365_phase_teams_index": "Teams-Mitgliedschaftsindex wird erstellt…",
+  "m365_phase_sharepoint": "SharePoint-Dateien werden gesammelt…",
+  "m365_btn_about": "Info",
+  "m365_stat_scanned": "Gescannt",
+  "m365_no_users_found": "Keine Benutzer gefunden",
+  "m365_no_users_match": "Keine Benutzer entsprechen",
+  "m365_no_cpr_found": "Keine Ausweis-Nummern gefunden.",
+  "m365_no_matches": "Keine Treffer",
+  "m365_btn_export_excel": "Excel exportieren",
+  "m365_export_no_data": "Keine Ergebnisse zum Exportieren.",
+  "m365_phase_emails": "Outlook-Nachrichten werden gesammelt",
+  "m365_phase_onedrive": "OneDrive wird gesammelt",
+  "m365_phase_teams": "Teams werden gesammelt",
+  "m365_preset_1yr": "1 J.",
+  "m365_preset_2yr": "2 J.",
+  "m365_preset_5yr": "5 J.",
+  "m365_preset_10yr": "10 J.",
+  "m365_preset_any": "Alle",
+  "m365_auth_mode_app_short": "Application permissions · client credentials",
+  "m365_auth_mode_delegated_short": "Delegated permissions · device code flow",
+  "m365_info_permissions": "Berechtigungen",
+  "m365_info_signin": "Anmeldung erforderlich",
+  "m365_info_scope": "Umfang",
+  "m365_info_scope_org": "Alle Benutzer im Mandanten",
+  "m365_info_scope_user": "Nur angemeldeter Benutzer",
+  "m365_info_consent": "Administratorzustimmung",
+  "m365_info_required": "Erforderlich",
+  "m365_info_admin": "Global Administrator",
+  "m365_info_expands_scope": "Erweitert Umfang auf alle Benutzer",
+  "m365_info_no": "Nein",
+  "m365_info_yes": "Ja",
+  "m365_info_app_desc": "Die App authentifiziert sich mit einem Client Secret und greift direkt über Microsoft Graph auf die Daten aller Benutzer zu — kein interaktives Anmelden erforderlich. Ideal für automatisierte oder geplante Scans.",
+  "m365_info_delegated_desc": "Die App handelt im Namen des angemeldeten Benutzers über den device code flow. Standardmäßig sind nur die Daten dieses Benutzers zugänglich. Ein Global Administrator kann eine umfassendere Zustimmung erteilen.",
+  "m365_filter_search": "Suchen…",
+  "m365_filter_clear": "Löschen",
+  "m365_btn_list_view": "Liste",
+  "m365_btn_grid_view": "Raster",
+  "m365_log_found_items": "Gefunden",
+  "m365_log_items_to_scan": "Element(e) zum Scannen",
+  "m365_log_starting_scan": "Scan starten:",
+  "m365_log_accounts": "Konto(en)",
+  "m365_btn_bulk_delete": "Löschen",
+  "m365_bulk_delete_title": "Massenlöschung",
+  "m365_bulk_delete_sub": "E-Mails werden in „Gelöschte Elemente\" verschoben · Dateien kommen in den Papierkorb",
+  "m365_bulk_filter_heading": "Filter für zu löschende Elemente",
+  "m365_bulk_filter_source": "Quellentyp",
+  "m365_bulk_filter_min_cpr": "Min. CPR-Treffer",
+  "m365_bulk_filter_older_than": "Älter als Datum",
+  "m365_bulk_no_match": "Keine Elemente entsprechen diesen Kriterien.",
+  "m365_bulk_match_count": "Element(e) werden gelöscht",
+  "m365_bulk_confirm_q": "Element(e) werden dauerhaft gelöscht. Fortfahren?",
+  "m365_bulk_deleting": "Löschen…",
+  "m365_bulk_deleted": "gelöscht",
+  "m365_bulk_failed": "fehlgeschlagen",
+  "m365_bulk_delete_confirm": "Passende Elemente löschen",
+  "m365_delete_confirm": "Löschen",
+  "m365_delete_warning": "Dies kann nicht rückgängig gemacht werden.",
+  "m365_log_deleted": "Gelöscht:",
+  "m365_log_delete_failed": "Löschen fehlgeschlagen:",
+  "m365_log_bulk_done": "Massenlöschung:",
+  "m365_log_older_than": "älter als",
+  "m365_eta_left": "verbleibend",
+  "btn_all": "Alle",
+  "btn_errors": "Fehler",
+  "log_copy": "Kopieren",
+  "btn_none": "Keine",
+  "m365_btn_resume": "Fortsetzen",
+  "m365_btn_start_fresh": "Neu starten",
+  "m365_resume_banner": "Vorheriger Scan unterbrochen — {scanned} gescannt, {flagged} gefunden",
+  "m365_log_resuming": "Scan fortsetzen:",
+  "m365_log_already_scanned": "bereits gescannt — übersprungen",
+  "m365_resuming": "Fortsetzen — bereits gescannte Elemente werden übersprungen…",
+  "m365_opt_delta": "Delta-Scan",
+  "m365_opt_delta_hint": "Nur geänderte Elemente (nach erstem Vollscan)",
+  "m365_delta_tokens_saved": "Tokens gespeichert",
+  "m365_delta_clear": "Tokens löschen",
+  "m365_delta_cleared": "Delta-Tokens gelöscht — nächster Scan wird ein Vollscan.",
+  "m365_delta_mode": "Delta-Modus — nur geänderte Elemente werden abgerufen…",
+  "m365_smtp_title": "✉ Bericht senden",
+  "m365_smtp_desc": "Excel-Bericht nach dem Scan per E-Mail senden.",
+  "m365_smtp_host": "SMTP-Server",
+  "m365_smtp_port": "Port",
+  "m365_smtp_user": "Benutzername",
+  "m365_smtp_pass": "Passwort",
+  "m365_smtp_from": "Absenderadresse",
+  "m365_smtp_tls": "STARTTLS",
+  "m365_smtp_ssl": "SSL",
+  "m365_smtp_recipients": "Empfänger",
+  "m365_smtp_recipients_hint": "Komma- oder semikolongetrennt",
+  "m365_smtp_save": "Speichern",
+  "m365_smtp_send": "Jetzt senden",
+  "m365_smtp_saved": "Einstellungen gespeichert.",
+  "m365_smtp_sending": "Senden…",
+  "m365_smtp_sent": "Bericht gesendet.",
+  "m365_smtp_no_recipients": "Mindestens einen Empfänger eingeben.",
+  "m365_smtp_configure": "Konfigurieren",
+  "m365_smtp_from_hint": "(optional — Standard ist Benutzername)",
+  "m365_subject_title": "🔍 Betroffenenperson",
+  "m365_subject_btn": "Nachschlagen",
+  "m365_subject_desc": "Alle markierten Elemente mit einer bestimmten CPR-Nummer finden. Die CPR-Nummer wird vor der Abfrage gehasht und nie im Klartext gespeichert.",
+  "m365_subject_placeholder": "TTMMJJ-XXXX",
+  "m365_subject_search": "Suchen",
+  "m365_subject_searching": "Suche…",
+  "m365_subject_found": "Element(e) gefunden",
+  "m365_subject_not_found": "Keine markierten Elemente für diese CPR-Nummer gefunden.",
+  "m365_subject_delete_all": "Alle für diese Person löschen",
+  "m365_subject_delete_confirm": "Element(e) werden dauerhaft gelöscht. Fortfahren?",
+  "m365_disposition_label": "Disposition",
+  "m365_disp_unreviewed": "Nicht geprüft",
+  "m365_disp_retain_legal": "Aufbewahren — gesetzliche Pflicht",
+  "m365_disp_retain_legit": "Aufbewahren — berechtigtes Interesse",
+  "m365_disp_retain_contract": "Aufbewahren — Vertrag",
+  "m365_disp_delete_sched": "Löschen — geplant",
+  "m365_disp_personal_use": "Privatgebrauch — außerhalb des Geltungsbereichs",
+  "m365_disp_deleted": "Gelöscht",
+  "m365_disp_save": "Speichern",
+  "m365_disp_saved": "✓ Gespeichert",
+  "m365_opt_retention": "Aufbewahrungsrichtlinie",
+  "m365_opt_retention_hint": "Elemente älter als N Jahre markieren und löschen",
+  "m365_ret_years": "Aufbewahrungsjahre",
+  "m365_ret_fy_end": "Geschäftsjahresende",
+  "m365_ret_fy_rolling": "Rollierend (ab heute)",
+  "m365_ret_fy_dec": "31. Dez. (Bogføringsloven)",
+  "m365_ret_fy_jun": "30. Jun.",
+  "m365_ret_fy_mar": "31. Mär.",
+  "m365_ret_mode_rolling": "rollierend",
+  "m365_ret_mode_fiscal": "Geschäftsjahr",
+  "m365_ret_cutoff_hint": "Elemente geändert vor",
+  "m365_ret_cutoff_flagged": "werden als überfällig markiert",
+  "m365_overdue_found": "überfällige(s) Element(e) gefunden",
+  "m365_bulk_overdue_btn": "Übrfällige filtern",
+  "m365_bulk_clear_filters": "Filter löschen",
+  "m365_btn_export_article30": "Art.30",
+  "m365_article30_done": "Artikel-30-Bericht bereit.",
+  "a30_title": "DSGVO Artikel 30",
+  "a30_subtitle": "Verzeichnis von Verarbeitungstätigkeiten",
+  "a30_generated": "Erstellt",
+  "a30_confidential": "Vertraulich — DSGVO-Compliance-Dokument",
+  "a30_s1": "1. Zusammenfassung",
+  "a30_scan_date": "Scan-Datum",
+  "a30_items_scanned": "Gescannte Elemente",
+  "a30_flagged": "Markierte Elemente",
+  "a30_cpr_hits": "CPR-Treffer gesamt",
+  "a30_data_subjects": "Geschätzte betroffene Personen",
+  "a30_overdue": "Überfällige Elemente (>5 J.)",
+  "a30_by_source": "Aufschlüsselung nach Quelle",
+  "a30_col_source": "Quelle",
+  "a30_col_items": "Elemente",
+  "a30_col_cpr": "CPR-Treffer",
+  "a30_col_overdue": "Überfällig",
+  "a30_s2": "2. Identifizierte Kategorien personenbezogener Daten",
+  "a30_s2_intro": "Folgende Kategorien personenbezogener Daten wurden beim Scan erkannt.",
+  "a30_col_gdpr_class": "DSGVO-Klassifizierung",
+  "a30_cpr_label": "CPR-Nummern (dän. Personalausweis-Nr.)",
+  "a30_cpr_class": "Art. 9 — nationales Kennzeichen",
+  "a30_pii_class_9": "Art. 9 — Gesundheit/sensibel",
+  "a30_pii_class_4": "Art. 4 — personenbezogene Daten",
+  "a30_s3": "3. Datenbestand",
+  "a30_s3_intro": "Alle markierten Elemente sind unten mit Speicherort, Aufbewahrungsstatus und Compliance-Disposition aufgeführt.",
+  "a30_col_name": "Name / Betreff",
+  "a30_col_account": "Konto",
+  "a30_col_modified": "Geändert",
+  "a30_col_disp": "Disposition",
+  "a30_more_items": "weitere Elemente nicht angezeigt. Für die vollständige Liste den Excel-Bericht exportieren.",
+  "a30_s4": "4. Aufbewahrungsanalyse",
+  "a30_s4_intro": "Folgende Elemente überschreiten den 5-jährigen Aufbewahrungszeitraum und sollten gemäß DSGVO Artikel 5(1)(e) — Speicherbegrenzung — auf Löschung geprüft werden.",
+  "a30_s5": "5. Compliance-Trend",
+  "a30_s5_intro": "Anzahl markierter Elemente über die letzten Scans (neuester zuerst).",
+  "a30_col_scan_date": "Scan-Datum",
+  "a30_col_scan_type": "Scan-Typ",
+  "a30_scan_delta": "Delta",
+  "a30_scan_full": "Vollständig",
+  "a30_s6": "6. Methodik und Rechtsgrundlage",
+  "a30_method_title": "Scan-Methodik",
+  "a30_method_1": "CPR-Nummern werden per Mustererkennung gegen das offizielle dänische CPR-Format (TTMMJJ-XXXX) mit vollständiger Datumsvalidierung erkannt.",
+  "a30_method_2": "Weitere personenbezogene Daten (Telefonnummern, E-Mail-Adressen, IBANs, Bankkontonummern, Namen, Adressen und Organisationen) werden per Regex und spaCy NER erkannt.",
+  "a30_method_3": "CPR-Nummern in der Datenbank dieses Dokuments sind SHA-256-gehasht und werden nie im Klartext gespeichert.",
+  "a30_method_4": "Die Überprüfung umfasst Exchange-Postfächer (alle Ordner einschl. Gesendete Elemente), OneDrive, SharePoint und Microsoft Teams-Kanaldateien über die Microsoft Graph API. Bei Verbindung umfasst das Google Workspace-Scanning Gmail und Google Drive über ein Dienstkonto mit domänenweiter Delegierung. Lokale und netzwerkbasierte Dateifreigaben (SMB) werden direkt gescannt.",
+  "a30_gdpr_title": "Referenzierte DSGVO-Artikel",
+  "a30_gdpr_1": "Artikel 5(1)(c) — Datenminimierung: nur notwendige Daten dürfen aufbewahrt werden",
+  "a30_gdpr_2": "Artikel 5(1)(e) — Speicherbegrenzung: Daten dürfen nicht länger als nötig aufbewahrt werden",
+  "a30_gdpr_3": "Artikel 9 — Besondere Kategorien: Gesundheitsdaten, Strafregister, Gewerkschaftszugehörigkeit usw. erfordern eine ausdrückliche Rechtsgrundlage",
+  "a30_gdpr_4": "Artikel 15 — Auskunftsrecht: betroffene Personen können Auskunft über ihre Daten verlangen",
+  "a30_gdpr_5": "Artikel 17 — Recht auf Löschung: betroffene Personen können Löschung verlangen",
+  "a30_gdpr_6": "Artikel 30 — Verzeichnis von Verarbeitungstätigkeiten: dieses Dokument erfüllt die Pflicht",
+  "a30_disp_unreviewed": "Nicht geprüft",
+  "a30_disp_retain_legal": "Aufbewahren — Gesetzliche Pflicht",
+  "a30_disp_retain_legit": "Aufbewahren — Berechtigtes Interesse",
+  "a30_disp_retain_contract": "Aufbewahren — Vertrag",
+  "a30_disp_delete_sched": "Löschen — Geplant",
+  "a30_disp_personal_use": "Personal use — out of GDPR scope (Art. 2(2)(c))",
+  "a30_disp_deleted": "Gelöscht",
+  "a30_s6_short": "Methodik und Rechtsgrundlage",
+  "m365_role_all": "Alle",
+  "m365_role_staff": "Personal",
+  "m365_role_student": "Schüler",
+  "a30_s_dellog": "Löschprotokoll",
+  "a30_dellog_intro": "Element(e) mit personenbezogenen Daten wurden über GDPRScanner gelöscht. Dieses Protokoll erfüllt die Rechenschaftspflicht gemäß DSGVO Art. 5 Abs. 2.",
+  "a30_dellog_by_reason": "Löschungen nach Grund",
+  "a30_dellog_records": "Löscheinträge",
+  "a30_col_reason": "Grund",
+  "a30_col_count": "Anzahl",
+  "a30_col_deleted_at": "Gelöscht am",
+  "a30_col_deleted_by": "Gelöscht von",
+  "a30_reason_manual": "Manuell (einzelne Karte)",
+  "a30_reason_bulk": "Massенlöschung",
+  "a30_reason_retention": "Aufbewahrungsrichtlinie",
+  "a30_reason_dsr": "Löschanfrage der betroffenen Person (Art. 17)",
+  "m365_filter_all_disp": "Alle Dispositionen",
+  "m365_trend_title": "Trend",
+  "m365_trend_flagged": "Markiert",
+  "m365_trend_overdue": "Überfällig",
+  "m365_filter_all_transfer": "Alle Elemente",
+  "m365_filter_ext_recipient": "Externer Empfänger",
+  "m365_filter_ext_share": "Extern freigegeben",
+  "m365_filter_shared": "Freigegeben",
+  "m365_badge_ext_recipient": "Extern",
+  "m365_badge_shared": "Freigegeben",
+  "a30_s_special": "Besondere Kategorien personenbezogener Daten (Artikel 9)",
+  "a30_special_intro": "Element(e) als besondere Kategorie gemäß DSGVO Artikel 9 erkannt. Erfordert explizite Rechtsgrundlage und Datenschutz-Folgenabschätzung (DSFA).",
+  "a30_special_by_cat": "Erkannte Kategorien",
+  "a30_special_items": "Betroffene Elemente (bis zu 50)",
+  "a30_col_category": "Kategorie",
+  "a30_cat_health": "Gesundheitsdaten (Art. 9)",
+  "a30_cat_mental": "Psychische Gesundheit (Art. 9)",
+  "a30_cat_criminal": "Strafregister (Art. 10)",
+  "a30_cat_union": "Gewerkschaftsmitgliedschaft (Art. 9)",
+  "a30_cat_religion": "Religiöse Überzeugungen (Art. 9)",
+  "a30_cat_ethnicity": "Ethnische Herkunft (Art. 9)",
+  "a30_cat_political": "Politische Meinungen (Art. 9)",
+  "a30_cat_biometric": "Biometrische Daten (Art. 9)",
+  "a30_cat_sexual": "Sexuelle Orientierung (Art. 9)",
+  "m365_filter_all_special": "Alle Risikostufen",
+  "m365_filter_special_only": "Art. 9 besondere Kategorien",
+  "m365_badge_special": "Art.9",
+  "m365_phase_scanning": "Scanne…",
+  "a30_special_cat": "Besondere Kategorien (Artikel 9)",
+  "a30_special_cat_note": "Diese Elemente enthalten Gesundheits-, Straf-, biometrische, religiöse, ethnische, gewerkschaftliche, politische oder sexuelle Daten. Eine ausdrückliche Rechtsgrundlage (Art. 9 Abs. 2) und ggf. eine DSFA (Art. 35) sind erforderlich.",
+  "a30_col_special": "Art. 9",
+  "a30_pii_phone": "Telefonnummern",
+  "a30_pii_email": "E-Mail-Adressen",
+  "a30_pii_iban": "IBAN-Bankkontonummern",
+  "a30_pii_bank": "Bankkontonummern",
+  "a30_pii_name": "Personennamen (NER)",
+  "a30_pii_address": "Adressen (NER)",
+  "a30_pii_org": "Organisationen (NER)",
+  "a30_col_cpr_short": "CPR",
+  "a30_inv_staff": "Personal / Lehrkräfte",
+  "a30_inv_students": "Schüler",
+  "a30_student_consent_note": "Hinweis: Schülerkonten in dänischen Folkeskoler (Schüler unter 15 Jahren) erfordern die elterliche Einwilligung zur Verarbeitung personenbezogener Daten gemäß Databeskyttelsesloven §6. Elemente in Schülerkonten dürfen nicht automatisch gelöscht werden — jede Maßnahme erfordert die Genehmigung der Schulleitung und für Schüler unter 15 Jahren die Benachrichtigung der Eltern oder Erziehungsberechtigten als Rechteinhaber gemäß DSGVO Artikel 8.",
+  "m365_profile_label": "Profil:",
+  "m365_profile_placeholder": "— Profil wählen —",
+  "m365_profile_save_tip": "Aktuelle Einstellungen als Profil speichern",
+  "m365_profile_save_prompt": "Profilname:",
+  "m365_profile_applied": "Profil geladen",
+  "m365_profile_saved": "Profil gespeichert",
+  "m365_profile_manage_btn": "Profile",
+  "m365_profile_clear_btn": "Löschen",
+  "m365_profile_save_btn": "Speichern",
+  "m365_profile_manage_title": "⚙ Profile verwalten",
+  "m365_profile_no_profiles": "Noch keine gespeicherten Profile. Verwenden Sie 💾, um die aktuellen Seitenleisteneinstellungen als Profil zu speichern.",
+  "m365_profile_use": "Verwenden",
+  "m365_profile_edit": "Bearbeiten",
+  "m365_profile_duplicate": "Duplizieren",
+  "m365_profile_delete": "Löschen",
+  "m365_profile_delete_confirm": "Profil löschen",
+  "m365_profile_duplicated": "Profil dupliziert",
+  "m365_profile_deleted": "Profil gelöscht",
+  "m365_profile_never": "nie",
+  "m365_profile_last_run": "Zuletzt ausgeführt",
+  "m365_profile_name_placeholder": "Profilname",
+  "m365_profile_desc_placeholder": "Beschreibung (optional)",
+  "m365_profile_name_required": "Profilname ist erforderlich.",
+  "m365_db_title": "🗄 Datenbank",
+  "m365_db_export": "Exportieren",
+  "m365_db_import": "Importieren",
+  "m365_db_export_error": "Export fehlgeschlagen",
+  "m365_db_exported": "Datenbank exportiert",
+  "m365_db_import_title": "📥 Datenbank importieren",
+  "m365_db_import_desc": "Wählen Sie eine zuvor exportierte .zip-Datei. Zusammenführen fügt Dispositionen und Löschprotokoll hinzu. Ersetzen löscht alles und stellt vollständig wieder her.",
+  "m365_db_import_file": "ZIP-Datei",
+  "m365_db_import_mode": "Modus:",
+  "m365_db_import_merge": "Zusammenführen (sicher)",
+  "m365_db_import_replace": "Ersetzen (vollständige Wiederherstellung)",
+  "m365_db_import_replace_warn": "⚠ Der Ersetzungsmodus löscht alle vorhandenen Scandaten vor der Wiederherstellung. Stellen Sie sicher, dass Sie zuerst eine Sicherungskopie von ~/.gdpr_scanner.db haben.",
+  "m365_db_import_replace_confirm": "Der Ersetzungsmodus löscht ALLE vorhandenen Scandaten und stellt aus dem Archiv wieder her.\\n\\nStellen Sie sicher, dass Sie eine manuelle Sicherungskopie von ~/.gdpr_scanner.db haben.\\n\\nFortfahren?",
+  "m365_db_import_no_file": "Bitte wählen Sie zuerst eine ZIP-Datei aus.",
+  "m365_db_importing": "Importiere…",
+  "m365_db_imported": "Importiert",
+  "m365_db_import_run": "Importieren",
+  "m365_opt_scan_photos": "Fotos nach Gesichtern durchsuchen",
+  "m365_opt_scan_photos_hint": "Markiert Bilder mit erkannten Gesichtern als biometrische Daten gem. Art. 9. Langsamer — bei Bedarf aktivieren.",
+  "m365_filter_photo_only": "📷 Fotos / biometrisch",
+  "m365_badge_faces": "Gesichter",
+  "a30_photo_items": "Fotos mit erkannten Gesichtern (Art. 9 biometrisch)",
+  "a30_photo_note": "Fotografien identifizierbarer Personen sind biometrische Daten gemäß Art. 9 DSGVO. Die Aufbewahrung erfordert eine dokumentierte Rechtsgrundlage gemäß Art. 9(2). Für Schulfotos von Schülern unter 15 Jahren ist die elterliche Einwilligung erforderlich (Databeskyttelsesloven §6). Siehe Leitfaden des Datatilsynet zur Schulfotografie.",
+  "a30_s_photos": "Fotografien und biometrische Daten (Artikel 9)",
+  "a30_photo_intro": "Bilddatei(en) mit erkanntem(n) Gesicht(ern) wurden beim Scan gefunden. Fotografien identifizierbarer Personen stellen biometrische Daten gemäß DSGVO Artikel 9 dar und unterliegen demselben erhöhten Schutz wie Gesundheits- oder Strafregisterdaten.",
+  "a30_photo_guidance": "Aufbewahrungshinweise",
+  "a30_photo_g1": "Fotos dürfen nur aufbewahrt werden, solange der ursprüngliche Zweck gültig ist (Art. 5(1)(b) — Zweckbindung).",
+  "a30_photo_g2": "Schüler unter 15 Jahren benötigen die elterliche Einwilligung (Databeskyttelsesloven §6). Die Einwilligung muss freiwillig, spezifisch und dokumentiert sein.",
+  "a30_photo_g3": "Fotos auf öffentlich zugänglichen Websites müssen umgehend entfernt werden, wenn eine Person die Organisation verlässt oder ihre Einwilligung widerruft (Art. 17 — Recht auf Löschung).",
+  "a30_photo_g4": "Historische/Archivnutzung kann eine längere Aufbewahrung gemäß Art. 89 nur mit spezifischen Schutzmaßnahmen und einer Einzelfallbewertung rechtfertigen.",
+  "a30_photo_col_faces": "Gesichter",
+  "a30_method_5": "Wenn die Fotoerfassung aktiviert ist, werden Bilddateien mithilfe der OpenCV Haar-Cascade-Gesichtserkennung analysiert, um Fotografien von Personen zu identifizieren (Art. 9 biometrische Daten).",
+  "m365_role_cycle_tip": "Klicken zum Ändern der Rolle (wechselt Schüler → Personal → Sonstiges → Auto)",
+  "m365_role_set": "Rolle gesetzt",
+  "m365_role_cleared": "Rollenüberschreibung entfernt",
+  "m365_sku_debug_title": "🔍 Mandanten-SKU-IDs",
+  "m365_sku_debug_desc": "Dies sind die rohen SKU-IDs, die Ihren Benutzern zugewiesen sind. Alle mit ❓ unbekannt markierten sind nicht in classification/m365_skus.json — kopieren Sie sie unter student_ids oder staff_ids und starten Sie neu.",
+  "m365_sku_debug_none": "Keine Lizenzdaten zurückgegeben — überprüfen Sie, ob die App die Berechtigung User.Read.All hat.",
+  "m365_file_sources_title": "📁 Dateiquellen",
+  "m365_file_sources_manage": "Verwalten",
+  "m365_file_sources_empty": "Keine Dateiquellen konfiguriert. Fügen Sie unten einen lokalen Ordner oder eine Netzwerkfreigabe hinzu.",
+  "m365_file_sources_add": "Quelle hinzufügen",
+  "m365_fsrc_label": "Bezeichnung",
+  "m365_fsrc_path": "Pfad",
+  "m365_fsrc_smb_detected": "SMB/CIFS-Netzwerkfreigabe erkannt",
+  "m365_fsrc_smb_host": "SMB-Host",
+  "m365_fsrc_smb_user": "Benutzername",
+  "m365_fsrc_smb_pw": "Passwort",
+  "m365_fsrc_smb_pw_hint": "Das Passwort wird im OS-Schlüsselbund gespeichert — nie in einer Datei.",
+  "m365_fsrc_add_btn": "Hinzufügen",
+  "m365_fsrc_saved": "Quelle gespeichert",
+  "m365_fsrc_saving": "Speichern...",
+  "m365_fsrc_path_required": "Pfad ist erforderlich.",
+  "m365_fsrc_scan_btn": "Scannen",
+  "m365_fsrc_scan_start": "Datei-Scan wird gestartet",
+  "m365_src_group_files": "Dateiquellen",
+  "m365_no_sources": "Keine Quellen ausgewählt — nichts zu scannen.",
+  "m365_fsrc_name_required": "Name ist erforderlich.",
+  "m365_srcmgmt_title": "⚙ Quellenverwaltung",
+  "m365_srcmgmt_tab_m365": "Microsoft 365",
+  "m365_srcmgmt_tab_google": "Google Workspace",
+  "m365_srcmgmt_tab_files": "Dateiquellen",
+  "m365_srcmgmt_connection": "Verbindung",
+  "m365_srcmgmt_azure_creds": "Azure-Anmeldedaten",
+  "m365_srcmgmt_sources_m365": "Zu scannende Quellen",
+  "m365_srcmgmt_connected": "Verbunden",
+  "m365_srcmgmt_not_connected": "Nicht verbunden",
+  "m365_srcmgmt_coming_soon": "Kommt bald",
+  "m365_srcmgmt_google_sub": "Gmail- und Google Drive-Scan erscheint hier, wenn implementiert.",
+  "m365_srcmgmt_file_sources": "Dateiquellen",
+  "m365_sources_manage_btn": "Quellen",
+  "m365_connecting": "Verbinde...",
+  "m365_err_creds_required": "Client-ID und Mandanten-ID erforderlich",
+  "m365_signout_confirm": "Trennen und Anmeldedaten löschen?",
+  "m365_btn_settings": "Einstellungen",
+  "m365_settings_title": "⚙ Einstellungen",
+  "m365_settings_tab_general": "Allgemein",
+  "m365_settings_tab_email": "E-Mail-Bericht",
+  "m365_settings_tab_database": "Datenbank",
+  "m365_settings_appearance": "Erscheinungsbild",
+  "m365_settings_language": "Sprache",
+  "m365_settings_theme": "Design",
+  "m365_settings_db_actions": "Aktionen",
+  "m365_db_reset": "DB zurücksetzen",
+  "m365_db_reset_confirm": "Datenbank zurücksetzen? Alle Scan-Ergebnisse werden gelöscht.",
+  "m365_db_reset_done": "Datenbank zurückgesetzt",
+  "m365_db_scans": "Scans",
+  "m365_smtp_saving": "Speichern...",
+  "m365_settings_admin_pin": "Admin-PIN",
+  "m365_settings_pin_hint": "Erforderlich für destruktive Aktionen (z.B. DB zurücksetzen). Leer lassen zum Deaktivieren.",
+  "m365_settings_current_pin": "Aktueller PIN",
+  "m365_settings_new_pin": "Neuer PIN",
+  "m365_settings_confirm_pin": "PIN bestätigen",
+  "m365_settings_pin_set": "Admin-PIN ist gesetzt",
+  "m365_settings_pin_not_set": "Kein PIN — DB zurücksetzen ist ungeschützt",
+  "m365_settings_pin_required": "PIN ist erforderlich.",
+  "m365_settings_pin_mismatch": "PINs stimmen nicht überein.",
+  "m365_settings_pin_wrong": "Falscher PIN — Zurücksetzen abgebrochen.",
+  "m365_settings_pin_saved": "PIN gespeichert",
+  "m365_settings_enter_pin": "Admin-PIN eingeben",
+  "m365_settings_enter_pin_reset": "Admin-PIN eingeben, um die Datenbank zurückzusetzen.",
+  "btn_confirm": "Bestätigen",
+  "m365_log_scan_started": "Scan gestartet",
+  "m365_preview_local_file": "Lokale Datei — keine Cloud-Vorschau verfügbar",
+  "m365_badge_gps": "GPS-Standort",
+  "a30_gps_items": "Elemente mit GPS-Standortdaten (Art. 4 — Standort = personenbezogene Daten)",
+  "a30_exif_pii_items": "Elemente mit EXIF-PII (Autor, Beschreibung, Schlüsselwörter)",
+  "a30_gps_title": "Elemente mit GPS-Standortdaten",
+  "a30_gps_intro": "Die folgenden Dateien enthalten GPS-Koordinaten in den EXIF-Metadaten. Standortdaten sind personenbezogene Daten gemäß Art. 4 DSGVO.",
+  "a30_gps_col_lat": "Breitengrad",
+  "a30_gps_col_lon": "Längengrad",
+  "m365_accounts_disabled_tip": "Wählen Sie eine Microsoft 365-Quelle aus, um die Kontoauswahl zu aktivieren",
+  "m365_smtp_test": "Testen",
+  "m365_smtp_testing": "Test-E-Mail wird gesendet…",
+  "m365_smtp_test_ok": "Test-E-Mail gesendet",
+  "m365_smtp_test_fail": "Verbindung fehlgeschlagen",
+  "m365_fsrc_edit_btn": "Bearbeiten",
+  "m365_fsrc_save_changes": "Änderungen speichern",
+  "m365_settings_tab_scheduler": "Zeitplaner",
+  "m365_sched_title": "Geplante Scans",
+  "m365_sched_next": "Nächste",
+  "m365_sched_hint": "Scans automatisch zu einem festgelegten Zeitpunkt ausführen. Erfordert eine aktive M365-Verbindung (Anwendungsmodus empfohlen).",
+  "m365_sched_no_aps": "⚠ APScheduler nicht installiert. Ausführen: pip install apscheduler",
+  "m365_sched_enabled": "Zeitplaner aktivieren",
+  "m365_sched_frequency": "Häufigkeit",
+  "m365_sched_dow": "Wochentag",
+  "m365_sched_dom": "Tag im Monat",
+  "m365_sched_time": "Uhrzeit",
+  "m365_sched_profile": "Profil",
+  "m365_sched_profile_last": "Zuletzt gespeicherte Einstellungen",
+  "m365_sched_after_scan": "Nach dem Scan",
+  "m365_sched_auto_email": "Bericht automatisch senden",
+  "m365_sched_auto_retention": "Aufbewahrungsrichtlinie durchsetzen",
+  "m365_sched_status": "Status",
+  "m365_sched_run_now": "▶ Jetzt ausführen",
+  "m365_sched_add": "+ Geplante Suche hinzufügen",
+  "m365_sched_name": "Name",
+  "m365_sched_editor_new": "Neue geplante Suche",
+  "m365_sched_editor_edit": "Geplante Suche bearbeiten",
+  "m365_sched_name_required": "Name ist erforderlich",
+  "m365_sched_no_runs": "Noch keine geplanten Läufe",
+  "m365_sched_freq_daily": "Täglich",
+  "m365_sched_freq_weekly": "Wöchentlich",
+  "m365_sched_freq_monthly": "Monatlich",
+  "m365_sched_dow_mon": "Montag",
+  "m365_sched_dow_tue": "Dienstag",
+  "m365_sched_dow_wed": "Mittwoch",
+  "m365_sched_dow_thu": "Donnerstag",
+  "m365_sched_dow_fri": "Freitag",
+  "m365_sched_dow_sat": "Samstag",
+  "m365_sched_dow_sun": "Sonntag",
+  "btn_save": "Speichern",
+  "m365_settings_about": "Über",
+  "m365_settings_save_pin": "PIN speichern",
+  "m365_sse_reconnecting": "Verbindung zum laufenden Scan wird wiederhergestellt…",
+  "m365_sse_replay_note": "Live-Log fortgesetzt — frühere Einträge vom laufenden Scan wiedergegeben.",
+  "m365_google_sa_creds": "Dienstkonto-Anmeldedaten",
+  "m365_google_sa_key_file": "JSON-Schlüssel des Dienstkontos",
+  "m365_google_sa_key_hint": "Download von Google Cloud Console → IAM & Admin → Dienstkonten → Schlüssel → Schlüssel hinzufügen → JSON",
+  "m365_google_admin_email": "Admin-E-Mail",
+  "m365_google_admin_email_hint": "Wird für domänenweite Delegierung verwendet — muss ein Workspace-Superadmin sein.",
+  "m365_google_libs_missing": "Bibliotheken nicht installiert",
+  "m365_google_key_required": "Wählen Sie eine JSON-Schlüsseldatei für das Dienstkonto aus",
+  "m365_google_invalid_json": "Ungültige JSON-Datei",
+  "m365_srcmgmt_sources_google": "Zu scannende Quellen",
+  "m365_google_src_gmail": "Gmail",
+  "m365_google_src_drive": "Google Drive",
+  "m365_google_setup_title": "Einrichtung in Google Workspace erforderlich:",
+  "m365_google_setup_step1": "Erstellen Sie ein Google Cloud-Projekt und aktivieren Sie Gmail API + Drive API + Admin SDK.",
+  "m365_google_setup_step2": "Erstellen Sie ein Dienstkonto, laden Sie den JSON-Schlüssel herunter und aktivieren Sie die domänenweite Delegierung.",
+  "m365_google_setup_step3": "Fügen Sie in Workspace Admin → Sicherheit → API-Steuerung → Domänenweite Delegierung die Client-ID des Dienstkontos mit folgenden Scopes hinzu:",
+  "m365_google_auth_mode": "Authentifizierungsmodus",
+  "m365_google_mode_workspace": "Workspace",
+  "m365_google_mode_personal": "Persönliches Konto",
+  "m365_google_personal_creds": "Persönliches Konto",
+  "m365_google_personal_client_id": "Client-ID",
+  "m365_google_personal_client_secret": "Client-Secret",
+  "m365_google_personal_hint": "Erstellen Sie OAuth 2.0 Desktop-Anmeldedaten in der Google Cloud Console und fügen Sie Client-ID und Secret oben ein.",
+  "m365_google_personal_sign_in": "Anmelden",
+  "m365_google_personal_creds_required": "Client-ID und Secret erforderlich",
+  "m365_google_personal_setup_title": "Einrichtung erforderlich:",
+  "m365_google_personal_setup_step1": "Erstellen Sie in der Google Cloud Console ein Projekt und aktivieren Sie Gmail API + Drive API.",
+  "m365_google_personal_setup_step2": "Erstellen Sie OAuth 2.0-Anmeldedaten (Typ: Desktop-App) und kopieren Sie Client-ID und Secret.",
+  "m365_google_personal_setup_step3": "Fügen Sie Ihre Google-Konto-E-Mail zur Liste der Testnutzer im OAuth-Einwilligungsbildschirm hinzu.",
+  "m365_auth_waiting": "Warte auf Anmeldung…",
+  "role_staff": "Personal",
+  "role_student": "Schüler",
+  "role_other": "Andere",
+
+  "m365_settings_tab_security": "Sicherheit",
+
+  "share_modal_title": "Ergebnisse teilen",
+  "share_modal_desc": "Schreibgeschützte Links ermöglichen einem Datenschutzbeauftragten oder Prüfer, Ergebnisse einzusehen und Verwendungszwecke zuzuweisen, ohne Zugriff auf Scansteuerung oder Anmeldedaten.",
+  "share_new_link": "Neuer Link",
+  "share_label_lbl": "Bezeichnung (optional)",
+  "share_label_placeholder": "z. B. DSB-Prüfung 2026",
+  "share_expires_in": "Läuft ab in",
+  "share_expires_never": "Nie",
+  "share_expires_7d": "7 Tage",
+  "share_expires_30d": "30 Tage",
+  "share_expires_90d": "90 Tage",
+  "share_expires_1y": "1 Jahr",
+  "share_create": "Erstellen",
+  "share_copy_link_prompt": "Link kopieren:",
+  "share_active_links": "Aktive Links",
+  "share_viewer_pin_label": "Betrachter-PIN:",
+  "share_pin_configure": "Konfigurieren",
+  "share_pin_set": "Festgelegt",
+  "share_pin_not_set": "Nicht festgelegt",
+  "share_no_links": "Keine aktiven Links.",
+  "share_unlabelled": "Ohne Bezeichnung",
+  "share_expires_prefix": "Läuft ab:",
+  "share_last_used": "Zuletzt verwendet:",
+  "share_revoke": "Widerrufen",
+  "share_copied": "Kopiert!",
+  "share_load_error": "Links konnten nicht geladen werden.",
+  "share_create_error": "Link konnte nicht erstellt werden:",
+  "share_revoke_confirm": "Diesen Link widerrufen? Alle Nutzer verlieren sofort den Zugriff.",
+  "share_revoke_error": "Widerrufen fehlgeschlagen:",
+
+  "viewer_pin_group_title": "Betrachter-PIN",
+  "viewer_pin_desc": "Eine numerische PIN (4–8 Stellen), die es jedem ermöglicht, <code style=\"font-size:10px\">/view</code> im Browser zu öffnen und schreibgeschützt auf Ergebnisse zuzugreifen \u2013 ohne Token-Link.",
+  "viewer_pin_clear": "PIN löschen",
+  "viewer_pin_is_set": "Betrachter-PIN ist festgelegt",
+  "viewer_pin_not_set_msg": "Keine PIN festgelegt — /view erfordert einen Token-Link",
+  "viewer_pin_format": "PIN muss 4–8 Stellen haben.",
+  "viewer_pin_saving": "Wird gespeichert…",
+  "viewer_pin_saved": "PIN gespeichert",
+  "viewer_pin_clear_confirm": "Betrachter-PIN entfernen? /view erfordert dann wieder einen Token-Link.",
+  "viewer_pin_cleared": "PIN gelöscht"
+}
\ No newline at end of file
diff --git a/lang/en.json b/lang/en.json
new file mode 100644
index 0000000..4a970c3
--- /dev/null
+++ b/lang/en.json
@@ -0,0 +1,773 @@
+{
+  "app_name": "Document Scanner",
+  "label_root_folder": "Root Folder",
+  "label_older_than": "Flag files with data older than",
+  "placeholder_folder": "/path/to/documents",
+  "btn_scan": "Run Scan",
+  "btn_stop": "Stop scan",
+  "toggle_anonymise": "Anonymise",
+  "toggle_mask": "Mask CPR only",
+  "toggle_blur_faces": "Blur faces",
+  "toggle_skip_cloud": "Skip cloud-only files",
+  "toggle_ocr": "OCR scanned PDFs",
+  "label_face_sensitivity": "Face sensitivity",
+  "face_sensitivity_high": "High",
+  "face_sensitivity_low": "Low",
+  "face_sensitivity_hint": "Higher = fewer false detections",
+  "label_ocr_language": "OCR language",
+  "label_ocr_dpi": "DPI (quality vs speed)",
+  "lang_danish": "Danish",
+  "lang_danish_english": "Danish + English",
+  "lang_english": "English",
+  "lang_norwegian": "Norwegian",
+  "lang_swedish": "Swedish",
+  "lang_german": "German",
+  "lang_french": "French",
+  "lang_dutch": "Dutch",
+  "time_any": "Any",
+  "time_1y": "1 year",
+  "time_2y": "2 years",
+  "time_5y": "5 years",
+  "time_10y": "10 years",
+  "stat_scanned": "Files scanned",
+  "stat_flagged": "Flagged",
+  "stat_high_risk": "High risk",
+  "stat_cpr": "CPR numbers found",
+  "col_file": "File",
+  "col_cpr": "CPR numbers",
+  "col_oldest": "Oldest date",
+  "col_risk": "Risk",
+  "col_action": "Action",
+  "col_detail": "Detail",
+  "sort_name_az": "Name A–Z",
+  "sort_name_za": "Name Z–A",
+  "sort_cpr_desc": "CPR count ↓",
+  "sort_oldest_desc": "Oldest date ↓",
+  "sort_risk_desc": "Risk ↓",
+  "sort_size_desc": "Size ↓",
+  "filter_all_types": "All types",
+  "filter_pdf": "PDF",
+  "filter_word": "Word",
+  "filter_excel": "Excel",
+  "filter_image": "Image",
+  "placeholder_search": "Search filename…",
+  "btn_anonymise": "Anonymise",
+  "btn_mask": "Mask CPR",
+  "btn_blur": "Blur",
+  "btn_preview": "Preview",
+  "btn_show_in_folder": "Show in folder",
+  "btn_move_to_trash": "Move to trash",
+  "btn_undo": "Undo",
+  "btn_export_csv": "CSV",
+  "btn_select_all": "Select all",
+  "btn_anonymise_flagged": "Anonymise flagged",
+  "btn_anonymise_all": "Anonymise all flagged",
+  "btn_cancel": "Cancel",
+  "btn_close": "Close",
+  "btn_clear": "Clear",
+  "preview_original": "Original",
+  "preview_processed": "Processed",
+  "preview_anonymise": "Anonymise → preview",
+  "preview_mask": "Mask CPR → preview",
+  "preview_blur_faces": "Blur {n} face(s) → preview",
+  "preview_no_faces": "✓ No faces detected",
+  "preview_scanning_faces": "Scanning for faces…",
+  "preview_processing_faces": "Processing faces…",
+  "preview_rendering": "Rendering…",
+  "scan_preparing": "Preparing…",
+  "scan_scanning": "Scanning…",
+  "scan_face_scanning": "Scanning {total} file(s) for faces…",
+  "scan_face_progress": "Faces: {index} / {total} — {file}",
+  "scan_eta": "{eta} left",
+  "scan_stopped": "Scan stopped.",
+  "empty_flagged": "No flagged documents",
+  "empty_flagged_detail": "Run a scan to view flagged documents",
+  "empty_filter": "No files match your filter",
+  "no_audit": "No audit entries yet",
+  "dialog_delete_title": "Delete files?",
+  "dialog_delete_confirm": "Move to trash",
+  "all_trashed": "All flagged documents moved to trash.",
+  "btn_audit_log": "Audit Log",
+  "audit_cleared": "Audit log cleared",
+  "failed_audit": "Failed to load audit log",
+  "about_title": "About",
+  "label_python": "Python",
+  "label_spacy": "spaCy model",
+  "label_tesseract": "Tesseract",
+  "label_pymupdf": "PyMuPDF",
+  "label_opencv": "OpenCV",
+  "no_model": "no model installed",
+  "not_installed": "not installed",
+  "btn_about": "About",
+  "lbl_size": "Size",
+  "lbl_time": "Time",
+  "lbl_space": "Space",
+  "lbl_loading": "Loading…",
+  "lbl_blurred": "Blurred",
+  "lbl_none": "None",
+  "lbl_scanner": "Scanner",
+  "lbl_document": "Document",
+  "lbl_folder": "Folder",
+  "empty_scan_hint": "Set a folder path and click",
+  "empty_flagged_found": "No flagged documents found.",
+  "preview_click_hint": "Click a document to preview it",
+  "kbd_select": "select",
+  "kbd_delete": "delete",
+  "kbd_close_preview": "close preview",
+  "kbd_select_all": "select all",
+  "sort_cpr_asc": "CPR count ↑",
+  "preview_error": "Preview error",
+  "preview_unavailable": "Preview unavailable",
+  "preview_not_available": "Preview not available for this file type",
+  "lbl_anonymised": "Anonymised",
+  "lbl_masked": "Masked CPR",
+  "lbl_processing": "Processing…",
+  "lbl_error": "Error",
+  "lbl_no_pii": "No changes — no PII found",
+  "badge_anonymised": "✓ anonymised",
+  "badge_masked": "✓ masked",
+  "badge_blurred": "✓ blurred",
+  "lbl_working": "Working…",
+  "lbl_stopping": "Stopping…",
+  "lbl_no_files_selected": "No files selected",
+  "lbl_selected_1": "file selected",
+  "lbl_selected_n": "files selected",
+  "dialog_delete_body": "This will permanently delete the selected files from disk. This action cannot be undone.",
+  "lbl_flagged_docs_1": "flagged document",
+  "lbl_flagged_docs_n": "flagged documents",
+  "banner_all_clean": "All clean",
+  "banner_files_scanned": "file(s) scanned, no CPR numbers found",
+  "banner_need_attention": "file(s) need attention out of",
+  "banner_scanned": "scanned",
+  "summary_face_blur": "image(s) to face-blur",
+  "badge_face": "face",
+  "badge_shared": "shared",
+  "badge_archive": "archive",
+  "badge_shared_cpr": "Shared CPR",
+  "lbl_also_in": "also in",
+  "filter_shared_cpr": "⚠ Shared CPR",
+  "risk_high": "HIGH",
+  "risk_medium": "MEDIUM",
+  "risk_low": "LOW",
+  "reason_cpr_number": "CPR number",
+  "reason_cpr_numbers": "CPR numbers",
+  "reason_cpr_confirmed": "CPR(s) with keyword context",
+  "reason_unique_individuals": "unique individuals",
+  "reason_cpr_shared": "CPR shared across {n} files",
+  "reason_data_10y": "data > 10 years old",
+  "reason_data_5y": "data > 5 years old",
+  "btn_export_excel": "Export report as Excel",
+  "btn_audit_log_short": "Audit log",
+  "btn_delete_selected": "Delete selected",
+  "audit_action_scan": "Scan",
+  "audit_action_redact": "Redact",
+  "audit_action_blur_faces": "Blur faces",
+  "audit_action_delete": "Delete",
+  "audit_action_restore": "Restore",
+  "audit_action_export": "Export",
+  "audit_files": "files",
+  "audit_flagged": "flagged",
+  "audit_high_risk": "high risk",
+  "audit_regions": "regions",
+  "audit_faces": "faces",
+  "audit_permanent": "permanent",
+  "audit_trash": "trash",
+  "audit_files_restored": "file(s) restored",
+  "confirm_clear_audit": "Clear the entire audit log? This cannot be undone.",
+  "lang_spanish": "Spanish",
+  "lang_italian": "Italian",
+  "lang_portuguese": "Portuguese",
+  "lang_finnish": "Finnish",
+  "lang_polish": "Polish",
+  "lang_czech": "Czech",
+  "lang_russian": "Russian",
+  "lang_arabic": "Arabic",
+  "lang_chinese_simplified": "Chinese (Simplified)",
+  "lang_chinese_traditional": "Chinese (Traditional)",
+  "lang_japanese": "Japanese",
+  "lang_korean": "Korean",
+  "lbl_root": "root",
+  "lbl_root_folder": "root folder",
+  "lbl_scanning": "Scanning:",
+  "btn_deselect_all": "Deselect all",
+  "filter_high_risk": "🔴 High risk",
+  "filter_in_archive": "📦 In archive",
+  "log_starting_scan": "Starting scan of",
+  "log_found_files": "Found {n} file(s) to scan",
+  "log_cloud_skipped": "cloud-only skipped",
+  "log_faces_detected": "face(s) detected",
+  "log_ocr_pages": "page(s)",
+  "log_pages_skipped": "image page(s) skipped (enable OCR)",
+  "log_scan_complete": "Scan complete",
+  "log_files_with_cpr": "file(s) with CPR",
+  "log_no_faces_in": "No faces detected in",
+  "pii_phone": "phone",
+  "pii_email": "email",
+  "pii_iban": "IBAN",
+  "pii_bank_account": "bank account",
+  "pii_name": "name",
+  "pii_address": "address",
+  "pii_org": "org",
+  "lbl_other_pii": "Other PII",
+  "lbl_found": "found",
+  "btn_clear_results_cache": "Clear results cache",
+  "btn_clear_ocr_cache": "Clear OCR cache",
+  "confirm_clear_results_cache": "Clear all cached scan results? The grid will be cleared.",
+  "confirm_clear_ocr_cache": "Clear OCR cache? This will force re-OCR on next scan.",
+  "log_cache_cleared": "Results cache cleared",
+  "log_ocr_cache_cleared": "OCR cache cleared",
+  "m365_app_name": "GDPRScanner",
+  "m365_sources": "Sources",
+  "m365_options": "Options",
+  "m365_accounts": "Accounts",
+  "m365_stats": "Stats",
+  "m365_src_email": "Outlook",
+  "m365_src_onedrive": "OneDrive",
+  "m365_src_sharepoint": "SharePoint",
+  "m365_src_teams": "Teams",
+  "m365_opt_date_from": "Scan emails/files from",
+  "m365_opt_date_from_hint": "Leave blank to scan all",
+  "m365_opt_email_body": "Scan email body",
+  "m365_opt_attachments": "Scan attachments",
+  "m365_opt_max_attach": "Max attachment size (MB)",
+  "m365_opt_max_emails": "Max emails per user",
+  "m365_connect_title": "Connect to Microsoft 365",
+  "m365_connect_sub": "Enter your Azure app credentials to sign in.",
+  "m365_label_client_id": "Client ID (Application ID)",
+  "m365_label_tenant_id": "Tenant ID",
+  "m365_label_client_secret": "Client Secret",
+  "m365_secret_hint": "(optional — enables org-wide scanning)",
+  "m365_secret_desc_app": "app accesses all users' data directly (Application permissions, no sign-in required).",
+  "m365_secret_desc_delegated": "you sign in as yourself and can only scan your own data unless you're a Global Admin.",
+  "m365_btn_connect": "Connect",
+  "m365_device_code_go": "Go to",
+  "m365_device_code_enter": "and enter this code",
+  "m365_btn_cancel_auth": "Cancel",
+  "m365_btn_reconfigure": "Reconfigure",
+  "m365_btn_sign_out": "Sign out",
+  "m365_mode_app": "🔑 App mode — org-wide",
+  "m365_mode_delegated": "Delegated",
+  "m365_search_users": "Search users…",
+  "m365_add_account_label": "Add account manually:",
+  "m365_add_account_placeholder": "email or UPN",
+  "m365_admin_note": "Only showing your account. To list all users, an admin must grant <strong>User.Read.All</strong> consent in Azure Portal, or add accounts manually below.",
+  "m365_btn_scan": "Scan",
+  "m365_btn_stop": "Stop",
+  "m365_pill_flagged": "flagged",
+  "m365_pill_scanned": "scanned",
+  "m365_filter_all_sources": "All sources",
+  "m365_filter_email": "Outlook",
+  "m365_filter_onedrive": "OneDrive",
+  "m365_filter_sharepoint": "SharePoint",
+  "m365_filter_teams": "Teams",
+  "m365_empty_hint": "Select sources and click <strong>Scan</strong><br>to find documents with CPR numbers",
+  "m365_stat_flagged": "Flagged",
+  "m365_stat_cpr": "CPR hits",
+  "m365_preview_open": "Open in M365 ↗",
+  "m365_preview_close": "Close",
+  "m365_auth_mode_app": "Auth mode: Application (client credentials — org-wide)",
+  "m365_auth_mode_delegated": "Auth mode: Delegated (device code — signed-in user only)",
+  "m365_phase_teams_index": "Building Teams membership index…",
+  "m365_phase_sharepoint": "Collecting SharePoint files…",
+  "m365_btn_about": "About",
+  "m365_stat_scanned": "Scanned",
+  "m365_no_users_found": "No users found",
+  "m365_no_users_match": "No users match",
+  "m365_no_cpr_found": "No CPR numbers found.",
+  "m365_no_matches": "No matches",
+  "m365_btn_export_excel": "Export Excel",
+  "m365_export_no_data": "No results to export.",
+  "m365_phase_emails": "Collecting Outlook messages",
+  "m365_phase_onedrive": "Collecting OneDrive",
+  "m365_phase_teams": "Collecting Teams",
+  "m365_preset_1yr": "1 yr",
+  "m365_preset_2yr": "2 yr",
+  "m365_preset_5yr": "5 yr",
+  "m365_preset_10yr": "10 yr",
+  "m365_preset_any": "Any",
+  "m365_auth_mode_app_short": "Application permissions · client credentials",
+  "m365_auth_mode_delegated_short": "Delegated permissions · device code flow",
+  "m365_info_permissions": "Permissions",
+  "m365_info_signin": "Sign-in required",
+  "m365_info_scope": "Scope",
+  "m365_info_scope_org": "All users in tenant",
+  "m365_info_scope_user": "Signed-in user only",
+  "m365_info_consent": "Admin consent",
+  "m365_info_required": "Required",
+  "m365_info_admin": "Global Admin",
+  "m365_info_expands_scope": "Expands scope to all users",
+  "m365_info_no": "No",
+  "m365_info_yes": "Yes",
+  "m365_info_app_desc": "The app authenticates with a Client Secret and accesses all users' data directly via Microsoft Graph — no interactive sign-in needed. Ideal for automated or scheduled scans.",
+  "m365_info_delegated_desc": "The app acts on behalf of the signed-in user via the device code flow. By default only that user's data is accessible. A Global Admin can grant broader consent to scan all users.",
+  "m365_filter_search": "Search…",
+  "m365_filter_clear": "Clear",
+  "m365_btn_list_view": "List",
+  "m365_btn_grid_view": "Grid",
+  "m365_log_found_items": "Found",
+  "m365_log_items_to_scan": "item(s) to scan",
+  "m365_log_starting_scan": "Starting scan:",
+  "m365_log_accounts": "account(s)",
+  "m365_btn_bulk_delete": "Delete",
+  "m365_bulk_delete_title": "Bulk Delete",
+  "m365_bulk_delete_sub": "Emails move to Deleted Items · Files go to the recycle bin",
+  "m365_bulk_filter_heading": "Filter what to delete",
+  "m365_bulk_filter_source": "Source type",
+  "m365_bulk_filter_min_cpr": "Min CPR hits",
+  "m365_bulk_filter_older_than": "Older than date",
+  "m365_bulk_no_match": "No items match these criteria.",
+  "m365_bulk_match_count": "item(s) will be deleted",
+  "m365_bulk_confirm_q": "item(s) will be permanently deleted. Continue?",
+  "m365_bulk_deleting": "Deleting…",
+  "m365_bulk_deleted": "deleted",
+  "m365_bulk_failed": "failed",
+  "m365_bulk_delete_confirm": "Delete matching items",
+  "m365_delete_confirm": "Delete",
+  "m365_delete_warning": "This cannot be undone.",
+  "m365_log_deleted": "Deleted:",
+  "m365_log_delete_failed": "Delete failed:",
+  "m365_log_bulk_done": "Bulk delete:",
+  "m365_log_older_than": "older than",
+  "m365_eta_left": "left",
+  "btn_all": "All",
+  "btn_errors": "Errors",
+  "log_copy": "Copy",
+  "btn_none": "None",
+  "m365_btn_resume": "Resume",
+  "m365_btn_start_fresh": "Start fresh",
+  "m365_resume_banner": "Previous scan interrupted — {scanned} scanned, {flagged} found",
+  "m365_log_resuming": "Resuming scan:",
+  "m365_log_already_scanned": "already scanned — skipped",
+  "m365_resuming": "Resuming — skipping already-scanned items…",
+  "m365_opt_delta": "Delta scan",
+  "m365_opt_delta_hint": "Changed items only (after first full scan)",
+  "m365_delta_tokens_saved": "Tokens saved",
+  "m365_delta_clear": "Clear tokens",
+  "m365_delta_cleared": "Delta tokens cleared — next scan will be a full scan.",
+  "m365_delta_mode": "Delta mode — fetching changed items only…",
+  "m365_smtp_title": "✉ Email report",
+  "m365_smtp_desc": "Send the Excel report by email after scanning.",
+  "m365_smtp_host": "SMTP host",
+  "m365_smtp_port": "Port",
+  "m365_smtp_user": "Username",
+  "m365_smtp_pass": "Password",
+  "m365_smtp_from": "From address",
+  "m365_smtp_tls": "STARTTLS",
+  "m365_smtp_ssl": "SSL",
+  "m365_smtp_recipients": "Recipients",
+  "m365_smtp_recipients_hint": "Comma or semicolon separated",
+  "m365_smtp_save": "Save",
+  "m365_smtp_send": "Send now",
+  "m365_smtp_saved": "Settings saved.",
+  "m365_smtp_sending": "Sending…",
+  "m365_smtp_sent": "Report sent.",
+  "m365_smtp_no_recipients": "Enter at least one recipient.",
+  "m365_smtp_configure": "Configure",
+  "m365_smtp_from_hint": "(optional — defaults to username)",
+  "m365_subject_title": "🔍 Data subject lookup",
+  "m365_subject_btn": "Look up",
+  "m365_subject_desc": "Find all flagged items containing a given CPR number. The CPR is hashed before querying and never stored in plaintext.",
+  "m365_subject_placeholder": "DDMMYY-XXXX",
+  "m365_subject_search": "Search",
+  "m365_subject_searching": "Searching…",
+  "m365_subject_found": "item(s) found",
+  "m365_subject_not_found": "No flagged items found for this CPR number.",
+  "m365_subject_delete_all": "Delete all for this person",
+  "m365_subject_delete_confirm": "item(s) will be permanently deleted. Continue?",
+  "m365_disposition_label": "Disposition",
+  "m365_disp_unreviewed": "Unreviewed",
+  "m365_disp_retain_legal": "Retain — legal obligation",
+  "m365_disp_retain_legit": "Retain — legitimate interest",
+  "m365_disp_retain_contract": "Retain — contract",
+  "m365_disp_delete_sched": "Delete — scheduled",
+  "m365_disp_personal_use": "Personal use — out of scope",
+  "m365_disp_deleted": "Deleted",
+  "m365_disp_save": "Save",
+  "m365_disp_saved": "✓ Saved",
+  "m365_opt_retention": "Retention policy",
+  "m365_opt_retention_hint": "Flag and delete items older than N years",
+  "m365_ret_years": "Retention years",
+  "m365_ret_fy_end": "Fiscal year end",
+  "m365_ret_fy_rolling": "Rolling (from today)",
+  "m365_ret_fy_dec": "31 Dec (Bogføringsloven)",
+  "m365_ret_fy_jun": "30 Jun",
+  "m365_ret_fy_mar": "31 Mar",
+  "m365_ret_mode_rolling": "rolling",
+  "m365_ret_mode_fiscal": "fiscal year",
+  "m365_ret_cutoff_hint": "Items modified before",
+  "m365_ret_cutoff_flagged": "will be flagged as overdue",
+  "m365_overdue_found": "overdue item(s) found",
+  "m365_bulk_overdue_btn": "Filter overdue",
+  "m365_bulk_clear_filters": "Clear filters",
+  "m365_btn_export_article30": "Art.30",
+  "m365_article30_done": "Article 30 report ready.",
+  "a30_title": "GDPR Article 30",
+  "a30_subtitle": "Register of Processing Activities",
+  "a30_generated": "Generated",
+  "a30_confidential": "Confidential — GDPR compliance document",
+  "a30_s1": "1. Summary",
+  "a30_scan_date": "Scan date",
+  "a30_items_scanned": "Items scanned",
+  "a30_flagged": "Flagged items",
+  "a30_cpr_hits": "Total CPR hits",
+  "a30_data_subjects": "Estimated data subjects",
+  "a30_overdue": "Overdue items (>5 yrs)",
+  "a30_by_source": "Breakdown by source",
+  "a30_col_source": "Source",
+  "a30_col_items": "Items",
+  "a30_col_cpr": "CPR hits",
+  "a30_col_overdue": "Overdue",
+  "a30_s2": "2. Personal Data Categories Identified",
+  "a30_s2_intro": "The following categories of personal data were detected during scanning.",
+  "a30_col_gdpr_class": "GDPR classification",
+  "a30_cpr_label": "CPR numbers (Danish personal ID)",
+  "a30_cpr_class": "Art. 9 — national identifier",
+  "a30_pii_class_9": "Art. 9 — health/sensitive",
+  "a30_pii_class_4": "Art. 4 — personal data",
+  "a30_s3": "3. Data Inventory",
+  "a30_s3_intro": "All flagged items are listed below with location, retention status, and compliance disposition.",
+  "a30_col_name": "Name / Subject",
+  "a30_col_account": "Account",
+  "a30_col_modified": "Modified",
+  "a30_col_disp": "Disposition",
+  "a30_more_items": "additional items not shown. Export the Excel report for the complete list.",
+  "a30_s4": "4. Retention Analysis",
+  "a30_s4_intro": "The following items exceed the 5-year retention threshold and should be reviewed for deletion under GDPR Article 5(1)(e) — storage limitation.",
+  "a30_s5": "5. Compliance Trend",
+  "a30_s5_intro": "Flagged item counts over the last scans (most recent first).",
+  "a30_col_scan_date": "Scan date",
+  "a30_col_scan_type": "Scan type",
+  "a30_scan_delta": "Delta",
+  "a30_scan_full": "Full",
+  "a30_s6": "6. Methodology and Legal Basis",
+  "a30_method_title": "Scanning methodology",
+  "a30_method_1": "CPR numbers are detected using pattern matching against the official Danish CPR format (DDMMYY-XXXX) with full date validation and century-digit verification per the CPR register rules.",
+  "a30_method_2": "Additional personal data (phone numbers, email addresses, IBANs, bank accounts, names, addresses, and organisations) is detected using regular expressions and spaCy NER.",
+  "a30_method_3": "CPR numbers stored in this document's database are SHA-256 hashed and never stored in plaintext.",
+  "a30_method_4": "Scanning covers Exchange mailboxes (all folders including Sent Items), OneDrive, SharePoint, and Microsoft Teams channel files via the Microsoft Graph API. When connected, Google Workspace scanning covers Gmail and Google Drive via a service account with domain-wide delegation. Local and network (SMB) file shares are scanned directly.",
+  "a30_gdpr_title": "GDPR Articles referenced",
+  "a30_gdpr_1": "Article 5(1)(c) — Data minimisation: only necessary data should be retained",
+  "a30_gdpr_2": "Article 5(1)(e) — Storage limitation: data must not be kept longer than necessary",
+  "a30_gdpr_3": "Article 9 — Special categories: health, criminal, trade union, and similar data require explicit legal basis",
+  "a30_gdpr_4": "Article 15 — Right of access: data subjects may request information about their data",
+  "a30_gdpr_5": "Article 17 — Right to erasure: data subjects may request deletion",
+  "a30_gdpr_6": "Article 30 — Records of processing activities: this document satisfies the obligation",
+  "a30_disp_unreviewed": "Unreviewed",
+  "a30_disp_retain_legal": "Retain — Legal obligation",
+  "a30_disp_retain_legit": "Retain — Legitimate interest",
+  "a30_disp_retain_contract": "Retain — Contract",
+  "a30_disp_delete_sched": "Delete — Scheduled",
+  "a30_disp_personal_use": "Personal use — out of GDPR scope (Art. 2(2)(c))",
+  "a30_disp_deleted": "Deleted",
+  "a30_s6_short": "Methodology and Legal Basis",
+  "m365_role_all": "All",
+  "m365_role_staff": "Staff",
+  "m365_role_student": "Student",
+  "a30_s_dellog": "Deletion Audit Log",
+  "a30_dellog_intro": "item(s) containing personal data have been deleted via GDPRScanner. This log satisfies the accountability obligation under GDPR Article 5(2).",
+  "a30_dellog_by_reason": "Deletions by reason",
+  "a30_dellog_records": "Deletion records",
+  "a30_col_reason": "Reason",
+  "a30_col_count": "Count",
+  "a30_col_deleted_at": "Deleted at",
+  "a30_col_deleted_by": "Deleted by",
+  "a30_reason_manual": "Manual (individual card delete)",
+  "a30_reason_bulk": "Bulk delete",
+  "a30_reason_retention": "Retention policy enforcement",
+  "a30_reason_dsr": "Data subject erasure request (Art. 17)",
+  "m365_filter_all_disp": "All dispositions",
+  "m365_trend_title": "Trend",
+  "m365_trend_flagged": "Flagged",
+  "m365_trend_overdue": "Overdue",
+  "m365_filter_all_transfer": "All items",
+  "m365_filter_ext_recipient": "External recipient",
+  "m365_filter_ext_share": "Externally shared",
+  "m365_filter_shared": "Shared",
+  "m365_badge_ext_recipient": "External",
+  "m365_badge_shared": "Shared",
+  "a30_s_special": "Special Category Data (Article 9)",
+  "a30_special_intro": "item(s) detected as special category data under GDPR Article 9. Requires explicit legal basis and DPIA.",
+  "a30_special_by_cat": "Detected categories",
+  "a30_special_items": "Affected items (up to 50)",
+  "a30_col_category": "Category",
+  "a30_cat_health": "Health data (Art. 9)",
+  "a30_cat_mental": "Mental health (Art. 9)",
+  "a30_cat_criminal": "Criminal records (Art. 10)",
+  "a30_cat_union": "Trade union membership (Art. 9)",
+  "a30_cat_religion": "Religious beliefs (Art. 9)",
+  "a30_cat_ethnicity": "Racial/ethnic origin (Art. 9)",
+  "a30_cat_political": "Political opinions (Art. 9)",
+  "a30_cat_biometric": "Biometric data (Art. 9)",
+  "a30_cat_sexual": "Sexual orientation (Art. 9)",
+  "m365_filter_all_special": "All risk levels",
+  "m365_filter_special_only": "Art. 9 special category",
+  "m365_badge_special": "Art.9",
+  "m365_phase_scanning": "Scanning…",
+  "a30_special_cat": "Art. 9 special category items",
+  "a30_special_cat_note": "These items contain health, criminal, biometric, religious, ethnic, trade union, political, or sexual orientation data. An explicit legal basis (Art. 9(2)) and possibly a DPIA (Art. 35) is required.",
+  "a30_col_special": "Art. 9",
+  "a30_pii_phone": "Phone numbers",
+  "a30_pii_email": "Email addresses",
+  "a30_pii_iban": "IBAN bank numbers",
+  "a30_pii_bank": "Bank account numbers",
+  "a30_pii_name": "Personal names (NER)",
+  "a30_pii_address": "Addresses (NER)",
+  "a30_pii_org": "Organisations (NER)",
+  "a30_col_cpr_short": "CPR",
+  "a30_inv_staff": "Staff / Faculty",
+  "a30_inv_students": "Students",
+  "a30_student_consent_note": "Note: Student accounts in Danish folkeskole (pupils under age 15) require parental consent for processing of personal data under Databeskyttelsesloven §6. Items in student accounts must not be auto-deleted — any action requires review by school administration and, for pupils under 15, notification of parents or guardians as rights holders under GDPR Article 8.",
+  "m365_profile_label": "Profile:",
+  "m365_profile_placeholder": "— Select profile —",
+  "m365_profile_save_tip": "Save current settings as a profile",
+  "m365_profile_save_prompt": "Profile name:",
+  "m365_profile_applied": "Profile loaded",
+  "m365_profile_saved": "Profile saved",
+  "m365_profile_manage_btn": "Profiles",
+  "m365_profile_clear_btn": "Clear",
+  "m365_profile_save_btn": "Save",
+  "m365_profile_manage_title": "⚙ Manage Profiles",
+  "m365_profile_no_profiles": "No saved profiles yet. Use 💾 to save the current sidebar settings as a profile.",
+  "m365_profile_use": "Use",
+  "m365_profile_edit": "Edit",
+  "m365_profile_duplicate": "Duplicate",
+  "m365_profile_delete": "Delete",
+  "m365_profile_delete_confirm": "Delete profile",
+  "m365_profile_duplicated": "Profile duplicated",
+  "m365_profile_deleted": "Profile deleted",
+  "m365_profile_never": "never",
+  "m365_profile_last_run": "Last run",
+  "m365_profile_name_placeholder": "Profile name",
+  "m365_profile_desc_placeholder": "Description (optional)",
+  "m365_profile_name_required": "Profile name is required.",
+  "m365_db_title": "🗄 Database",
+  "m365_db_export": "Export",
+  "m365_db_import": "Import",
+  "m365_db_export_error": "Export failed",
+  "m365_db_exported": "Database exported",
+  "m365_db_import_title": "📥 Import Database",
+  "m365_db_import_desc": "Select a previously exported .zip file. Merge adds dispositions and deletion log. Replace wipes and fully restores.",
+  "m365_db_import_file": "ZIP file",
+  "m365_db_import_mode": "Mode:",
+  "m365_db_import_merge": "Merge (safe)",
+  "m365_db_import_replace": "Replace (full restore)",
+  "m365_db_import_replace_warn": "⚠ Replace mode will erase all existing scan data before restoring. Make sure you have a backup of ~/.gdpr_scanner.db first.",
+  "m365_db_import_replace_confirm": "Replace mode will erase ALL existing scan data and restore from the archive.\\n\\nMake sure you have a manual backup of ~/.gdpr_scanner.db.\\n\\nProceed?",
+  "m365_db_import_no_file": "Please select a ZIP file first.",
+  "m365_db_importing": "Importing…",
+  "m365_db_imported": "Imported",
+  "m365_db_import_run": "Import",
+  "m365_opt_scan_photos": "Scan photos for faces",
+  "m365_opt_scan_photos_hint": "Flags images with detected faces as Art. 9 biometric data. Slower — opt in.",
+  "m365_filter_photo_only": "📷 Photos / biometric",
+  "m365_badge_faces": "faces",
+  "a30_photo_items": "Photos with detected faces (Art. 9 biometric)",
+  "a30_photo_note": "Photographs of identifiable persons are biometric data under Art. 9 GDPR. Retention requires a documented legal basis under Art. 9(2). For school photographs of pupils under 15, parental consent is required (Databeskyttelsesloven §6). See Datatilsynet guidance on school photography.",
+  "a30_s_photos": "Photographs and Biometric Data (Article 9)",
+  "a30_photo_intro": "image file(s) containing detected face(s) were found in the scan. Photographs of identifiable persons constitute biometric data under GDPR Article 9 and are subject to the same heightened protection as health or criminal records data.",
+  "a30_photo_guidance": "Retention guidance",
+  "a30_photo_g1": "Photos may only be retained while the original purpose remains valid (Art. 5(1)(b) — purpose limitation).",
+  "a30_photo_g2": "Pupils under 15 require parental consent (Databeskyttelsesloven §6). Consent must be freely given, specific, and documented.",
+  "a30_photo_g3": "Photos on public-facing websites must be removed promptly after a person leaves the organisation or withdraws consent (Art. 17 — right to erasure).",
+  "a30_photo_g4": "Historical/archive use may justify longer retention under Art. 89 only with specific safeguards and case-by-case assessment.",
+  "a30_photo_col_faces": "Faces",
+  "a30_method_5": "When photo scanning is enabled, image files are analysed using OpenCV Haar cascade face detection to identify photographs of persons (Art. 9 biometric data).",
+  "m365_role_cycle_tip": "Click to override role (cycles student → staff → other → auto)",
+  "m365_role_set": "Role set",
+  "m365_role_cleared": "Role override cleared",
+  "m365_sku_debug_title": "🔍 Tenant SKU IDs",
+  "m365_sku_debug_desc": "These are the raw SKU IDs assigned to your users. Any marked ❓ unknown are not in classification/m365_skus.json — copy them under student_ids or staff_ids and restart.",
+  "m365_sku_debug_none": "No license data returned — check that the app has User.Read.All permission.",
+  "m365_file_sources_title": "📁 File sources",
+  "m365_file_sources_manage": "Manage",
+  "m365_file_sources_empty": "No file sources configured. Add a local folder or network share below.",
+  "m365_file_sources_add": "Add source",
+  "m365_fsrc_label": "Label",
+  "m365_fsrc_path": "Path",
+  "m365_fsrc_smb_detected": "SMB/CIFS network share detected",
+  "m365_fsrc_smb_host": "SMB host",
+  "m365_fsrc_smb_user": "Username",
+  "m365_fsrc_smb_pw": "Password",
+  "m365_fsrc_smb_pw_hint": "Password is saved to the OS keychain — never stored in a file.",
+  "m365_fsrc_add_btn": "Add",
+  "m365_fsrc_saved": "Source saved",
+  "m365_fsrc_saving": "Saving...",
+  "m365_fsrc_path_required": "Path is required.",
+  "m365_fsrc_scan_btn": "Scan",
+  "m365_fsrc_scan_start": "Starting file scan",
+  "m365_src_group_files": "File sources",
+  "m365_no_sources": "No sources selected — nothing to scan.",
+  "m365_fsrc_name_required": "Name is required.",
+  "m365_srcmgmt_title": "⚙ Source management",
+  "m365_srcmgmt_tab_m365": "Microsoft 365",
+  "m365_srcmgmt_tab_google": "Google Workspace",
+  "m365_srcmgmt_tab_files": "File sources",
+  "m365_srcmgmt_connection": "Connection",
+  "m365_srcmgmt_azure_creds": "Azure credentials",
+  "m365_srcmgmt_sources_m365": "Sources to scan",
+  "m365_srcmgmt_connected": "Connected",
+  "m365_srcmgmt_not_connected": "Not connected",
+  "m365_srcmgmt_coming_soon": "Coming soon",
+  "m365_srcmgmt_google_sub": "Gmail and Google Drive scanning will appear here when implemented.",
+  "m365_srcmgmt_file_sources": "File sources",
+  "m365_sources_manage_btn": "Sources",
+  "m365_connecting": "Connecting...",
+  "m365_err_creds_required": "Client ID and Tenant ID required",
+  "m365_signout_confirm": "Disconnect and clear credentials?",
+  "m365_btn_settings": "Settings",
+  "m365_settings_title": "⚙ Settings",
+  "m365_settings_tab_general": "General",
+  "m365_settings_tab_email": "Email report",
+  "m365_settings_tab_database": "Database",
+  "m365_settings_appearance": "Appearance",
+  "m365_settings_language": "Language",
+  "m365_settings_theme": "Theme",
+  "m365_settings_db_actions": "Actions",
+  "m365_db_reset": "Reset DB",
+  "m365_db_reset_confirm": "Reset database? All scan results will be deleted.",
+  "m365_db_reset_done": "Database reset",
+  "m365_db_scans": "Scans",
+  "m365_smtp_saving": "Saving...",
+  "m365_settings_admin_pin": "Admin PIN",
+  "m365_settings_pin_hint": "Required for destructive actions (e.g. Reset DB). Leave blank to disable.",
+  "m365_settings_current_pin": "Current PIN",
+  "m365_settings_new_pin": "New PIN",
+  "m365_settings_confirm_pin": "Confirm PIN",
+  "m365_settings_pin_set": "Admin PIN is set",
+  "m365_settings_pin_not_set": "No PIN set — Reset DB is unprotected",
+  "m365_settings_pin_required": "PIN is required.",
+  "m365_settings_pin_mismatch": "PINs do not match.",
+  "m365_settings_pin_wrong": "Incorrect PIN — reset cancelled.",
+  "m365_settings_pin_saved": "PIN saved",
+  "m365_settings_enter_pin": "Enter admin PIN",
+  "m365_settings_enter_pin_reset": "Enter admin PIN to reset the database.",
+  "btn_confirm": "Confirm",
+  "m365_log_scan_started": "Scan started",
+  "m365_preview_local_file": "Local file — no cloud preview available",
+  "m365_badge_gps": "GPS location",
+  "a30_gps_items": "Items with GPS location data (Art. 4 — location = personal data)",
+  "a30_exif_pii_items": "Items with EXIF PII (author, description, keywords)",
+  "a30_gps_title": "Items with GPS location data",
+  "a30_gps_intro": "The following files contain GPS coordinates embedded in EXIF metadata. Location data constitutes personal data under Art. 4 GDPR.",
+  "a30_gps_col_lat": "Latitude",
+  "a30_gps_col_lon": "Longitude",
+  "m365_accounts_disabled_tip": "Select a Microsoft 365 source to enable account selection",
+  "m365_smtp_test": "Test",
+  "m365_smtp_testing": "Sending test email…",
+  "m365_smtp_test_ok": "Test email sent",
+  "m365_smtp_test_fail": "Connection failed",
+  "m365_fsrc_edit_btn": "Edit",
+  "m365_fsrc_save_changes": "Save changes",
+  "m365_settings_tab_scheduler": "Scheduler",
+  "m365_sched_title": "Scheduled scans",
+  "m365_sched_next": "Next",
+  "m365_sched_hint": "Run scans automatically at a set time. Requires an active M365 connection (application mode recommended).",
+  "m365_sched_no_aps": "⚠ APScheduler not installed. Run: pip install apscheduler",
+  "m365_sched_enabled": "Enable scheduler",
+  "m365_sched_frequency": "Frequency",
+  "m365_sched_dow": "Day of week",
+  "m365_sched_dom": "Day of month",
+  "m365_sched_time": "Time",
+  "m365_sched_profile": "Profile",
+  "m365_sched_profile_last": "Last saved settings",
+  "m365_sched_after_scan": "After scan",
+  "m365_sched_auto_email": "Email report automatically",
+  "m365_sched_auto_retention": "Enforce retention policy",
+  "m365_sched_status": "Status",
+  "m365_sched_run_now": "▶ Run now",
+  "m365_sched_add": "+ Add scheduled scan",
+  "m365_sched_name": "Name",
+  "m365_sched_editor_new": "New scheduled scan",
+  "m365_sched_editor_edit": "Edit scheduled scan",
+  "m365_sched_name_required": "Name is required",
+  "m365_sched_no_runs": "No scheduled runs yet",
+  "m365_sched_freq_daily": "Daily",
+  "m365_sched_freq_weekly": "Weekly",
+  "m365_sched_freq_monthly": "Monthly",
+  "m365_sched_dow_mon": "Monday",
+  "m365_sched_dow_tue": "Tuesday",
+  "m365_sched_dow_wed": "Wednesday",
+  "m365_sched_dow_thu": "Thursday",
+  "m365_sched_dow_fri": "Friday",
+  "m365_sched_dow_sat": "Saturday",
+  "m365_sched_dow_sun": "Sunday",
+  "btn_save": "Save",
+  "m365_settings_about": "About",
+  "m365_settings_save_pin": "Save PIN",
+  "m365_sse_reconnecting": "Reconnecting to running scan…",
+  "m365_sse_replay_note": "Live log resumed — earlier entries replayed from running scan.",
+  "m365_google_sa_creds": "Service account credentials",
+  "m365_google_sa_key_file": "Service Account JSON key",
+  "m365_google_sa_key_hint": "Download from Google Cloud Console → IAM & Admin → Service Accounts → Keys → Add Key → JSON",
+  "m365_google_admin_email": "Admin email",
+  "m365_google_admin_email_hint": "Used for domain-wide delegation — must be a Workspace super-admin.",
+  "m365_google_libs_missing": "Libraries not installed",
+  "m365_google_key_required": "Select a service account JSON key file",
+  "m365_google_invalid_json": "Invalid JSON file",
+  "m365_srcmgmt_sources_google": "Sources to scan",
+  "m365_google_src_gmail": "Gmail",
+  "m365_google_src_drive": "Google Drive",
+  "m365_google_setup_title": "Setup required in Google Workspace:",
+  "m365_google_setup_step1": "Create a Google Cloud project and enable Gmail API + Drive API + Admin SDK.",
+  "m365_google_setup_step2": "Create a service account, download the JSON key, and enable domain-wide delegation.",
+  "m365_google_setup_step3": "In Workspace Admin → Security → API Controls → Domain-wide delegation, add the service account client ID with scopes:",
+  "m365_google_auth_mode": "Auth mode",
+  "m365_google_mode_workspace": "Workspace",
+  "m365_google_mode_personal": "Personal account",
+  "m365_google_personal_creds": "Personal account",
+  "m365_google_personal_client_id": "Client ID",
+  "m365_google_personal_client_secret": "Client secret",
+  "m365_google_personal_hint": "Create OAuth 2.0 Desktop credentials in Google Cloud Console, then paste the client ID and secret above.",
+  "m365_google_personal_sign_in": "Sign in",
+  "m365_google_personal_creds_required": "Client ID and secret required",
+  "m365_google_personal_setup_title": "Setup required:",
+  "m365_google_personal_setup_step1": "In Google Cloud Console, create a project and enable Gmail API + Drive API.",
+  "m365_google_personal_setup_step2": "Create OAuth 2.0 credentials (Desktop app type) and copy the client ID and secret.",
+  "m365_google_personal_setup_step3": "Add your Google account email to the OAuth consent screen test users list.",
+  "m365_auth_waiting": "Waiting for sign-in…",
+  "role_staff": "Staff",
+  "role_student": "Student",
+  "role_other": "Other",
+
+  "m365_settings_tab_security": "Security",
+
+  "share_modal_title": "Share results",
+  "share_modal_desc": "Read-only links let a DPO or reviewer browse results and tag dispositions without access to scan controls or credentials.",
+  "share_new_link": "New link",
+  "share_label_lbl": "Label (optional)",
+  "share_label_placeholder": "e.g. DPO review 2026",
+  "share_expires_in": "Expires in",
+  "share_expires_never": "Never",
+  "share_expires_7d": "7 days",
+  "share_expires_30d": "30 days",
+  "share_expires_90d": "90 days",
+  "share_expires_1y": "1 year",
+  "share_create": "Create",
+  "share_copy_link_prompt": "Copy link:",
+  "share_active_links": "Active links",
+  "share_viewer_pin_label": "Viewer PIN:",
+  "share_pin_configure": "Configure",
+  "share_pin_set": "Set",
+  "share_pin_not_set": "Not set",
+  "share_no_links": "No active links.",
+  "share_unlabelled": "Unlabelled",
+  "share_expires_prefix": "Expires:",
+  "share_last_used": "Last used:",
+  "share_revoke": "Revoke",
+  "share_copied": "Copied!",
+  "share_load_error": "Failed to load links.",
+  "share_create_error": "Failed to create link:",
+  "share_revoke_confirm": "Revoke this link? Anyone using it will immediately lose access.",
+  "share_revoke_error": "Failed to revoke:",
+
+  "viewer_pin_group_title": "Viewer PIN",
+  "viewer_pin_desc": "A numeric PIN (4\u20138 digits) that lets anyone open <code style=\"font-size:10px\">/view</code> in a browser for read-only access to results without a token URL.",
+  "viewer_pin_clear": "Clear PIN",
+  "viewer_pin_is_set": "Viewer PIN is set",
+  "viewer_pin_not_set_msg": "No PIN set \u2014 /view requires a token link",
+  "viewer_pin_format": "PIN must be 4\u20138 digits.",
+  "viewer_pin_saving": "Saving\u2026",
+  "viewer_pin_saved": "PIN saved",
+  "viewer_pin_clear_confirm": "Remove the viewer PIN? /view will require a token link again.",
+  "viewer_pin_cleared": "PIN cleared"
+}
\ No newline at end of file
diff --git a/m365_connector.py b/m365_connector.py
new file mode 100644
index 0000000..275a249
--- /dev/null
+++ b/m365_connector.py
@@ -0,0 +1,1141 @@
+#!/usr/bin/env python3
+"""
+m365_connector.py — Microsoft Graph API connector for M365 Scanner.
+
+Handles OAuth device-code flow via MSAL and exposes iterators for:
+  - Exchange/Outlook mail (body + attachments)
+  - OneDrive personal files
+  - SharePoint site files
+  - Teams channel files (backed by SharePoint)
+
+All file content is yielded as (metadata_dict, bytes_content) tuples so the
+scanner can process them without keeping everything in memory.
+"""
+
+import json
+import logging
+import time
+import tempfile
+import threading
+from pathlib import Path
+from typing import Iterator, Optional
+
+logger = logging.getLogger(__name__)
+
+# ── MSAL ──────────────────────────────────────────────────────────────────────
+try:
+    import msal
+    MSAL_OK = True
+except ImportError:
+    MSAL_OK = False
+
+# ── Requests ──────────────────────────────────────────────────────────────────
+try:
+    import requests as _requests
+    REQUESTS_OK = True
+except ImportError:
+    REQUESTS_OK = False
+
+GRAPH_BASE = "https://graph.microsoft.com/v1.0"
+
+# Delegated scopes — used when signing in as a specific user (device code flow)
+SCOPES = [
+    "Mail.Read",
+    "Files.Read.All",
+    "Sites.Read.All",
+    "Team.ReadBasic.All",
+    "ChannelMessage.Read.All",
+    "User.Read",
+    "User.Read.All",
+]
+
+# Application scope — client credentials flow uses a single fixed scope
+APP_SCOPES = ["https://graph.microsoft.com/.default"]
+
+_DATA_DIR         = Path.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+_TOKEN_CACHE_FILE = _DATA_DIR / "token.json"
+
+
+class M365Error(Exception):
+    pass
+
+
+class M365PermissionError(M365Error):
+    """
+    Raised when the Graph API returns 403 Forbidden.
+
+    With delegated (device-code) permissions the signed-in user can only
+    access their own data via /me/...  Accessing /users/{other}/... requires
+    one of:
+      • The signed-in user is a Global Admin or Exchange Admin
+      • The Azure app has been granted Application permissions (not Delegated)
+        for Mail.Read, Files.Read.All, etc. and an admin has consented
+      • The target user has explicitly shared their mailbox/OneDrive
+    """
+    def __init__(self, path: str, user_hint: str = ""):
+        self.path = path
+        self.user_hint = user_hint
+        who = f" for {user_hint}" if user_hint else ""
+        super().__init__(
+            f"403 Forbidden{who}: the signed-in account does not have permission "
+            f"to access this resource.\n"
+            f"  Path: {path}\n"
+            f"  Fix: the signed-in user must be a Global/Exchange Admin, OR an admin must "
+            f"grant Application permissions (Mail.Read, Files.Read.All, Sites.Read.All) "
+            f"in Azure → App registrations → API permissions → Grant admin consent."
+        )
+
+
+class M365DeltaTokenExpired(M365Error):
+    """Raised when a stored delta token is no longer valid (HTTP 410 Gone).
+    The caller should clear the token and fall back to a full scan."""
+    pass
+
+
+class M365Connector:
+    def __init__(self, client_id: str, tenant_id: str, client_secret: str = ""):
+        if not MSAL_OK:
+            raise M365Error("msal not installed — run: pip install msal")
+        if not REQUESTS_OK:
+            raise M365Error("requests not installed — run: pip install requests")
+
+        self.client_id     = client_id
+        self.tenant_id     = tenant_id
+        self.client_secret = client_secret.strip()
+        self._token: Optional[dict] = None
+        self._lock = threading.Lock()
+
+        authority = f"https://login.microsoftonline.com/{tenant_id}"
+
+        if self.client_secret:
+            # ── Application mode (client credentials) ─────────────────────────
+            self._app = msal.ConfidentialClientApplication(
+                client_id,
+                authority=authority,
+                client_credential=self.client_secret,
+            )
+            self._mode = "application"
+        else:
+            # ── Delegated mode (device code flow) ─────────────────────────────
+            cache = msal.SerializableTokenCache()
+            if _TOKEN_CACHE_FILE.exists():
+                try:
+                    cache.deserialize(_TOKEN_CACHE_FILE.read_text())
+                except Exception:
+                    pass
+            self._app = msal.PublicClientApplication(
+                client_id, authority=authority, token_cache=cache
+            )
+            self._mode = "delegated"
+
+    @property
+    def is_app_mode(self) -> bool:
+        return self._mode == "application"
+
+    # ── Auth ──────────────────────────────────────────────────────────────────
+
+    def _save_cache(self):
+        if self._mode == "delegated" and self._app.token_cache.has_state_changed:
+            try:
+                _TOKEN_CACHE_FILE.write_text(self._app.token_cache.serialize())
+            except Exception:
+                pass
+
+    def get_device_code_flow(self) -> dict:
+        """Start device code flow (delegated mode only)."""
+        if self._mode == "application":
+            raise M365Error("Device code flow is not used in application mode.")
+        flow = self._app.initiate_device_flow(scopes=SCOPES)
+        if "user_code" not in flow:
+            raise M365Error(f"Failed to start device flow: {flow.get('error_description', flow)}")
+        return flow
+
+    def complete_device_code_flow(self, flow: dict) -> bool:
+        """Poll until user completes auth. Returns True on success."""
+        result = self._app.acquire_token_by_device_flow(flow)
+        if "access_token" in result:
+            self._token = result
+            self._save_cache()
+            return True
+        raise M365Error(result.get("error_description", str(result)))
+
+    def try_silent_auth(self) -> bool:
+        """Try to get a token without user interaction."""
+        if self._mode == "application":
+            result = self._app.acquire_token_for_client(scopes=APP_SCOPES)
+            if result and "access_token" in result:
+                result["_acquired_at"] = time.time()
+                self._token = result
+                return True
+            return False
+        else:
+            accounts = self._app.get_accounts()
+            if not accounts:
+                return False
+            result = self._app.acquire_token_silent(SCOPES, account=accounts[0])
+            if result and "access_token" in result:
+                result["_acquired_at"] = time.time()
+                self._token = result
+                self._save_cache()
+                return True
+            return False
+
+    def get_access_token(self) -> str:
+        with self._lock:
+            if self._token and "access_token" in self._token:
+                expires_in = self._token.get("expires_in", 0)
+                acquired   = self._token.get("_acquired_at", time.time())
+                if time.time() < acquired + expires_in - 60:
+                    return self._token["access_token"]
+            if self.try_silent_auth():
+                return self._token["access_token"]
+        raise M365Error("Not authenticated — please sign in first.")
+
+    def authenticate_app_mode(self) -> bool:
+        """Acquire token via client credentials. Call once after init with client_secret."""
+        if self._mode != "application":
+            raise M365Error("authenticate_app_mode() called in delegated mode.")
+        result = self._app.acquire_token_for_client(scopes=APP_SCOPES)
+        if "access_token" in result:
+            result["_acquired_at"] = time.time()
+            self._token = result
+            return True
+        err = result.get("error_description") or result.get("error") or str(result)
+        raise M365Error(f"Client credentials auth failed: {err}")
+
+    def get_user_info(self) -> dict:
+        if self._mode == "application":
+            # /me is not available with app-only tokens — return a placeholder
+            return {"displayName": "App (service account)", "id": "", "mail": ""}
+        return self._get("/me")
+
+    def list_users(self, top: int = 999) -> list:
+        """List licensed domain users in the tenant (requires User.Read.All).
+
+        Tries a filtered query first; falls back to a plain /users call if the
+        tenant's directory index doesn't support $count + ConsistencyLevel.
+        """
+        select = "id,displayName,mail,userPrincipalName,accountEnabled,userType,assignedLicenses"
+
+        def _fetch(params: dict, extra_headers: dict = None) -> list:
+            """Paginate through /users with given params, using _get() so 403s
+            are raised as M365PermissionError with the Graph error body."""
+            url = "/users"
+            all_items = []
+            first = True
+            while url:
+                if extra_headers:
+                    # _get() doesn't support extra headers, so call requests directly
+                    full_url = url if url.startswith("http") else GRAPH_BASE + url
+                    r = _requests.get(full_url,
+                                      headers={**self._headers(), **extra_headers},
+                                      params=(params if first else None),
+                                      timeout=self._TIMEOUT_API)
+                    if r.status_code == 429:
+                        time.sleep(int(r.headers.get("Retry-After", 5)))
+                        continue
+                    if r.status_code == 403:
+                        try:
+                            msg = r.json().get("error", {}).get("message", "")
+                        except Exception:
+                            msg = r.text[:200]
+                        raise M365PermissionError(url, msg)
+                    if not r.ok:
+                        try:
+                            err = r.json().get("error", {})
+                            msg = err.get("message") or err.get("code") or r.text[:300]
+                        except Exception:
+                            msg = r.text[:300]
+                        raise M365Error(f"Graph /users error {r.status_code}: {msg}")
+                    data = r.json()
+                else:
+                    data = self._get(url, params if first else None)
+                first = False
+                all_items.extend(data.get("value", []))
+                url = data.get("@odata.nextLink")
+            return all_items
+
+        # Attempt 1: filtered query with ConsistencyLevel (works on most tenants)
+        try:
+            users = _fetch(
+                params={
+                    "$top": str(top),
+                    "$filter": "accountEnabled eq true and userType eq 'Member'",
+                    "$select": select,
+                    "$count": "true",
+                },
+                extra_headers={"ConsistencyLevel": "eventual"},
+            )
+        except M365PermissionError:
+            raise
+        except Exception:
+            # Attempt 2: plain /users with no filter (works everywhere)
+            users = _fetch(params={"$top": str(top), "$select": select})
+            # Post-filter guests / disabled accounts
+            users = [u for u in users
+                     if u.get("accountEnabled")
+                     and u.get("userType", "Member") == "Member"]
+
+        # Post-filter: skip accounts with no mail, external sync objects,
+        # or no assigned licenses (service accounts, shared mailboxes, sync objects)
+        users = [
+            u for u in users
+            if (u.get("mail") or u.get("userPrincipalName", ""))
+            and "#EXT#" not in (u.get("userPrincipalName") or "")
+            and u.get("assignedLicenses")  # must have at least one license
+        ]
+        users.sort(key=lambda u: (u.get("displayName") or "").lower())
+        return users
+
+    # ── User-scoped variants (scan other users as admin) ──────────────────────
+
+    def list_mail_folders_for(self, user_id: str) -> list:
+        return list(self._paginate(f"/users/{user_id}/mailFolders", {"$top": "100"}))
+
+
+    def iter_messages_for(self, user_id: str, folder_id: str = "inbox", top: int = 50) -> Iterator[dict]:
+        path = f"/users/{user_id}/mailFolders/{folder_id}/messages"
+        params = {
+            "$top": str(top),
+            "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,hasAttachments,bodyPreview,body,parentFolderId",
+            "$orderby": "receivedDateTime desc",
+        }
+        yield from self._paginate(path, params)
+
+    def iter_message_attachments_for(self, user_id: str, message_id: str) -> Iterator[dict]:
+        try:
+            yield from self._paginate(
+                f"/users/{user_id}/messages/{message_id}/attachments", {"$top": "100"}
+            )
+        except Exception:
+            return
+
+    def download_attachment_for(self, user_id: str, message_id: str, attachment_id: str) -> bytes:
+        import base64 as _b64
+        data = self._get(f"/users/{user_id}/messages/{message_id}/attachments/{attachment_id}")
+        return _b64.b64decode(data.get("contentBytes", "") or "")
+
+    def iter_onedrive_files_for(self, user_id: str, display_name: str = "") -> Iterator[dict]:
+        label = display_name or user_id
+        yield from self._iter_drive_folder_for(user_id, f"/users/{user_id}/drive/root", f"OneDrive ({label})")
+
+    def _iter_drive_folder_for(self, user_id: str, item_path: str, source: str) -> Iterator[dict]:
+        path = f"{item_path}/children"
+        try:
+            items = list(self._paginate(path, {"$top": "200", "$select": "id,name,file,folder,size,webUrl,lastModifiedDateTime,parentReference,shared"}))
+        except Exception:
+            return
+        for item in items:
+            if item.get("folder"):
+                next_path = f"/users/{user_id}/drive/items/{item['id']}"
+                yield from self._iter_drive_folder_for(user_id, next_path, source)
+            elif item.get("file"):
+                item["_source"] = source
+                item["_user_id"] = user_id
+                yield item
+
+    def download_drive_item_for(self, user_id: str, item_id: str) -> bytes:
+        url = f"{GRAPH_BASE}/users/{user_id}/drive/items/{item_id}/content"
+        return self._get_bytes(url)
+
+    def iter_teams_files_for(self, user_id: str, display_name: str = "") -> Iterator[dict]:
+        """Yield Teams files for a specific user."""
+        try:
+            teams = list(self._paginate(f"/users/{user_id}/joinedTeams", {"$top": "50"}))
+        except Exception:
+            return
+        for team in teams:
+            yield from self.iter_teams_files(team["id"], team.get("displayName", ""))
+
+
+    def is_authenticated(self) -> bool:
+        try:
+            self.get_access_token()
+            return True
+        except M365Error:
+            return False
+
+    def sign_out(self):
+        accounts = self._app.get_accounts()
+        for acc in accounts:
+            self._app.remove_account(acc)
+        self._token = None
+        if _TOKEN_CACHE_FILE.exists():
+            _TOKEN_CACHE_FILE.unlink()
+
+    # ── HTTP helpers ──────────────────────────────────────────────────────────
+
+    def _headers(self) -> dict:
+        return {
+            "Authorization": f"Bearer {self.get_access_token()}",
+            "Accept": "application/json",
+        }
+
+    # ── Timeouts ──────────────────────────────────────────────────────────────
+    # Tuple: (connect_timeout, read_timeout) — tight connect, generous read.
+    # File downloads get extra read time for slow wireless connections.
+    _TIMEOUT_API   = (10, 45)   # JSON API calls
+    _TIMEOUT_BYTES = (10, 120)  # File / attachment downloads
+
+    # Network errors that are safe to retry (transient dropouts, resets)
+    _RETRYABLE_ERRORS = (
+        _requests.exceptions.ConnectionError,
+        _requests.exceptions.Timeout,
+        _requests.exceptions.ChunkedEncodingError,
+        _requests.exceptions.ReadTimeout,
+    )
+    _MAX_RETRIES = 4            # total attempts (1 original + 3 retries)
+    _BACKOFF_BASE = 2           # seconds: 2, 4, 8 between retries
+
+    def _backoff_sleep(self, attempt: int, extra: float = 0) -> None:
+        """Sleep 2^attempt seconds (capped at 30) plus any server-requested delay."""
+        wait = min(self._BACKOFF_BASE ** attempt, 30) + extra
+        time.sleep(wait)
+
+    def _get(self, path: str, params: dict = None, _retry: bool = True) -> dict:
+        url = path if path.startswith("http") else GRAPH_BASE + path
+        for attempt in range(self._MAX_RETRIES):
+            try:
+                r = _requests.get(url, headers=self._headers(),
+                                  params=params, timeout=self._TIMEOUT_API)
+            except self._RETRYABLE_ERRORS:
+                if attempt == self._MAX_RETRIES - 1:
+                    raise
+                self._backoff_sleep(attempt)
+                continue
+
+            if r.status_code == 429:
+                self._backoff_sleep(attempt, float(r.headers.get("Retry-After", 5)))
+                continue
+            if r.status_code == 503 or r.status_code == 504:
+                # Gateway timeout / service unavailable — transient, retry
+                if attempt < self._MAX_RETRIES - 1:
+                    self._backoff_sleep(attempt)
+                    continue
+            if r.status_code == 410:
+                raise M365DeltaTokenExpired(f"410 Gone — delta token expired: {path}")
+            if r.status_code == 401 and _retry:
+                self._token = None
+                if self.try_silent_auth():
+                    return self._get(path, params, _retry=False)
+            if r.status_code == 403:
+                try:
+                    msg = r.json().get("error", {}).get("message", "")
+                except Exception:
+                    msg = r.text[:200]
+                raise M365PermissionError(path, msg)
+            r.raise_for_status()
+            return r.json()
+        raise _requests.exceptions.RetryError(f"Gave up after {self._MAX_RETRIES} attempts: {url}")
+
+    def _post(self, path: str, body: dict, _retry: bool = True) -> dict:
+        url = path if path.startswith("http") else GRAPH_BASE + path
+        headers = {**self._headers(), "Content-Type": "application/json"}
+        for attempt in range(self._MAX_RETRIES):
+            try:
+                r = _requests.post(url, headers=headers, json=body,
+                                   timeout=self._TIMEOUT_API)
+            except self._RETRYABLE_ERRORS:
+                if attempt == self._MAX_RETRIES - 1:
+                    raise
+                self._backoff_sleep(attempt)
+                continue
+
+            if r.status_code == 429:
+                self._backoff_sleep(attempt, float(r.headers.get("Retry-After", 5)))
+                continue
+            if r.status_code == 503 or r.status_code == 504:
+                if attempt < self._MAX_RETRIES - 1:
+                    self._backoff_sleep(attempt)
+                    continue
+            if r.status_code == 401 and _retry:
+                self._token = None
+                if self.try_silent_auth():
+                    return self._post(path, body, _retry=False)
+            if r.status_code == 403:
+                try:
+                    msg = r.json().get("error", {}).get("message", "")
+                except Exception:
+                    msg = r.text[:200]
+                raise M365PermissionError(path, msg)
+            r.raise_for_status()
+            return r.json()
+        raise _requests.exceptions.RetryError(f"Gave up after {self._MAX_RETRIES} attempts: {url}")
+
+    def _get_bytes(self, url: str, _retry: bool = True) -> bytes:
+        """Download binary content (file / attachment) with streaming and retry."""
+        for attempt in range(self._MAX_RETRIES):
+            try:
+                r = _requests.get(url, headers=self._headers(),
+                                  timeout=self._TIMEOUT_BYTES, stream=True)
+            except self._RETRYABLE_ERRORS:
+                if attempt == self._MAX_RETRIES - 1:
+                    raise
+                self._backoff_sleep(attempt)
+                continue
+
+            if r.status_code == 429:
+                self._backoff_sleep(attempt, float(r.headers.get("Retry-After", 5)))
+                continue
+            if r.status_code == 503 or r.status_code == 504:
+                if attempt < self._MAX_RETRIES - 1:
+                    self._backoff_sleep(attempt)
+                    continue
+            if r.status_code == 401 and _retry:
+                self._token = None
+                if self.try_silent_auth():
+                    return self._get_bytes(url, _retry=False)
+            if r.status_code == 403:
+                try:
+                    msg = r.json().get("error", {}).get("message", "")
+                except Exception:
+                    msg = r.text[:200]
+                raise M365PermissionError(url, msg)
+            r.raise_for_status()
+            # Stream in chunks — avoids loading entire file into memory at once
+            # and allows the read timeout to apply per-chunk rather than total
+            chunks = []
+            for chunk in r.iter_content(chunk_size=65536):
+                if chunk:
+                    chunks.append(chunk)
+            return b"".join(chunks)
+        raise _requests.exceptions.RetryError(f"Gave up after {self._MAX_RETRIES} attempts: {url}")
+
+    def _delete(self, path: str, _retry: bool = True) -> bool:
+        url = path if path.startswith("http") else GRAPH_BASE + path
+        for attempt in range(self._MAX_RETRIES):
+            try:
+                r = _requests.delete(url, headers=self._headers(),
+                                     timeout=self._TIMEOUT_API)
+            except self._RETRYABLE_ERRORS:
+                if attempt == self._MAX_RETRIES - 1:
+                    raise
+                self._backoff_sleep(attempt)
+                continue
+
+            if r.status_code == 429:
+                self._backoff_sleep(attempt, float(r.headers.get("Retry-After", 5)))
+                continue
+            if r.status_code == 503 or r.status_code == 504:
+                if attempt < self._MAX_RETRIES - 1:
+                    self._backoff_sleep(attempt)
+                    continue
+            if r.status_code == 401 and _retry:
+                self._token = None
+                if self.try_silent_auth():
+                    return self._delete(path, _retry=False)
+            if r.status_code == 403:
+                try:
+                    msg = r.json().get("error", {}).get("message", "")
+                except Exception:
+                    msg = r.text[:200]
+                raise M365PermissionError(path, msg)
+            if r.status_code == 404:
+                return False  # already deleted
+            r.raise_for_status()
+            return True  # 204 No Content = success
+        raise _requests.exceptions.RetryError(f"Gave up after {self._MAX_RETRIES} attempts: {url}")
+        """Move an email to Deleted Items (soft delete)."""
+        base = "/me" if (not user_id or user_id == "me") else f"/users/{user_id}"
+        try:
+            self._post(f"{base}/messages/{message_id}/move", {"destinationId": "deleteditems"})
+            return True
+        except Exception:
+            return self._delete(f"{base}/messages/{message_id}")
+
+    def delete_drive_item(self, drive_id: str, item_id: str) -> bool:
+        """Delete a OneDrive/SharePoint/Teams file (moves to recycle bin)."""
+        return self._delete(f"/drives/{drive_id}/items/{item_id}")
+
+    def delete_drive_item_for_user(self, user_id: str, item_id: str) -> bool:
+        """Delete a drive item via user path as fallback."""
+        base = "/me" if (not user_id or user_id == "me") else f"/users/{user_id}"
+        return self._delete(f"{base}/drive/items/{item_id}")
+
+    def _paginate(self, path: str, params: dict = None) -> Iterator[dict]:
+        """Yield all items across paginated Graph responses."""
+        url = path if path.startswith("http") else GRAPH_BASE + path
+        while url:
+            data = self._get(url, params=params)
+            params = None  # only on first request
+            yield from data.get("value", [])
+            url = data.get("@odata.nextLink")
+
+    def _paginate_delta(self, path: str, params: dict = None,
+                        delta_url: str = None) -> tuple[list, str | None]:
+        """Exhaust a delta query and return (items, new_delta_link).
+
+        Pass *delta_url* to resume from a previously saved deltaLink token.
+        The returned delta_link should be stored by the caller and passed back
+        on the next run to receive only changed items.
+        """
+        url = delta_url or (path if path.startswith("http") else GRAPH_BASE + path)
+        items: list = []
+        delta_link: str | None = None
+        while url:
+            data = self._get(url, params=params)
+            params = None
+            items.extend(data.get("value", []))
+            delta_link = data.get("@odata.deltaLink") or delta_link
+            url = data.get("@odata.nextLink")
+        return items, delta_link
+
+    # ── Delta iterators ───────────────────────────────────────────────────────
+
+    def iter_onedrive_delta_for(self, user_id: str, display_name: str = "",
+                                delta_url: str = None) -> tuple[list, str | None]:
+        """Return (changed_file_items, new_delta_url) for a user's OneDrive.
+
+        Items with 'deleted' key are removed items — callers should skip them
+        for CPR scanning but may use them to prune result sets.
+        On first call (delta_url=None) returns ALL files plus a token.
+        Subsequent calls with the saved token return only changes.
+        """
+        label = display_name or user_id
+        path  = f"/users/{user_id}/drive/root/delta"
+        params = {"$select": "id,name,size,file,folder,parentReference,lastModifiedDateTime,webUrl,deleted"}
+        try:
+            raw, new_token = self._paginate_delta(path, params=params, delta_url=delta_url)
+        except M365Error as e:
+            if "410" in str(e) or "resync" in str(e).lower() or "deltaToken" in str(e):
+                # Token expired — caller should clear and retry as full scan
+                raise M365DeltaTokenExpired(f"OneDrive delta token expired for {label}")
+            raise
+        items = []
+        for item in raw:
+            if item.get("folder"):
+                continue  # skip folder entries
+            item["_source"]    = f"OneDrive ({label})"
+            item["_user_id"]   = user_id
+            item["_source_type"] = "onedrive"
+            items.append(item)
+        return items, new_token
+
+    def iter_onedrive_delta(self, delta_url: str = None) -> tuple[list, str | None]:
+        """Delegated-mode OneDrive delta for the signed-in user."""
+        path   = "/me/drive/root/delta"
+        params = {"$select": "id,name,size,file,folder,parentReference,lastModifiedDateTime,webUrl,deleted"}
+        try:
+            raw, new_token = self._paginate_delta(path, params=params, delta_url=delta_url)
+        except M365Error as e:
+            if "410" in str(e) or "resync" in str(e).lower():
+                raise M365DeltaTokenExpired("OneDrive delta token expired for /me")
+            raise
+        items = []
+        for item in raw:
+            if item.get("folder"):
+                continue
+            item["_source"]      = "OneDrive"
+            item["_source_type"] = "onedrive"
+            items.append(item)
+        return items, new_token
+
+    def iter_drive_delta(self, drive_id: str, source_label: str,
+                         delta_url: str = None) -> tuple[list, str | None]:
+        """Delta query for any drive (SharePoint document library or Teams channel).
+
+        Returns (changed_file_items, new_delta_url).
+        """
+        path   = f"/drives/{drive_id}/root/delta"
+        params = {"$select": "id,name,size,file,folder,parentReference,lastModifiedDateTime,webUrl,deleted"}
+        try:
+            raw, new_token = self._paginate_delta(path, params=params, delta_url=delta_url)
+        except M365Error as e:
+            if "410" in str(e) or "resync" in str(e).lower():
+                raise M365DeltaTokenExpired(f"Drive delta token expired for {drive_id}")
+            raise
+        items = []
+        for item in raw:
+            if item.get("folder"):
+                continue
+            item["_source"]      = source_label
+            item["_drive_id"]    = drive_id
+            item["_source_type"] = "sharepoint"
+            items.append(item)
+        return items, new_token
+
+    def iter_messages_delta_for(self, user_id: str, folder_id: str,
+                                delta_url: str = None,
+                                top: int = 500) -> tuple[list, str | None]:
+        """Delta query for a mail folder for a specific user.
+
+        Returns (changed_message_items, new_delta_url).
+        """
+        path   = f"/users/{user_id}/mailFolders/{folder_id}/messages/delta"
+        params = {
+            "$top":    str(top),
+            "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,hasAttachments,bodyPreview,body,parentFolderId",
+        }
+        try:
+            raw, new_token = self._paginate_delta(path, params=params, delta_url=delta_url)
+        except M365Error as e:
+            if "410" in str(e) or "resync" in str(e).lower():
+                raise M365DeltaTokenExpired(f"Email delta token expired for {user_id}/{folder_id}")
+            raise
+        return raw, new_token
+
+    def iter_messages_delta(self, folder_id: str,
+                            delta_url: str = None,
+                            top: int = 500) -> tuple[list, str | None]:
+        """Delegated-mode email delta for the signed-in user."""
+        path   = f"/me/mailFolders/{folder_id}/messages/delta"
+        params = {
+            "$top":    str(top),
+            "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,hasAttachments,bodyPreview,body,parentFolderId",
+        }
+        try:
+            raw, new_token = self._paginate_delta(path, params=params, delta_url=delta_url)
+        except M365Error as e:
+            if "410" in str(e) or "resync" in str(e).lower():
+                raise M365DeltaTokenExpired(f"Email delta token expired for /me/{folder_id}")
+            raise
+        return raw, new_token
+
+    # ── Exchange / Outlook ────────────────────────────────────────────────────
+
+    def list_mail_folders(self) -> list:
+        folders = list(self._paginate("/me/mailFolders", {"$top": "100"}))
+        return folders
+
+    def list_all_mail_folders(self, errors_out: list = None) -> list:
+        """Return all mail folders recursively (including subfolders)."""
+        def _recurse(folder_id: str, path: str, base: str, depth: int = 0) -> list:
+            if depth > 10:
+                return []
+            result = []
+            try:
+                children = list(self._paginate(
+                    f"{base}/mailFolders/{folder_id}/childFolders",
+                    {"$top": "100", "$select": "id,displayName,totalItemCount,childFolderCount"}
+                ))
+            except Exception as e:
+                if errors_out is not None:
+                    errors_out.append(f"{path}: {e}")
+                return result
+            for child in children:
+                child["_display_path"] = path + " / " + child.get("displayName", "")
+                result.append(child)
+                result.extend(_recurse(child["id"], child["_display_path"], base, depth + 1))
+            return result
+
+        base = "/me"
+        top_folders = list(self._paginate(
+            f"{base}/mailFolders",
+            {"$top": "100", "$select": "id,displayName,totalItemCount,childFolderCount"}
+        ))
+        all_folders = []
+        for f in top_folders:
+            f["_display_path"] = f.get("displayName", "")
+            all_folders.append(f)
+            all_folders.extend(_recurse(f["id"], f["_display_path"], base))
+        return all_folders
+
+    def list_all_mail_folders_for(self, user_id: str, errors_out: list = None) -> list:
+        """Return all mail folders recursively for a specific user."""
+        def _recurse(folder_id: str, path: str, depth: int = 0) -> list:
+            if depth > 10:
+                return []
+            result = []
+            try:
+                children = list(self._paginate(
+                    f"/users/{user_id}/mailFolders/{folder_id}/childFolders",
+                    {"$top": "100", "$select": "id,displayName,totalItemCount,childFolderCount"}
+                ))
+            except Exception as e:
+                if errors_out is not None:
+                    errors_out.append(f"{path}: {e}")
+                return result
+            for child in children:
+                child["_display_path"] = path + " / " + child.get("displayName", "")
+                result.append(child)
+                result.extend(_recurse(child["id"], child["_display_path"], depth + 1))
+            return result
+
+        top_folders = list(self._paginate(
+            f"/users/{user_id}/mailFolders",
+            {"$top": "100", "$select": "id,displayName,totalItemCount,childFolderCount"}
+        ))
+        all_folders = []
+        for f in top_folders:
+            f["_display_path"] = f.get("displayName", "")
+            all_folders.append(f)
+            all_folders.extend(_recurse(f["id"], f["_display_path"]))
+        return all_folders
+
+    def count_messages(self, folder_id: str = "inbox") -> int:
+        try:
+            data = self._get(f"/me/mailFolders/{folder_id}", {"$select": "totalItemCount"})
+            return data.get("totalItemCount", 0)
+        except Exception:
+            return 0
+
+    def iter_messages(self, folder_id: str = "inbox", top: int = 50) -> Iterator[dict]:
+        """Yield message metadata dicts."""
+        path = f"/me/mailFolders/{folder_id}/messages"
+        params = {
+            "$top": str(top),
+            "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,hasAttachments,bodyPreview,body,parentFolderId",
+            "$orderby": "receivedDateTime desc",
+        }
+        yield from self._paginate(path, params)
+
+    def get_message_body_text(self, msg: dict) -> str:
+        """Extract plain text from message body."""
+        body = msg.get("body", {})
+        content = body.get("content", "")
+        if body.get("contentType", "").lower() == "html":
+            # Strip HTML tags simply
+            import re
+            content = re.sub(r"<[^>]+>", " ", content)
+            content = re.sub(r"&nbsp;", " ", content)
+            content = re.sub(r"&[a-z]+;", "", content)
+        return content
+
+    def iter_message_attachments(self, message_id: str) -> Iterator[dict]:
+        """Yield attachment metadata (with contentBytes for small files)."""
+        path = f"/me/messages/{message_id}/attachments"
+        params = {"$top": "100"}
+        try:
+            yield from self._paginate(path, params)
+        except Exception:
+            return
+
+    def download_attachment(self, message_id: str, attachment_id: str) -> bytes:
+        data = self._get(f"/me/messages/{message_id}/attachments/{attachment_id}")
+        import base64 as _b64
+        content = data.get("contentBytes", "")
+        return _b64.b64decode(content) if content else b""
+
+    # ── OneDrive ──────────────────────────────────────────────────────────────
+
+    def iter_onedrive_files(self, folder_path: str = "root") -> Iterator[dict]:
+        """Recursively yield all files in OneDrive."""
+        yield from self._iter_drive_folder("/me/drive/root", "OneDrive")
+
+    def _iter_drive_folder(self, item_path: str, source: str) -> Iterator[dict]:
+        path = f"{item_path}/children"
+        try:
+            items = list(self._paginate(path, {"$top": "200", "$select": "id,name,file,folder,size,webUrl,lastModifiedDateTime,parentReference,shared"}))
+        except Exception:
+            return
+        for item in items:
+            if item.get("folder"):
+                next_path = f"/me/drive/items/{item['id']}"
+                yield from self._iter_drive_folder(next_path, source)
+            elif item.get("file"):
+                item["_source"] = source
+                yield item
+
+    def download_drive_item(self, item_id: str, drive_id: str = None) -> bytes:
+        if drive_id:
+            url = f"{GRAPH_BASE}/drives/{drive_id}/items/{item_id}/content"
+        else:
+            url = f"{GRAPH_BASE}/me/drive/items/{item_id}/content"
+        return self._get_bytes(url)
+
+    # ── SharePoint ────────────────────────────────────────────────────────────
+
+    def list_sharepoint_sites(self) -> list:
+        try:
+            data = self._get("/sites", {"search": "*", "$top": "50"})
+            return data.get("value", [])
+        except Exception:
+            return []
+
+    def iter_sharepoint_files(self, site_id: str, site_name: str = "") -> Iterator[dict]:
+        """Recursively yield all files in a SharePoint site's default drive."""
+        try:
+            drives = list(self._paginate(f"/sites/{site_id}/drives", {"$top": "20"}))
+        except Exception:
+            return
+        for drive in drives:
+            drive_id = drive["id"]
+            yield from self._iter_sharepoint_drive(drive_id, f"/drives/{drive_id}/root", site_name or drive.get("name", "SharePoint"))
+
+    def _iter_sharepoint_drive(self, drive_id: str, item_path: str, source: str) -> Iterator[dict]:
+        path = f"{item_path}/children"
+        try:
+            items = list(self._paginate(path, {"$top": "200", "$select": "id,name,file,folder,size,webUrl,lastModifiedDateTime,parentReference,shared"}))
+        except Exception:
+            return
+        for item in items:
+            if item.get("folder"):
+                next_path = f"/drives/{drive_id}/items/{item['id']}"
+                yield from self._iter_sharepoint_drive(drive_id, next_path, source)
+            elif item.get("file"):
+                item["_source"] = source
+                item["_drive_id"] = drive_id
+                yield item
+
+    def download_sharepoint_item(self, drive_id: str, item_id: str) -> bytes:
+        url = f"{GRAPH_BASE}/drives/{drive_id}/items/{item_id}/content"
+        return self._get_bytes(url)
+
+    # ── Teams ─────────────────────────────────────────────────────────────────
+
+    def list_all_teams(self) -> list:
+        """List all Teams in the tenant using /groups filter (app-only compatible).
+        Falls back to /teams if the groups endpoint is unavailable."""
+        try:
+            return list(self._paginate(
+                "/groups",
+                {
+                    "$filter": "resourceProvisioningOptions/Any(x:x eq 'Team')",
+                    "$select": "id,displayName",
+                    "$top":    "999",
+                }
+            ))
+        except Exception:
+            try:
+                return list(self._paginate("/teams", {"$top": "999", "$select": "id,displayName"}))
+            except Exception:
+                return []
+
+    def list_teams(self) -> list:
+        """Delegated-mode: return teams the signed-in user is a member of."""
+        try:
+            return list(self._paginate("/me/joinedTeams", {"$top": "50"}))
+        except Exception:
+            return []
+
+    def get_team_members(self, team_id: str) -> list:
+        """Return member user IDs for a team (app-only compatible)."""
+        try:
+            members = list(self._paginate(
+                f"/groups/{team_id}/members",
+                {"$select": "id", "$top": "999"}
+            ))
+            return [m["id"] for m in members if m.get("id")]
+        except Exception:
+            return []
+
+    def iter_teams_files(self, team_id: str, team_name: str = "") -> Iterator[dict]:
+        """Yield files from all channels in a Team (backed by SharePoint)."""
+        try:
+            channels = list(self._paginate(f"/teams/{team_id}/channels", {"$top": "50"}))
+        except Exception:
+            return
+        for ch in channels:
+            ch_id   = ch["id"]
+            ch_name = ch.get("displayName", ch_id)
+            source  = f"Teams / {team_name} / {ch_name}"
+            try:
+                # Get the SharePoint folder for this channel
+                data = self._get(f"/teams/{team_id}/channels/{ch_id}/filesFolder")
+                drive_id    = data.get("parentReference", {}).get("driveId")
+                item_id     = data.get("id")
+                if drive_id and item_id:
+                    yield from self._iter_sharepoint_drive(
+                        drive_id, f"/drives/{drive_id}/items/{item_id}", source
+                    )
+            except Exception:
+                continue
+
+    # ── Convenience: download any item ───────────────────────────────────────
+
+    def download_item(self, item: dict) -> bytes:
+        """Download file bytes for any drive item dict."""
+        drive_id = item.get("_drive_id") or item.get("parentReference", {}).get("driveId")
+        item_id  = item["id"]
+        if drive_id:
+            return self.download_sharepoint_item(drive_id, item_id)
+        return self.download_drive_item(item_id)
+
+    # ── License / role classification ─────────────────────────────────────────
+
+    # SKU IDs and part-number fragments are loaded from classification/m365_skus.json at
+    # startup.  Edit that file to add new SKUs — no code change needed.
+    # The two ID sets must remain disjoint (student checked first).
+
+    @classmethod
+    def _sku_file_path(cls) -> Path:
+        """Resolve classification/m365_skus.json correctly both normally and in a PyInstaller bundle."""
+        import sys as _sys
+        if getattr(_sys, "frozen", False) and hasattr(_sys, "_MEIPASS"):
+            return Path(_sys._MEIPASS) / "classification" / "m365_skus.json"
+        return Path(__file__).parent / "classification" / "m365_skus.json"
+
+    @classmethod
+    def _load_sku_data(cls) -> None:
+        """Load SKU IDs and fragments from classification/m365_skus.json.
+
+        Falls back silently to empty sets if the file is missing or malformed —
+        the skuPartNumber fragment fallback in classify_user_role() still works
+        when get_subscribed_skus() succeeds, and manual role overrides are always
+        available as a last resort.
+        """
+        try:
+            import json as _j
+            data = _j.loads(cls._sku_file_path().read_text(encoding="utf-8"))
+            cls._STUDENT_SKU_IDS  = set(data.get("student_ids", {}).keys())
+            cls._STAFF_SKU_IDS    = set(data.get("staff_ids",   {}).keys())
+            cls._STUDENT_SKU_LABELS = dict(data.get("student_ids", {}))
+            cls._STAFF_SKU_LABELS   = dict(data.get("staff_ids",   {}))
+            cls._STUDENT_SKU_FRAGMENTS = tuple(data.get("student_fragments", []))
+            cls._STAFF_SKU_FRAGMENTS   = tuple(data.get("staff_fragments",   []))
+            overlap = cls._STUDENT_SKU_IDS & cls._STAFF_SKU_IDS
+            if overlap:
+                import warnings
+                warnings.warn(
+                    f"[m365_skus.json] SKU ID collision between student_ids and staff_ids "
+                    f"— these will always resolve to 'student': {overlap}",
+                    RuntimeWarning, stacklevel=2,
+                )
+            logger.info("[skus] Loaded %d student + %d staff SKUs from classification/m365_skus.json",
+                        len(cls._STUDENT_SKU_IDS), len(cls._STAFF_SKU_IDS))
+        except FileNotFoundError:
+            logger.warning("[skus] classification/m365_skus.json not found — role classification uses fragment fallback only")
+            cls._STUDENT_SKU_IDS = set()
+            cls._STAFF_SKU_IDS   = set()
+            cls._STUDENT_SKU_LABELS = {}
+            cls._STAFF_SKU_LABELS   = {}
+            cls._STUDENT_SKU_FRAGMENTS = ("STUDENT",)
+            cls._STAFF_SKU_FRAGMENTS   = ("FACULTY", "TEACHER")
+        except Exception as e:
+            logger.error("[skus] Failed to load classification/m365_skus.json: %s", e)
+            cls._STUDENT_SKU_IDS = set()
+            cls._STAFF_SKU_IDS   = set()
+            cls._STUDENT_SKU_LABELS = {}
+            cls._STAFF_SKU_LABELS   = {}
+            cls._STUDENT_SKU_FRAGMENTS = ("STUDENT",)
+            cls._STAFF_SKU_FRAGMENTS   = ("FACULTY", "TEACHER")
+
+    # Populated by _load_sku_data() below — treated as read-only after that
+    _STUDENT_SKU_IDS:       set   = set()
+    _STAFF_SKU_IDS:         set   = set()
+    _STUDENT_SKU_LABELS:    dict  = {}
+    _STAFF_SKU_LABELS:      dict  = {}
+    _STUDENT_SKU_FRAGMENTS: tuple = ()
+    _STAFF_SKU_FRAGMENTS:   tuple = ()
+
+    def get_subscribed_skus(self) -> dict:
+        """Return a mapping of {skuId: skuPartNumber} for the tenant.
+
+        Tries three endpoints in order, using whichever the token permits:
+
+        1. /subscribedSkus          — requires Directory.Read.All (admin)
+                                      returns ALL tenant SKUs in one call
+        2. /me/licenseDetails       — requires only User.Read (delegated)
+                                      returns the signed-in user's SKUs only
+        3. /users/{id}/licenseDetails for each user already fetched
+                                      requires User.Read.All; covers all users
+
+        Returns {skuId: skuPartNumber}.  An empty dict means no endpoint
+        succeeded — role classification will fall back to the hardcoded
+        SKU ID sets in m365_skus.json only.
+        """
+        # Attempt 1: tenant-wide (admin)
+        try:
+            data = self._get("/subscribedSkus", {"$select": "skuId,skuPartNumber"})
+            result = {s["skuId"]: s["skuPartNumber"]
+                      for s in data.get("value", []) if s.get("skuId")}
+            if result:
+                logger.info("[skus] sku_map via /subscribedSkus: %d entries", len(result))
+                return result
+        except Exception:
+            pass
+
+        # Attempt 2: signed-in user's own license details (delegated, User.Read only)
+        result = {}
+        try:
+            data = self._get("/me/licenseDetails", {"$select": "skuId,skuPartNumber"})
+            for item in data.get("value", []):
+                if item.get("skuId") and item.get("skuPartNumber"):
+                    result[item["skuId"]] = item["skuPartNumber"]
+        except Exception:
+            pass
+
+        if result:
+            logger.info("[skus] sku_map via /me/licenseDetails: %d entries (partial — add Directory.Read.All for full coverage)", len(result))
+            return result
+
+        logger.warning("[skus] could not fetch skuPartNumber from any endpoint — role classification uses SKU ID matching only")
+        return {}
+
+    def build_sku_map_from_users(self, users: list, max_calls: int = 30) -> dict:
+        """Build a {skuId: skuPartNumber} map by calling /users/{id}/licenseDetails
+        for a spread of users across the full list.  Requires User.Read.All.
+
+        Samples evenly across the entire user list rather than taking the first N,
+        so that both student and staff SKUs are discovered even when users are
+        sorted alphabetically and staff appear only later in the list.
+        """
+        if not users:
+            return {}
+        result = {}
+        # Pick indices spread evenly across the full list
+        n = len(users)
+        step = max(1, n // max_calls)
+        indices = list(range(0, n, step))[:max_calls]
+        # Always include the last few in case staff sort at end
+        for tail_idx in range(max(0, n - 5), n):
+            if tail_idx not in indices:
+                indices.append(tail_idx)
+        for i in indices:
+            u = users[i]
+            uid = u.get("id", "")
+            if not uid:
+                continue
+            try:
+                data = self._get(f"/users/{uid}/licenseDetails",
+                                 {"$select": "skuId,skuPartNumber"})
+                for item in data.get("value", []):
+                    if item.get("skuId") and item.get("skuPartNumber"):
+                        result[item["skuId"]] = item["skuPartNumber"]
+            except Exception:
+                pass
+            # Stop early if we've seen both student and staff SKU types
+            if result and len(result) >= 4:
+                break
+        return result
+
+    def classify_user_role(self, assigned_licenses: list,
+                           sku_map: dict) -> str:
+        """Return 'student', 'staff', or 'other' based on assigned O365 licenses.
+
+        Classification order:
+        1. SKU IDs from classification/m365_skus.json (loaded at startup, no extra permissions)
+        2. skuPartNumber fragment matching via sku_map (requires subscribedSkus)
+        3. Falls back to 'other'
+
+        To add new SKUs: edit classification/m365_skus.json — no code change needed.
+        If auto-classification is still wrong for specific users, use the
+        manual role override in the UI (role badge on each user row).
+        """
+        # ── Helper: resolve skuPartNumber for a licence ─────────────────────
+        def _sku_name(lic: dict) -> str:
+            sid = lic.get("skuId", "").lower()
+            return sku_map.get(sid, sku_map.get(lic.get("skuId", ""), "")).upper()
+
+        # ── Pass 1: skuPartNumber fragment match (preferred) ─────────────────
+        # Fragment matching is done FIRST when sku_map is available because
+        # Microsoft's part-number strings (e.g. STANDARDWOFFPACK_FACULTY) are
+        # stable across all SKU ID generations — EA, A1/A3/A5, new commerce,
+        # CSP, benefit variants — while UUIDs change with every new SKU issuance.
+        # Staff fragments checked across ALL licences before student, so a
+        # STUDENT_BENEFIT add-on cannot mask a FACULTY licence.
+        if sku_map:
+            if any(any(f in _sku_name(lic) for f in self._STAFF_SKU_FRAGMENTS)
+                   for lic in assigned_licenses):
+                return "staff"
+            if any(any(f in _sku_name(lic) for f in self._STUDENT_SKU_FRAGMENTS)
+                   for lic in assigned_licenses):
+                return "student"
+
+        # ── Pass 2: SKU ID fallback (m365_skus.json) ─────────────────────────
+        # Used when sku_map is unavailable or when a licence has no recognisable
+        # fragment (e.g. Power Automate Free assigned to faculty accounts).
+        # Staff checked before student for the same add-on masking reason above.
+        for lic in assigned_licenses:
+            if lic.get("skuId", "").lower() in self._STAFF_SKU_IDS:
+                return "staff"
+        for lic in assigned_licenses:
+            if lic.get("skuId", "").lower() in self._STUDENT_SKU_IDS:
+                return "student"
+
+        return "other"
+
+
+# Load SKU classification data from classification/m365_skus.json at import time
+M365Connector._load_sku_data()
diff --git a/m365_launcher.py b/m365_launcher.py
new file mode 100644
index 0000000..55a882b
--- /dev/null
+++ b/m365_launcher.py
@@ -0,0 +1,446 @@
+"""
+gdpr_launcher.py — entry point for the packaged GDPRScanner app.
+
+Responsibilities:
+  1. Find a free port (default 5100)
+  2. Start Flask in a background thread
+  3. Open the UI in a native webview window (pywebview)
+     — falls back to the system browser if pywebview is unavailable
+
+Generated by build_gdpr.py — do not edit manually.
+"""
+import os
+os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
+
+import subprocess
+import sys
+import socket
+import threading
+import time
+import webbrowser
+from pathlib import Path
+
+if getattr(sys, "frozen", False):
+    BASE_DIR = Path(sys._MEIPASS)
+else:
+    BASE_DIR = Path(__file__).parent
+
+
+def _setup_external_tools():
+    """
+    Locate Tesseract and Poppler regardless of how the app was launched.
+    GDPRScanner calls document_scanner for file content extraction, which
+    may need OCR for scanned PDFs — same setup as Document Scanner.
+    """
+    extra_paths = []
+
+    if sys.platform == "darwin":
+        brew_prefix = None
+        for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
+            if Path(brew_candidate).exists():
+                try:
+                    result = subprocess.run(
+                        [brew_candidate, "--prefix"],
+                        capture_output=True, text=True, timeout=5
+                    )
+                    if result.returncode == 0:
+                        brew_prefix = result.stdout.strip()
+                        break
+                except Exception:
+                    pass
+
+        brew_candidates = []
+        if brew_prefix:
+            brew_candidates.append(brew_prefix)
+        brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
+
+        for prefix in brew_candidates:
+            bin_dir = Path(prefix) / "bin"
+            if bin_dir.exists():
+                extra_paths.append(str(bin_dir))
+            tessdata = Path(prefix) / "share" / "tessdata"
+            if tessdata.exists():
+                os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
+
+        for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
+            if Path(t).exists():
+                os.environ.setdefault("TESSERACT_CMD", t)
+                break
+
+        for p in ["/opt/homebrew/bin", "/usr/local/bin",
+                  "/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
+            if (Path(p) / "pdftoppm").exists():
+                os.environ.setdefault("POPPLER_PATH", p)
+                extra_paths.insert(0, p)
+                break
+
+    elif sys.platform == "win32":
+        import winreg
+        tess_dir = None
+        try:
+            key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR")
+            tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
+            winreg.CloseKey(key)
+        except Exception:
+            pass
+
+        for d in ([tess_dir] if tess_dir else []) + [
+            r"C:\Program Files\Tesseract-OCR",
+            r"C:\Program Files (x86)\Tesseract-OCR",
+            r"C:\Tesseract-OCR",
+        ]:
+            if d and Path(d, "tesseract.exe").exists():
+                os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
+                extra_paths.append(d)
+                tessdata = Path(d) / "tessdata"
+                if tessdata.exists():
+                    os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
+                break
+
+        for d in [
+            r"C:\poppler\Library\bin", r"C:\poppler\bin",
+            r"C:\Program Files\poppler\Library\bin",
+            r"C:\Program Files\poppler\bin",
+            r"C:\tools\poppler\Library\bin",
+        ]:
+            if (Path(d) / "pdftoppm.exe").exists():
+                os.environ.setdefault("POPPLER_PATH", d)
+                extra_paths.insert(0, d)
+                break
+
+    if getattr(sys, "frozen", False):
+        tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
+        if tess_bin.exists():
+            os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
+        for sub in ["poppler/bin", "poppler/Library/bin", "."]:
+            pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
+            if pdftoppm.exists():
+                os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
+                extra_paths.insert(0, str(pdftoppm.parent))
+                break
+        extra_paths.insert(0, str(BASE_DIR))
+
+    if extra_paths:
+        current = os.environ.get("PATH", "")
+        additions = os.pathsep.join(p for p in extra_paths if p not in current)
+        if additions:
+            os.environ["PATH"] = additions + os.pathsep + current
+
+    cmd = os.environ.get("TESSERACT_CMD")
+    if cmd and Path(cmd).exists():
+        try:
+            import pytesseract
+            pytesseract.pytesseract.tesseract_cmd = cmd
+        except ImportError:
+            pass
+
+    poppler = os.environ.get("POPPLER_PATH")
+    if poppler:
+        try:
+            import pdf2image.pdf2image as _p2i
+            _orig = _p2i.convert_from_path
+            def _patched(pdf_path, *a, poppler_path=None, **kw):
+                return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
+            _p2i.convert_from_path = _patched
+        except Exception:
+            pass
+
+
+_setup_external_tools()
+
+
+def find_free_port(start: int = 5100) -> int:
+    for port in range(start, start + 100):
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(("127.0.0.1", port))
+                return port
+            except OSError:
+                continue
+    raise RuntimeError("No free port found in range 5100-5200")
+
+
+# ── Single-instance lock ──────────────────────────────────────────────────────
+_LOCK_FH = None
+
+def acquire_instance_lock() -> bool:
+    """
+    Acquire an exclusive process lock so only one instance runs at a time.
+    Returns True if the lock was acquired, False if another instance holds it.
+    The lock is released automatically when the process exits.
+    """
+    global _LOCK_FH
+    lock_dir = Path.home() / ".gdprscanner"
+    lock_dir.mkdir(parents=True, exist_ok=True)
+    lock_path = lock_dir / "app.lock"
+    try:
+        _LOCK_FH = open(lock_path, "w")
+        if sys.platform == "win32":
+            import msvcrt
+            msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            import fcntl
+            fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        _LOCK_FH.write(str(os.getpid()))
+        _LOCK_FH.flush()
+        return True
+    except (IOError, OSError):
+        if _LOCK_FH:
+            _LOCK_FH.close()
+            _LOCK_FH = None
+        return False
+
+
+def _activate_venv():
+    if getattr(sys, "frozen", False):
+        return
+    for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
+        if sys.platform == "win32":
+            site_pkg = candidate / "Lib" / "site-packages"
+        else:
+            lib = candidate / "lib"
+            site_pkg = None
+            if lib.exists():
+                for d in lib.iterdir():
+                    sp = d / "site-packages"
+                    if sp.exists():
+                        site_pkg = sp
+                        break
+        if site_pkg and site_pkg.exists():
+            sys.path.insert(0, str(site_pkg))
+            os.environ["VIRTUAL_ENV"] = str(candidate)
+            os.environ.pop("PYTHONHOME", None)
+            break
+
+
+_activate_venv()
+
+
+def start_flask(port: int):
+    import gdpr_scanner as _app
+    _app.app.run(host="127.0.0.1", port=port, debug=False,
+                 threaded=True, use_reloader=False)
+
+
+def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        try:
+            with socket.create_connection(("127.0.0.1", port), timeout=0.2):
+                return True
+        except OSError:
+            time.sleep(0.1)
+    return False
+
+
+def _load_icon_image():
+    try:
+        from PIL import Image as PILImage
+        for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
+                     "icon.ico", "icon.icns", "icon.png",
+                     "icon_m365.ico", "icon_m365.icns", "icon_m365.png"]:  # legacy fallback
+            p = BASE_DIR / name
+            if p.exists():
+                return PILImage.open(p).convert("RGBA").resize((64, 64))
+        # Minimal fallback — blue square
+        img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
+        return img
+    except Exception:
+        return None
+
+
+def run_webview(port: int):
+    """
+    Open the app in a native webview window.
+    Returns True on success, False if pywebview is unavailable.
+    """
+    try:
+        import webview
+    except ImportError:
+        return False
+
+    class Api:
+        def quit(self):
+            import webview as _wv
+            for w in _wv.windows:
+                w.destroy()
+
+        def save_excel(self):
+            """Fetch the Excel export from Flask and save via native dialog."""
+            import urllib.request, datetime, os, webview as _wv
+            try:
+                url = f"http://127.0.0.1:{port}/api/export_excel"
+                with urllib.request.urlopen(url) as resp:
+                    data = resp.read()
+                fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+                win = _wv.windows[0] if _wv.windows else None
+                if win:
+                    paths = win.create_file_dialog(
+                        _wv.SAVE_DIALOG,
+                        save_filename=fname,
+                        file_types=("Excel Files (*.xlsx)",),
+                    )
+                    if paths:
+                        dest = paths[0] if isinstance(paths, (list, tuple)) else paths
+                        if not dest.endswith(".xlsx"):
+                            dest += ".xlsx"
+                        with open(dest, "wb") as f:
+                            f.write(data)
+                        return {"ok": True, "path": dest}
+                return {"ok": False, "error": "cancelled"}
+            except Exception as e:
+                return {"ok": False, "error": str(e)}
+
+        def save_db_export(self):
+            """Fetch the DB export ZIP from Flask and save via native dialog."""
+            import urllib.request, datetime, webview as _wv
+            try:
+                url = f"http://127.0.0.1:{port}/api/db/export"
+                with urllib.request.urlopen(url) as resp:
+                    data = resp.read()
+                fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
+                win = _wv.windows[0] if _wv.windows else None
+                if win:
+                    paths = win.create_file_dialog(
+                        _wv.SAVE_DIALOG,
+                        save_filename=fname,
+                        file_types=("ZIP Archive (*.zip)",),
+                    )
+                    if paths:
+                        dest = paths[0] if isinstance(paths, (list, tuple)) else paths
+                        if not dest.endswith(".zip"):
+                            dest += ".zip"
+                        with open(dest, "wb") as f:
+                            f.write(data)
+                        return {"ok": True, "path": dest}
+                return {"ok": False, "error": "cancelled"}
+            except Exception as e:
+                return {"ok": False, "error": str(e)}
+
+        def save_article30(self):
+            """Fetch the Article 30 Word doc from Flask and save via native dialog."""
+            import urllib.request, datetime, webview as _wv
+            try:
+                url = f"http://127.0.0.1:{port}/api/export_article30"
+                with urllib.request.urlopen(url) as resp:
+                    data = resp.read()
+                fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
+                win = _wv.windows[0] if _wv.windows else None
+                if win:
+                    paths = win.create_file_dialog(
+                        _wv.SAVE_DIALOG,
+                        save_filename=fname,
+                        file_types=("Word Document (*.docx)",),
+                    )
+                    if paths:
+                        dest = paths[0] if isinstance(paths, (list, tuple)) else paths
+                        if not dest.endswith(".docx"):
+                            dest += ".docx"
+                        with open(dest, "wb") as f:
+                            f.write(data)
+                        return {"ok": True, "path": dest}
+                return {"ok": False, "error": "cancelled"}
+            except Exception as e:
+                return {"ok": False, "error": str(e)}
+
+        def open_manual(self, lang: str):
+            """Open the user manual in a new native webview window."""
+            import webview as _wv
+            url = f"http://127.0.0.1:{port}/manual?lang={lang}"
+            existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
+            if existing:
+                existing.load_url(url)
+            else:
+                mw = _wv.create_window(
+                    title="GDPRScanner — Manual",
+                    url=url,
+                    width=960,
+                    height=800,
+                    resizable=True,
+                )
+                mw._is_manual = True
+
+    w = webview.create_window(
+        title="GDPRScanner",
+        url=f"http://127.0.0.1:{port}/",
+        width=1400,
+        height=900,
+        min_size=(900, 600),
+        js_api=Api(),
+    )
+
+    def _on_closed():
+        os._exit(0)
+
+    w.events.closed += _on_closed
+    webview.start(debug=False)
+    return True
+
+
+def _run_browser_fallback(port: int):
+    """Open in system browser + optional tray icon."""
+    url = f"http://127.0.0.1:{port}/"
+    webbrowser.open(url)
+
+    try:
+        import pystray
+        from PIL import Image as PILImage
+
+        img = _load_icon_image()
+        if img is None:
+            return
+
+        def _quit(icon, item):
+            icon.stop()
+            os._exit(0)
+
+        def _open(icon, item):
+            webbrowser.open(url)
+
+        menu = pystray.Menu(
+            pystray.MenuItem("Open GDPRScanner", _open, default=True),
+            pystray.MenuItem("Quit", _quit),
+        )
+        icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
+        icon.run()
+    except ImportError:
+        # No pystray — just keep the process alive
+        try:
+            while True:
+                time.sleep(60)
+        except KeyboardInterrupt:
+            pass
+
+
+if __name__ == "__main__":
+    if not acquire_instance_lock():
+        print("GDPRScanner is already running.", file=sys.stderr)
+        sys.exit(1)
+
+    # On macOS, multiprocessing uses "fork" which is unsafe with some
+    # frameworks — use "spawn" to match PyInstaller's behaviour.
+    if sys.platform == "darwin":
+        import multiprocessing
+        multiprocessing.set_start_method("spawn", force=True)
+
+    port = find_free_port()
+    # Machine-readable port line — stdout pipe for any parent process.
+    print(f"GDPR_PORT={port}", flush=True)
+
+    # Pre-import on main thread so cv2 / numpy initialise safely
+    try:
+        import gdpr_scanner  # noqa: F401 — side effect: loads Flask app
+    except Exception as e:
+        print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
+    flask_thread.start()
+
+    if not wait_for_flask(port):
+        print("[!] Flask did not start in time", file=sys.stderr)
+        sys.exit(1)
+
+    webview_ok = run_webview(port)
+    if not webview_ok:
+        _run_browser_fallback(port)
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..9855d94
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,6 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --tb=short
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c38bd95
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,48 @@
+# M365 GDPR Scanner — Python dependencies
+# Python 3.11+ required (3.13+ not recommended — spaCy compatibility)
+
+# ── Web server ────────────────────────────────────────────────────────────────
+flask>=3.0
+
+# ── Microsoft 365 authentication ─────────────────────────────────────────────
+msal>=1.28             # OAuth device code + client credentials flow
+requests>=2.31         # Microsoft Graph API HTTP client
+
+# ── Document scanning ─────────────────────────────────────────────────────────
+pdfplumber>=0.11       # PDF text extraction
+python-docx>=1.1       # Word document scanning
+openpyxl>=3.1          # Excel scanning + export
+
+# ── Image processing ──────────────────────────────────────────────────────────
+Pillow>=10.0           # Image thumbnails + EXIF extraction (always-on)
+opencv-python>=4.9     # Face detection (opt-in — Scan photos for faces)
+numpy>=1.26            # Required by opencv-python
+
+# ── NER / PII detection ───────────────────────────────────────────────────────
+# spaCy 3.7 supports Python 3.8–3.12. Do NOT upgrade past Python 3.12.
+spacy>=3.7,<4.0
+
+# ── PDF scanning (optional — improves accuracy) ───────────────────────────────
+pymupdf>=1.24          # Physical PDF text layer access (fallback: pdfplumber)
+
+# ── Encryption ───────────────────────────────────────────────────────────────
+cryptography>=42.0     # Fernet — SMTP password encrypted at rest
+
+# ── Packaging / desktop ───────────────────────────────────────────────────────
+pyinstaller>=6.0
+pyinstaller-hooks-contrib>=2024.0
+pywebview>=5.0         # Native app window
+pystray>=0.19          # System tray icon
+
+# ── File system scanning (optional) ──────────────────────────────────────────
+smbprotocol>=1.13      # SMB2/3 network share scanning without mounting
+keyring>=25.0          # OS keychain credential storage for SMB passwords
+python-dotenv>=1.0     # .env file fallback for headless SMB credentials
+
+# ── Scheduler (#19) ──────────────────────────────────────────────────────────
+APScheduler>=3.10      # In-process scheduled scans
+
+# ── Google Workspace scanning (#10) ──────────────────────────────────────────
+google-auth>=2.0                   # Service account + domain-wide delegation
+google-auth-httplib2               # HTTP transport for google-auth
+google-api-python-client>=2.0      # Gmail API + Drive API + Admin Directory API
diff --git a/routes/CLAUDE.md b/routes/CLAUDE.md
new file mode 100644
index 0000000..3e3db7c
--- /dev/null
+++ b/routes/CLAUDE.md
@@ -0,0 +1,21 @@
+# Routes — Architecture Rules
+
+## SSE constraints
+SSE routes must live in `gdpr_scanner.py`, not blueprints — blueprints can't stream.
+
+M365 scan emits `scan_done`; Google emits `google_scan_done`; file scan emits `file_scan_done`. Never mix them up.
+
+## scan_progress source field
+All three scan engines must include `"source": "m365"` / `"google"` / `"file"` in every `scan_progress` SSE event. Never remove this field — the frontend uses it to route progress to the correct segment.
+
+## file_sources
+`file_sources` in profiles are stored as source ID strings by the JS frontend. The scheduler resolves them via `_load_file_sources()` before calling `run_file_scan()`.
+
+## Circular import prohibition
+`scan_engine.py` and `gdpr_scanner.py` must not import each other. `scan_engine` imports from `sse`, `checkpoint`, `app_config`, `cpr_detector`; `gdpr_scanner` imports scan functions from `scan_engine`.
+
+## Gotchas
+
+- **`_load_settings()` return** — does NOT include `file_sources`. Returns only: sources, user_ids, options, retention_years, fiscal_year_end, email_to.
+- **`_save_settings()` clobbers profile fields** — called on every M365 scan start with only M365 sources/user_ids/options. The fix in `app_config.py` preserves `google_sources` and `file_sources` and rebuilds `sources` as `m365_src + google_src + file_src`. Do not simplify away this merge logic.
+- **`loadLastScanSummary()` timing** — must only be called after the first `/api/scan/status` poll resolves (inside `_sseWatchdog` in `results.js`, guarded by `_initialStatusChecked`). Calling it on `DOMContentLoaded` shows a stale "no results" card during a live scan after a hard refresh.
diff --git a/routes/__init__.py b/routes/__init__.py
new file mode 100644
index 0000000..3edbbf9
--- /dev/null
+++ b/routes/__init__.py
@@ -0,0 +1,8 @@
+"""
+GDPR Scanner — Flask route blueprints.
+
+Each module registers one Blueprint and imports shared state from
+gdpr_scanner (the application entry point).  Import order matters:
+blueprints must be registered after `app` and all shared globals
+(flagged_items, _connector, etc.) are defined.
+"""
diff --git a/routes/app_routes.py b/routes/app_routes.py
new file mode 100644
index 0000000..5b429dc
--- /dev/null
+++ b/routes/app_routes.py
@@ -0,0 +1,386 @@
+"""
+App-level routes: about, language, version
+"""
+from __future__ import annotations
+import sys
+from flask import Blueprint, Response, jsonify, request
+from pathlib import Path
+from routes import state
+from app_config import _set_lang_override, _load_lang_forced
+
+bp = Blueprint("app_routes", __name__)
+
+_APP_VERSION = (Path(__file__).parent.parent / "VERSION").read_text().strip()
+_LANG_DIR    = (Path(sys._MEIPASS) if getattr(sys, "frozen", False)
+                else Path(__file__).parent.parent) / "lang"
+
+
+@bp.route("/api/about")
+def about_info():
+    import platform
+    info = {"python": platform.python_version(), "app": _APP_VERSION}
+    try:
+        import msal as _msal
+        info["msal"] = getattr(_msal, "__version__", "installed")
+    except ImportError:
+        info["msal"] = "not installed"
+    try:
+        import requests as _req
+        info["requests"] = getattr(_req, "__version__", "installed")
+    except ImportError:
+        info["requests"] = "not installed"
+    try:
+        import openpyxl as _xl
+        info["openpyxl"] = getattr(_xl, "__version__", "installed")
+    except ImportError:
+        info["openpyxl"] = "not installed"
+    return jsonify(info)
+
+
+@bp.route("/api/langs")
+def get_langs():
+    display_names = {
+        "da": "Dansk", "en": "English", "de": "Deutsch",
+        "fr": "Français", "nl": "Nederlands", "sv": "Svenska",
+        "no": "Norsk", "fi": "Suomi", "es": "Español",
+        "it": "Italiano", "pl": "Polski", "pt": "Português",
+    }
+    langs = []
+    if _LANG_DIR.exists():
+        seen = set()
+        for f in sorted(list(_LANG_DIR.glob("*.json")) + list(_LANG_DIR.glob("*.lang"))):
+            code = f.stem
+            if code not in seen:
+                seen.add(code)
+                langs.append({"code": code, "name": display_names.get(code, code.upper())})
+        langs.sort(key=lambda x: x["code"])
+    return jsonify({"langs": langs, "current": state.LANG.get("_lang_code", "en")})
+
+
+@bp.route("/api/set_lang", methods=["POST"])
+def set_lang():
+    data = request.get_json(force=True) or {}
+    code = str(data.get("lang", "en")).strip().lower()[:10]
+    _set_lang_override(code)
+    state.LANG = _load_lang_forced(code)
+    return jsonify({"status": "ok", "lang": code, "translations": state.LANG})
+
+
+@bp.route("/api/lang")
+def get_lang_json():
+    """Return the current language translations as JSON."""
+    return jsonify(state.LANG)
+
+
+@bp.route("/manual")
+def manual():
+    """Serve the user manual as a styled, printable HTML page.
+    Respects ?lang=da|en; falls back to the current UI language."""
+    import sys as _sys
+
+    lang = request.args.get("lang", "").strip().lower() or \
+           state.LANG.get("_lang_code", "da")
+    lang = lang if lang in ("da", "en") else "da"
+
+    _here = Path(_sys._MEIPASS) if getattr(_sys, "frozen", False) \
+            else Path(__file__).parent.parent
+    fname = "MANUAL-DA.md" if lang == "da" else "MANUAL-EN.md"
+    md_path = _here / "docs" / "manuals" / fname
+    if not md_path.exists():
+        return f"Manual file not found: {fname}", 404
+
+    md_text = md_path.read_text(encoding="utf-8")
+    body_html = _md_to_html(md_text)
+
+    title = "GDPR Scanner — Brugermanual" if lang == "da" \
+            else "GDPR Scanner — User Manual"
+    print_label = "Udskriv" if lang == "da" else "Print"
+    other_lang = "en" if lang == "da" else "da"
+    other_label = "English" if lang == "da" else "Dansk"
+
+    page = f"""<!DOCTYPE html>
+<html lang="{lang}">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>{title}</title>
+<style>
+  :root {{
+    --text:    #1a1a1a;
+    --muted:   #555;
+    --border:  #ddd;
+    --accent:  #0060b0;
+    --bg:      #fff;
+    --surface: #f6f8fa;
+    --code-bg: #f0f0f0;
+  }}
+  *, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
+  body {{
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+    font-size: 15px;
+    line-height: 1.7;
+    color: var(--text);
+    background: var(--bg);
+    max-width: 860px;
+    margin: 0 auto;
+    padding: 32px 24px 64px;
+  }}
+  h1 {{ font-size: 1.9em; margin: 0 0 4px; color: var(--text); }}
+  h2 {{ font-size: 1.35em; margin: 2.2em 0 .6em; padding-bottom: .3em;
+        border-bottom: 2px solid var(--border); color: var(--text); }}
+  h3 {{ font-size: 1.1em; margin: 1.6em 0 .4em; color: var(--text); }}
+  h4 {{ font-size: 1em; margin: 1.2em 0 .3em; color: var(--muted); }}
+  p  {{ margin: .6em 0; }}
+  a  {{ color: var(--accent); text-decoration: none; }}
+  a:hover {{ text-decoration: underline; }}
+  strong {{ font-weight: 600; }}
+  em {{ font-style: italic; }}
+  hr {{ border: none; border-top: 1px solid var(--border); margin: 1.8em 0; }}
+  blockquote {{
+    border-left: 3px solid var(--accent);
+    margin: .8em 0;
+    padding: .4em 1em;
+    background: var(--surface);
+    border-radius: 0 4px 4px 0;
+    color: var(--muted);
+  }}
+  code {{
+    font-family: "SF Mono", Consolas, "Liberation Mono", monospace;
+    font-size: .88em;
+    background: var(--code-bg);
+    padding: 1px 5px;
+    border-radius: 3px;
+  }}
+  pre {{
+    background: var(--code-bg);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    padding: 14px 16px;
+    overflow-x: auto;
+    margin: .8em 0;
+    font-size: .85em;
+    line-height: 1.5;
+  }}
+  pre code {{ background: none; padding: 0; font-size: inherit; }}
+  ul, ol {{ margin: .5em 0 .5em 1.6em; }}
+  li {{ margin: .25em 0; }}
+  table {{
+    border-collapse: collapse;
+    width: 100%;
+    margin: .8em 0;
+    font-size: .93em;
+  }}
+  th, td {{
+    border: 1px solid var(--border);
+    padding: 7px 12px;
+    text-align: left;
+    vertical-align: top;
+  }}
+  th {{
+    background: var(--surface);
+    font-weight: 600;
+  }}
+  tr:nth-child(even) td {{ background: #fafafa; }}
+
+  /* ── Top toolbar ── */
+  .manual-toolbar {{
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 28px;
+    padding-bottom: 14px;
+    border-bottom: 1px solid var(--border);
+  }}
+  .manual-toolbar .spacer {{ flex: 1; }}
+  .toolbar-btn {{
+    font-size: 13px;
+    padding: 5px 14px;
+    border-radius: 6px;
+    border: 1px solid var(--border);
+    background: var(--surface);
+    color: var(--text);
+    cursor: pointer;
+    text-decoration: none;
+    display: inline-block;
+  }}
+  .toolbar-btn:hover {{ background: var(--border); }}
+  .toolbar-btn.primary {{
+    background: var(--accent);
+    color: #fff;
+    border-color: var(--accent);
+  }}
+  .toolbar-btn.primary:hover {{ opacity: .88; }}
+
+  /* ── Table of contents ── */
+  .toc {{
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 16px 20px;
+    margin: 1.2em 0 2em;
+    font-size: .93em;
+  }}
+  .toc ol {{ margin: .3em 0 0 1.2em; }}
+  .toc li {{ margin: .3em 0; }}
+
+  /* ── Print ── */
+  @media print {{
+    .manual-toolbar {{ display: none !important; }}
+    body {{ max-width: 100%; padding: 0; font-size: 12pt; }}
+    h2 {{ page-break-before: always; }}
+    h2:first-of-type {{ page-break-before: avoid; }}
+    pre, blockquote, table {{ page-break-inside: avoid; }}
+    a {{ color: var(--text); text-decoration: none; }}
+    a[href^="http"]::after {{ content: " (" attr(href) ")"; font-size: .8em; color: var(--muted); }}
+    tr:nth-child(even) td {{ background: #f5f5f5; }}
+  }}
+</style>
+</head>
+<body>
+<div class="manual-toolbar">
+  <strong style="font-size:14px">{title}</strong>
+  <span class="spacer"></span>
+  <a class="toolbar-btn" href="/manual?lang={other_lang}">{other_label}</a>
+  <button class="toolbar-btn primary" onclick="window.print()">🖨 {print_label}</button>
+</div>
+{body_html}
+</body>
+</html>"""
+    return Response(page, mimetype="text/html")
+
+
+def _md_to_html(md: str) -> str:
+    """Lightweight Markdown → HTML converter (no external dependencies).
+    Handles headings, tables, lists, blockquotes, code blocks, bold/italic,
+    inline code, links, and horizontal rules."""
+    import re, html as _html
+
+    def inline(text: str) -> str:
+        text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
+        text = re.sub(r'\*(.+?)\*',     r'<em>\1</em>',         text)
+        text = re.sub(r'`(.+?)`',       lambda m: '<code>' + _html.escape(m.group(1)) + '</code>', text)
+        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
+        return text
+
+    def make_anchor(text: str) -> str:
+        return re.sub(r'[^\w\s-]', '', text.lower()).strip().replace(' ', '-')
+
+    result   = []
+    lines    = md.splitlines()
+    i        = 0
+
+    in_code   = False
+    code_buf  = []
+    in_list   = False
+    list_type = None
+    list_buf  = []
+    in_table  = False
+    tbl_buf   = []
+
+    def flush_list():
+        nonlocal in_list, list_type, list_buf
+        if not in_list:
+            return
+        tag = list_type
+        result.append(f'<{tag}>')
+        for item in list_buf:
+            result.append(f'  <li>{inline(item)}</li>')
+        result.append(f'</{tag}>')
+        in_list = False; list_buf = []; list_type = None
+
+    def flush_table():
+        nonlocal in_table, tbl_buf
+        if not in_table or len(tbl_buf) < 2:
+            in_table = False; tbl_buf = []; return
+        heads = [c.strip() for c in tbl_buf[0].strip('|').split('|')]
+        result.append('<table>')
+        result.append('<thead><tr>' + ''.join(f'<th>{inline(h)}</th>' for h in heads) + '</tr></thead>')
+        result.append('<tbody>')
+        for row in tbl_buf[2:]:
+            cols = [c.strip() for c in row.strip('|').split('|')]
+            result.append('<tr>' + ''.join(f'<td>{inline(c)}</td>' for c in cols) + '</tr>')
+        result.append('</tbody></table>')
+        in_table = False; tbl_buf = []
+
+    while i < len(lines):
+        line = lines[i]
+        i += 1
+
+        # ── fenced code block ──────────────────────────────────────────
+        if line.startswith('```'):
+            if not in_code:
+                flush_list(); flush_table()
+                in_code = True; code_buf = []
+            else:
+                in_code = False
+                escaped = _html.escape('\n'.join(code_buf))
+                result.append(f'<pre><code>{escaped}</code></pre>')
+            continue
+        if in_code:
+            code_buf.append(line)
+            continue
+
+        # ── table row ─────────────────────────────────────────────────
+        if line.strip().startswith('|') and '|' in line[1:]:
+            flush_list()
+            in_table = True
+            tbl_buf.append(line)
+            continue
+        elif in_table:
+            flush_table()
+
+        # ── blank line ────────────────────────────────────────────────
+        if not line.strip():
+            flush_list()
+            result.append('')
+            continue
+
+        # ── heading ───────────────────────────────────────────────────
+        m = re.match(r'^(#{1,6})\s+(.+)$', line)
+        if m:
+            flush_list()
+            lvl  = len(m.group(1))
+            text = m.group(2)
+            anc  = make_anchor(text)
+            result.append(f'<h{lvl} id="{anc}">{inline(text)}</h{lvl}>')
+            continue
+
+        # ── horizontal rule ───────────────────────────────────────────
+        if re.match(r'^-{3,}$', line.strip()):
+            flush_list()
+            result.append('<hr>')
+            continue
+
+        # ── blockquote ────────────────────────────────────────────────
+        if line.startswith('> '):
+            flush_list()
+            result.append(f'<blockquote>{inline(line[2:])}</blockquote>')
+            continue
+
+        # ── unordered list ────────────────────────────────────────────
+        m = re.match(r'^- (.+)$', line)
+        if m:
+            if not in_list or list_type != 'ul':
+                flush_list()
+                in_list = True; list_type = 'ul'; list_buf = []
+            list_buf.append(m.group(1))
+            continue
+
+        # ── ordered list ─────────────────────────────────────────────
+        m = re.match(r'^\d+\. (.+)$', line)
+        if m:
+            if not in_list or list_type != 'ol':
+                flush_list()
+                in_list = True; list_type = 'ol'; list_buf = []
+            list_buf.append(m.group(1))
+            continue
+
+        # ── paragraph ────────────────────────────────────────────────
+        flush_list()
+        result.append(f'<p>{inline(line)}</p>')
+
+    flush_list()
+    flush_table()
+    return '\n'.join(result)
+
+
diff --git a/routes/auth.py b/routes/auth.py
new file mode 100644
index 0000000..52d52f4
--- /dev/null
+++ b/routes/auth.py
@@ -0,0 +1,179 @@
+"""
+Microsoft 365 authentication routes
+"""
+from __future__ import annotations
+import threading
+from flask import Blueprint, jsonify, request
+from routes import state
+from app_config import _load_config, _save_config
+
+try:
+    from m365_connector import M365Connector, M365Error, MSAL_OK
+except ImportError:
+    MSAL_OK = False
+    M365Connector = None  # type: ignore[assignment,misc]
+    class M365Error(Exception): pass  # type: ignore[no-redef]
+
+bp = Blueprint("auth", __name__)
+
+
+@bp.route("/api/auth/status")
+def auth_status():
+    cfg = _load_config()
+    if not MSAL_OK:
+        return jsonify({"authenticated": False, "error": "msal not installed",
+                        "client_id": cfg.get("client_id",""), "tenant_id": cfg.get("tenant_id","")})
+
+    saved_secret = cfg.get("client_secret", "")
+    saved_cid    = cfg.get("client_id", "")
+    saved_tid    = cfg.get("tenant_id", "")
+
+    # Rebuild connector if:
+    #  • none exists yet, OR
+    #  • the saved secret doesn't match what the current connector was built with
+    #    (user entered a secret after previously connecting without one)
+    connector_secret = getattr(state.connector, "client_secret", None)
+    need_rebuild = (
+        not state.connector
+        or connector_secret != saved_secret
+        or getattr(state.connector, "client_id", None) != saved_cid
+    )
+
+    if need_rebuild and saved_cid and saved_tid:
+        try:
+            state.connector = M365Connector(saved_cid, saved_tid, client_secret=saved_secret)
+            if state.connector.is_app_mode:
+                state.connector.authenticate_app_mode()
+        except Exception:
+            state.connector = None
+
+    if state.connector and state.connector.is_authenticated():
+        try:
+            info = state.connector.get_user_info()
+            return jsonify({"authenticated": True,
+                            "display_name": info.get("displayName",""),
+                            "email": info.get("mail") or info.get("userPrincipalName",""),
+                            "client_id":     saved_cid,
+                            "tenant_id":     saved_tid,
+                            "client_secret": saved_secret,
+                            "app_mode":      state.connector.is_app_mode})
+        except Exception:
+            pass
+    return jsonify({"authenticated": False,
+                    "client_id":     saved_cid,
+                    "tenant_id":     saved_tid,
+                    "client_secret": saved_secret})
+
+
+@bp.route("/api/auth/start", methods=["POST"])
+def auth_start():
+    if not MSAL_OK:
+        return jsonify({"error": "msal not installed — run: pip install msal"})
+    data          = request.get_json() or {}
+    client_id     = data.get("client_id","").strip()
+    tenant_id     = data.get("tenant_id","").strip()
+    client_secret = data.get("client_secret","").strip()
+    if not client_id or not tenant_id:
+        return jsonify({"error": "client_id and tenant_id required"})
+    try:
+        state.connector = M365Connector(client_id, tenant_id, client_secret=client_secret)
+
+        if state.connector.is_app_mode:
+            # Application mode — acquire token immediately, no device code
+            state.connector.authenticate_app_mode()
+            _save_config({"client_id": client_id, "tenant_id": tenant_id,
+                          "client_secret": client_secret})
+            return jsonify({"mode": "application"})
+
+        # Delegated mode — start device code flow
+        state.pending_flow     = state.connector.get_device_code_flow()
+        state.auth_poll_result = None
+        _save_config({"client_id": client_id, "tenant_id": tenant_id, "client_secret": ""})
+
+        flow_copy = state.pending_flow
+        def _do_auth():
+            try:
+                ok = state.connector.complete_device_code_flow(flow_copy)
+                state.auth_poll_result = "ok" if ok else "Sign-in failed"
+            except M365Error as e:
+                state.auth_poll_result = str(e)
+            except Exception as e:
+                state.auth_poll_result = str(e)
+        threading.Thread(target=_do_auth, daemon=True).start()
+
+        return jsonify({
+            "mode":             "delegated",
+            "user_code":        state.pending_flow["user_code"],
+            "verification_uri": state.pending_flow["verification_uri"],
+            "message":          state.pending_flow["message"],
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)})
+
+
+@bp.route("/api/auth/poll", methods=["POST"])
+def auth_poll():
+    if not state.connector or not state.pending_flow:
+        return jsonify({"status": "error", "error": "No pending flow"})
+    # Return current poll result (set by background thread)
+    result = state.auth_poll_result
+    if result == "ok":
+        state.auth_poll_result = None
+        state.pending_flow = None
+        return jsonify({"status": "ok"})
+    elif result and result != "pending":
+        state.auth_poll_result = None
+        state.pending_flow = None
+        return jsonify({"status": "error", "error": result})
+    return jsonify({"status": "pending"})
+
+
+@bp.route("/api/auth/userinfo")
+def auth_userinfo():
+    if not state.connector:
+        return jsonify({"error": "not connected"}), 401
+    try:
+        return jsonify(state.connector.get_user_info())
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@bp.route("/api/auth/signout", methods=["POST"])
+def auth_signout():
+    if state.connector:
+        try: state.connector.sign_out()
+        except Exception: pass
+        state.connector = None
+    # Also clear the delegated token cache so a fresh sign-in is required
+    from m365_connector import _TOKEN_CACHE_FILE
+    try:
+        if _TOKEN_CACHE_FILE.exists():
+            _TOKEN_CACHE_FILE.unlink()
+    except Exception:
+        pass
+    return jsonify({"status": "ok"})
+
+
+@bp.route("/api/auth/config", methods=["GET", "POST"])
+def auth_config():
+    """GET: return saved config (secret masked). POST: update config directly."""
+    if request.method == "POST":
+        data          = request.get_json() or {}
+        client_id     = data.get("client_id", "").strip()
+        tenant_id     = data.get("tenant_id", "").strip()
+        client_secret = data.get("client_secret", "").strip()
+        if not client_id or not tenant_id:
+            return jsonify({"error": "client_id and tenant_id required"}), 400
+        _save_config({"client_id": client_id, "tenant_id": tenant_id,
+                      "client_secret": client_secret})
+        # Force connector rebuild on next request
+        state.connector = None
+        return jsonify({"status": "saved", "app_mode": bool(client_secret)})
+    cfg = _load_config()
+    secret = cfg.get("client_secret", "")
+    return jsonify({
+        "client_id":     cfg.get("client_id", ""),
+        "tenant_id":     cfg.get("tenant_id", ""),
+        "has_secret":    bool(secret),
+        "secret_preview": (secret[:4] + "…" + secret[-4:]) if len(secret) > 8 else ("***" if secret else ""),
+    })
diff --git a/routes/database.py b/routes/database.py
new file mode 100644
index 0000000..cbefcaf
--- /dev/null
+++ b/routes/database.py
@@ -0,0 +1,591 @@
+"""
+Database stats, disposition, export/import, admin PIN, preview, thumbnail
+"""
+from __future__ import annotations
+import base64
+from pathlib import Path
+from flask import Blueprint, Response, jsonify, request
+from routes import state
+from app_config import _set_admin_pin, _verify_admin_pin, _admin_pin_is_set
+from checkpoint import _clear_checkpoint, _DELTA_PATH
+from cpr_detector import _extract_exif, _html_esc, _placeholder_svg
+
+try:
+    from gdpr_db import get_db as _get_db
+    DB_OK = True
+except ImportError:
+    DB_OK = False
+    def _get_db(*a, **kw): return None  # type: ignore[misc]
+
+try:
+    import document_scanner as _ds  # noqa: F401
+    SCANNER_OK = True
+except ImportError:
+    SCANNER_OK = False
+
+bp = Blueprint("database", __name__)
+
+
+@bp.route("/api/db/stats")
+def db_stats():
+    """Return stats for the latest (or specified) scan, plus aggregate counts."""
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    scan_id = request.args.get("scan_id", type=int)
+    db   = _get_db()
+    data = db.get_stats(scan_id) or {}
+    # Add aggregate counts the Settings panel needs — query directly so they
+    # are correct even if no scan has finished_at set yet
+    try:
+        import sqlite3 as _sq
+        con = _sq.connect(db._path)
+        con.row_factory = _sq.Row
+        data["total_items"]   = con.execute("SELECT COUNT(*) FROM flagged_items").fetchone()[0]
+        data["flagged_items"] = data["total_items"]
+        data["total_scans"]   = con.execute("SELECT COUNT(*) FROM scans").fetchone()[0]
+        data["finished_scans"]= con.execute("SELECT COUNT(*) FROM scans WHERE finished_at IS NOT NULL").fetchone()[0]
+        if not data.get("flagged_count"):
+            data["flagged_count"] = data["total_items"]
+        if not data.get("total_scanned"):
+            data["total_scanned"] = con.execute("SELECT COALESCE(SUM(total_scanned),0) FROM scans").fetchone()[0]
+        con.close()
+    except Exception:
+        data.setdefault("total_items",  0)
+        data.setdefault("flagged_items", 0)
+        data.setdefault("total_scans",   0)
+    return jsonify(data)
+
+
+@bp.route("/api/db/trend")
+def db_trend():
+    """Return scan history for trend chart (last 20 scans)."""
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    n = request.args.get("n", default=20, type=int)
+    return jsonify(_get_db().get_trend(n))
+
+
+@bp.route("/api/db/scans")
+def db_scans():
+    """List recent completed scans."""
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    return jsonify(_get_db().scans_list())
+
+
+@bp.route("/api/db/subject", methods=["POST"])
+def db_subject_lookup():
+    """Find all items containing a given CPR number.
+    Body: {cpr: "DDMMYY-XXXX"}
+    The CPR is hashed before querying -- never stored in plaintext.
+    """
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    data = request.get_json() or {}
+    cpr  = data.get("cpr", "").strip().replace("-", "").replace(" ", "")
+    if not cpr:
+        return jsonify({"error": "cpr required"}), 400
+    items = _get_db().lookup_data_subject(cpr)
+    return jsonify({"count": len(items), "items": items})
+
+
+@bp.route("/api/db/overdue")
+def db_overdue():
+    """Return items older than the retention threshold.
+
+    Query params:
+        years            int, default 5
+        fiscal_year_end  MM-DD string, e.g. 12-31 (omit for rolling window)
+        scan_id          int (omit for latest scan)
+    """
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    years           = request.args.get("years", default=5, type=int)
+    fiscal_year_end = request.args.get("fiscal_year_end", default=None)
+    scan_id         = request.args.get("scan_id", type=int)
+    try:
+        from gdpr_db import overdue_cutoff
+        cutoff = overdue_cutoff(years, fiscal_year_end)
+        items  = _get_db().get_overdue_items(years, scan_id, fiscal_year_end)
+    except ValueError as e:
+        return jsonify({"error": str(e)}), 400
+    return jsonify({
+        "count":          len(items),
+        "cutoff_date":    cutoff,
+        "cutoff_mode":    "fiscal" if fiscal_year_end else "rolling",
+        "fiscal_year_end": fiscal_year_end,
+        "years":          years,
+        "items":          items,
+    })
+
+
+@bp.route("/api/db/disposition", methods=["POST"])
+def db_set_disposition():
+    """Set a compliance disposition on a flagged item.
+    Body: {item_id, status, legal_basis?, notes?, reviewed_by?}
+    Status values: unreviewed | retain-legal | retain-legitimate | retain-contract |
+                   delete-scheduled | deleted | personal-use
+    """
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    data = request.get_json() or {}
+    item_id = data.get("item_id", "")
+    if not item_id:
+        return jsonify({"error": "item_id required"}), 400
+    _get_db().set_disposition(
+        item_id,
+        status      = data.get("status", "unreviewed"),
+        legal_basis = data.get("legal_basis", ""),
+        notes       = data.get("notes", ""),
+        reviewed_by = data.get("reviewed_by", ""),
+    )
+    return jsonify({"status": "saved"})
+
+
+@bp.route("/api/db/disposition/<item_id>")
+def db_get_disposition(item_id):
+    """Get the current disposition for an item."""
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    d = _get_db().get_disposition(item_id)
+    return jsonify(d or {"status": "unreviewed"})
+
+
+@bp.route("/api/db/flagged")
+def db_flagged_items():
+    """Return flagged items from the most recent completed scan session.
+    Used by the read-only viewer to load results without an active SSE connection.
+    """
+    if not DB_OK: return jsonify([])
+    items = _get_db().get_session_items()
+    # Normalise JSON-encoded columns the same way scan_engine does for SSE cards
+    import json as _json
+    out = []
+    for row in items:
+        row["special_category"] = _json.loads(row.get("special_category") or "[]") if isinstance(row.get("special_category"), str) else row.get("special_category", [])
+        row["exif"] = _json.loads(row.get("exif_json") or "{}") if isinstance(row.get("exif_json"), str) else row.get("exif", {})
+        row.pop("exif_json", None)
+        out.append(row)
+    return jsonify(out)
+
+
+@bp.route("/api/db/deletion_log")
+def db_deletion_log():
+    """Return the deletion audit log.
+    Query params: limit (int, default 500), reason (str filter)
+    """
+    if not DB_OK: return jsonify({"error": "database not available"}), 503
+    limit  = request.args.get("limit", default=500, type=int)
+    reason = request.args.get("reason", default=None)
+    rows   = _get_db().get_deletion_log(limit=limit, reason=reason)
+    stats  = _get_db().deletion_log_stats()
+    return jsonify({"stats": stats, "entries": rows})
+
+
+@bp.route("/api/db/reset", methods=["POST"])
+def db_reset():
+    """Reset the database and clear in-memory scan results.
+    Requires {confirm: "yes", pin: "<admin_pin>"} in request body.
+    """
+    data = request.get_json() or {}
+    if data.get("confirm") != "yes":
+        return jsonify({"error": "confirm=yes required"}), 400
+    if _admin_pin_is_set():
+        pin = data.get("pin", "")
+        if not _verify_admin_pin(pin):
+            return jsonify({"error": "incorrect_pin"}), 403
+    if not DB_OK:
+        return jsonify({"error": "database not available"}), 503
+    try:
+        _get_db().reset()
+        state.flagged_items = []
+        state.scan_meta = {}
+        _clear_checkpoint()
+        if _DELTA_PATH.exists():
+            _DELTA_PATH.unlink()
+        return jsonify({"ok": True, "message": "Database reset. All scan results cleared."})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@bp.route("/api/admin/pin", methods=["GET"])
+def admin_pin_status():
+    """Return whether an admin PIN has been set."""
+    return jsonify({"pin_set": _admin_pin_is_set()})
+
+
+@bp.route("/api/admin/pin", methods=["POST"])
+def admin_pin_set():
+    """Set or change the admin PIN.
+    Body: {current_pin: "..", new_pin: ".."}
+    If no PIN is currently set, current_pin is not required.
+    """
+    data = request.get_json() or {}
+    new_pin = data.get("new_pin", "").strip()
+    if not new_pin:
+        return jsonify({"error": "new_pin required"}), 400
+    if _admin_pin_is_set():
+        if not _verify_admin_pin(data.get("current_pin", "")):
+            return jsonify({"error": "incorrect_pin"}), 403
+    _set_admin_pin(new_pin)
+    return jsonify({"ok": True})
+
+
+@bp.route("/api/db/export")
+def db_export():
+    """Export the database to a structured ZIP and return it as a download.
+    The ZIP contains 8 JSON files (see ScanDB.export_db for details).
+    CPR numbers are stored as SHA-256 hashes only — never in plaintext.
+    Thumbnails are stripped to keep the download small.  (#11)
+    """
+    if not DB_OK:
+        return jsonify({"error": "database not available"}), 503
+    import tempfile, datetime as _dt
+    try:
+        ts  = _dt.datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"gdpr_export_{ts}.zip"
+        with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tf:
+            tmp = Path(tf.name)
+        try:
+            _get_db().export_db(tmp)
+            data = tmp.read_bytes()
+        finally:
+            try: tmp.unlink()
+            except Exception: pass
+        return Response(
+            data,
+            mimetype="application/zip",
+            headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+        )
+    except Exception as e:
+        import traceback
+        return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
+
+
+@bp.route("/api/db/import", methods=["POST"])
+def db_import():
+    """Import a previously exported ZIP archive into the database.  (#11)
+
+    Multipart form:
+        file    — the export ZIP
+        mode    — "merge" (default) or "replace"
+        confirm — must be "yes" when mode == "replace"
+    """
+    if not DB_OK:
+        return jsonify({"error": "database not available"}), 503
+    import tempfile
+    f = request.files.get("file")
+    if not f:
+        return jsonify({"error": "no file uploaded"}), 400
+    mode    = request.form.get("mode", "merge")
+    confirm = request.form.get("confirm", "")
+    if mode == "replace" and confirm != "yes":
+        return jsonify({"error": "confirm=yes required for replace mode"}), 400
+    try:
+        tmp = Path(tempfile.mktemp(suffix=".zip", prefix="gdpr_import_"))
+        f.save(str(tmp))
+        result = _get_db().import_db(tmp, mode=mode)
+        tmp.unlink(missing_ok=True)
+        return jsonify({"ok": True, "mode": mode, "imported": result})
+    except (ValueError, FileNotFoundError) as e:
+        return jsonify({"error": str(e)}), 400
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@bp.route("/api/preview/<item_id>")
+def get_preview(item_id):
+    """Return a preview URL or HTML for a flagged item."""
+    source_type = request.args.get("source_type", "")
+    account_id  = request.args.get("account_id", "me") or "me"
+
+    # Local and SMB file sources — re-read file and render preview
+    if source_type in ("local", "smb"):
+        item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
+        full_path = item_meta.get("full_path", "")
+        name      = item_meta.get("name", "")
+        ext       = Path(name).suffix.lower() if name else ""
+
+        if not full_path:
+            return jsonify({"error": "File path not available — rescan to enable preview"})
+
+        if source_type == "smb":
+            return jsonify({
+                "type":  "info",
+                "html":  f"<p style='color:var(--muted);font-size:12px'>SMB preview requires re-reading the file over the network. Open the file directly: <code>{full_path}</code></p>",
+            })
+
+        try:
+            file_path = Path(full_path).expanduser()
+            if not file_path.exists():
+                return jsonify({"error": f"File not found: {full_path}"})
+
+            size = file_path.stat().st_size
+
+            # Images — return as data URI
+            if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
+                import base64 as _b64
+                mime = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
+                        "gif": "image/gif", "webp": "image/webp", "bmp": "image/bmp"}.get(ext.lstrip("."), "image/jpeg")
+                data = _b64.b64encode(file_path.read_bytes()).decode()
+                _exif = item_meta.get("exif") or _extract_exif(file_path.read_bytes(), name)
+                exif_html = ""
+                if _exif:
+                    rows = []
+                    if _exif.get("gps"):
+                        g = _exif["gps"]
+                        rows.append(f'<tr><td>📍 GPS</td><td><a href="{g["maps_url"]}" target="_blank" style="color:#7ec8d0">{g["lat"]}, {g["lon"]}</a></td></tr>')
+                    if _exif.get("author"):
+                        rows.append(f'<tr><td>👤 Author</td><td>{_html_esc(_exif["author"])}</td></tr>')
+                    if _exif.get("datetime"):
+                        rows.append(f'<tr><td>📅 Date</td><td>{_html_esc(_exif["datetime"])}</td></tr>')
+                    if _exif.get("device"):
+                        rows.append(f'<tr><td>📷 Device</td><td>{_html_esc(_exif["device"])}</td></tr>')
+                    for field, val in (_exif.get("pii_fields") or {}).items():
+                        if field not in ("Artist",):
+                            rows.append(f'<tr><td>{_html_esc(field)}</td><td>{_html_esc(str(val)[:200])}</td></tr>')
+                    if rows:
+                        exif_html = ('<details style="margin:8px 12px;font-size:11px">'
+                                     '<summary style="cursor:pointer;color:#888">EXIF data</summary>'
+                                     '<table style="border-collapse:collapse;width:100%;margin-top:6px">'
+                                     + "".join(f'<tr style="border-top:1px solid #333"><td style="padding:4px 8px;color:#888;width:120px;white-space:nowrap">{r.split("</td><td>")[0].replace("<tr><td>","")}</td><td style="padding:4px 8px;word-break:break-all">{r.split("</td><td>")[1].replace("</td></tr>","")}</td></tr>' for r in rows)
+                                     + '</table></details>')
+                html = f'<div style="text-align:center;padding:12px"><img src="data:{mime};base64,{data}" style="max-width:100%;max-height:60vh;border-radius:6px"></div>{exif_html}'
+                return jsonify({"type": "html", "html": html})
+
+            # Text-based files — render with highlighted CPR numbers
+            if ext in {".txt", ".csv", ".eml", ".md", ".log", ".xml", ".json", ".html", ".htm"}:
+                if size > 2 * 1024 * 1024:
+                    return jsonify({"error": "File too large for inline preview (>2 MB)"})
+                raw = file_path.read_bytes().decode("utf-8", errors="replace")
+                import html as _html, re as _re
+                escaped = _html.escape(raw[:50000])
+                escaped = _re.sub(
+                    r"(\d{6}[-\s]?\d{4})",
+                    r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
+                    escaped
+                )
+                html_out = (
+                    '<pre style="font-family:var(--mono);font-size:11px;white-space:pre-wrap;'
+                    'word-break:break-all;padding:12px;color:var(--text);line-height:1.6">'
+                    + escaped + "</pre>"
+                )
+                return jsonify({"type": "html", "html": html_out})
+
+            # PDF — render first 5 pages as text using pdfplumber
+            if ext == ".pdf":
+                if size > 20 * 1024 * 1024:
+                    return jsonify({"error": "File too large for preview (>20 MB)"})
+                if SCANNER_OK:
+                    try:
+                        import pdfplumber as _plumber, io as _io, html as _h
+                        pages_html = []
+                        with _plumber.open(_io.BytesIO(file_path.read_bytes())) as pdf:
+                            total = len(pdf.pages)
+                            for i, page in enumerate(pdf.pages[:5]):
+                                text = page.extract_text() or ""
+                                if not text.strip():
+                                    text = f"[Page {i+1}: image-only or OCR required]"
+                                import re as _re
+                                escaped = _re.sub(
+                                    r"(\d{6}[-\s]?\d{4})",
+                                    r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
+                                    _h.escape(text)
+                                )
+                                pages_html.append(
+                                    f'<div style="border-bottom:1px solid #333;padding:10px 0;margin-bottom:8px">'
+                                    f'<div style="font-size:9px;color:#666;margin-bottom:4px">Page {i+1}</div>'
+                                    f'<pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;margin:0;line-height:1.6">{escaped}</pre>'
+                                    f'</div>'
+                                )
+                        note = f'<div style="font-size:10px;color:#666;padding:6px 0">Showing {min(5,total)} of {total} page(s)</div>' if total > 5 else ""
+                        html_out = f'<div style="padding:10px">{note}{"".join(pages_html)}</div>'
+                        return jsonify({"type": "html", "html": html_out})
+                    except Exception:
+                        pass
+                html_out = (
+                    f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
+                    f'<div style="font-size:40px">📄</div>'
+                    f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
+                    f'<div style="font-size:11px;color:var(--muted)">{round(size/1024,1)} KB</div>'
+                    f'<div style="margin-top:12px;font-size:11px;color:var(--muted)">{_html_esc(full_path)}</div>'
+                    f'</div>'
+                )
+                return jsonify({"type": "html", "html": html_out})
+
+            # Word/Excel/CSV — render content or show metadata
+            if SCANNER_OK and ext in {".xlsx", ".xlsm", ".csv"}:
+                try:
+                    import html as _hh, re as _re, io as _io
+                    if ext == ".csv":
+                        raw = file_path.read_bytes().decode("utf-8", errors="replace")
+                        rows = [r for r in raw.splitlines()[:50]]
+                        table_rows = ""
+                        for i, row in enumerate(rows):
+                            cols = row.split(",")
+                            style = "background:#2a2a2a" if i % 2 == 0 else ""
+                            cells = "".join(f'<td style="padding:3px 8px;border:1px solid #333;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{_hh.escape(str(c)[:80])}</td>' for c in cols)
+                            table_rows += f'<tr style="{style}">{cells}</tr>'
+                        html_out = f'<div style="padding:8px;overflow-x:auto"><table style="border-collapse:collapse;font-size:11px;color:var(--text)">{table_rows}</table></div>'
+                    else:
+                        import openpyxl as _xl
+                        wb = _xl.load_workbook(_io.BytesIO(file_path.read_bytes()), read_only=True, data_only=True)
+                        tabs = []
+                        for sheet_name in wb.sheetnames[:3]:
+                            ws = wb[sheet_name]
+                            table_rows = ""
+                            for i, row in enumerate(ws.iter_rows(max_row=50, values_only=True)):
+                                style = "background:#2a2a2a" if i % 2 == 0 else ""
+                                cells = "".join(
+                                    f'<td style="padding:3px 8px;border:1px solid #333;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">'
+                                    f'{_hh.escape(str(c)[:80]) if c is not None else ""}</td>'
+                                    for c in row
+                                )
+                                table_rows += f'<tr style="{style}">{cells}</tr>'
+                            tabs.append(
+                                f'<div style="margin-bottom:12px">'
+                                f'<div style="font-size:10px;color:#888;margin-bottom:4px">📋 {_hh.escape(sheet_name)}</div>'
+                                f'<div style="overflow-x:auto"><table style="border-collapse:collapse;font-size:11px;color:var(--text)">{table_rows}</table></div>'
+                                f'</div>'
+                            )
+                        html_out = '<div style="padding:8px">' + "".join(tabs) + '</div>'
+                    return jsonify({"type": "html", "html": html_out})
+                except Exception:
+                    pass
+
+            if SCANNER_OK and ext in {".docx", ".doc"}:
+                try:
+                    import io as _io, html as _hh, re as _re
+                    from docx import Document as _Doc
+                    doc = _Doc(_io.BytesIO(file_path.read_bytes()))
+                    paragraphs = [p.text for p in doc.paragraphs if p.text.strip()][:80]
+                    text = "\n".join(paragraphs)
+                    escaped = _re.sub(
+                        r"(\d{6}[-\s]?\d{4})",
+                        r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
+                        _hh.escape(text)
+                    )
+                    html_out = f'<div style="padding:12px"><pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;line-height:1.7">{escaped}</pre></div>'
+                    return jsonify({"type": "html", "html": html_out})
+                except Exception:
+                    pass
+
+            html_out = (
+                f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
+                f'<div style="font-size:40px">📄</div>'
+                f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
+                f'<div style="font-size:11px;color:var(--muted)">{round(size/1024,1)} KB · {ext.upper().lstrip(".")} file</div>'
+                f'<div style="margin-top:12px;font-size:11px;color:var(--muted)">{_html_esc(full_path)}</div>'
+                f'</div>'
+            )
+            return jsonify({"type": "html", "html": html_out})
+
+        except PermissionError:
+            return jsonify({"error": f"Permission denied: {full_path}"})
+        except Exception as e:
+            return jsonify({"error": str(e)})
+
+    if not state.connector:
+        return jsonify({"error": "not authenticated"}), 401
+
+    item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
+    drive_id  = item_meta.get("drive_id", "")
+
+    try:
+        if source_type == "email":
+            uid = account_id
+            try:
+                msg = state.connector._get(
+                    f"/{'me' if uid == 'me' else 'users/' + uid}/messages/{item_id}",
+                    {"$select": "subject,from,receivedDateTime,body"}
+                )
+            except Exception as e:
+                return jsonify({"error": f"Could not load email: {e}"})
+
+            sender   = msg.get("from", {}).get("emailAddress", {})
+            from_str = f"{sender.get('name', '')} &lt;{sender.get('address', '')}&gt;"
+            date_str = (msg.get("receivedDateTime") or "")[:10]
+            body_html = msg.get("body", {}).get("content", "") or ""
+            content_type = msg.get("body", {}).get("contentType", "text")
+            import html as _html
+            if content_type == "text":
+                body_html = "<pre style='white-space:pre-wrap;font-family:sans-serif'>" + _html.escape(body_html) + "</pre>"
+
+            att_list = item_meta.get("attachments", [])
+            att_html = ""
+            if att_list:
+                def _att_row(a):
+                    cpr_badge = f'<span class="att-cpr">{a["cpr_count"]} CPR</span>' if a["cpr_count"] else ''
+                    name_esc  = _html.escape(a["name"])
+                    return f'<div class="att-row"><span class="att-name">{name_esc}</span>{cpr_badge}</div>'
+                rows = "".join(_att_row(a) for a in att_list)
+                att_html = f"""
+<div class="att-section">
+  <div class="att-header">📎 Attachments ({len(att_list)})</div>
+  {rows}
+</div>"""
+
+            page = f"""<!DOCTYPE html><html><head><meta charset="utf-8">
+<style>
+  *, *::before, *::after {{ box-sizing: border-box; max-width: 100%; }}
+  html, body {{ margin: 0; padding: 0; overflow-x: hidden; }}
+  body {{ font-family: -apple-system, sans-serif; font-size: 13px; padding: 12px 16px;
+         background: #fff; color: #111; word-break: break-word; }}
+  img {{ max-width: 100% !important; height: auto !important; }}
+  table {{ max-width: 100% !important; table-layout: fixed; word-break: break-word; }}
+  .hdr {{ border-bottom: 1px solid #eee; margin-bottom: 12px; padding-bottom: 10px; }}
+  .hdr-row {{ color: #555; font-size: 12px; margin-bottom: 3px; }}
+  .hdr-row b {{ color: #111; }}
+  .att-section {{ margin-top: 16px; border-top: 1px solid #eee; padding-top: 10px; }}
+  .att-header {{ font-size: 12px; font-weight: 600; color: #555; margin-bottom: 6px; }}
+  .att-row {{ display: flex; align-items: center; gap: 8px; font-size: 12px;
+              padding: 4px 0; border-bottom: 1px solid #f0f0f0; }}
+  .att-name {{ flex: 1; color: #333; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}
+  .att-cpr {{ background: #fff0f0; color: #c00; font-size: 11px; padding: 1px 6px;
+              border-radius: 10px; font-weight: 600; white-space: nowrap; }}
+  ::-webkit-scrollbar {{ width: 4px; height: 4px; }}
+  ::-webkit-scrollbar-track {{ background: transparent; }}
+  ::-webkit-scrollbar-thumb {{ background: #aaa; border-radius: 2px; }}
+  * {{ scrollbar-width: thin; scrollbar-color: #aaa transparent; }}
+</style></head><body>
+<div class="hdr">
+  <div class="hdr-row"><b>From:</b> {from_str}</div>
+  <div class="hdr-row"><b>Date:</b> {date_str}</div>
+  <div class="hdr-row"><b>Subject:</b> {_html.escape(msg.get('subject', '(no subject)'))}</div>
+</div>
+{body_html}{att_html}
+</body></html>"""
+            return jsonify({"type": "html", "html": page})
+
+        else:
+            # OneDrive / SharePoint / Teams — use Graph's embed preview API
+            preview_url = None
+            errors = []
+
+            endpoints_to_try = []
+            if drive_id:
+                endpoints_to_try.append(f"/drives/{drive_id}/items/{item_id}/preview")
+            uid = account_id
+            if uid and uid != "me":
+                endpoints_to_try.append(f"/users/{uid}/drive/items/{item_id}/preview")
+            endpoints_to_try.append(f"/me/drive/items/{item_id}/preview")
+
+            for ep in endpoints_to_try:
+                try:
+                    data = state.connector._post(ep, {})
+                    preview_url = data.get("getUrl") or data.get("postUrl")
+                    if preview_url:
+                        break
+                except Exception as e:
+                    errors.append(str(e))
+
+            if preview_url:
+                return jsonify({"type": "iframe", "url": preview_url})
+            return jsonify({"error": "No preview available for this file type. " + "; ".join(errors[:1])})
+
+    except Exception as e:
+        return jsonify({"error": str(e)})
+
+
+@bp.route("/api/thumb")
+def thumb():
+    """Fallback thumbnail for non-image files."""
+    name = request.args.get("name", "file")
+    ext  = Path(name).suffix.lower()
+    svg_b64 = _placeholder_svg(ext, name)
+    data = base64.b64decode(svg_b64)
+    return Response(data, mimetype="image/svg+xml",
+                    headers={"Cache-Control": "public, max-age=3600"})
diff --git a/routes/email.py b/routes/email.py
new file mode 100644
index 0000000..b04c437
--- /dev/null
+++ b/routes/email.py
@@ -0,0 +1,303 @@
+"""
+SMTP configuration, test, and report sending
+"""
+from __future__ import annotations
+from flask import Blueprint, jsonify, request
+from routes import state
+from app_config import _load_smtp_config, _save_smtp_config
+from routes.export import _build_excel_bytes
+
+bp = Blueprint("email", __name__)
+
+
+def _send_report_email(xl_bytes: bytes, fname: str,
+                       smtp_cfg: dict, recipients: list[str]) -> None:
+    """Send the scan report Excel as an email attachment via SMTP."""
+    import smtplib as _smtp
+    import email.mime.text as _mime_text
+    import email.mime.multipart as _mime_mp
+    import email.mime.base as _mime_base
+    import email.encoders as _encoders
+    import datetime as _dt
+
+    host      = smtp_cfg.get("host", "").strip()
+    port      = int(smtp_cfg.get("port", 587))
+    username  = smtp_cfg.get("username", "").strip()
+    password  = smtp_cfg.get("password", "")
+    from_addr = smtp_cfg.get("from_addr", "").strip() or username
+    use_ssl   = bool(smtp_cfg.get("use_ssl", False))
+    use_tls   = bool(smtp_cfg.get("use_tls", True)) and not use_ssl
+
+    if not host:
+        raise ValueError("No SMTP host configured")
+
+    subject = f"GDPR Scanner \u2014 scan report {_dt.datetime.now().strftime('%Y-%m-%d')}"
+    body_html = (
+        "<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
+        "<h2 style='color:#1F3864'>\u2601\ufe0f GDPR Scanner \u2014 scan report</h2>"
+        f"<p>Please find the latest scan report attached ({fname}).</p>"
+        f"<p style='color:#888;font-size:12px'>Generated: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>"
+        "</body></html>"
+    )
+
+    msg = _mime_mp.MIMEMultipart("mixed")
+    msg["Subject"] = subject
+    msg["From"]    = from_addr
+    msg["To"]      = ", ".join(recipients)
+    msg.attach(_mime_text.MIMEText(body_html, "html"))
+
+    part = _mime_base.MIMEBase(
+        "application",
+        "vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    )
+    part.set_payload(xl_bytes)
+    _encoders.encode_base64(part)
+    part.add_header("Content-Disposition", f'attachment; filename="{fname}"')
+    msg.attach(part)
+
+    if use_ssl:
+        server = _smtp.SMTP_SSL(host, port, timeout=30)
+    else:
+        server = _smtp.SMTP(host, port, timeout=30)
+    with server:
+        server.ehlo()
+        if use_tls:
+            server.starttls()
+            server.ehlo()
+        if username and password:
+            server.login(username, password)
+        server.sendmail(from_addr, recipients, msg.as_string())
+
+
+def _send_email_graph(subject: str, html_body: str,
+                      recipients: list[str],
+                      attachment_bytes: bytes = None,
+                      attachment_name: str = None) -> None:
+    """Send an email via Microsoft Graph API using the current connector token.
+    Requires Mail.Send permission (delegated or application).
+    Raises on failure."""
+    if not state.connector or not state.connector.is_authenticated():
+        raise RuntimeError("Not connected to Microsoft 365")
+
+    to_list = [{"emailAddress": {"address": r}} for r in recipients]
+    message: dict = {
+        "subject": subject,
+        "body":    {"contentType": "HTML", "content": html_body},
+        "toRecipients": to_list,
+    }
+    if attachment_bytes and attachment_name:
+        import base64 as _b64
+        message["attachments"] = [{
+            "@odata.type":  "#microsoft.graph.fileAttachment",
+            "name":         attachment_name,
+            "contentType":  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            "contentBytes": _b64.b64encode(attachment_bytes).decode(),
+        }]
+
+    if state.connector.is_app_mode:
+        smtp_cfg = _load_smtp_config()
+        sender = smtp_cfg.get("from_addr") or smtp_cfg.get("username") or recipients[0]
+        state.connector._post(f"/users/{sender}/sendMail", {"message": message, "saveToSentItems": False})
+    else:
+        state.connector._post("/me/sendMail", {"message": message, "saveToSentItems": False})
+
+
+@bp.route("/api/smtp/config", methods=["GET"])
+def smtp_config_get():
+    """Return saved SMTP config (password redacted — never sent to client)."""
+    cfg = _load_smtp_config()
+    safe = {k: v for k, v in cfg.items() if k != "password"}
+    safe["has_password"] = bool(cfg.get("password"))
+    return jsonify(safe)
+
+
+@bp.route("/api/smtp/config", methods=["POST"])
+def smtp_config_save():
+    """Save SMTP config. Omitting 'password' preserves any previously saved password."""
+    data = request.get_json() or {}
+    existing = _load_smtp_config()
+    if not data.get("password") and existing.get("password"):
+        data["password"] = existing["password"]
+    _save_smtp_config(data)
+    return jsonify({"status": "saved"})
+
+
+@bp.route("/api/smtp/test", methods=["POST"])
+def smtp_test():
+    """Send a test email. Tries Microsoft Graph API first (no SMTP config needed),
+    falls back to SMTP if Graph is unavailable."""
+    import datetime as _dt
+    saved      = _load_smtp_config()
+    recipients = saved.get("recipients", [])
+    if isinstance(recipients, str):
+        recipients = [r.strip() for r in recipients.replace(";", ",").split(",") if r.strip()]
+    if not recipients:
+        return jsonify({"error": "No recipients configured — add at least one recipient and save first"}), 400
+
+    subject  = f"GDPR Scanner — test email ({_dt.datetime.now().strftime('%Y-%m-%d %H:%M')})"
+    body_html = (
+        "<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
+        "<h2 style='color:#1F3864'>☁️ GDPR Scanner — test email</h2>"
+        "<p>This is a test email confirming that your email configuration is working correctly.</p>"
+        f"<p style='color:#888;font-size:12px'>Sent: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>"
+        "</body></html>"
+    )
+
+    # Try Graph API first
+    if state.connector and state.connector.is_authenticated():
+        try:
+            _send_email_graph(subject, body_html, recipients)
+            return jsonify({"ok": True,
+                            "message": f"Test email sent via Microsoft Graph to {', '.join(recipients)}"})
+        except Exception as graph_err:
+            graph_error_str = str(graph_err)
+    else:
+        graph_error_str = None
+
+    # Fall back to SMTP
+    host      = saved.get("host", "").strip()
+    port      = int(saved.get("port", 587))
+    username  = saved.get("username", "").strip()
+    password  = saved.get("password", "")
+    from_addr = saved.get("from_addr", "").strip() or username
+    use_ssl   = bool(saved.get("use_ssl", False))
+    use_tls   = bool(saved.get("use_tls", True)) and not use_ssl
+
+    if not host:
+        return jsonify({"error": "No SMTP host configured. To send via Microsoft 365 Graph (no SMTP needed), add Mail.Send to your Azure app registration."}), 400
+
+    try:
+        import smtplib as _smtp
+        import email.mime.text as _mime_text
+        import email.mime.multipart as _mime_mp
+        msg = _mime_mp.MIMEMultipart("alternative")
+        msg["Subject"] = subject
+        msg["From"]    = from_addr
+        msg["To"]      = ", ".join(recipients)
+        msg.attach(_mime_text.MIMEText(body_html, "html"))
+        if use_ssl:
+            server = _smtp.SMTP_SSL(host, port, timeout=15)
+        else:
+            server = _smtp.SMTP(host, port, timeout=15)
+        with server:
+            server.ehlo()
+            if use_tls:
+                server.starttls()
+                server.ehlo()
+            if username and password:
+                server.login(username, password)
+            server.sendmail(from_addr, recipients, msg.as_string())
+        suffix = " (⚠ Graph also failed — Mail.Send permission not granted)" if graph_error_str else ""
+        return jsonify({"ok": True, "message": f"Test email sent via SMTP to {', '.join(recipients)}{suffix}"})
+    except Exception as smtp_err:
+        err_str = str(smtp_err)
+        _h = host.lower()
+        _corp_m365   = "office365" in _h or "microsoft" in _h
+        _personal_ms = not _corp_m365 and any(s in _h for s in ("outlook", "live", "hotmail"))
+        _gmail_host  = "gmail" in _h or "smtp.google" in _h
+        _auth_err    = "5.7.57" in err_str or "530" in err_str or "535" in err_str or \
+                       "534" in err_str or "not authenticated" in err_str.lower() or \
+                       "Username and Password" in err_str
+        _conn_err    = "nodename nor servname" in err_str or "Name or service not known" in err_str or \
+                       "getaddrinfo" in err_str or "Connection refused" in err_str or \
+                       "Errno 8" in err_str or "Errno 111" in err_str or "Errno 61" in err_str or \
+                       "timed out" in err_str.lower()
+        if _conn_err:
+            err_str = (f"Could not connect to SMTP server \"{host}\" on port {port}. "
+                       f"Check that the hostname and port are correct.")
+        elif _corp_m365 and _auth_err:
+            err_str = ("M365 blocked SMTP AUTH. Fix: enable Authenticated SMTP in the M365 admin centre "
+                       "(Users → Active users → [user] → Mail → Manage email apps → Authenticated SMTP), "
+                       "or add Mail.Send to your Azure app to use Graph instead.")
+        elif (_personal_ms or _gmail_host) and _auth_err:
+            provider = "Microsoft" if _personal_ms else "Google"
+            url = "account.microsoft.com/security" if _personal_ms else "myaccount.google.com → Security → 2-Step Verification"
+            err_str = (f"Authentication failed — {provider} blocks regular passwords for SMTP when MFA is enabled.\n\n"
+                       f"Fix: create an App Password at {url} → App passwords "
+                       f"and use that instead of your normal password.")
+        elif graph_error_str:
+            err_str = f"SMTP: {err_str} | Graph also unavailable (Mail.Send not granted)"
+        return jsonify({"error": err_str}), 200
+
+
+@bp.route("/api/send_report", methods=["POST"])
+def send_report():
+    """Build Excel and email it to the requested recipients.
+    Tries Microsoft Graph API first, falls back to SMTP."""
+    if not state.flagged_items:
+        return jsonify({"error": "No results to send — run a scan first"}), 400
+
+    data       = request.get_json() or {}
+    smtp_cfg   = _load_smtp_config()
+    recipients = data.get("recipients", []) or smtp_cfg.get("recipients", [])
+    if isinstance(recipients, str):
+        recipients = [r.strip() for r in recipients.replace(";", ",").split(",") if r.strip()]
+    if data.get("smtp"):
+        smtp_cfg = {**smtp_cfg, **data["smtp"]}
+    if not recipients:
+        return jsonify({"error": "No recipients specified"}), 400
+
+    try:
+        xl_bytes, fname = _build_excel_bytes()
+    except Exception as e:
+        return jsonify({"error": f"Excel build failed: {e}"}), 500
+
+    import datetime as _dt
+    subject   = f"GDPR Scanner — scan report {_dt.datetime.now().strftime('%Y-%m-%d')}"
+    body_html = (
+        "<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
+        "<h2 style='color:#1F3864'>\u2601\ufe0f GDPR Scanner \u2014 scan report</h2>"
+        f"<p>Please find the latest scan report attached ({fname}).</p>"
+        f"<p style='color:#888;font-size:12px'>Generated: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}<br>"
+        f"Items flagged: {len(state.flagged_items)}</p>"
+        "</body></html>"
+    )
+
+    # Try Graph API first
+    if state.connector and state.connector.is_authenticated():
+        try:
+            _send_email_graph(subject, body_html, recipients,
+                              attachment_bytes=xl_bytes, attachment_name=fname)
+            return jsonify({"status": "sent", "method": "graph",
+                            "recipients": recipients, "filename": fname})
+        except Exception as graph_err:
+            graph_err_str = str(graph_err)
+            if "403" in graph_err_str or "Forbidden" in graph_err_str \
+                    or "Mail.Send" in graph_err_str or "insufficient" in graph_err_str.lower():
+                return jsonify({"error": (
+                    "Mail.Send permission not granted on the Azure app registration. "
+                    "Go to Azure AD → App registrations → [your app] → API permissions → "
+                    "Add → Microsoft Graph → Mail.Send → Grant admin consent."
+                )}), 500
+
+    # Fall back to SMTP
+    try:
+        _send_report_email(xl_bytes, fname, smtp_cfg, recipients)
+        return jsonify({"status": "sent", "method": "smtp",
+                        "recipients": recipients, "filename": fname})
+    except Exception as e:
+        err = str(e)
+        _h2 = smtp_cfg.get("host", "").lower()
+        _p2 = int(smtp_cfg.get("port", 587))
+        _corp_m365_2   = "office365" in _h2 or "microsoft" in _h2
+        _personal_ms_2 = not _corp_m365_2 and any(s in _h2 for s in ("outlook", "live", "hotmail"))
+        _gmail_2       = "gmail" in _h2 or "smtp.google" in _h2
+        _auth_err_2    = "5.7.57" in err or "530" in err or "535" in err or \
+                         "534" in err or "not authenticated" in err.lower()
+        _conn_err_2    = "nodename nor servname" in err or "Name or service not known" in err or \
+                         "getaddrinfo" in err or "Connection refused" in err or \
+                         "Errno 8" in err or "Errno 111" in err or "Errno 61" in err or \
+                         "timed out" in err.lower()
+        if _conn_err_2:
+            err = (f"Could not connect to SMTP server \"{_h2}\" on port {_p2}. "
+                   f"Check that the hostname and port are correct.")
+        elif _corp_m365_2 and _auth_err_2:
+            err = (f"{err}\n\nTip: Enable SMTP AUTH for this mailbox in the Microsoft 365 admin centre, "
+                   "or connect to M365 first so the scanner can send via Microsoft Graph instead.")
+        elif (_personal_ms_2 or _gmail_2) and _auth_err_2:
+            provider2 = "Microsoft" if _personal_ms_2 else "Google"
+            url2 = "account.microsoft.com/security" if _personal_ms_2 else "myaccount.google.com → Security → 2-Step Verification"
+            err = (f"Authentication failed — {provider2} blocks regular passwords for SMTP when MFA is enabled.\n\n"
+                   f"Fix: create an App Password at {url2} → App passwords "
+                   f"and use that instead of your normal password.")
+        return jsonify({"error": err}), 500
diff --git a/routes/export.py b/routes/export.py
new file mode 100644
index 0000000..7b87b7d
--- /dev/null
+++ b/routes/export.py
@@ -0,0 +1,1222 @@
+"""
+Excel and Article 30 export, bulk delete
+"""
+from __future__ import annotations
+import json, io, re, traceback, logging
+from pathlib import Path
+from flask import Blueprint, Response, jsonify, request
+from routes import state
+from app_config import _GUID_RE, _resolve_display_name
+
+try:
+    from gdpr_db import get_db as _get_db
+    DB_OK = True
+except ImportError:
+    DB_OK = False
+    def _get_db(*a, **kw): return None  # type: ignore[misc]
+
+try:
+    from m365_connector import M365PermissionError
+except ImportError:
+    class M365PermissionError(Exception): pass  # type: ignore[no-redef]
+
+bp = Blueprint("export", __name__)
+logger = logging.getLogger(__name__)
+
+
+def _build_excel_bytes() -> tuple[bytes, str]:
+    """Build the M365 scan Excel workbook and return (bytes, filename).
+    Raises on error. Used by export_excel() and send_report()."""
+    from openpyxl import Workbook
+    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
+    from openpyxl.utils import get_column_letter
+
+    HEADER_BG  = "1F3864"
+    HEADER_FG  = "FFFFFF"
+    ALT_BG     = "EEF2FF"
+    SOURCE_MAP = {
+        "email":      ("📧 Outlook",       "D6E4F7"),
+        "onedrive":   ("💾 OneDrive",     "D6F7E4"),
+        "sharepoint": ("🌐 SharePoint",   "FFF0D6"),
+        "teams":      ("💬 Teams",        "F7D6F0"),
+        "gmail":      ("📧 Gmail",        "D6EAF8"),
+        "gdrive":     ("💾 Google Drive", "D5F5E3"),
+        "local":      ("📁 Local",        "E6F7E6"),
+        "smb":        ("🌐 Network",      "E0F0FA"),
+    }
+    COLS = [
+        ("Name / Subject",    45),
+        ("CPR Hits",           9),
+        ("Face count",         9),
+        ("GPS",                6),
+        ("Special category",  22),
+        ("EXIF author",       18),
+        ("Folder",            30),
+        ("Account",           24),
+        ("Role",              10),
+        ("Disposition",       18),
+        ("Date Modified",     14),
+        ("Size (KB)",         10),
+        ("URL",               50),
+    ]
+
+    thin   = Side(style="thin", color="CCCCCC")
+    border = Border(left=thin, right=thin, top=thin, bottom=thin)
+
+    def _fill(hex_col):
+        return PatternFill("solid", fgColor=hex_col)
+
+    def _write_sheet(ws, rows, tab_color):
+        ws.sheet_properties.tabColor = tab_color
+        for col_idx, (col_name, col_w) in enumerate(COLS, 1):
+            cell = ws.cell(row=1, column=col_idx, value=col_name)
+            cell.font      = Font(name="Arial", bold=True, color=HEADER_FG, size=10)
+            cell.fill      = _fill(HEADER_BG)
+            cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
+            cell.border    = border
+            ws.column_dimensions[get_column_letter(col_idx)].width = col_w
+        ws.row_dimensions[1].height = 20
+        ws.freeze_panes = "A2"
+
+        for r_idx, item in enumerate(rows, 2):
+            row_fill = _fill(ALT_BG if r_idx % 2 == 0 else "FFFFFF")
+            _disp = ""
+            if DB_OK:
+                try:
+                    _d = _get_db().get_disposition(item.get("id", ""))
+                    _disp = (_d.get("status", "") if _d else "")
+                except Exception:
+                    pass
+            _sc = item.get("special_category", [])
+            _sc_str = ", ".join(
+                s for s in (_sc if isinstance(_sc, list) else [str(_sc or "")])
+                if s not in ("gps_location", "exif_pii")
+            )
+            _exif   = item.get("exif") or {}
+            _gps    = _exif.get("gps")
+            _author = _exif.get("author") or ""
+            values = [
+                item.get("name", ""),
+                item.get("cpr_count", 0),
+                item.get("face_count", 0),
+                "✔" if _gps else "",
+                _sc_str,
+                _author,
+                item.get("folder", ""),
+                item.get("account_name", "") or item.get("source", ""),
+                item.get("user_role", ""),
+                _disp,
+                item.get("modified", ""),
+                item.get("size_kb", ""),
+                item.get("url", ""),
+            ]
+            for col_idx, val in enumerate(values, 1):
+                is_url = col_idx == 13 and val
+                cell = ws.cell(row=r_idx, column=col_idx, value=val)
+                cell.font      = Font(name="Arial", size=10,
+                                     color="1155CC" if is_url else "000000",
+                                     underline="single" if is_url else None)
+                cell.fill      = row_fill
+                cell.alignment = Alignment(vertical="center", wrap_text=(col_idx == 1))
+                cell.border    = border
+            ws.row_dimensions[r_idx].height = 16
+
+        if rows:
+            tr = len(rows) + 2
+            ws.cell(row=tr, column=1, value="Total").font = Font(name="Arial", bold=True, size=10)
+            ws.cell(row=tr, column=2, value=f"=SUM(B2:B{tr-1})").font = Font(name="Arial", bold=True, size=10)
+            for col_idx in range(1, len(COLS) + 1):
+                ws.cell(row=tr, column=col_idx).fill   = _fill("D0D8F0")
+                ws.cell(row=tr, column=col_idx).border = border
+
+        ws.auto_filter.ref = f"A1:{get_column_letter(len(COLS))}1"
+
+    wb     = Workbook()
+    ws_sum = wb.active
+    ws_sum.title = "Summary"
+    ws_sum.sheet_properties.tabColor = "1F3864"
+    ws_sum["A1"] = "GDPRScanner — Export"
+    ws_sum["A1"].font = Font(name="Arial", bold=True, size=14, color=HEADER_FG)
+    ws_sum["A1"].fill = _fill(HEADER_BG)
+    ws_sum.merge_cells("A1:D1")
+    ws_sum["A1"].alignment = Alignment(horizontal="center", vertical="center")
+    ws_sum.row_dimensions[1].height = 28
+
+    import datetime as _dt
+    ws_sum["A2"] = "Generated:"
+    ws_sum["B2"] = _dt.datetime.now().strftime("%Y-%m-%d %H:%M")
+    ws_sum["A3"] = "Total flagged items:"
+    ws_sum["B3"] = len(state.flagged_items)
+    gps_count = sum(1 for i in state.flagged_items if (i.get("exif") or {}).get("gps"))
+    if gps_count:
+        ws_sum["A4"] = "Items with GPS data:"
+        ws_sum["B4"] = gps_count
+    for cell in (ws_sum["A2"], ws_sum["A3"], ws_sum["A4"]):
+        cell.font = Font(name="Arial", bold=True, size=10)
+    for cell in (ws_sum["B2"], ws_sum["B3"], ws_sum["B4"]):
+        cell.font = Font(name="Arial", size=10)
+    ws_sum.column_dimensions["A"].width = 22
+    ws_sum.column_dimensions["B"].width = 20
+
+    for ci, h in enumerate(["Source", "Items", "Total CPR Hits"], 1):
+        cell = ws_sum.cell(row=6, column=ci, value=h)
+        cell.font      = Font(name="Arial", bold=True, color=HEADER_FG, size=10)
+        cell.fill      = _fill(HEADER_BG)
+        cell.border    = border
+        cell.alignment = Alignment(horizontal="center", vertical="center")
+    ws_sum.row_dimensions[6].height = 18
+    ws_sum.column_dimensions["C"].width = 16
+
+    by_source: dict = {}
+    for item in state.flagged_items:
+        by_source.setdefault(item.get("source_type", "other"), []).append(item)
+
+    sum_row = 7
+    for src_key, (label, tab_bg) in SOURCE_MAP.items():
+        items = by_source.get(src_key, [])
+        if not items:
+            continue
+        ws_sum.cell(row=sum_row, column=1, value=label).font = Font(name="Arial", size=10)
+        ws_sum.cell(row=sum_row, column=2, value=len(items)).font = Font(name="Arial", size=10)
+        ws_sum.cell(row=sum_row, column=3, value=sum(i.get("cpr_count", 0) for i in items)).font = Font(name="Arial", size=10)
+        for ci in range(1, 4):
+            ws_sum.cell(row=sum_row, column=ci).border = border
+            ws_sum.cell(row=sum_row, column=ci).fill = _fill("EEF2FF" if sum_row % 2 == 0 else "FFFFFF")
+        sum_row += 1
+
+    for src_key, (label, tab_bg) in SOURCE_MAP.items():
+        items = by_source.get(src_key, [])
+        if not items:
+            continue
+        clean_label = label.split(" ", 1)[1]
+        _write_sheet(wb.create_sheet(title=clean_label), items, tab_bg)
+
+    # GPS items sheet
+    gps_items = [i for i in state.flagged_items if (i.get("exif") or {}).get("gps")]
+    if gps_items:
+        ws_gps = wb.create_sheet(title="GPS locations")
+        ws_gps.sheet_properties.tabColor = "1A7A6E"
+        GPS_COLS = [
+            ("Name", 40), ("Latitude", 14), ("Longitude", 14),
+            ("Maps link", 50), ("Account", 24), ("Date Modified", 14),
+        ]
+        for col_idx, (col_name, col_w) in enumerate(GPS_COLS, 1):
+            cell = ws_gps.cell(row=1, column=col_idx, value=col_name)
+            cell.font      = Font(name="Arial", bold=True, color=HEADER_FG, size=10)
+            cell.fill      = _fill("1A7A6E")
+            cell.alignment = Alignment(horizontal="center", vertical="center")
+            cell.border    = border
+            ws_gps.column_dimensions[get_column_letter(col_idx)].width = col_w
+        ws_gps.freeze_panes = "A2"
+        for r_idx, item in enumerate(gps_items, 2):
+            _exif = item.get("exif") or {}
+            _gps  = _exif.get("gps") or {}
+            row_fill = _fill("E0F7F4" if r_idx % 2 == 0 else "FFFFFF")
+            for col_idx, val in enumerate([
+                item.get("name", ""),
+                _gps.get("lat", ""),
+                _gps.get("lon", ""),
+                _gps.get("maps_url", ""),
+                item.get("account_name", "") or item.get("source", ""),
+                item.get("modified", ""),
+            ], 1):
+                is_link = col_idx == 4 and val
+                cell = ws_gps.cell(row=r_idx, column=col_idx, value=val)
+                cell.font   = Font(name="Arial", size=10,
+                                   color="1155CC" if is_link else "000000",
+                                   underline="single" if is_link else None)
+                cell.fill   = row_fill
+                cell.border = border
+        ws_gps.auto_filter.ref = f"A1:{get_column_letter(len(GPS_COLS))}1"
+
+    # External transfers sheet
+    ext_items = [i for i in state.flagged_items
+                 if i.get("transfer_risk") in ("external-recipient", "external-share", "shared")]
+    if ext_items:
+        ws_ext = wb.create_sheet(title="External transfers")
+        _write_sheet(ws_ext, ext_items, "E74C3C")
+        ws_ext.sheet_properties.tabColor = "E74C3C"
+        ws_sum.cell(row=sum_row, column=1, value="⚠ External transfers").font = Font(name="Arial", size=10, bold=True, color="E74C3C")
+        ws_sum.cell(row=sum_row, column=2, value=len(ext_items)).font = Font(name="Arial", size=10, bold=True, color="E74C3C")
+        ws_sum.cell(row=sum_row, column=3, value=sum(i.get("cpr_count", 0) for i in ext_items)).font = Font(name="Arial", size=10, bold=True, color="E74C3C")
+        for ci in range(1, 4):
+            ws_sum.cell(row=sum_row, column=ci).border = border
+            ws_sum.cell(row=sum_row, column=ci).fill = _fill("FDE8E8")
+
+    buf = io.BytesIO()
+    wb.save(buf)
+    buf.seek(0)
+    fname = f"m365_scan_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+    return buf.read(), fname
+@bp.route("/api/export_excel")
+def export_excel():
+    """Export flagged items as an Excel workbook with per-source tabs."""
+    # If in-memory list is empty (e.g. after page reload), try loading from DB.
+    # Use get_session_items() so concurrent M365 + Google + File scans (each with
+    # their own scan_id) are all included, not just the single latest scan_id.
+    if not state.flagged_items and DB_OK:
+        try:
+            db = _get_db()
+            if db:
+                db_items = db.get_session_items()
+                if db_items:
+                    state.flagged_items[:] = db_items
+        except Exception:
+            pass
+    try:
+        xl_bytes, fname = _build_excel_bytes()
+        return Response(
+            xl_bytes,
+            mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            headers={"Content-Disposition": f"attachment; filename={fname}"}
+        )
+    except ImportError:
+        return jsonify({"error": "openpyxl not installed — run: pip install openpyxl"}), 500
+    except Exception as e:
+        import traceback
+        logger.error("export_excel error: %s\n%s", e, traceback.format_exc())
+        return jsonify({"error": str(e)}), 500
+
+
+# ── Article 30 report ─────────────────────────────────────────────────────────
+
+def _build_article30_docx() -> tuple[bytes, str]:
+    """Generate a GDPR Article 30 Register of Processing Activities as .docx.
+    Returns (bytes, filename). Strings are translated using the active state.LANG dict."""
+    try:
+        from docx import Document as _Document
+        from docx.shared import Pt, RGBColor, Inches, Cm
+        from docx.enum.text import WD_ALIGN_PARAGRAPH
+        from docx.oxml.ns import qn
+        from docx.oxml import OxmlElement
+    except ImportError:
+        raise ImportError("python-docx not installed — run: pip install python-docx")
+
+    import datetime as _dt
+
+    # Translate helper — falls back to English default if key missing
+    def L(key: str, default: str = "") -> str:
+        return state.LANG.get(key, default)
+
+    # ── Data ─────────────────────────────────────────────────────────────────
+    db    = _get_db() if DB_OK else None
+    stats   = db.get_stats() if db else {}
+    items   = db.get_session_items() if db else list(state.flagged_items)
+    trend   = db.get_trend(10) if db else []
+    overdue = db.get_overdue_items(5) if db else []
+
+    # Build account_id → display_name map from the scan's stored user_ids
+    # This lets us resolve GUIDs and "Microsoft Konto" placeholders that
+    # were stored in account_name before _resolve_display_name was applied.
+    _acct_map: dict[str, str] = {}
+    if db:
+        try:
+            scan_id = stats.get("scan_id") or db.latest_scan_id()
+            if scan_id:
+                row = db._connect().execute(
+                    "SELECT user_count, options FROM scans WHERE id=?", (scan_id,)
+                ).fetchone()
+                # user_ids are stored in the options JSON column
+                opts_json = json.loads(row["options"] or "{}") if row else {}
+                for u in opts_json.get("user_ids", []):
+                    uid  = u.get("id", "")
+                    name = u.get("displayName", "")
+                    if uid and name:
+                        _acct_map[uid] = name
+        except Exception:
+            pass
+    # Also seed from in-memory state.flagged_items (catches current scan not yet in DB)
+    for item in state.flagged_items:
+        aid  = item.get("account_id", "")
+        name = item.get("account_name", "")
+        if aid and name and not _GUID_RE.match(name.strip()):
+            _acct_map.setdefault(aid, name)
+
+    def _acct_label(item: dict) -> str:
+        """Return the best human-readable account label for an item."""
+        aid  = item.get("account_id", "")
+        name = item.get("account_name", "")
+        # Try the lookup map first (most reliable — built from scan user_ids)
+        if aid and aid in _acct_map:
+            return _acct_map[aid]
+        # Fall back to stored name, resolving GUIDs/placeholders against account_id
+        return _resolve_display_name(name, aid)
+    overdue_ids = {o["id"] for o in overdue}
+
+    now_str   = _dt.datetime.now().strftime("%Y-%m-%d %H:%M")
+    date_str  = _dt.datetime.now().strftime("%Y-%m-%d")
+    fname     = f"article30_{date_str}.docx"
+
+    # Aggregate by source
+    by_source: dict = {}
+    for item in items:
+        st = item.get("source_type", "other")
+        by_source.setdefault(st, []).append(item)
+
+    SOURCE_LABELS = {
+        "email":      "Exchange (Outlook)",
+        "onedrive":   "OneDrive",
+        "sharepoint": "SharePoint",
+        "teams":      "Teams",
+        "gmail":      "Gmail",
+        "gdrive":     "Google Drive",
+        "local":      "Local files",
+        "smb":        "Network / SMB",
+    }
+
+    # ── Colour palette ────────────────────────────────────────────────────────
+    DARK_BLUE  = RGBColor(0x1F, 0x38, 0x64)
+    MID_BLUE   = RGBColor(0x00, 0x78, 0xD4)
+    LIGHT_GREY = RGBColor(0xF2, 0xF2, 0xF2)
+    RED        = RGBColor(0xC0, 0x39, 0x2B)
+    ORANGE     = RGBColor(0xC5, 0x5A, 0x00)
+    WHITE      = RGBColor(0xFF, 0xFF, 0xFF)
+
+    def _hex(c: RGBColor) -> str:
+        return f"{c[0]:02X}{c[1]:02X}{c[2]:02X}"
+
+    # ── Document setup ────────────────────────────────────────────────────────
+    doc = _Document()
+    doc.core_properties.title   = "GDPR Article 30 — Register of Processing Activities"
+    doc.core_properties.author  = "GDPRScanner"
+    doc.core_properties.subject = "GDPR Compliance"
+
+    # Page margins — A4 with 2.5 cm margins
+    for section in doc.sections:
+        section.top_margin    = Cm(2.5)
+        section.bottom_margin = Cm(2.5)
+        section.left_margin   = Cm(2.5)
+        section.right_margin  = Cm(2.5)
+
+    # ── Helper: set cell background ──────────────────────────────────────────
+    def _cell_bg(cell, hex_color: str):
+        tc   = cell._tc
+        tcPr = tc.get_or_add_tcPr()
+        shd  = OxmlElement("w:shd")
+        shd.set(qn("w:val"),   "clear")
+        shd.set(qn("w:color"), "auto")
+        shd.set(qn("w:fill"),  hex_color)
+        tcPr.append(shd)
+
+    def _set_cell_border(cell, **kwargs):
+        tc   = cell._tc
+        tcPr = tc.get_or_add_tcPr()
+        tcBorders = OxmlElement("w:tcBorders")
+        for edge in ("top", "left", "bottom", "right"):
+            cfg = kwargs.get(edge, {})
+            el  = OxmlElement(f"w:{edge}")
+            el.set(qn("w:val"),   cfg.get("val",   "single"))
+            el.set(qn("w:sz"),    cfg.get("sz",    "4"))
+            el.set(qn("w:space"), cfg.get("space", "0"))
+            el.set(qn("w:color"), cfg.get("color", "CCCCCC"))
+            tcBorders.append(el)
+        tcPr.append(tcBorders)
+
+    def _para(text: str = "", bold=False, size=11, color=None,
+              align=WD_ALIGN_PARAGRAPH.LEFT, space_before=0, space_after=6) -> object:
+        p = doc.add_paragraph()
+        p.alignment = align
+        p.paragraph_format.space_before = Pt(space_before)
+        p.paragraph_format.space_after  = Pt(space_after)
+        if text:
+            run = p.add_run(text)
+            run.bold      = bold
+            run.font.size = Pt(size)
+            if color:
+                run.font.color.rgb = color
+        return p
+
+    def _heading(text: str, level: int = 1):
+        p  = doc.add_heading(text, level=level)
+        r  = p.runs[0] if p.runs else p.add_run(text)
+        r.font.color.rgb = DARK_BLUE
+        r.font.size      = Pt(16 if level == 1 else 13)
+        r.bold           = True
+        p.paragraph_format.space_before = Pt(14 if level == 1 else 10)
+        p.paragraph_format.space_after  = Pt(4)
+        return p
+
+    def _kv(label: str, value: str, label_width=2.5, bold=False, highlight=False):
+        """Two-column key-value paragraph using a 2-cell table row."""
+        tbl = doc.add_table(rows=1, cols=2)
+        tbl.style = "Table Grid"
+        w_label = int(label_width * 1440)
+        w_value = int((16.0 - label_width) * 1440 * 0.6)  # approx content width
+        c1, c2 = tbl.rows[0].cells
+        _cell_bg(c1, "FFF3E0" if highlight else "F2F2F2")
+        _cell_bg(c2, "FFF3E0" if highlight else "FFFFFF")
+        c1.width = Inches(label_width)
+        c2.width = Inches(16.0 - label_width)
+        p1 = c1.paragraphs[0]; p1.clear()
+        r1 = p1.add_run(label); r1.bold = True; r1.font.size = Pt(10)
+        p2 = c2.paragraphs[0]; p2.clear()
+        r2 = p2.add_run(value); r2.font.size = Pt(10); r2.bold = bold
+        if highlight:
+            r1.font.color.rgb = RGBColor(0x6B, 0x00, 0x6B)
+            r2.font.color.rgb = RGBColor(0x6B, 0x00, 0x6B)
+        for cell in (c1, c2):
+            _set_cell_border(cell, top={"color": "E0E0E0"}, bottom={"color": "E0E0E0"},
+                             left={"color": "E0E0E0"}, right={"color": "E0E0E0"})
+        return tbl
+
+    # ── Cover page ────────────────────────────────────────────────────────────
+    _para()
+    title_p = doc.add_paragraph()
+    title_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    title_p.paragraph_format.space_before = Pt(40)
+    r = title_p.add_run(L("a30_title", "GDPR Article 30"))
+    r.bold = True; r.font.size = Pt(28); r.font.color.rgb = DARK_BLUE
+
+    sub_p = doc.add_paragraph()
+    sub_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    r2 = sub_p.add_run(L("a30_subtitle", "Register of Processing Activities"))
+    r2.font.size = Pt(16); r2.font.color.rgb = MID_BLUE
+
+    _para()
+    meta_p = doc.add_paragraph()
+    meta_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    r3 = meta_p.add_run(f"{L('a30_generated','Generated')}: {now_str}  ·  GDPRScanner")
+    r3.font.size = Pt(10); r3.font.color.rgb = RGBColor(0x88, 0x88, 0x88)
+
+    # Divider line
+    _para()
+    div = doc.add_paragraph()
+    div_fmt = div.paragraph_format
+    div_fmt.space_after = Pt(20)
+    pPr = div._p.get_or_add_pPr()
+    pBdr = OxmlElement("w:pBdr")
+    bot  = OxmlElement("w:bottom")
+    bot.set(qn("w:val"), "single"); bot.set(qn("w:sz"), "6")
+    bot.set(qn("w:color"), _hex(MID_BLUE))
+    pBdr.append(bot); pPr.append(pBdr)
+
+    doc.add_page_break()
+
+    # ── Section 1: Summary ────────────────────────────────────────────────────
+    _heading(L("a30_s1", "1. Summary"))
+
+    total_items    = len(items)
+    total_cpr      = sum(i.get("cpr_count", 0) for i in items)
+    special_items  = [i for i in items if i.get("special_category") and
+                      i["special_category"] not in ("[]", "", None, [])]
+    photo_items    = [i for i in items if i.get("face_count", 0) > 0]
+    gps_items      = [i for i in items if "gps_location" in (i.get("special_category") or [])]
+    exif_pii_items = [i for i in items if "exif_pii" in (i.get("special_category") or [])]
+    unique_subj    = stats.get("unique_subjects", 0)
+    total_scanned  = stats.get("total_scanned", 0)
+    scan_date      = _dt.datetime.fromtimestamp(
+        stats.get("started_at", 0)).strftime("%Y-%m-%d %H:%M") if stats.get("started_at") else "—"
+    special_items  = [i for i in items if i.get("special_category") and
+                      i["special_category"] not in ("[]", "", None, [])]
+
+    _kv(L("a30_scan_date",       "Scan date"),                scan_date)
+    _kv(L("a30_items_scanned",   "Items scanned"),            str(total_scanned))
+    _kv(L("a30_flagged",         "Flagged items"),            str(total_items))
+    _kv(L("a30_cpr_hits",        "Total CPR hits"),           str(total_cpr))
+    _kv(L("a30_data_subjects",   "Estimated data subjects"),  str(unique_subj))
+    _kv(L("a30_overdue",         "Overdue items (>5 yrs)"),   str(len(overdue_ids)))
+    if gps_items:
+        _kv(L("a30_gps_items", "Items with GPS location data (Art. 4 — location = personal data)"),
+            str(len(gps_items)))
+    if exif_pii_items:
+        _kv(L("a30_exif_pii_items", "Items with EXIF PII (author, description, keywords)"),
+            str(len(exif_pii_items)))
+    if photo_items:
+        total_faces = sum(i.get("face_count", 0) for i in photo_items)
+        _kv(L("a30_photo_items", "Photos with detected faces (Art. 9 biometric)"),
+            f"{len(photo_items)} items / {total_faces} faces")
+        _para(L("a30_photo_note",
+                "Photographs of identifiable persons are biometric data under Art. 9 GDPR. "
+                "Retention requires a documented legal basis under Art. 9(2). "
+                "For school photographs of pupils under 15, parental consent is required "
+                "(Databeskyttelsesloven §6). See Datatilsynet guidance on school photography."),
+              size=9, space_after=4)
+    if special_items:
+        _kv(L("a30_special_cat", "Art. 9 special category items"),
+            str(len(special_items)))
+        _para(L("a30_special_cat_note",
+                "These items contain health, criminal, biometric, religious, ethnic, "
+                "trade union, political, or sexual orientation data. "
+                "An explicit legal basis (Art. 9(2)) and possibly a DPIA (Art. 35) is required."),
+              size=9, space_after=4)
+
+    _para()
+
+    # Per-source breakdown table
+    _para(L("a30_by_source", "Breakdown by source"), bold=True, size=11, space_before=10)
+
+    src_tbl = doc.add_table(rows=1, cols=5)
+    src_tbl.style = "Table Grid"
+    hdr_cells = src_tbl.rows[0].cells
+    for cell, txt in zip(hdr_cells, [L("a30_col_source","Source"), L("a30_col_items","Items"),
+                                     L("a30_col_cpr","CPR hits"), L("a30_col_overdue","Overdue"),
+                                     L("a30_col_special","Art. 9")]):
+        _cell_bg(cell, _hex(DARK_BLUE))
+        p = cell.paragraphs[0]; p.clear()
+        r = p.add_run(txt); r.bold = True
+        r.font.size = Pt(10); r.font.color.rgb = WHITE
+
+    for src_key in ("email", "onedrive", "sharepoint", "teams", "gmail", "gdrive", "local", "smb"):
+        src_items = by_source.get(src_key, [])
+        if not src_items:
+            continue
+        row   = src_tbl.add_row().cells
+        n_ov   = sum(1 for i in src_items if i.get("id") in overdue_ids)
+        n_cpr  = sum(i.get("cpr_count", 0) for i in src_items)
+        n_spec = sum(1 for i in src_items if i.get("special_category") and
+                     i["special_category"] not in ("[]", "", None, []))
+        for cell, val in zip(row, [
+            SOURCE_LABELS.get(src_key, src_key),
+            str(len(src_items)), str(n_cpr), str(n_ov),
+            str(n_spec) if n_spec else "—"
+        ]):
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(val); r.font.size = Pt(10)
+            if val != "0" and cell == row[3]:
+                r.font.color.rgb = ORANGE
+            if n_spec and cell == row[4]:
+                r.font.color.rgb = RGBColor(0x7B, 0x00, 0x82)
+                r.bold = True
+
+    # ── Section 2: Data categories ────────────────────────────────────────────
+    doc.add_page_break()
+    _heading(L("a30_s2", "2. Personal Data Categories Identified"))
+
+    _para(L("a30_s2_intro", "The following categories of personal data were detected during scanning."),
+          size=10, space_after=8)
+
+    # Aggregate PII from DB or from items
+    pii_totals: dict = {}
+    if db:
+        rows = db._connect().execute(
+            """SELECT pii_type, SUM(hit_count) FROM pii_hits
+               WHERE scan_id=? GROUP BY pii_type""",
+            (stats.get("scan_id") or db.latest_scan_id() or 0,)
+        ).fetchall()
+        for pii_type, count in rows:
+            pii_totals[pii_type] = count
+
+    PII_LABELS = {
+        "PHONE":        L("a30_pii_phone",        "Phone numbers"),
+        "EMAIL":        L("a30_pii_email",        "Email addresses"),
+        "IBAN":         L("a30_pii_iban",         "IBAN bank numbers"),
+        "BANK_ACCOUNT": L("a30_pii_bank",         "Bank account numbers"),
+        "NAME":         L("a30_pii_name",         "Personal names (NER)"),
+        "ADDRESS":      L("a30_pii_address",      "Addresses (NER)"),
+        "ORG":          L("a30_pii_org",          "Organisations (NER)"),
+    }
+
+    pii_tbl = doc.add_table(rows=1, cols=3)
+    pii_tbl.style = "Table Grid"
+    for cell, txt in zip(pii_tbl.rows[0].cells,
+                          [L("a30_col_category","Data category"), L("a30_col_count","Count"), L("a30_col_gdpr_class","GDPR classification")]):
+        _cell_bg(cell, _hex(DARK_BLUE))
+        p = cell.paragraphs[0]; p.clear()
+        r = p.add_run(txt); r.bold = True
+        r.font.size = Pt(10); r.font.color.rgb = WHITE
+
+    # CPR row first — always
+    cpr_row = pii_tbl.add_row().cells
+    for cell, val in zip(cpr_row, [L("a30_cpr_label", "CPR numbers (Danish personal ID)"), str(total_cpr),
+                                    L("a30_cpr_class", "Art. 9 — national identifier")]):
+        p = cell.paragraphs[0]; p.clear()
+        r = p.add_run(val); r.font.size = Pt(10)
+        _cpr_class = L("a30_cpr_class", "Art. 9 — national identifier")
+        if val == _cpr_class:
+            r.font.color.rgb = RED; r.bold = True
+
+    for pii_type, label in PII_LABELS.items():
+        count = pii_totals.get(pii_type, 0)
+        if not count:
+            continue
+        cls = L("a30_pii_class_9", "Art. 9 — health/sensitive") if pii_type in ("NAME", "ADDRESS") else L("a30_pii_class_4", "Art. 4 — personal data")
+        row = pii_tbl.add_row().cells
+        for cell, val in zip(row, [label, str(count), cls]):
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(val); r.font.size = Pt(10)
+
+    # ── Section 3: Data inventory ─────────────────────────────────────────────
+    doc.add_page_break()
+    _heading(L("a30_s3", "3. Data Inventory"))
+
+    _para(L("a30_s3_intro", "All flagged items are listed below with location, retention status, and compliance disposition."),
+          size=10, space_after=8)
+
+    # Split by user role for separate presentation
+    student_items = [i for i in items if i.get("user_role") == "student"]
+    staff_items   = [i for i in items if i.get("user_role") != "student"]
+
+    _disp_map = {
+        "unreviewed":       L("a30_disp_unreviewed",      "Unreviewed"),
+        "retain-legal":     L("a30_disp_retain_legal",    "Retain — Legal obligation"),
+        "retain-legitimate": L("a30_disp_retain_legit",   "Retain — Legitimate interest"),
+        "retain-contract":  L("a30_disp_retain_contract", "Retain — Contract"),
+        "delete-scheduled": L("a30_disp_delete_sched",    "Delete — Scheduled"),
+        "deleted":          L("a30_disp_deleted",         "Deleted"),
+        "personal-use":     L("a30_disp_personal_use",    "Personal use — out of GDPR scope (Art. 2(2)(c))"),
+    }
+
+    def _inv_table(tbl_items: list):
+        tbl = doc.add_table(rows=1, cols=6)
+        tbl.style = "Table Grid"
+        col_hdrs = [L("a30_col_name","Name / Subject"), L("a30_col_source","Source"),
+                    L("a30_col_account","Account"), L("a30_col_modified","Modified"),
+                    L("a30_col_cpr_short","CPR"), L("a30_col_disp","Disposition")]
+        for cell, txt in zip(tbl.rows[0].cells, col_hdrs):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(9); r.font.color.rgb = WHITE
+        sorted_tbl = sorted(tbl_items,
+            key=lambda x: (0 if x.get("id") in overdue_ids else 1, -x.get("cpr_count", 0)))
+        for idx, item in enumerate(sorted_tbl[:500]):
+            disp_rec = db.get_disposition(item["id"]) if db else None
+            raw_disp = disp_rec.get("status", "unreviewed") if disp_rec else "unreviewed"
+            disp_str = _disp_map.get(raw_disp, raw_disp.replace("-", " ").title())
+            is_ov    = item.get("id") in overdue_ids
+            row = tbl.add_row().cells
+            vals = [
+                (item.get("name", "")[:60] + ("…" if len(item.get("name", "")) > 60 else "")),
+                SOURCE_LABELS.get(item.get("source_type", ""), item.get("source_type", "")),
+                _acct_label(item),
+                item.get("modified", ""),
+                str(item.get("cpr_count", 0)),
+                disp_str,
+            ]
+            bg = "FFF8F0" if is_ov else ("FFFFFF" if idx % 2 == 0 else "F8F8F8")
+            for cell, val in zip(row, vals):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(8)
+                if is_ov and cell == row[3]:
+                    r.font.color.rgb = ORANGE
+        if len(tbl_items) > 500:
+            _para(f"… {len(tbl_items) - 500} {L('a30_more_items', 'additional items not shown.')}",
+                  size=9, color=RGBColor(0x88, 0x88, 0x88), space_before=4)
+
+    if staff_items:
+        if student_items:
+            _para(L("a30_inv_staff", "👔 Staff / Faculty"), bold=True, size=11, space_before=6, space_after=4)
+        _inv_table(staff_items)
+
+    if student_items:
+        _para(L("a30_inv_students", "🎓 Students"), bold=True, size=11, space_before=14, space_after=2)
+        _para(L("a30_student_consent_note",
+                "Note: Student accounts in Danish folkeskole (pupils under age 15) require parental "
+                "consent for processing of personal data under Databeskyttelsesloven §6. "
+                "Items in student accounts must not be auto-deleted — any action requires "
+                "review by school administration and, for pupils under 15, notification of parents "
+                "or guardians as rights holders under GDPR Article 8."),
+              size=9, color=RGBColor(0x88, 0x44, 0x00), space_after=6)
+        _inv_table(student_items)
+
+    # ── Section 4: Retention analysis ────────────────────────────────────────
+    if overdue:
+        doc.add_page_break()
+        _heading(L("a30_s4", "4. Retention Analysis"))
+
+        _para(L("a30_s4_intro", "The following items exceed the 5-year retention threshold and should be reviewed for deletion under GDPR Article 5(1)(e) — storage limitation."),
+              size=10, space_after=8)
+
+        ret_tbl = doc.add_table(rows=1, cols=5)
+        ret_tbl.style = "Table Grid"
+        for cell, txt in zip(ret_tbl.rows[0].cells,
+                              [L("a30_col_name","Name"), L("a30_col_source","Source"), L("a30_col_account","Account"), L("a30_col_modified","Modified"), L("a30_col_cpr","CPR hits")]):
+            _cell_bg(cell, _hex(ORANGE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(9); r.font.color.rgb = WHITE
+
+        for item in overdue[:200]:
+            row = ret_tbl.add_row().cells
+            for cell, val in zip(row, [
+                item.get("name", "")[:55],
+                SOURCE_LABELS.get(item.get("source_type", ""), ""),
+                _acct_label(item),
+                item.get("modified", ""),
+                str(item.get("cpr_count", 0)),
+            ]):
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(8)
+
+    # ── Section 5: Scan history ───────────────────────────────────────────────
+    if trend:
+        sec_num = "5" if overdue else "4"
+        doc.add_page_break()
+        _heading(f"{sec_num}. {L('a30_s5','Compliance Trend').split('. ',1)[-1]}")
+
+        _para(L("a30_s5_intro", "Flagged item counts over the last scans (most recent first)."),
+              size=10, space_after=8)
+
+        trend_tbl = doc.add_table(rows=1, cols=4)
+        trend_tbl.style = "Table Grid"
+        for cell, txt in zip(trend_tbl.rows[0].cells,
+                              [L("a30_col_scan_date","Scan date"), L("a30_col_flagged","Flagged"), L("a30_col_overdue","Overdue"), L("a30_col_scan_type","Scan type")]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(9); r.font.color.rgb = WHITE
+
+        for t in reversed(trend):
+            row = trend_tbl.add_row().cells
+            for cell, val in zip(row, [
+                t.get("scan_date", ""),
+                str(t.get("flagged_count", 0)),
+                str(t.get("overdue_count", 0)),
+                L("a30_scan_delta", "Delta") if t.get("delta") else L("a30_scan_full", "Full"),
+            ]):
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(9)
+
+    # ── Section: Deletion audit log ───────────────────────────────────────────
+    del_log   = db.get_deletion_log(limit=500) if db else []
+    del_stats = db.deletion_log_stats() if db else {}
+
+    # Running section counter — starts at 3 (summary, categories, inventory always present)
+    last_sec  = 3
+    last_sec += 1 if overdue  else 0   # retention analysis
+    last_sec += 1 if trend    else 0   # compliance trend
+
+    if del_log:
+        del_sec   = last_sec
+        last_sec += 1
+        doc.add_page_break()
+        _heading(f"{del_sec}. {L('a30_s_dellog', 'Deletion Audit Log')}")
+
+        _para(L("a30_dellog_intro",
+                f"A total of {del_stats.get('total', len(del_log))} item(s) containing personal data "
+                f"have been deleted via GDPRScanner. "
+                f"CPR hits removed: {del_stats.get('cpr_hits_deleted', 0)}. "
+                f"This log satisfies the accountability obligation under GDPR Article 5(2)."),
+              size=10, space_after=8)
+
+        # Summary by reason
+        by_reason = del_stats.get("by_reason", {})
+        if by_reason:
+            _para(L("a30_dellog_by_reason", "Deletions by reason"), bold=True, size=10, space_before=4, space_after=4)
+            reason_tbl = doc.add_table(rows=1, cols=2)
+            reason_tbl.style = "Table Grid"
+            for cell, txt in zip(reason_tbl.rows[0].cells,
+                                  [L("a30_col_reason", "Reason"), L("a30_col_count", "Count")]):
+                _cell_bg(cell, _hex(DARK_BLUE))
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(txt); r.bold = True
+                r.font.size = Pt(9); r.font.color.rgb = WHITE
+            REASON_LABELS = {
+                "manual":               L("a30_reason_manual",    "Manual (individual card delete)"),
+                "bulk":                 L("a30_reason_bulk",       "Bulk delete"),
+                "retention":            L("a30_reason_retention",  "Retention policy enforcement"),
+                "data-subject-request": L("a30_reason_dsr",        "Data subject erasure request (Art. 17)"),
+            }
+            for reason, count in sorted(by_reason.items()):
+                row = reason_tbl.add_row().cells
+                for cell, val in zip(row, [REASON_LABELS.get(reason, reason), str(count)]):
+                    p = cell.paragraphs[0]; p.clear()
+                    r = p.add_run(val); r.font.size = Pt(9)
+
+        # Full log table
+        _para(L("a30_dellog_records", "Deletion records"), bold=True, size=10, space_before=10, space_after=4)
+        log_tbl = doc.add_table(rows=1, cols=7)
+        log_tbl.style = "Table Grid"
+        for cell, txt in zip(log_tbl.rows[0].cells, [
+            L("a30_col_deleted_at",  "Deleted at"),
+            L("a30_col_name",        "Name"),
+            L("a30_col_source",      "Source"),
+            L("a30_col_account",     "Account"),
+            L("a30_col_cpr",         "CPR hits"),
+            L("a30_col_reason",      "Reason"),
+            L("a30_col_deleted_by",  "Deleted by"),
+        ]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(8); r.font.color.rgb = WHITE
+
+        for idx, entry in enumerate(del_log):
+            ts  = _dt.datetime.fromtimestamp(entry.get("deleted_at", 0)).strftime("%Y-%m-%d %H:%M")
+            bg  = "FFFFFF" if idx % 2 == 0 else "F8F8F8"
+            row = log_tbl.add_row().cells
+            for cell, val in zip(row, [
+                ts,
+                entry.get("item_name", "")[:40],
+                SOURCE_LABELS.get(entry.get("source_type", ""), entry.get("source_type", "")),
+                _acct_map.get(entry.get("account_id", "")) or _resolve_display_name(entry.get("account_name", ""), entry.get("account_id", "")),
+                str(entry.get("cpr_count", 0)),
+                REASON_LABELS.get(entry.get("reason", ""), entry.get("reason", "")),
+                entry.get("deleted_by", "") or "—",
+            ]):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(7)
+
+    # ── Section: Article 9 special categories ────────────────────────────────
+    if special_items:
+        last_sec += 1
+        doc.add_page_break()
+        _heading(f"{last_sec}. {L('a30_s_special', 'Special Category Data (Article 9)')}")
+
+        _para(L("a30_special_intro",
+                f"{len(special_items)} item(s) were detected as containing special category "
+                f"data under GDPR Article 9. These require an explicit legal basis beyond "
+                f"Article 6, and processing should be covered by a Data Protection Impact "
+                f"Assessment (DPIA) under Article 35."),
+              size=10, space_after=8)
+
+        # Category breakdown table
+        from collections import Counter as _Counter
+        cat_counts: dict = _Counter()
+        for item in special_items:
+            sc = item.get("special_category", [])
+            if isinstance(sc, str):
+                import json as _scjson
+                try:
+                    sc = _scjson.loads(sc)
+                except Exception:
+                    sc = []
+            for c in sc:
+                cat_counts[c] += 1
+
+        if cat_counts:
+            _para(L("a30_special_by_cat", "Detected categories"), bold=True, size=10,
+                  space_before=4, space_after=4)
+            cat_tbl = doc.add_table(rows=1, cols=2)
+            cat_tbl.style = "Table Grid"
+            for cell, txt in zip(cat_tbl.rows[0].cells,
+                                  [L("a30_col_category", "Category"),
+                                   L("a30_col_count", "Items")]):
+                _cell_bg(cell, _hex(DARK_BLUE))
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(txt); r.bold = True
+                r.font.size = Pt(9); r.font.color.rgb = WHITE
+            CAT_LABELS = {
+                "health":           L("a30_cat_health",    "Health data (Art. 9)"),
+                "mental_health":    L("a30_cat_mental",    "Mental health (Art. 9)"),
+                "criminal":         L("a30_cat_criminal",  "Criminal records (Art. 10)"),
+                "trade_union":      L("a30_cat_union",     "Trade union membership (Art. 9)"),
+                "religion":         L("a30_cat_religion",  "Religious beliefs (Art. 9)"),
+                "ethnicity":        L("a30_cat_ethnicity", "Racial/ethnic origin (Art. 9)"),
+                "political":        L("a30_cat_political", "Political opinions (Art. 9)"),
+                "biometric":        L("a30_cat_biometric", "Biometric data (Art. 9)"),
+                "sexual_orientation": L("a30_cat_sexual",  "Sexual orientation (Art. 9)"),
+            }
+            for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]):
+                row = cat_tbl.add_row().cells
+                for cell, val in zip(row, [CAT_LABELS.get(cat, cat), str(count)]):
+                    p = cell.paragraphs[0]; p.clear()
+                    r = p.add_run(val); r.font.size = Pt(9)
+
+        # Item list (capped at 50)
+        _para(L("a30_special_items", "Affected items (up to 50)"), bold=True, size=10,
+              space_before=10, space_after=4)
+        sc_tbl = doc.add_table(rows=1, cols=5)
+        sc_tbl.style = "Table Grid"
+        for cell, txt in zip(sc_tbl.rows[0].cells, [
+            L("a30_col_name",     "Name"),
+            L("a30_col_account",  "Account"),
+            L("a30_col_source",   "Source"),
+            L("a30_col_category", "Category"),
+            L("a30_col_cpr",      "CPR hits"),
+        ]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(8); r.font.color.rgb = WHITE
+
+        for idx, item in enumerate(special_items[:50]):
+            bg = "FFFFFF" if idx % 2 == 0 else "FFF0F8"
+            sc = item.get("special_category", [])
+            if isinstance(sc, str):
+                try:
+                    import json as _scj2; sc = _scj2.loads(sc)
+                except Exception:
+                    sc = []
+            row = sc_tbl.add_row().cells
+            for cell, val in zip(row, [
+                item.get("name", "")[:35],
+                _acct_map.get(item.get("account_id", "")) or item.get("account_name", ""),
+                SOURCE_LABELS.get(item.get("source_type", ""), item.get("source_type", "")),
+                ", ".join(CAT_LABELS.get(c, c) for c in sc)[:45],
+                str(item.get("cpr_count", 0)),
+            ]):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(7)
+
+    # ── Section: Photographs / biometric data (#9) ───────────────────────────
+    if photo_items:
+        last_sec += 1
+        doc.add_page_break()
+        _heading(f"{last_sec}. {L('a30_s_photos', 'Photographs and Biometric Data (Article 9)')}")
+
+        total_faces = sum(i.get("face_count", 0) for i in photo_items)
+        _para(L("a30_photo_intro",
+                f"{len(photo_items)} image file(s) containing {total_faces} detected face(s) "
+                f"were found in the scan. Photographs of identifiable persons constitute "
+                f"biometric data under GDPR Article 9 and are subject to the same "
+                f"heightened protection as health or criminal records data."),
+              size=10, space_after=8)
+
+        _para(L("a30_photo_guidance", "Retention guidance"), bold=True, size=10,
+              space_before=4, space_after=4)
+        for line in [
+            L("a30_photo_g1",
+              "Photos may only be retained while the original purpose remains valid "
+              "(Art. 5(1)(b) — purpose limitation)."),
+            L("a30_photo_g2",
+              "Pupils under 15 require parental consent (Databeskyttelsesloven §6). "
+              "Consent must be freely given, specific, and documented."),
+            L("a30_photo_g3",
+              "Photos on public-facing websites must be removed promptly after a person "
+              "leaves the organisation or withdraws consent (Art. 17 — right to erasure)."),
+            L("a30_photo_g4",
+              "Historical/archive use may justify longer retention under Art. 89 only "
+              "with specific safeguards and case-by-case assessment."),
+        ]:
+            p = doc.add_paragraph(style="List Bullet")
+            r = p.add_run(line); r.font.size = Pt(9)
+
+        # GPS items sub-section
+        if gps_items:
+            _para(L("a30_gps_title", "Items with GPS location data"), bold=True, size=10,
+                  space_before=10, space_after=4)
+            _para(L("a30_gps_intro",
+                    "The following files contain GPS coordinates embedded in EXIF metadata. "
+                    "Location data constitutes personal data under Art. 4 GDPR. For photos of children "
+                    "or staff, GPS data may reveal sensitive patterns (home address, health institution, "
+                    "religious site). Consider stripping EXIF before sharing or publishing."),
+                  size=9, space_after=6)
+            gps_tbl = doc.add_table(rows=1, cols=4)
+            gps_tbl.style = "Table Grid"
+            for cell, txt in zip(gps_tbl.rows[0].cells, [
+                L("a30_col_name", "Name"),
+                L("a30_gps_col_lat", "Latitude"),
+                L("a30_gps_col_lon", "Longitude"),
+                L("a30_col_date", "Modified"),
+            ]):
+                _cell_bg(cell, _hex(DARK_BLUE))
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(txt); r.bold = True
+                r.font.size = Pt(8); r.font.color.rgb = WHITE
+            for idx, item in enumerate(gps_items[:50]):
+                bg = "FFFFFF" if idx % 2 == 0 else "E8F7FF"
+                row = gps_tbl.add_row().cells
+                exif = item.get("exif") or {}
+                gps  = exif.get("gps") or {}
+                for cell, val in zip(row, [
+                    item.get("name", "")[:40],
+                    str(gps.get("lat", ""))[:12],
+                    str(gps.get("lon", ""))[:12],
+                    item.get("modified", ""),
+                ]):
+                    _cell_bg(cell, bg)
+                    p = cell.paragraphs[0]; p.clear()
+                    r = p.add_run(val); r.font.size = Pt(7)
+
+        # Photo item list (capped at 50)
+        _para(L("a30_photo_items", "Detected photo items (up to 50)"), bold=True, size=10,
+              space_before=10, space_after=4)
+        ph_tbl = doc.add_table(rows=1, cols=6)
+        ph_tbl.style = "Table Grid"
+        for cell, txt in zip(ph_tbl.rows[0].cells, [
+            L("a30_col_name",    "Name"),
+            L("a30_col_account", "Account"),
+            L("a30_col_source",  "Source"),
+            L("a30_photo_col_faces", "Faces"),
+            L("a30_gps_col",     "GPS"),
+            L("a30_col_date",    "Modified"),
+        ]):
+            _cell_bg(cell, _hex(DARK_BLUE))
+            p = cell.paragraphs[0]; p.clear()
+            r = p.add_run(txt); r.bold = True
+            r.font.size = Pt(8); r.font.color.rgb = WHITE
+
+        for idx, item in enumerate(photo_items[:50]):
+            bg = "FFFFFF" if idx % 2 == 0 else "E8F7FF"
+            row = ph_tbl.add_row().cells
+            for cell, val in zip(row, [
+                item.get("name", "")[:40],
+                _acct_map.get(item.get("account_id", "")) or item.get("account_name", ""),
+                SOURCE_LABELS.get(item.get("source_type", ""), item.get("source_type", "")),
+                str(item.get("face_count", 0)),
+                "✔" if (item.get("exif") or {}).get("gps") else "",
+                item.get("modified", ""),
+            ]):
+                _cell_bg(cell, bg)
+                p = cell.paragraphs[0]; p.clear()
+                r = p.add_run(val); r.font.size = Pt(7)
+
+    # ── Section: Methodology ─────────────────────────────────────────────────
+    # last_sec already reflects all optional sections that were added above
+    doc.add_page_break()
+    _heading(f"{last_sec}. {L('a30_s6_short', 'Methodology and Legal Basis')}")
+
+    _para(L("a30_method_title", "Scanning methodology"), bold=True, size=11, space_before=6, space_after=4)
+    for line in [
+        L("a30_method_1", "CPR numbers are detected using pattern matching against the official Danish CPR format (DDMMYY-XXXX)."),
+        L("a30_method_2", "Additional personal data (phone numbers, email addresses, IBANs, bank accounts, names, addresses, and organisations) is detected using regular expressions and spaCy NER."),
+        L("a30_method_3", "CPR numbers stored in this document's database are SHA-256 hashed and never stored in plaintext."),
+        L("a30_method_4", "Scanning covers Exchange mailboxes (all folders including Sent Items), OneDrive, SharePoint, and Microsoft Teams channel files via the Microsoft Graph API. When connected, Google Workspace scanning covers Gmail and Google Drive via a service account with domain-wide delegation. Local and network (SMB) file shares are scanned directly."),
+        L("a30_method_5", "When photo scanning is enabled, image files are analysed using OpenCV Haar cascade face detection to identify photographs of persons (Art. 9 biometric data)."),
+    ]:
+        p = doc.add_paragraph(style="List Bullet")
+        r = p.add_run(line); r.font.size = Pt(10)
+
+    _para(L("a30_gdpr_title", "GDPR Articles referenced"), bold=True, size=11, space_before=10, space_after=4)
+    for line in [
+        L("a30_gdpr_1", "Article 5(1)(c) — Data minimisation: only necessary data should be retained"),
+        L("a30_gdpr_2", "Article 5(1)(e) — Storage limitation: data must not be kept longer than necessary"),
+        L("a30_gdpr_3", "Article 9 — Special categories: health, criminal, trade union, and similar data require explicit legal basis"),
+        L("a30_gdpr_4", "Article 15 — Right of access: data subjects may request information about their data"),
+        L("a30_gdpr_5", "Article 17 — Right to erasure: data subjects may request deletion"),
+        L("a30_gdpr_6", "Article 30 — Records of processing activities: this document satisfies the obligation"),
+    ]:
+        p = doc.add_paragraph(style="List Bullet")
+        r = p.add_run(line); r.font.size = Pt(10)
+
+    _para(f"{L('a30_generated','Generated')}: {now_str}  ·  GDPRScanner  ·  {L('a30_confidential','Confidential — GDPR compliance document')}",
+          size=9, color=RGBColor(0x88, 0x88, 0x88), align=WD_ALIGN_PARAGRAPH.CENTER, space_before=20)
+
+    # ── Serialise ─────────────────────────────────────────────────────────────
+    buf = io.BytesIO()
+    doc.save(buf)
+    buf.seek(0)
+    return buf.read(), fname
+
+
+@bp.route("/api/export_article30")
+def export_article30():
+    """Generate and return an Article 30 Word document."""
+    # Pre-populate in-memory list from DB session so _build_article30_docx()
+    # has state.flagged_items available for the account-name seed (line ~318).
+    if not state.flagged_items and DB_OK:
+        try:
+            db = _get_db()
+            if db:
+                db_items = db.get_session_items()
+                if db_items:
+                    state.flagged_items[:] = db_items
+        except Exception:
+            pass
+    if not state.flagged_items:
+        return jsonify({"error": "No results to export — run a scan first"}), 400
+    try:
+        docx_bytes, fname = _build_article30_docx()
+        return Response(
+            docx_bytes,
+            mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            headers={"Content-Disposition": f"attachment; filename={fname}"}
+        )
+    except ImportError as e:
+        return jsonify({"error": str(e)}), 500
+    except Exception as e:
+        import traceback
+        logger.error("export_article30 error: %s\n%s", e, traceback.format_exc())
+        return jsonify({"error": str(e)}), 500
+
+
+def delete_item():
+    """Delete a single flagged item. Returns {ok, error}."""
+    if not state.connector:
+        return jsonify({"ok": False, "error": "not authenticated"}), 401
+    data        = request.get_json() or {}
+    item_id     = data.get("id", "")
+    source_type = data.get("source_type", "")
+    account_id  = data.get("account_id", "") or "me"
+    drive_id    = data.get("drive_id", "")
+
+    if not item_id:
+        return jsonify({"ok": False, "error": "id required"}), 400
+
+    try:
+        if source_type == "email":
+            ok = state.connector.delete_message(account_id, item_id)
+        elif drive_id:
+            ok = state.connector.delete_drive_item(drive_id, item_id)
+        else:
+            ok = state.connector.delete_drive_item_for_user(account_id, item_id)
+
+        if ok or ok is False:  # False = already gone, treat as success
+            # Retrieve full item for audit log before removing it
+            item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
+            state.flagged_items = [x for x in state.flagged_items if x.get("id") != item_id]
+            _db = _get_db() if DB_OK else None
+            if _db:
+                try:
+                    _db.log_deletion(item_meta or {"id": item_id, "source_type": source_type},
+                                     reason="manual")
+                    _db.delete_item_record(item_id)
+                except Exception: pass
+            return jsonify({"ok": True})
+        return jsonify({"ok": False, "error": "Delete returned unexpected result"})
+    except M365PermissionError:
+        return jsonify({"ok": False, "error":
+            "Permission denied (403) — deletion requires Mail.ReadWrite / Files.ReadWrite.All / Sites.ReadWrite.All. "
+            "Go to Azure → App registrations → API permissions → add these and Grant admin consent."})
+    except Exception as e:
+        return jsonify({"ok": False, "error": str(e)})
+
+
+@bp.route("/api/delete_bulk", methods=["POST"])
+def delete_bulk():
+    """Delete multiple items matching criteria. Streams progress as SSE."""
+    if not state.connector:
+        return jsonify({"ok": False, "error": "not authenticated"}), 401
+    data    = request.get_json() or {}
+    item_ids = data.get("ids", [])   # explicit list of ids, or empty = use filters
+    filters  = data.get("filters", {})
+    del_reason = data.get("reason", "bulk")  # manual/bulk/retention/data-subject-request
+
+    # Build target list
+    if item_ids:
+        targets = [x for x in state.flagged_items if x.get("id") in set(item_ids)]
+    else:
+        targets = list(state.flagged_items)
+        # Apply filters
+        if filters.get("source_type"):
+            targets = [x for x in targets if x.get("source_type") == filters["source_type"]]
+        if filters.get("min_cpr"):
+            targets = [x for x in targets if x.get("cpr_count", 0) >= int(filters["min_cpr"])]
+        if filters.get("older_than_date"):
+            targets = [x for x in targets if x.get("modified", "9999") <= filters["older_than_date"]]
+
+    deleted_ids  = []
+    failed_items = []
+
+    for item in targets:
+        iid         = item.get("id", "")
+        source_type = item.get("source_type", "")
+        account_id  = item.get("account_id", "") or "me"
+        drive_id    = item.get("drive_id", "")
+        try:
+            if source_type == "email":
+                state.connector.delete_message(account_id, iid)
+            elif drive_id:
+                state.connector.delete_drive_item(drive_id, iid)
+            else:
+                state.connector.delete_drive_item_for_user(account_id, iid)
+            deleted_ids.append(iid)
+        except M365PermissionError:
+            failed_items.append({"id": iid, "name": item.get("name", ""), "error":
+                "403 — requires Mail.ReadWrite / Files.ReadWrite.All / Sites.ReadWrite.All (Azure admin consent)"})
+        except Exception as e:
+            failed_items.append({"id": iid, "name": item.get("name", ""), "error": str(e)})
+
+    # Build id->item map for audit log
+    _deleted_meta = {x.get("id"): x for x in targets if x.get("id") in set(deleted_ids)}
+    state.flagged_items = [x for x in state.flagged_items if x.get("id") not in set(deleted_ids)]
+    _db = _get_db() if DB_OK else None
+    if _db:
+        for _did in deleted_ids:
+            try:
+                _db.log_deletion(_deleted_meta.get(_did, {"id": _did}), reason=del_reason)
+                _db.delete_item_record(_did)
+            except Exception: pass
+
+    return jsonify({
+        "ok":      True,
+        "deleted": len(deleted_ids),
+        "failed":  len(failed_items),
+        "errors":  failed_items[:10],  # cap error list
+    })
+
+
diff --git a/routes/google_auth.py b/routes/google_auth.py
new file mode 100644
index 0000000..9682e1e
--- /dev/null
+++ b/routes/google_auth.py
@@ -0,0 +1,246 @@
+"""
+Google Workspace authentication routes.
+
+Endpoints:
+  GET  /api/google/auth/status    — is a service account loaded?
+  POST /api/google/auth/connect   — save key JSON + optional admin_email
+  POST /api/google/auth/disconnect — remove saved key + clear connector
+"""
+from __future__ import annotations
+from flask import Blueprint, jsonify, request
+import json
+import threading
+
+from routes import state
+
+bp = Blueprint("google_auth", __name__)
+
+
+def __getattr__(name):
+    import gdpr_scanner as _m
+    if hasattr(_m, name):
+        return getattr(_m, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+@bp.route("/api/google/auth/status")
+def google_auth_status():
+    """Return current Google connection state."""
+    from google_connector import GOOGLE_AUTH_OK, load_saved_key
+    if not GOOGLE_AUTH_OK:
+        return jsonify({
+            "connected": False,
+            "error": "google-auth not installed — run: pip install google-auth google-auth-httplib2 google-api-python-client",
+            "libs_ok": False,
+        })
+
+    key = load_saved_key()
+    if not key:
+        return jsonify({"connected": False, "libs_ok": True})
+
+    sa_email   = key.get("client_email", "")
+    project_id = key.get("project_id", "")
+    admin_email = ""
+
+    # Read persisted admin_email from config
+    cfg = _load_google_config()
+    admin_email = cfg.get("admin_email", "")
+
+    # Rebuild connector in state if not present
+    if not state.google_connector:
+        try:
+            from google_connector import GoogleConnector
+            state.google_connector = GoogleConnector(key, admin_email=admin_email)
+        except Exception as e:
+            return jsonify({"connected": False, "libs_ok": True,
+                            "error": str(e), "sa_email": sa_email})
+
+    return jsonify({
+        "connected":    True,
+        "libs_ok":      True,
+        "sa_email":     sa_email,
+        "project_id":   project_id,
+        "admin_email":  admin_email,
+    })
+
+
+@bp.route("/api/google/auth/connect", methods=["POST"])
+def google_auth_connect():
+    """
+    Accept a service account key JSON + optional admin_email.
+    Body: { "key_json": "<raw JSON string or object>", "admin_email": "admin@domain.com" }
+    """
+    from google_connector import GOOGLE_AUTH_OK, save_key, GoogleConnector
+    if not GOOGLE_AUTH_OK:
+        return jsonify({"error": "google-auth not installed"}), 503
+
+    data = request.get_json() or {}
+    raw_key  = data.get("key_json", "")
+    admin_email = data.get("admin_email", "").strip()
+
+    # Accept both a JSON string and an already-parsed object
+    if isinstance(raw_key, str):
+        try:
+            key_dict = json.loads(raw_key)
+        except json.JSONDecodeError as e:
+            return jsonify({"error": f"Invalid JSON: {e}"}), 400
+    elif isinstance(raw_key, dict):
+        key_dict = raw_key
+    else:
+        return jsonify({"error": "key_json must be a JSON string or object"}), 400
+
+    if key_dict.get("type") != "service_account":
+        return jsonify({"error": "File must be a service_account JSON key (type != service_account)"}), 400
+
+    # Validate by building a connector
+    try:
+        conn = GoogleConnector(key_dict, admin_email=admin_email)
+        if not conn.is_authenticated():
+            return jsonify({"error": "Credentials did not validate — check the key file"}), 400
+    except Exception as e:
+        return jsonify({"error": str(e)}), 400
+
+    save_key(key_dict)
+    _save_google_config({"admin_email": admin_email})
+
+    state.google_connector = conn
+
+    return jsonify({
+        "ok":         True,
+        "sa_email":   key_dict.get("client_email", ""),
+        "project_id": key_dict.get("project_id", ""),
+    })
+
+
+@bp.route("/api/google/auth/disconnect", methods=["POST"])
+def google_auth_disconnect():
+    """Remove saved service account key and clear the connector."""
+    from google_connector import delete_key
+    delete_key()
+    _save_google_config({})
+    state.google_connector = None
+    return jsonify({"ok": True})
+
+
+# ── Personal Google account (device-code OAuth) ───────────────────────────────
+
+@bp.route("/api/google/personal/status")
+def google_personal_status():
+    """Check whether a personal Google OAuth token is present and valid."""
+    from google_connector import GOOGLE_AUTH_OK, load_personal_token, PersonalGoogleConnector
+    if not GOOGLE_AUTH_OK:
+        return jsonify({"connected": False, "libs_ok": False, "auth_mode": "personal"})
+
+    token_data = load_personal_token()
+    if not token_data:
+        return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal"})
+
+    if not isinstance(state.google_connector, PersonalGoogleConnector):
+        try:
+            conn = PersonalGoogleConnector(token_data)
+            if conn.is_authenticated():
+                state.google_connector = conn
+            else:
+                return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal"})
+        except Exception as e:
+            return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal",
+                            "error": str(e)})
+
+    try:
+        info = state.google_connector.get_user_info()
+        return jsonify({
+            "connected":   True,
+            "libs_ok":     True,
+            "auth_mode":   "personal",
+            "email":       info.get("email", ""),
+            "displayName": info.get("displayName", ""),
+        })
+    except Exception as e:
+        return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal",
+                        "error": str(e)})
+
+
+@bp.route("/api/google/personal/start", methods=["POST"])
+def google_personal_start():
+    """Initiate a Google device-code flow for a personal account."""
+    from google_connector import GOOGLE_AUTH_OK, PersonalGoogleConnector
+    if not GOOGLE_AUTH_OK:
+        return jsonify({"error": "google-auth not installed"}), 503
+
+    data          = request.get_json() or {}
+    client_id     = data.get("client_id", "").strip()
+    client_secret = data.get("client_secret", "").strip()
+    if not client_id or not client_secret:
+        return jsonify({"error": "client_id and client_secret required"}), 400
+
+    try:
+        flow = PersonalGoogleConnector.get_device_code_flow(client_id, client_secret)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 400
+
+    state.google_pending_flow = flow
+    state.google_poll_result  = None
+
+    def _do_auth():
+        try:
+            conn = PersonalGoogleConnector.complete_device_code_flow(flow)
+            state.google_connector  = conn
+            state.google_poll_result = "ok"
+        except Exception as e:
+            state.google_poll_result = str(e)
+
+    threading.Thread(target=_do_auth, daemon=True).start()
+
+    return jsonify({
+        "user_code":        flow["user_code"],
+        "verification_url": flow["verification_url"],
+    })
+
+
+@bp.route("/api/google/personal/poll", methods=["POST"])
+def google_personal_poll():
+    """Check whether the device-code sign-in has completed."""
+    result = state.google_poll_result
+    if result == "ok":
+        state.google_poll_result  = None
+        state.google_pending_flow = None
+        return jsonify({"status": "ok"})
+    if result and result != "pending":
+        state.google_poll_result  = None
+        state.google_pending_flow = None
+        return jsonify({"status": "error", "error": result})
+    return jsonify({"status": "pending"})
+
+
+@bp.route("/api/google/personal/signout", methods=["POST"])
+def google_personal_signout():
+    """Delete the stored personal OAuth token and clear the connector."""
+    from google_connector import delete_personal_token, PersonalGoogleConnector
+    delete_personal_token()
+    if isinstance(state.google_connector, PersonalGoogleConnector):
+        state.google_connector = None
+    return jsonify({"ok": True})
+
+
+# ── Config helpers ────────────────────────────────────────────────────────────
+
+from pathlib import Path as _Path
+_DATA_DIR      = _Path.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+_GOOGLE_CONFIG = _DATA_DIR / "google.json"
+
+
+def _load_google_config() -> dict:
+    if _GOOGLE_CONFIG.exists():
+        try:
+            return json.loads(_GOOGLE_CONFIG.read_text())
+        except Exception:
+            pass
+    return {}
+
+
+def _save_google_config(cfg: dict) -> None:
+    try:
+        _GOOGLE_CONFIG.write_text(json.dumps(cfg, indent=2))
+    except Exception:
+        pass
diff --git a/routes/google_scan.py b/routes/google_scan.py
new file mode 100644
index 0000000..221167a
--- /dev/null
+++ b/routes/google_scan.py
@@ -0,0 +1,328 @@
+"""
+Google Workspace scan routes.
+
+Endpoints:
+  POST /api/google/scan/start   — kick off a Gmail + Drive scan
+  POST /api/google/scan/cancel  — abort running Google scan
+  GET  /api/google/scan/users   — list workspace users via Admin SDK
+"""
+from __future__ import annotations
+from flask import Blueprint, jsonify, request
+import logging
+import threading
+
+logger = logging.getLogger(__name__)
+
+from routes import state
+from routes.state import _google_scan_lock as _scan_lock, _google_scan_abort as _scan_abort
+
+bp = Blueprint("google_scan", __name__)
+
+
+def __getattr__(name):
+    import gdpr_scanner as _m
+    if hasattr(_m, name):
+        return getattr(_m, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+# ── Scan lock shared with M365 scan so both can't run simultaneously ──────────
+# _scan_lock / _scan_abort live in routes/state.py; resolved via gdpr_scanner.__getattr__.
+
+
+@bp.route("/api/google/scan/users")
+def google_scan_users():
+    """Return list of workspace users available via Admin SDK."""
+    conn = state.google_connector
+    if not conn:
+        return jsonify({"error": "not connected"}), 401
+    try:
+        users = conn.list_users()
+        return jsonify({"users": users})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@bp.route("/api/google/scan/start", methods=["POST"])
+def google_scan_start():
+    """
+    Start a Google Workspace scan.
+
+    Body (all optional):
+    {
+      "sources":       ["gmail", "gdrive"],   // default: both
+      "user_emails":   ["a@dom.com"],         // default: all users via Admin SDK
+      "options": {
+        "max_messages":    2000,
+        "max_files":       5000,
+        "max_attach_mb":   20,
+        "scan_body":       true,
+        "scan_attachments":true,
+        "max_file_mb":     50
+      }
+    }
+    """
+    conn = state.google_connector
+    if not conn:
+        return jsonify({"error": "not connected to Google Workspace"}), 401
+
+    if not _scan_lock.acquire(blocking=False):
+        return jsonify({"error": "scan already running"}), 409
+
+    options = request.get_json() or {}
+    _scan_abort.clear()
+
+    def _run():
+        try:
+            _run_google_scan(options)
+        finally:
+            _scan_lock.release()
+
+    threading.Thread(target=_run, daemon=True).start()
+    return jsonify({"status": "started"})
+
+
+@bp.route("/api/google/scan/cancel", methods=["POST"])
+def google_scan_cancel():
+    _scan_abort.set()
+    return jsonify({"status": "cancelling"})
+
+
+# ── Scan engine ───────────────────────────────────────────────────────────────
+
+def _run_google_scan(options: dict):
+    """
+    Core Google Workspace scan loop.
+
+    Mirrors the M365 scan structure:
+      broadcast("scan_start")
+      for each user:
+        for each source (gmail / gdrive):
+          for each item:
+            scan bytes → broadcast card
+      broadcast("scan_done")
+    """
+    import gdpr_scanner as _m
+
+    broadcast  = _m.broadcast
+    _scan_bytes = _m._scan_bytes
+    flagged_items = _m.flagged_items
+    LANG = _m.LANG
+
+    # Import DB helpers
+    try:
+        from gdpr_db import get_db as _get_db
+        DB_OK = True
+    except ImportError:
+        DB_OK = False
+        def _get_db(*a, **kw): return None
+
+    from scan_engine import _with_disposition
+
+    conn = state.google_connector
+    if not conn:
+        broadcast("scan_error", {"file": "auth", "error": "Not connected to Google Workspace"})
+        broadcast("google_scan_done", {"flagged_count": 0, "total_scanned": 0})
+        return
+
+    import time as _time
+    _sse_buffer_clear = getattr(_m, '_sse_buffer', None)
+    if _sse_buffer_clear is not None:
+        _sse_buffer_clear.clear()
+
+    sources       = options.get("sources", ["gmail", "gdrive"])
+    # user_emails may come at top level or inside options
+    user_emails   = options.get("user_emails", [])
+    scan_opts     = options.get("options", {})
+    max_messages  = int(scan_opts.get("max_messages",  2000))
+    max_files     = int(scan_opts.get("max_files",     5000))
+    max_attach_mb = float(scan_opts.get("max_attach_mb", 20.0))
+    max_file_mb   = float(scan_opts.get("max_file_mb",   50.0))
+    scan_body     = bool(scan_opts.get("scan_body",        True))
+    scan_att      = bool(scan_opts.get("scan_attachments", True))
+
+    # Resolve users: explicit list → Admin SDK → fall back to SA email itself
+    _user_role_map:    dict = {}  # email → role
+    _user_display_map: dict = {}  # email → display name
+    if not user_emails:
+        try:
+            ws_users    = conn.list_users()
+            user_emails = [u["email"] for u in ws_users if u.get("email")]
+            _user_role_map        = {u["email"]: u.get("userRole",    "other") for u in ws_users}
+            _user_display_map     = {u["email"]: u.get("displayName", u["email"]) for u in ws_users}
+        except Exception as e:
+            # Admin SDK unavailable — scan only the delegated admin account
+            broadcast("scan_phase", {"phase": f"Admin SDK unavailable ({e}) — scanning service account email only"})
+            user_emails = [conn.get_service_account_email()]
+            # SA email itself is not a mailbox; use admin_email if set
+            if conn._admin_email:
+                user_emails = [conn._admin_email]
+
+    # If user_emails came from the request, try to get display names and roles
+    if user_emails and not _user_role_map:
+        try:
+            ws_users = conn.list_users()
+            _user_role_map    = {u["email"]: u.get("userRole",    "other") for u in ws_users}
+            _user_display_map = {u["email"]: u.get("displayName", u["email"]) for u in ws_users}
+        except Exception:
+            _user_display_map = {}
+
+    if not user_emails:
+        broadcast("scan_error", {"file": "users", "error": "No users to scan — set admin email or provide user_emails"})
+        broadcast("google_scan_done", {"flagged_count": 0, "total_scanned": 0})
+        return
+
+    source_labels = []
+    if "gmail" in sources: source_labels.append("Gmail")
+    if "gdrive" in sources: source_labels.append("Google Drive")
+
+    broadcast("scan_start", {"sources": source_labels})
+    broadcast("scan_phase", {"phase": f"Google Workspace scan · {len(user_emails)} user(s) · " + ", ".join(source_labels)})
+
+    # Open DB
+    _db = _get_db() if DB_OK else None
+    _db_scan_id = None
+    if _db:
+        try:
+            _db_scan_id = _db.begin_scan(options)
+        except Exception as e:
+            logger.error("[google_scan] begin_scan failed: %s", e)
+
+    total_flagged = 0
+    total_scanned = 0
+    t_start = _time.monotonic()
+
+    def _check_abort():
+        from gdpr_scanner import _scan_abort as _sa
+        if _sa.is_set():
+            broadcast("scan_cancelled", {"completed": total_scanned})
+            return True
+        return False
+
+    def _broadcast_card(item_meta: dict, cprs: list, pii_counts=None):
+        nonlocal total_flagged
+        card = {
+            "id":           item_meta.get("id", ""),
+            "name":         item_meta.get("name", ""),
+            "source":       item_meta.get("_source", ""),
+            "source_type":  item_meta.get("_source_type", ""),
+            "cpr_count":    len(cprs),
+            "url":          item_meta.get("_url", ""),
+            "size_kb":      round(item_meta.get("size", 0) / 1024, 1),
+            "modified":     (item_meta.get("lastModifiedDateTime") or item_meta.get("receivedDateTime") or "")[:10],
+            "thumb_b64":    "",
+            "thumb_mime":   "image/svg+xml",
+            "risk":         None,
+            "account_id":   item_meta.get("_account_id", ""),
+            "account_name": item_meta.get("_account", ""),
+            "user_role":    _user_role_map.get(user_email, "other"),
+            "drive_id":     "",
+            "attachments":  [],
+            "folder":       "",
+            "transfer_risk":    "",
+            "special_category": [],
+            "face_count":       0,
+            "exif":             {},
+        }
+        flagged_items.append(card)
+        broadcast("scan_file_flagged", _with_disposition(card, _db))
+        total_flagged += 1
+        if _db and _db_scan_id:
+            try:
+                _db.save_item(_db_scan_id, card, cprs, pii_counts=pii_counts)
+            except Exception as e:
+                logger.error("[google_scan] save_item failed: %s", e)
+
+    # ── Per-user scan loop ────────────────────────────────────────────────────
+    from google_connector import GoogleError
+
+    for user_email in user_emails:
+        _display_name = _user_display_map.get(user_email, user_email)
+        if _check_abort():
+            return
+
+        broadcast("scan_phase", {"phase": f"Google Workspace \u2014 {user_email}"})
+
+        # ── Gmail ─────────────────────────────────────────────────────────────
+        if "gmail" in sources:
+            try:
+                broadcast("scan_phase", {"phase": f"{user_email} — Gmail"})
+                for meta, data in conn.iter_gmail_messages(
+                    user_email,
+                    max_messages=max_messages,
+                    scan_body=scan_body,
+                    scan_attachments=scan_att,
+                    max_attach_mb=max_attach_mb,
+                ):
+                    if _check_abort():
+                        return
+                    total_scanned += 1
+                    broadcast("scan_file", {"file": meta.get("name", "")})
+                    broadcast("scan_progress", {
+                        "scanned": total_scanned,
+                        "flagged": total_flagged,
+                        "file":    meta.get("name", ""),
+                        "pct":     min(90, 10 + total_scanned // 10),
+                        "source":  "google",
+                    })
+                    try:
+                        meta["_account"] = _display_name
+                        result = _scan_bytes(data, meta.get("name", "msg.txt"))
+                    except Exception as e:
+                        broadcast("scan_error", {"file": meta.get("name", ""), "error": str(e)})
+                        continue
+                    cprs      = result.get("cprs", [])
+                    pii_counts = result.get("pii_counts")
+                    if cprs or (pii_counts and any(pii_counts.values())):
+                        _broadcast_card(meta, cprs, pii_counts)
+            except GoogleError as e:
+                broadcast("scan_error", {"file": f"Gmail/{user_email}", "error": str(e)})
+            except Exception as e:
+                broadcast("scan_error", {"file": f"Gmail/{user_email}", "error": str(e)})
+
+        # ── Google Drive ──────────────────────────────────────────────────────
+        if "gdrive" in sources:
+            try:
+                broadcast("scan_phase", {"phase": f"{user_email} — Google Drive"})
+                for meta, data in conn.iter_drive_files(
+                    user_email,
+                    max_files=max_files,
+                    max_file_mb=max_file_mb,
+                ):
+                    if _check_abort():
+                        return
+                    total_scanned += 1
+                    broadcast("scan_file", {"file": meta.get("name", "")})
+                    broadcast("scan_progress", {
+                        "scanned": total_scanned,
+                        "flagged": total_flagged,
+                        "file":    meta.get("name", ""),
+                        "pct":     min(90, 10 + total_scanned // 10),
+                        "source":  "google",
+                    })
+                    try:
+                        meta["_account"] = _display_name
+                        result = _scan_bytes(data, meta.get("name", "file"))
+                    except Exception as e:
+                        broadcast("scan_error", {"file": meta.get("name", ""), "error": str(e)})
+                        continue
+                    cprs      = result.get("cprs", [])
+                    pii_counts = result.get("pii_counts")
+                    if cprs or (pii_counts and any(pii_counts.values())):
+                        _broadcast_card(meta, cprs, pii_counts)
+            except GoogleError as e:
+                broadcast("scan_error", {"file": f"Drive/{user_email}", "error": str(e)})
+            except Exception as e:
+                broadcast("scan_error", {"file": f"Drive/{user_email}", "error": str(e)})
+
+    elapsed = _time.monotonic() - t_start
+    broadcast("scan_done", {
+        "flagged_count":  total_flagged,
+        "total_scanned":  total_scanned,
+        "elapsed_seconds": round(elapsed, 1),
+    })
+    if _db and _db_scan_id:
+        try:
+            _db.end_scan(_db_scan_id, total_scanned, total_flagged)
+        except Exception:
+            pass
diff --git a/routes/profiles.py b/routes/profiles.py
new file mode 100644
index 0000000..643f1ac
--- /dev/null
+++ b/routes/profiles.py
@@ -0,0 +1,47 @@
+"""
+Scan profiles
+"""
+from __future__ import annotations
+from flask import Blueprint, jsonify, request
+from app_config import _profiles_load, _profile_save, _profile_delete, _profile_get
+
+bp = Blueprint("profiles", __name__)
+
+
+@bp.route("/api/profiles", methods=["GET"])
+def profiles_list():
+    """Return all saved profiles."""
+    return jsonify({"profiles": _profiles_load()})
+
+
+@bp.route("/api/profiles/save", methods=["POST"])
+def profiles_save():
+    """Create or update a profile."""
+    profile = request.get_json() or {}
+    if not profile.get("name"):
+        return jsonify({"error": "name required"}), 400
+    saved = _profile_save(profile)
+    return jsonify({"status": "saved", "profile": saved})
+
+
+@bp.route("/api/profiles/delete", methods=["POST"])
+def profiles_delete():
+    """Delete a profile by name or id."""
+    data = request.get_json() or {}
+    key  = data.get("name") or data.get("id", "")
+    if not key:
+        return jsonify({"error": "name or id required"}), 400
+    ok = _profile_delete(key)
+    return jsonify({"status": "deleted" if ok else "not_found"})
+
+
+@bp.route("/api/profiles/get")
+def profiles_get():
+    """Return a single profile by name or id."""
+    key = request.args.get("name") or request.args.get("id", "")
+    p   = _profile_get(key)
+    if not p:
+        return jsonify({"error": "not found"}), 404
+    return jsonify({"profile": p})
+
+
diff --git a/routes/scan.py b/routes/scan.py
new file mode 100644
index 0000000..3b1f6e1
--- /dev/null
+++ b/routes/scan.py
@@ -0,0 +1,137 @@
+"""
+Scan stream, start/stop, checkpoint, settings, delta
+"""
+from __future__ import annotations
+import threading
+from flask import Blueprint, jsonify, request
+from routes import state
+from app_config import (
+    _save_settings, _load_settings,
+    _load_src_toggles, _save_src_toggles,
+)
+from checkpoint import (
+    _checkpoint_key, _load_checkpoint, _clear_checkpoint,
+    _load_delta_tokens, _DELTA_PATH,
+)
+
+bp = Blueprint("scan", __name__)
+
+
+@bp.route("/api/scan/status")
+def scan_status():
+    """Lightweight status check — is a scan running? What scan_id?"""
+    import sse as _sse_mod
+    acquired = state._scan_lock.acquire(blocking=False)
+    if acquired:
+        state._scan_lock.release()
+    return jsonify({
+        "running":  not acquired,
+        "scan_id":  _sse_mod._current_scan_id or None,
+    })
+
+
+@bp.route("/api/src_toggles", methods=["GET", "POST"])
+def src_toggles():
+    """GET: return source toggle state. POST: save."""
+    if request.method == "POST":
+        _save_src_toggles(request.get_json() or {})
+        return jsonify({"ok": True})
+    return jsonify(_load_src_toggles())
+
+
+@bp.route("/api/scan/start", methods=["POST"])
+def scan_start():
+    if not state.connector:
+        return jsonify({"error": "not authenticated"}), 401
+    if not state._scan_lock.acquire(blocking=False):
+        return jsonify({"error": "scan already running"}), 409
+    options = request.get_json() or {}
+    state._scan_abort.clear()
+    profile_id = options.pop("profile_id", None)
+    _save_settings({
+        "sources":  options.get("sources", []),
+        "user_ids": options.get("user_ids", []),
+        "options":  options.get("options", {}),
+    }, profile_id=profile_id)
+    def _run():
+        from scan_engine import run_scan
+        try:
+            run_scan(options)
+        finally:
+            state._scan_lock.release()
+    threading.Thread(target=_run, daemon=True).start()
+    return jsonify({"status": "started"})
+
+
+@bp.route("/api/scan/stop", methods=["POST"])
+def scan_stop():
+    state._scan_abort.set()
+    return jsonify({"status": "stopping"})
+
+
+@bp.route("/api/scan/checkpoint", methods=["POST"])
+def scan_checkpoint_info():
+    """Return info about any saved checkpoint for the given scan options.
+    If check_only=true, just reports whether a scan is currently running."""
+    options = request.get_json() or {}
+    if options.get("check_only"):
+        acquired = state._scan_lock.acquire(blocking=False)
+        if acquired:
+            state._scan_lock.release()
+        return jsonify({"running": not acquired})
+    key = _checkpoint_key(options)
+    cp  = _load_checkpoint(key)
+    if not cp:
+        return jsonify({"exists": False})
+    return jsonify({
+        "exists":        True,
+        "scanned_count": len(cp.get("scanned_ids", [])),
+        "flagged_count": len(cp.get("flagged", [])),
+        "started_at":    cp.get("meta", {}).get("started_at"),
+    })
+
+
+@bp.route("/api/scan/clear_checkpoint", methods=["POST"])
+def scan_clear_checkpoint():
+    """Discard any saved checkpoint so the next scan starts fresh."""
+    _clear_checkpoint()
+    return jsonify({"status": "cleared"})
+
+
+@bp.route("/api/settings/save", methods=["POST"])
+def settings_save():
+    """Persist scan settings so they can be reused by --headless mode."""
+    payload = request.get_json() or {}
+    _save_settings(payload)
+    return jsonify({"status": "saved"})
+
+
+@bp.route("/api/settings/load")
+def settings_load():
+    """Return previously saved scan settings (for --headless setup guidance)."""
+    s = _load_settings()
+    if not s:
+        return jsonify({"exists": False})
+    return jsonify({"exists": True, "settings": s})
+
+
+@bp.route("/api/delta/status")
+def delta_status():
+    """Return info about stored delta tokens."""
+    tokens = _load_delta_tokens()
+    return jsonify({
+        "count":  len(tokens),
+        "keys":   list(tokens.keys()),
+        "exists": len(tokens) > 0,
+    })
+
+
+@bp.route("/api/delta/clear", methods=["POST"])
+def delta_clear():
+    """Discard all stored delta tokens (next scan will be a full scan)."""
+    try:
+        if _DELTA_PATH.exists():
+            _DELTA_PATH.unlink()
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+    return jsonify({"status": "cleared"})
diff --git a/routes/scheduler.py b/routes/scheduler.py
new file mode 100644
index 0000000..3e1f4cc
--- /dev/null
+++ b/routes/scheduler.py
@@ -0,0 +1,156 @@
+"""
+Scheduler API routes — multi-job CRUD, status, history, run-now.
+"""
+from __future__ import annotations
+from flask import Blueprint, jsonify, request
+import sys, os, threading
+
+bp = Blueprint("scheduler", __name__)
+
+# Return JSON for any unhandled exception in this blueprint
+@bp.errorhandler(Exception)
+def _handle_error(e):
+    import traceback; traceback.print_exc()
+    return jsonify({"error": str(e)}), 500
+
+# Ensure the project root is on sys.path so `import scheduler` finds
+# our scheduler.py and not any stdlib module.
+def _sm():
+    import scan_scheduler as _s
+    return _s
+
+
+def _sched():
+    import scan_scheduler as _s
+    return _s.scan_scheduler
+
+def _db():
+    import gdpr_scanner as _m
+    return _m._get_db() if _m.DB_OK else None
+
+
+# ── Job list ──────────────────────────────────────────────────────────────────
+
+@bp.route("/api/scheduler/jobs", methods=["GET"])
+def scheduler_jobs_list():
+    return jsonify({"jobs": _sm().load_jobs()})
+
+
+@bp.route("/api/scheduler/jobs/save", methods=["POST"])
+def scheduler_jobs_save():
+    try:
+        sm   = _sm()
+        data = request.get_json() or {}
+        jobs = sm.load_jobs()
+        job_id = (data.get("id") or "").strip()
+        if job_id:
+            for i, j in enumerate(jobs):
+                if j["id"] == job_id:
+                    jobs[i] = {**sm._DEFAULT_JOB, **j, **data}
+                    sm.save_jobs(jobs)
+                    try:
+                        _sched().reload()
+                    except Exception:
+                        pass
+                    return jsonify({"ok": True, "job": jobs[i]})
+        # New job
+        job = sm._new_job(data)
+        jobs.append(job)
+        sm.save_jobs(jobs)
+        try:
+            _sched().reload()
+        except Exception:
+            pass
+        return jsonify({"ok": True, "job": job})
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
+
+
+@bp.route("/api/scheduler/jobs/delete", methods=["POST"])
+def scheduler_jobs_delete():
+    try:
+        sm     = _sm()
+        job_id = (request.get_json() or {}).get("id", "")
+        if not job_id:
+            return jsonify({"error": "id required"}), 400
+        jobs = [j for j in sm.load_jobs() if j["id"] != job_id]
+        sm.save_jobs(jobs)
+        try:
+            _sched().reload()
+        except Exception:
+            pass
+        return jsonify({"ok": True})
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return jsonify({"error": str(e)}), 500
+
+
+# ── Run now ───────────────────────────────────────────────────────────────────
+
+@bp.route("/api/scheduler/jobs/run_now", methods=["POST"])
+def scheduler_jobs_run_now():
+    job_id = (request.get_json() or {}).get("id", "")
+    s = _sched()
+    if job_id in s._running_jobs:
+        return jsonify({"error": "Job already running"}), 409
+    if s.is_running:
+        return jsonify({"error": "Another scan is already running"}), 409
+    threading.Thread(target=s._execute_scan, args=[job_id], daemon=True).start()
+    return jsonify({"status": "started"})
+
+
+# ── Status ────────────────────────────────────────────────────────────────────
+
+@bp.route("/api/scheduler/status")
+def scheduler_status():
+    return jsonify(_sched().get_status())
+
+
+# ── History ───────────────────────────────────────────────────────────────────
+
+@bp.route("/api/scheduler/history")
+def scheduler_history():
+    db = _db()
+    if not db:
+        return jsonify({"runs": []})
+    try:
+        limit  = int(request.args.get("limit", 20))
+        job_id = request.args.get("job_id")
+        try:
+            runs = db.get_schedule_runs(limit=limit, job_id=job_id)
+        except TypeError:
+            runs = db.get_schedule_runs(limit=limit)
+        return jsonify({"runs": runs})
+    except Exception as e:
+        return jsonify({"runs": [], "error": str(e)})
+
+
+# ── Backward-compat single-job endpoints ─────────────────────────────────────
+
+@bp.route("/api/scheduler/config", methods=["GET"])
+def scheduler_config_get():
+    return jsonify(_sm().load_schedule_config())
+
+
+@bp.route("/api/scheduler/config", methods=["POST"])
+def scheduler_config_save():
+    sm   = _sm()
+    data = request.get_json() or {}
+    merged = {**sm.load_schedule_config(), **data}
+    sm.save_schedule_config(merged)
+    s = _sched()
+    s.reload()
+    return jsonify({"status": "saved", "config": merged,
+                    "next_run": s.next_run_time()})
+
+
+@bp.route("/api/scheduler/run_now", methods=["POST"])
+def scheduler_run_now():
+    s = _sched()
+    if s.is_running:
+        return jsonify({"error": "Scheduled scan already running"}), 409
+    threading.Thread(target=s._execute_scan, args=[None], daemon=True).start()
+    return jsonify({"status": "started"})
diff --git a/routes/sources.py b/routes/sources.py
new file mode 100644
index 0000000..5477448
--- /dev/null
+++ b/routes/sources.py
@@ -0,0 +1,100 @@
+"""
+File sources and file scan
+"""
+from __future__ import annotations
+import threading
+from flask import Blueprint, jsonify, request
+from routes import state
+from app_config import _load_file_sources, _save_file_sources
+
+try:
+    from file_scanner import store_smb_password, SMB_OK as _SMB_OK
+    _FILE_SCANNER_OK = True
+except ImportError:
+    _FILE_SCANNER_OK = False
+    _SMB_OK = False
+    def store_smb_password(*a, **kw): return False  # type: ignore[misc]
+
+bp = Blueprint("sources", __name__)
+
+
+@bp.route("/api/file_sources", methods=["GET"])
+def file_sources_list():
+    """Return all saved file source definitions."""
+    sources = _load_file_sources()
+    return jsonify({
+        "sources":       sources,
+        "smb_available": _SMB_OK,
+        "scanner_ok":    _FILE_SCANNER_OK,
+    })
+
+
+@bp.route("/api/file_sources/save", methods=["POST"])
+def file_sources_save():
+    """Add or update a file source.  Assigns a UUID if id is missing."""
+    import uuid as _uuid
+    data = request.get_json() or {}
+    path = data.get("path", "").strip()
+    if not path:
+        return jsonify({"error": "path required"}), 400
+    sources = _load_file_sources()
+    uid = data.get("id") or ""
+    for i, s in enumerate(sources):
+        if s.get("id") == uid:
+            sources[i] = {**s, **data}
+            _save_file_sources(sources)
+            return jsonify({"ok": True, "source": sources[i]})
+    data["id"] = data.get("id") or str(_uuid.uuid4())
+    sources.append(data)
+    _save_file_sources(sources)
+    return jsonify({"ok": True, "source": data})
+
+
+@bp.route("/api/file_sources/delete", methods=["POST"])
+def file_sources_delete():
+    """Remove a file source by id."""
+    uid = (request.get_json() or {}).get("id", "")
+    if not uid:
+        return jsonify({"error": "id required"}), 400
+    sources = [s for s in _load_file_sources() if s.get("id") != uid]
+    _save_file_sources(sources)
+    return jsonify({"ok": True})
+
+
+@bp.route("/api/file_sources/store_creds", methods=["POST"])
+def file_sources_store_creds():
+    """Store SMB password in the OS keychain."""
+    if not _FILE_SCANNER_OK:
+        return jsonify({"error": "file_scanner not available"}), 503
+    data     = request.get_json() or {}
+    smb_host = data.get("smb_host", "")
+    smb_user = data.get("smb_user", "")
+    password = data.get("password", "")
+    key      = data.get("keychain_key") or smb_user
+    if not smb_user or not password:
+        return jsonify({"error": "smb_user and password required"}), 400
+    ok = store_smb_password(smb_host, smb_user, password, key)
+    if ok:
+        return jsonify({"ok": True, "keychain_key": key})
+    return jsonify({"error": "keyring not available — install: pip install keyring"}), 500
+
+
+@bp.route("/api/file_scan/start", methods=["POST"])
+def file_scan_start():
+    """Start a file system scan for a single file source."""
+    if not _FILE_SCANNER_OK:
+        return jsonify({"error": "file_scanner not available"}), 503
+    if not state._scan_lock.acquire(blocking=False):
+        return jsonify({"error": "scan already running"}), 409
+    source = request.get_json() or {}
+    state._scan_abort.clear()
+
+    def _run():
+        from scan_engine import run_file_scan
+        try:
+            run_file_scan(source)
+        finally:
+            state._scan_lock.release()
+
+    threading.Thread(target=_run, daemon=True).start()
+    return jsonify({"status": "started"})
diff --git a/routes/state.py b/routes/state.py
new file mode 100644
index 0000000..12b40f8
--- /dev/null
+++ b/routes/state.py
@@ -0,0 +1,41 @@
+"""
+Shared mutable state for GDPR Scanner.
+
+All modules (gdpr_scanner.py and route blueprints) import from here.
+This avoids circular imports while keeping a single source of truth
+for every global that routes need to read or write.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from m365_connector import M365Connector
+
+# ── Auth ──────────────────────────────────────────────────────────────────────
+connector: "M365Connector | None" = None
+pending_flow: "dict | None" = None
+auth_poll_result: "dict | None" = None
+
+# ── Google Workspace ──────────────────────────────────────────────────────────
+google_connector  = None   # GoogleConnector | PersonalGoogleConnector | None
+google_pending_flow: "dict | None" = None
+google_poll_result: "str | None"   = None
+
+# ── Scan concurrency ──────────────────────────────────────────────────────────
+import threading as _threading
+_scan_lock        = _threading.Lock()
+_scan_abort       = _threading.Event()
+_google_scan_lock  = _threading.Lock()
+_google_scan_abort = _threading.Event()
+
+# ── Scan results (in-memory session cache) ────────────────────────────────────
+flagged_items: list = []
+scan_meta:     dict = {}
+
+# ── i18n ─────────────────────────────────────────────────────────────────────
+LANG: dict = {}
+
+# ── Art. 9 keyword data ───────────────────────────────────────────────────────
+compiled_keywords: list = []  # list of compiled re.Pattern
+keyword_data:      dict = {}  # raw keyword dict from JSON
+keyword_flat:      list = []  # flat list of keyword strings
diff --git a/routes/users.py b/routes/users.py
new file mode 100644
index 0000000..19f4306
--- /dev/null
+++ b/routes/users.py
@@ -0,0 +1,217 @@
+"""
+User listing, role overrides, license debug
+"""
+from __future__ import annotations
+import logging
+import traceback
+from flask import Blueprint, jsonify, request
+from routes import state
+from app_config import (
+    _load_role_overrides, _save_role_overrides, _resolve_display_name,
+)
+
+bp = Blueprint("users", __name__)
+logger = logging.getLogger(__name__)
+
+
+@bp.route("/api/users")
+def get_users():
+    """List all tenant users for account selection."""
+    if not state.connector:
+        return jsonify({"error": "not authenticated"}), 401
+    try:
+        users   = state.connector.list_users()
+        out     = []
+        seen    = set()
+
+        # Build SKU map for role classification.
+        # get_subscribed_skus() tries /subscribedSkus → /me/licenseDetails.
+        # Then always merge per-user licenseDetails on top — this ensures we
+        # have skuPartNumbers for every distinct SKU in the tenant, not just
+        # the admin's own license (which is all /me/licenseDetails returns).
+        try:
+            sku_map = state.connector.get_subscribed_skus()
+        except Exception:
+            sku_map = {}
+
+        try:
+            per_user = state.connector.build_sku_map_from_users(users)
+            if per_user:
+                added = len(set(per_user) - set(sku_map))
+                sku_map.update(per_user)
+                if added:
+                    logger.info("[skus] merged %d additional SKU(s) from per-user licenseDetails", added)
+        except Exception:
+            pass
+
+        # Load any manual role overrides set by the admin
+        _role_overrides = _load_role_overrides()
+
+        def _build_user(u: dict, is_me: bool = False) -> dict:
+            _em   = u.get("mail") or u.get("userPrincipalName", "")
+            _auto = state.connector.classify_user_role(
+                u.get("assignedLicenses", []), sku_map
+            )
+            # Manual override takes precedence over auto-classification
+            _role = _role_overrides.get(u["id"], _auto)
+            return {
+                "id":           u["id"],
+                "displayName":  _resolve_display_name(u.get("displayName", ""), _em),
+                "email":        _em,
+                "isMe":         is_me,
+                "userRole":     _role,
+                "roleOverride": u["id"] in _role_overrides,
+            }
+
+        if state.connector.is_app_mode:
+            for u in users:
+                uid = u.get("id")
+                if uid and uid not in seen:
+                    seen.add(uid)
+                    out.append(_build_user(u))
+        else:
+            me    = state.connector.get_user_info()
+            me_id = me.get("id")
+            for u in ([me] + users):
+                uid = u.get("id")
+                if uid and uid not in seen:
+                    seen.add(uid)
+                    out.append(_build_user(u, is_me=(uid == me_id)))
+
+        # Log a warning when no users were classified — helps diagnose
+        # tenants with SKUs not yet in m365_skus.json
+        classified = [u for u in out if u["userRole"] in ("student", "staff")]
+        if out and not classified:
+            unknown_skus: set = set()
+            for u in users[:20]:  # sample first 20 to keep it brief
+                for lic in u.get("assignedLicenses", []):
+                    sid = lic.get("skuId", "")
+                    if sid:
+                        unknown_skus.add(sid)
+            logger.warning(
+                "[role] 0/%d users classified — no SKUs in m365_skus.json matched. "
+                "Unrecognised SKU IDs (sample): %s. "
+                "Add them to classification/m365_skus.json or use /api/users/license_debug.",
+                len(out), sorted(unknown_skus)[:10],
+            )
+
+        return jsonify({
+            "users":             out,
+            "sku_map_available": bool(sku_map),
+            "unclassified":      len(out) - len(classified),
+        })
+    except Exception as e:
+        return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
+
+
+@bp.route("/api/users/license_debug")
+def license_debug():
+    """Full diagnostic: runtime SKU sets, sku_map, per-user trace, and step-by-step
+    classification walk for every user — enough to diagnose any remaining issue."""
+    if not state.connector:
+        return jsonify({"error": "not authenticated"}), 401
+    try:
+        users   = state.connector.list_users()
+        sku_map = state.connector.get_subscribed_skus()
+        try:
+            sku_map.update(state.connector.build_sku_map_from_users(users))
+        except Exception:
+            pass
+
+        # Per-user trace with step-by-step classification walk
+        out = []
+        for u in users[:100]:
+            lics     = u.get("assignedLicenses", [])
+            role     = state.connector.classify_user_role(lics, sku_map)
+
+            # Walk each licence exactly as classify_user_role does
+            lic_trace = []
+            for lic in lics:
+                raw_id  = lic.get("skuId", "")
+                low_id  = raw_id.lower()
+                name    = sku_map.get(low_id) or sku_map.get(raw_id) or "?"
+                lic_trace.append({
+                    "skuId":       raw_id,
+                    "skuName":     name,
+                    "in_staff":    low_id in state.connector._STAFF_SKU_IDS,
+                    "in_student":  low_id in state.connector._STUDENT_SKU_IDS,
+                    "frag_staff":  next((f for f in state.connector._STAFF_SKU_FRAGMENTS
+                                        if f in name.upper()), None),
+                    "frag_student": next((f for f in state.connector._STUDENT_SKU_FRAGMENTS
+                                         if f in name.upper()), None),
+                })
+
+            out.append({
+                "displayName": u.get("displayName", ""),
+                "email":       u.get("mail") or u.get("userPrincipalName", ""),
+                "role":        role,
+                "licences":    lic_trace,
+            })
+
+        return jsonify({
+            # Runtime state — proves whether m365_skus.json loaded correctly
+            "runtime": {
+                "student_ids_count": len(state.connector._STUDENT_SKU_IDS),
+                "staff_ids_count":   len(state.connector._STAFF_SKU_IDS),
+                "student_fragments": list(state.connector._STUDENT_SKU_FRAGMENTS),
+                "staff_fragments":   list(state.connector._STAFF_SKU_FRAGMENTS),
+                "sku_map_entries":   len(sku_map),
+                "sku_file_path":     str(state.connector._sku_file_path()),
+            },
+            "student_ids": sorted(state.connector._STUDENT_SKU_IDS),
+            "staff_ids":   sorted(state.connector._STAFF_SKU_IDS),
+            "sku_map":     sku_map,
+            "users":       out,
+        })
+    except Exception as e:
+        return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
+
+
+@bp.route("/api/users/lookup")
+def lookup_user():
+    """Look up a single user by UPN or email."""
+    if not state.connector:
+        return jsonify({"error": "not authenticated"}), 401
+    upn = request.args.get("upn", "").strip()
+    if not upn:
+        return jsonify({"error": "upn required"}), 400
+    try:
+        data   = state.connector._get(f"/users/{upn}", {"$select": "id,displayName,mail,userPrincipalName"})
+        _email = data.get("mail") or data.get("userPrincipalName", upn)
+        return jsonify({
+            "id":          data["id"],
+            "displayName": _resolve_display_name(data.get("displayName", ""), _email, upn),
+            "email":       _email,
+            "isMe":        False,
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 404
+
+
+@bp.route("/api/users/role_override", methods=["GET"])
+def role_override_get():
+    """Return all manual role overrides as {user_id: role}."""
+    return jsonify(_load_role_overrides())
+
+
+@bp.route("/api/users/role_override", methods=["POST"])
+def role_override_set():
+    """Set or clear a manual role override for one user.
+
+    Body: {user_id, role}  — role is 'student' | 'staff' | 'other' | '' (clear).
+    """
+    data    = request.get_json() or {}
+    uid     = data.get("user_id", "").strip()
+    role    = data.get("role", "").strip().lower()
+    if not uid:
+        return jsonify({"error": "user_id required"}), 400
+    if role and role not in ("student", "staff", "other"):
+        return jsonify({"error": "role must be student | staff | other | '' (clear)"}), 400
+    overrides = _load_role_overrides()
+    if role:
+        overrides[uid] = role
+    else:
+        overrides.pop(uid, None)
+    _save_role_overrides(overrides)
+    return jsonify({"ok": True, "user_id": uid, "role": role or None,
+                    "total_overrides": len(overrides)})
diff --git a/routes/viewer.py b/routes/viewer.py
new file mode 100644
index 0000000..e9d6845
--- /dev/null
+++ b/routes/viewer.py
@@ -0,0 +1,152 @@
+"""
+Read-only viewer token + PIN management routes (#33).
+"""
+from __future__ import annotations
+import time
+from flask import Blueprint, jsonify, request, session
+from app_config import (
+    create_viewer_token,
+    validate_viewer_token,
+    revoke_viewer_token,
+    cleanup_expired_viewer_tokens,
+    _load_viewer_tokens,
+    get_viewer_pin_hash,
+    set_viewer_pin,
+    verify_viewer_pin,
+    clear_viewer_pin,
+)
+
+bp = Blueprint("viewer", __name__)
+
+# Simple brute-force guard: keyed by remote IP.
+_pin_attempts: dict[str, list[float]] = {}
+_MAX_ATTEMPTS = 5
+_WINDOW_S     = 300   # 5 minutes
+
+
+def _pin_rate_limit(ip: str) -> bool:
+    """Return True if the IP is rate-limited (too many recent failures)."""
+    now   = time.time()
+    times = [t for t in _pin_attempts.get(ip, []) if now - t < _WINDOW_S]
+    _pin_attempts[ip] = times
+    return len(times) >= _MAX_ATTEMPTS
+
+
+def _pin_record_failure(ip: str) -> None:
+    now = time.time()
+    _pin_attempts.setdefault(ip, []).append(now)
+
+
+def _pin_clear_failures(ip: str) -> None:
+    _pin_attempts.pop(ip, None)
+
+
+# ── Token endpoints ───────────────────────────────────────────────────────────
+
+@bp.route("/api/viewer/tokens", methods=["GET"])
+def list_tokens():
+    cleanup_expired_viewer_tokens()
+    tokens = _load_viewer_tokens()
+    safe = [
+        {
+            "token_hint":  t["token"][:8] + "…",
+            "token":       t["token"],
+            "label":       t.get("label", ""),
+            "created_at":  t.get("created_at"),
+            "expires_at":  t.get("expires_at"),
+            "last_used_at": t.get("last_used_at"),
+        }
+        for t in tokens
+    ]
+    return jsonify(safe)
+
+
+@bp.route("/api/viewer/tokens", methods=["POST"])
+def create_token():
+    body         = request.get_json(silent=True) or {}
+    label        = str(body.get("label", "")).strip()
+    expires_days = body.get("expires_days")
+    if expires_days is not None:
+        try:
+            expires_days = int(expires_days)
+            if expires_days <= 0:
+                return jsonify({"error": "expires_days must be a positive integer"}), 400
+        except (TypeError, ValueError):
+            return jsonify({"error": "expires_days must be a positive integer"}), 400
+    entry = create_viewer_token(label=label, expires_days=expires_days)
+    return jsonify(entry), 201
+
+
+@bp.route("/api/viewer/tokens/<token>", methods=["DELETE"])
+def delete_token(token: str):
+    if not token:
+        return jsonify({"error": "token required"}), 400
+    removed = revoke_viewer_token(token)
+    if not removed:
+        return jsonify({"error": "token not found"}), 404
+    return jsonify({"ok": True})
+
+
+@bp.route("/api/viewer/tokens/validate", methods=["POST"])
+def validate_token():
+    body  = request.get_json(silent=True) or {}
+    token = str(body.get("token", "")).strip()
+    entry = validate_viewer_token(token)
+    if entry is None:
+        return jsonify({"valid": False}), 401
+    return jsonify({"valid": True, "label": entry.get("label", ""), "expires_at": entry.get("expires_at")})
+
+
+# ── PIN endpoints ─────────────────────────────────────────────────────────────
+
+@bp.route("/api/viewer/pin", methods=["GET"])
+def pin_status():
+    """Return whether a viewer PIN is currently set."""
+    return jsonify({"pin_set": bool(get_viewer_pin_hash())})
+
+
+@bp.route("/api/viewer/pin", methods=["POST"])
+def pin_set():
+    """Set or change the viewer PIN.
+    Body: {pin: "...", current_pin: "..."}
+    current_pin required only when a PIN is already set.
+    """
+    body = request.get_json(silent=True) or {}
+    new_pin = str(body.get("pin", "")).strip()
+    if not new_pin:
+        return jsonify({"error": "pin required"}), 400
+    if not new_pin.isdigit() or not (4 <= len(new_pin) <= 8):
+        return jsonify({"error": "PIN must be 4–8 digits"}), 400
+    if get_viewer_pin_hash():
+        if not verify_viewer_pin(str(body.get("current_pin", "")).strip()):
+            return jsonify({"error": "current PIN is incorrect"}), 403
+    set_viewer_pin(new_pin)
+    return jsonify({"ok": True})
+
+
+@bp.route("/api/viewer/pin", methods=["DELETE"])
+def pin_clear():
+    """Remove the viewer PIN.  Requires current PIN if one is set."""
+    body = request.get_json(silent=True) or {}
+    if get_viewer_pin_hash():
+        if not verify_viewer_pin(str(body.get("current_pin", "")).strip()):
+            return jsonify({"error": "current PIN is incorrect"}), 403
+    clear_viewer_pin()
+    return jsonify({"ok": True})
+
+
+@bp.route("/api/viewer/pin/verify", methods=["POST"])
+def pin_verify():
+    """Verify a PIN submission and set a viewer session on success."""
+    ip  = request.remote_addr or "unknown"
+    if _pin_rate_limit(ip):
+        return jsonify({"error": "Too many failed attempts. Try again later."}), 429
+    body = request.get_json(silent=True) or {}
+    pin  = str(body.get("pin", "")).strip()
+    if not verify_viewer_pin(pin):
+        _pin_record_failure(ip)
+        remaining = _MAX_ATTEMPTS - len(_pin_attempts.get(ip, []))
+        return jsonify({"error": "Incorrect PIN", "remaining": max(0, remaining)}), 401
+    _pin_clear_failures(ip)
+    session["viewer_ok"] = True
+    return jsonify({"ok": True})
diff --git a/run_tests.sh b/run_tests.sh
new file mode 100755
index 0000000..ea273d1
--- /dev/null
+++ b/run_tests.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Activate venv
+if [[ ! -f venv/bin/activate ]]; then
+  echo "ERROR: venv not found. Run: python -m venv venv && pip install -r requirements.txt" >&2
+  exit 1
+fi
+source venv/bin/activate
+
+exec python -m pytest "$@"
diff --git a/scan_engine.py b/scan_engine.py
new file mode 100644
index 0000000..d8be012
--- /dev/null
+++ b/scan_engine.py
@@ -0,0 +1,1161 @@
+"""
+scan_engine.py — M365 and file-system scan orchestration for GDPRScanner.
+
+Provides:
+  run_scan(options)        — full M365 scan (Exchange, OneDrive, SharePoint, Teams)
+  run_file_scan(source)    — local / SMB file system scan
+
+Both functions use sse.broadcast() for progress events and gdpr_db for persistence.
+"""
+from __future__ import annotations
+import concurrent.futures
+import gc
+import hashlib
+import logging
+import json
+import re
+import sys
+import time
+import tempfile
+import threading
+from collections import deque
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# ── Runtime dependencies — resolved at startup by gdpr_scanner.py ─────────────
+# Fallback stubs allow isolated import (e.g. for tests).
+try:
+    from sse import broadcast, _sse_buffer
+except ImportError:
+    def broadcast(event, data): pass  # type: ignore
+    _sse_buffer = None
+
+try:
+    from gdpr_db import get_db as _get_db
+    DB_OK = True
+except ImportError:
+    DB_OK = False
+    def _get_db(*a, **kw): return None  # type: ignore
+
+from routes import state as _state
+
+def _get_scan_abort():
+    return _state._scan_abort
+
+def _get_flagged_items():
+    return _state.flagged_items
+
+def _get_scan_meta():
+    return _state.scan_meta
+
+# ── Connector classes — imported at module level ──────────────────────────────
+try:
+    from m365_connector import (
+        M365Connector, M365Error, M365PermissionError, M365DeltaTokenExpired,
+        MSAL_OK, REQUESTS_OK,
+    )
+    CONNECTOR_OK = True
+except ImportError:
+    M365Connector = None        # type: ignore[assignment,misc]
+    M365Error = Exception
+    M365PermissionError = Exception
+    M365DeltaTokenExpired = Exception
+    MSAL_OK = False
+    REQUESTS_OK = False
+    CONNECTOR_OK = False
+
+try:
+    from file_scanner import FileScanner, store_smb_password, SMB_OK as _SMB_OK
+    FILE_SCANNER_OK = True
+except ImportError:
+    FileScanner = None          # type: ignore[assignment,misc]
+    FILE_SCANNER_OK = False
+
+try:
+    import document_scanner as ds
+    SCANNER_OK = True
+except ImportError:
+    ds = None                   # type: ignore[assignment]
+    SCANNER_OK = False
+
+try:
+    from PIL import Image as PILImage
+    PIL_OK = True
+except ImportError:
+    PILImage = None             # type: ignore[assignment]
+    PIL_OK = False
+
+try:
+    from gdpr_db import get_db as _get_db
+    DB_OK = True
+except ImportError:
+    DB_OK = False
+    def _get_db(*a, **kw): return None  # type: ignore[misc]
+
+# Stubs for standalone import — overwritten by gdpr_scanner.py injections
+LANG: dict = {}
+PHOTO_EXTS: set = set()
+SUPPORTED_EXTS: set = set()
+
+# cpr_detector helpers — injected by gdpr_scanner.py
+def _scan_bytes(content, filename, poppler_path=None): return {"cprs": [], "dates": []}  # type: ignore[misc]
+def _scan_bytes_timeout(content, filename, timeout=60): return {"cprs": [], "dates": []}  # type: ignore[misc]
+def _detect_photo_faces(content, filename): return 0  # type: ignore[misc]
+def _extract_exif(content, filename): return {}  # type: ignore[misc]
+def _make_thumb(content, filename): return ""  # type: ignore[misc]
+def _placeholder_svg(ext, name): return ""  # type: ignore[misc]
+def _check_special_category(text, cprs): return []  # type: ignore[misc]
+def _get_pii_counts(text): return {}  # type: ignore[misc]
+def _html_esc(s): return str(s)  # type: ignore[misc]
+
+# checkpoint helpers — injected by gdpr_scanner.py
+def _checkpoint_key(opts): return ""  # type: ignore[misc]
+def _save_checkpoint(*a, **kw): pass  # type: ignore[misc]
+def _load_checkpoint(key): return None  # type: ignore[misc]
+def _clear_checkpoint(): pass  # type: ignore[misc]
+def _load_delta_tokens(): return {}  # type: ignore[misc]
+def _save_delta_tokens(t): pass  # type: ignore[misc]
+
+# app_config helpers — imported directly
+try:
+    from app_config import _load_role_overrides, _resolve_display_name
+except ImportError:
+    def _load_role_overrides(): return {}  # type: ignore[misc]
+    def _resolve_display_name(dn, email="", upn=""): return dn or email or upn  # type: ignore[misc]
+
+# cpr_detector helpers — imported directly
+try:
+    from cpr_detector import _scan_text_direct
+except ImportError:
+    def _scan_text_direct(text): return {"cprs": [], "dates": []}  # type: ignore[misc]
+
+def _with_disposition(card: dict, db) -> dict:
+    """Inject prior disposition into a scan card if one exists."""
+    if not db:
+        return card
+    try:
+        prior = db.get_prior_disposition(card.get("id", ""))
+        if prior:
+            return {**card, "disposition": prior}
+    except Exception:
+        pass
+    return card
+
+
+def run_file_scan(source: dict):
+    """Scan a single local or SMB file source for CPR numbers and PII.
+
+    Reuses _scan_bytes, _broadcast_card, _check_special_category,
+    _detect_photo_faces and all other existing scan helpers.
+
+    Args:
+        source: file source dict with keys:
+            path, label, smb_host, smb_user, smb_domain, keychain_key,
+            scan_photos (bool), max_file_mb (int)
+    """
+    # state vars accessed via _state module
+
+    path        = source.get("path", "")
+    label       = source.get("label") or path
+    smb_host    = source.get("smb_host") or None
+    smb_user    = source.get("smb_user") or None
+    smb_domain  = source.get("smb_domain") or ""
+    keychain_key= source.get("keychain_key") or None
+    smb_password= source.get("smb_password") or None
+    scan_photos = bool(source.get("scan_photos", False))
+    max_mb      = int(source.get("max_file_mb", 50))
+
+    if not FILE_SCANNER_OK:
+        broadcast("scan_error", {"file": label, "error": "file_scanner.py not found"})
+        return
+
+    import sse as _sse; _sse._current_scan_id = f"filescan_{int(time.time()*1000)}"
+    _state.scan_meta = {"started_at": time.time(), "options": source}
+
+    _db = _get_db() if DB_OK else None
+    _db_scan_id: int | None = None
+    if _db:
+        try:
+            _db_scan_id = _db.begin_scan(
+                sources=[source.get("source_type", "local")],
+                user_count=0,
+                options=source,
+            )
+        except Exception as e:
+            logger.error("[db] start_scan failed: %s", e)
+
+    total_scanned = 0
+    total_flagged = 0
+
+    broadcast("scan_start", {"sources": [label]})
+    broadcast("scan_phase", {"phase": f"Files \u2014 {label}"})
+
+    try:
+        fs = FileScanner(
+            path=path,
+            smb_host=smb_host,
+            smb_user=smb_user,
+            smb_password=smb_password,
+            smb_domain=smb_domain,
+            keychain_key=keychain_key,
+            max_file_bytes=max_mb * 1_048_576,
+        )
+
+        def _progress(rel_path: str):
+            broadcast("scan_file", {"file": rel_path})
+
+        for rel_path, content, meta in fs.iter_files(progress_cb=_progress):
+            if _state._scan_abort.is_set():
+                break
+
+            total_scanned += 1
+            broadcast("scan_progress", {"scanned": total_scanned, "flagged": total_flagged, "file": rel_path, "pct": min(90, 10 + total_scanned // 10), "source": "file"})
+
+            # Skip sentinel (too large or error)
+            if content is None:
+                if meta.get("skip_reason"):
+                    broadcast("scan_error", {
+                        "file": rel_path,
+                        "error": meta["skip_reason"],
+                    })
+                continue
+
+            ext = Path(rel_path).suffix.lower()
+
+            # CPR scan — skip for images (no text layer; EXIF/face detection handles them)
+            result: dict = {"cprs": [], "dates": []}
+            if ext not in PHOTO_EXTS:
+                try:
+                    result = _scan_bytes_timeout(content, rel_path)
+                except Exception as e:
+                    broadcast("scan_error", {"file": rel_path, "error": str(e)})
+                    continue
+
+            cprs = result.get("cprs", [])
+
+            # Photo / biometric scan + EXIF extraction
+            _face_count = 0
+            _exif       = {}
+            if ext in PHOTO_EXTS:
+                if scan_photos:
+                    _face_count = _detect_photo_faces(content, rel_path)
+                _exif = _extract_exif(content, rel_path)
+
+            if not cprs and _face_count == 0 and not _exif.get("has_pii"):
+                continue
+
+            # Build card metadata
+            try:
+                _file_text = content.decode("utf-8", errors="replace")
+            except Exception:
+                _file_text = ""
+
+            _pii = _get_pii_counts(_file_text)
+            _sc  = _check_special_category(_file_text, cprs)
+            if _face_count > 0 and "biometric" not in _sc:
+                _sc = sorted(_sc + ["biometric"])
+            if _exif.get("gps") and "gps_location" not in _sc:
+                _sc = sorted(_sc + ["gps_location"])
+            if _exif.get("has_pii") and "exif_pii" not in _sc:
+                _sc = sorted(_sc + ["exif_pii"])
+
+            # Thumbnail for images
+            if ext in {".jpg", ".jpeg", ".png"} and PIL_OK:
+                _thumb      = _make_thumb(content, rel_path)
+                _thumb_mime = True
+            else:
+                _thumb      = _placeholder_svg(ext, rel_path)
+                _thumb_mime = False
+            del content  # raw bytes no longer needed — free before card build and next iteration
+
+            source_type = meta["source_type"]  # "local" or "smb"
+            source_root = meta["source_root"]
+
+            card = {
+                "id":           hashlib.sha256(meta["full_path"].encode()).hexdigest()[:24],
+                "name":         rel_path,
+                "source":       label,
+                "source_type":  source_type,
+                "cpr_count":    len(cprs),
+                "url":          "",
+                "size_kb":      meta["size_kb"],
+                "modified":     meta["modified"],
+                "thumb_b64":    _thumb,
+                "thumb_mime":   "image/jpeg" if _thumb_mime else "image/svg+xml",
+                "risk":         None,
+                "account_id":   "",
+                "account_name": source_root,
+                "user_role":    "other",
+                "drive_id":     "",
+                "attachments":  [],
+                "folder":       str(Path(rel_path).parent) if "/" in rel_path or "\\" in rel_path else "",
+                "transfer_risk": "",
+                "special_category": _sc,
+                "face_count":   _face_count,
+                "exif":         _exif,
+                "full_path":    meta["full_path"],
+            }
+
+            _state.flagged_items.append(card)
+            total_flagged += 1
+            broadcast("scan_file_flagged", _with_disposition(card, _db))
+
+            if _db and _db_scan_id:
+                try:
+                    _db.save_item(_db_scan_id, card, cprs, pii_counts=_pii)
+                except Exception as e:
+                    logger.error("[db] save_item failed: %s", e)
+
+    except Exception as e:
+        import traceback
+        broadcast("scan_error", {"file": label, "error": str(e)})
+        logger.error("[file_scan] error:\n%s", traceback.format_exc())
+    finally:
+        if _db and _db_scan_id:
+            try:
+                _db.finish_scan(_db_scan_id, total_scanned)
+            except Exception:
+                pass
+        _state.scan_meta["finished_at"] = time.time()
+        broadcast("file_scan_done", {
+            "total_scanned": total_scanned,
+            "flagged_count": total_flagged,
+        })
+
+
+def run_scan(options: dict):
+    # state vars accessed via _state module
+    import sse as _sse; _sse._current_scan_id = f"scan_{int(time.time()*1000)}"
+    _state.scan_meta = {"started_at": time.time(), "options": options}
+    _sse_buffer.clear()  # fresh buffer for each scan
+
+    # Open DB and start a scan record (runs alongside JSON cache)
+    _db = _get_db() if DB_OK else None
+    _db_scan_id: int | None = None
+    if _db:
+        try:
+            _db_scan_id = _db.begin_scan(options)
+        except Exception as _e:
+            logger.error("[db] begin_scan failed: %s", _e)
+
+    conn: M365Connector = _state.connector  # type: ignore[assignment]
+    if not conn:
+        broadcast("scan_error", {"file": "auth", "error": "Not connected to M365"})
+        broadcast("scan_done", {"flagged_count": 0, "total_scanned": 0})
+        return
+
+    # ── Checkpoint: resume from a previous interrupted scan ──────────────────
+    ck_key        = _checkpoint_key(options)
+    checkpoint    = _load_checkpoint(ck_key)
+    scanned_ids:  set  = set(checkpoint["scanned_ids"]) if checkpoint else set()
+    resumed_count = len(scanned_ids)
+
+    if checkpoint:
+        # Restore previously found cards; new finds will be appended
+        _state.flagged_items = list(checkpoint.get("flagged", []))
+        broadcast("scan_phase", {
+            "phase": LANG.get("m365_resuming", f"Resuming — skipping {resumed_count} already-scanned items…")
+        })
+        # Re-emit previously found cards so the UI grid is populated
+        for card in _state.flagged_items:
+            broadcast("scan_file_flagged", _with_disposition(card, _db))
+    else:
+        _state.flagged_items = []
+
+    # Save checkpoint every N items so progress isn't lost mid-scan
+    _CHECKPOINT_SAVE_EVERY = 25
+    _items_since_save = 0
+
+    conn: M365Connector = _state.connector  # type: ignore[assignment]
+    if not conn:
+        broadcast("scan_error", {"file": "auth", "error": "Not connected to M365"})
+        broadcast("scan_done", {"flagged_count": 0, "total_scanned": 0})
+        return
+
+    # Log which auth mode is active — helps diagnose 403 issues
+    mode_label = LANG.get("m365_auth_mode_app", "Auth mode: Application (client credentials — org-wide)") if conn.is_app_mode else LANG.get("m365_auth_mode_delegated", "Auth mode: Delegated (device code — signed-in user only)")
+    broadcast("scan_phase", {"phase": mode_label})
+    logger.info("[run_scan] sources=%s, users=%d, app_mode=%s",
+                options.get("sources", []), len(options.get("user_ids", [])), conn.is_app_mode)
+
+    sources        = options.get("sources", [])
+    scan_opts      = options.get("options", {})
+    older_than_days= int(scan_opts.get("older_than_days", 0))
+    scan_email_body= scan_opts.get("email_body", True)
+    scan_attachments= scan_opts.get("attachments", True)
+    max_attach_mb  = float(scan_opts.get("max_attach_mb", 20))
+    max_emails     = int(scan_opts.get("max_emails", 2000))
+    delta_enabled  = bool(scan_opts.get("delta", False))
+    scan_photos    = bool(scan_opts.get("scan_photos", False))  # biometric photo scan (#9)
+
+    # Delta token state — loaded once, updated per-source, saved on completion
+    delta_tokens:     dict = _load_delta_tokens() if delta_enabled else {}
+    new_delta_tokens: dict = {}  # keys written after a successful delta query
+
+    if delta_enabled:
+        broadcast("scan_phase", {"phase": LANG.get("m365_delta_mode", "Delta mode — fetching changed items only…")})
+
+    # Compute cutoff date if requested
+    from datetime import datetime, timezone, timedelta
+    cutoff_dt = None
+    if older_than_days > 0:
+        cutoff_dt = datetime.now(timezone.utc) - timedelta(days=older_than_days)
+
+    def _after_cutoff(date_str: str) -> bool:
+        """Return True if item is NEWER than cutoff (should be skipped)."""
+        if not cutoff_dt or not date_str:
+            return False
+        try:
+            dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+            if dt.tzinfo is None:
+                dt = dt.replace(tzinfo=timezone.utc)
+            return dt > cutoff_dt
+        except Exception:
+            return False
+
+    total     = 0
+    completed = 0
+    t_start   = time.monotonic()
+
+    def _eta(done, tot):
+        if done < 2 or tot == 0:
+            return ""
+        elapsed = time.monotonic() - t_start
+        rate    = done / elapsed
+        rem     = (tot - done) / rate
+        if rem < 60:   return f"{int(rem)}s"
+        if rem < 3600: return f"{int(rem/60)}m"
+        return f"{int(rem/3600)}h"
+
+    def _check_abort():
+        if _state._scan_abort.is_set():
+            broadcast("scan_cancelled", {"completed": completed})
+            return True
+        return False
+
+    def _broadcast_card(item_meta: dict, cprs: list, pii_counts: dict | None = None):
+        card = {
+            "id":           item_meta.get("id", ""),
+            "name":         item_meta.get("name", ""),
+            "source":       item_meta.get("_source", ""),
+            "source_type":  item_meta.get("_source_type", ""),
+            "cpr_count":    len(cprs),
+            "url":          item_meta.get("webUrl", "") or item_meta.get("_url", ""),
+            "size_kb":      round(item_meta.get("size", 0) / 1024, 1),
+            "modified":     (item_meta.get("lastModifiedDateTime") or item_meta.get("receivedDateTime") or "")[:10],
+            "thumb_b64":    item_meta.get("_thumb", ""),
+            "thumb_mime":   "image/jpeg" if item_meta.get("_thumb_is_jpeg") else "image/svg+xml",
+            "risk":         None,
+            "account_id":   item_meta.get("_account_id", "") or item_meta.get("_user_id", ""),
+            "account_name": item_meta.get("_account", ""),
+            "user_role":    item_meta.get("_user_role", "other"),
+            "drive_id":     item_meta.get("_drive_id", "") or item_meta.get("parentReference", {}).get("driveId", ""),
+            "attachments":  item_meta.get("_attachments", []),
+            "folder":       item_meta.get("_folder", ""),
+            "transfer_risk":    item_meta.get("_transfer_risk", ""),
+            "special_category": item_meta.get("_special_category", []),
+            "face_count":       item_meta.get("_face_count", 0),
+            "exif":             item_meta.get("_exif", {}),
+        }
+        _state.flagged_items.append(card)
+        broadcast("scan_file_flagged", _with_disposition(card, _db))
+        # Persist to SQLite alongside JSON
+        if _db and _db_scan_id:
+            try:
+                _db.save_item(_db_scan_id, card, cprs, pii_counts=pii_counts)
+            except Exception as _e:
+                logger.error("[db] save_item failed: %s", _e)
+
+    # ── External transfer detection (#5) ─────────────────────────────────────
+    def _tenant_domain() -> str:
+        """Best-effort: extract the primary domain from the tenant's user list."""
+        try:
+            me = conn.get_user_info()
+            addr = me.get("mail") or me.get("userPrincipalName", "")
+            return addr.split("@")[-1].lower() if "@" in addr else ""
+        except Exception:
+            return ""
+
+    _tenant_dom = _tenant_domain()
+
+    def _check_transfer_risk(meta: dict) -> str:
+        """Return transfer risk tag or empty string.
+
+        Email: external recipient detected (domain outside tenant).
+        File:  external sharing link present on the drive item.
+        """
+        src_type = meta.get("_source_type", "")
+        if src_type == "email":
+            if not _tenant_dom:
+                return ""
+            recipients = []
+            for field in ("toRecipients", "ccRecipients"):
+                for r in meta.get(field, []):
+                    addr = (r.get("emailAddress") or {}).get("address", "")
+                    if addr:
+                        recipients.append(addr.lower())
+            external = [a for a in recipients
+                        if "@" in a and not a.endswith("@" + _tenant_dom)]
+            if external:
+                return "external-recipient"
+        elif src_type in ("onedrive", "sharepoint", "teams"):
+            if meta.get("shared"):
+                scope = ""
+                try:
+                    scope = meta["shared"].get("scope", "").lower()
+                except Exception:
+                    pass
+                if scope in ("anonymous", "organization"):
+                    return "external-share"
+                return "shared"
+        return ""
+
+    # ── Collect work items ────────────────────────────────────────────────────
+    work_items = []  # list of (type, meta, fetch_fn)
+
+    try:
+        # Determine which user accounts to scan
+        # Normalise user_ids — may be list of dicts OR list of plain ID strings (legacy)
+        _raw_uids = options.get("user_ids", [])
+        user_ids = [
+            u if isinstance(u, dict) else {"id": u, "displayName": u, "userRole": "other"}
+            for u in _raw_uids
+        ]
+
+        # Resolve the signed-in user so we can use /me/... for them (avoids
+        # needing admin delegation just to read your own mailbox/drive)
+        try:
+            me_info = conn.get_user_info()
+            me_id   = me_info.get("id", "")
+        except Exception:
+            me_id   = ""
+            me_info = {}
+
+        if not user_ids:
+            if conn.is_app_mode:
+                # App mode with no users selected — scan everyone
+                logger.info("[run_scan] user_ids empty — fetching all tenant users")
+                all_users = conn.list_users()
+                user_ids = [{"id": u["id"],
+                             "displayName": _resolve_display_name(
+                                 u.get("displayName", ""),
+                                 u.get("mail") or u.get("userPrincipalName", ""))}
+                            for u in all_users if u.get("id")]
+            else:
+                user_ids = [{"id": me_id or "me",
+                             "displayName": _resolve_display_name(
+                                 me_info.get("displayName", ""),
+                                 me_info.get("mail") or me_info.get("userPrincipalName", "me"))}]
+        else:
+            sample = user_ids[0] if user_ids else None
+            logger.info("[run_scan] user_ids: %d entries, type=%s, sample=%s",
+                        len(user_ids), type(user_ids).__name__, sample)
+
+        # Build uid → userRole map for use during scanning
+        # Manual overrides (set by admin in UI) take precedence over auto-classification
+        _scan_role_overrides = _load_role_overrides()
+        _user_role_map: dict[str, str] = {
+            u["id"]: _scan_role_overrides.get(u["id"], u.get("userRole", "other"))
+            for u in user_ids if u.get("id")
+        }
+
+        def _uid_path(uid: str) -> str:
+            """In delegated mode, return 'me' when uid is the signed-in user
+            so /me/... endpoints are used. In app mode, always use /users/{id}
+            since there is no signed-in user context."""
+            if conn.is_app_mode:
+                return uid  # app mode: always explicit user ID
+            return "me" if (uid == me_id or uid == "me") else uid
+
+        def _permission_msg(resource: str, uname: str) -> str:
+            return (
+                f"Permission denied (403) — cannot access {resource} for {uname}. "
+                f"The signed-in account needs Global Admin or Exchange Admin rights, "
+                f"OR an admin must grant Application permissions in Azure "
+                f"(Mail.ReadWrite / Files.ReadWrite.All / Sites.ReadWrite.All for delete; "
+                f"Mail.Read / Files.Read.All / Sites.Read.All for scan-only) under "
+                f"App registrations → API permissions → Grant admin consent."
+            )
+
+        def _scan_user_email(uid, uname):
+            effective = _uid_path(uid)
+            broadcast("scan_phase", {"phase": LANG.get("m365_phase_emails", "Collecting emails") + f" — {uname}\u2026"})
+            try:
+                folder_errors = []
+                if effective != "me":
+                    all_folders = conn.list_all_mail_folders_for(effective, errors_out=folder_errors)
+                else:
+                    all_folders = conn.list_all_mail_folders(errors_out=folder_errors)
+
+                for ferr in folder_errors:
+                    broadcast("scan_error", {"file": f"mail folders ({uname})", "error": ferr})
+
+                broadcast("scan_phase", {"phase": LANG.get("m365_phase_emails", "Collecting emails") + f" — {uname}: {len(all_folders)} folders…"})
+
+                # Skip system folders. Use wellKnownName (language-independent) when
+                # Graph returns it; fall back to localised display names otherwise.
+                SKIP_WELL_KNOWN = {
+                    "deleteditems", "junkemail", "drafts",
+                    # "sentitems" and "outbox" intentionally NOT skipped — may contain CPR numbers
+                    "syncissues", "recoverableitemsdeletions",
+                    "recoverableitemsroot", "recoverableitemspurges",
+                    "recoverableitemsversions",
+                }
+                SKIP_DISPLAY = {
+                    # English
+                    "deleted items", "junk email", "drafts",
+                    # "sent items" and "outbox" intentionally NOT skipped
+                    "sync issues", "recoverable items",
+                    "purges", "versions", "conflicts", "local failures",
+                    "server failures",
+                    # Danish
+                    "slettet post", "uønsket post", "kladder",
+                    "synkroniseringsproblemer",
+                    # German
+                    "gelöschte elemente", "junk-e-mail", "entwürfe",
+                }
+
+                def _should_skip(f):
+                    wkn = f.get("wellKnownName", "").lower()
+                    if wkn:
+                        return wkn in SKIP_WELL_KNOWN
+                    return f.get("displayName", "").lower() in SKIP_DISPLAY
+
+                scan_folders = [f for f in all_folders if not _should_skip(f)]
+
+                # Prioritise subfolders (depth > 0) before Inbox so the cap
+                # doesn't get exhausted by Inbox alone.
+                def _folder_sort_key(f):
+                    path = f.get("_display_path", "")
+                    depth = path.count(" / ")
+                    is_inbox_root = path.lower() in ("inbox", "indbakke")
+                    return (is_inbox_root, -depth)  # subfolders first, then inbox last
+
+                scan_folders.sort(key=_folder_sort_key)
+
+                msgs_added = 0
+                for folder in scan_folders:
+                    if _state._scan_abort.is_set():
+                        return
+                    if msgs_added >= max_emails:
+                        break
+                    remaining    = max_emails - msgs_added
+                    folder_limit = remaining   # each folder gets whatever budget is left
+                    folder_id    = folder["id"]
+                    folder_path  = folder.get("_display_path", folder.get("displayName", ""))
+                    delta_key    = f"email:{uid}:{folder_id}"
+
+                    if delta_enabled:
+                        saved_link = delta_tokens.get(delta_key)
+                        try:
+                            if effective != "me":
+                                folder_msgs, new_link = conn.iter_messages_delta_for(
+                                    effective, folder_id, delta_url=saved_link, top=folder_limit)
+                            else:
+                                folder_msgs, new_link = conn.iter_messages_delta(
+                                    folder_id, delta_url=saved_link, top=folder_limit)
+                            if new_link:
+                                new_delta_tokens[delta_key] = new_link
+                        except M365DeltaTokenExpired:
+                            broadcast("scan_phase", {"phase": f"📂 {folder_path}: delta token expired — full fetch"})
+                            if delta_key in delta_tokens:
+                                del delta_tokens[delta_key]
+                            folder_msgs = list(
+                                conn.iter_messages_for(effective, folder_id, top=folder_limit)
+                                if effective != "me"
+                                else conn.iter_messages(folder_id, top=folder_limit)
+                            )
+                    else:
+                        folder_msgs = list(
+                            conn.iter_messages_for(effective, folder_id, top=folder_limit) if effective != "me"
+                            else conn.iter_messages(folder_id, top=folder_limit)
+                        )
+
+                    # Filter deleted items returned by delta (have @removed key)
+                    folder_msgs = [m for m in folder_msgs if "@removed" not in m]
+
+                    if folder_msgs:
+                        delta_badge = " Δ" if delta_enabled else ""
+                        broadcast("scan_phase", {"phase": f"📂 {folder_path}{delta_badge}: {len(folder_msgs)} msg(s)"})
+                    for msg in folder_msgs:
+                        if _after_cutoff(msg.get("receivedDateTime", "")):
+                            continue
+                        msg["_account"]    = uname
+                        msg["_account_id"] = effective
+                        msg["_user_role"]  = _user_role_map.get(uid, "other")
+                        msg["_folder"]     = folder_path
+                        # Pre-extract body text and discard raw HTML to avoid storing
+                        # potentially hundreds of KB of HTML per message in work_items.
+                        # For a large org this is the primary driver of multi-GB RAM usage.
+                        if scan_email_body:
+                            msg["_precomputed_body"] = conn.get_message_body_text(msg)
+                        msg.pop("body", None)       # free raw HTML (can be 100 KB+)
+                        msg.pop("bodyPreview", None) # 255-char preview, not needed
+                        work_items.append(("email", msg, None))
+                        msgs_added += 1
+                        if msgs_added >= max_emails:
+                            break
+            except M365PermissionError:
+                broadcast("scan_error", {"file": f"mail ({uname})", "error": _permission_msg("email", uname)})
+            except Exception as e:
+                broadcast("scan_error", {"file": f"mail ({uname})", "error": str(e)})
+
+        def _scan_user_onedrive(uid, uname):
+            effective  = _uid_path(uid)
+            delta_key  = f"onedrive:{uid}"
+            saved_link = delta_tokens.get(delta_key) if delta_enabled else None
+            phase_sfx  = " Δ" if (delta_enabled and saved_link) else ""
+            broadcast("scan_phase", {"phase": LANG.get("m365_phase_onedrive", "Collecting OneDrive") + f" — {uname}{phase_sfx}…"})
+            try:
+                if delta_enabled:
+                    try:
+                        if effective != "me":
+                            items, new_link = conn.iter_onedrive_delta_for(effective, uname, delta_url=saved_link)
+                        else:
+                            items, new_link = conn.iter_onedrive_delta(delta_url=saved_link)
+                        if new_link:
+                            new_delta_tokens[delta_key] = new_link
+                    except M365DeltaTokenExpired:
+                        broadcast("scan_phase", {"phase": f"OneDrive ({uname}): delta token expired — falling back to full scan"})
+                        if delta_key in delta_tokens:
+                            del delta_tokens[delta_key]
+                        if effective != "me":
+                            items = list(conn.iter_onedrive_files_for(effective, uname))
+                        else:
+                            items = list(conn.iter_onedrive_files())
+                    for item in items:
+                        if _state._scan_abort.is_set():
+                            return
+                        if item.get("deleted"):
+                            continue
+                        ext = Path(item.get("name", "")).suffix.lower()
+                        if ext not in SUPPORTED_EXTS:
+                            continue
+                        if _after_cutoff(item.get("lastModifiedDateTime", "")):
+                            continue
+                        item["_source_type"] = "onedrive"
+                        item["_account"]     = uname
+                        item["_user_id"]     = effective
+                        item["_user_role"]   = _user_role_map.get(uid, "other")
+                        work_items.append(("file", item, None))
+                else:
+                    gen = conn.iter_onedrive_files_for(effective, uname) if effective != "me" else conn.iter_onedrive_files()
+                    for item in gen:
+                        if _state._scan_abort.is_set():
+                            return
+                        ext = Path(item.get("name", "")).suffix.lower()
+                        if ext not in SUPPORTED_EXTS:
+                            continue
+                        if _after_cutoff(item.get("lastModifiedDateTime", "")):
+                            continue
+                        item["_source_type"] = "onedrive"
+                        item["_account"]     = uname
+                        item["_user_id"]     = effective
+                        item["_user_role"]   = _user_role_map.get(uid, "other")
+                        work_items.append(("file", item, None))
+            except M365PermissionError:
+                broadcast("scan_error", {"file": f"OneDrive ({uname})", "error": _permission_msg("OneDrive", uname)})
+            except Exception as e:
+                broadcast("scan_error", {"file": f"OneDrive ({uname})", "error": str(e)})
+            else:
+                od_count = sum(1 for k, m, _ in work_items if m.get("_source_type") == "onedrive" and m.get("_account") == uname)
+                if od_count:
+                    broadcast("scan_phase", {"phase": f"📁 OneDrive — {uname}: {od_count} file(s)"})
+        def _scan_user_teams(uid, uname):
+            """Scan Teams files the specific user is a member of."""
+            effective = _uid_path(uid)
+            phase_sfx = " Δ" if delta_enabled else ""
+            broadcast("scan_phase", {"phase": LANG.get("m365_phase_teams", "Collecting Teams") + f" — {uname}{phase_sfx}…"})
+            try:
+                if effective == "me":
+                    teams = conn.list_teams()
+                elif conn.is_app_mode:
+                    teams = _app_user_teams.get(uid, [])
+                else:
+                    teams = list(conn._paginate(f"/users/{effective}/joinedTeams", {"$top": "50"}))
+                for team in teams:
+                    if _state._scan_abort.is_set():
+                        return
+                    team_id   = team["id"]
+                    team_name = team.get("displayName", team_id)
+                    if delta_enabled:
+                        # Each Teams channel is a SharePoint drive — use per-drive delta
+                        try:
+                            channels = list(conn._paginate(f"/teams/{team_id}/channels", {"$top": "50"}))
+                        except Exception:
+                            channels = []
+                        for ch in channels:
+                            if _state._scan_abort.is_set():
+                                return
+                            ch_id   = ch["id"]
+                            ch_name = ch.get("displayName", ch_id)
+                            source  = f"Teams / {team_name} / {ch_name}"
+                            try:
+                                data = conn._get(f"/teams/{team_id}/channels/{ch_id}/filesFolder")
+                                drive_id = data.get("parentReference", {}).get("driveId")
+                                if not drive_id:
+                                    continue
+                                delta_key  = f"teams:{drive_id}"
+                                saved_link = delta_tokens.get(delta_key)
+                                try:
+                                    items, new_link = conn.iter_drive_delta(drive_id, source, delta_url=saved_link)
+                                    if new_link:
+                                        new_delta_tokens[delta_key] = new_link
+                                except M365DeltaTokenExpired:
+                                    broadcast("scan_phase", {"phase": f"Teams {source}: token expired — full scan"})
+                                    if delta_key in delta_tokens:
+                                        del delta_tokens[delta_key]
+                                    items, new_link = conn.iter_drive_delta(drive_id, source, delta_url=None)
+                                    if new_link:
+                                        new_delta_tokens[delta_key] = new_link
+                                for item in items:
+                                    if item.get("deleted"):
+                                        continue
+                                    ext = Path(item.get("name", "")).suffix.lower()
+                                    if ext not in SUPPORTED_EXTS:
+                                        continue
+                                    if _after_cutoff(item.get("lastModifiedDateTime", "")):
+                                        continue
+                                    item["_source_type"] = "teams"
+                                    item["_account"]     = uname
+                                    item["_user_role"]   = _user_role_map.get(uid, "other")
+                                    work_items.append(("file", item, None))
+                            except Exception:
+                                continue
+                    else:
+                        for item in conn.iter_teams_files(team_id, team_name):
+                            ext = Path(item.get("name", "")).suffix.lower()
+                            if ext not in SUPPORTED_EXTS:
+                                continue
+                            if _after_cutoff(item.get("lastModifiedDateTime", "")):
+                                continue
+                            item["_source_type"] = "teams"
+                            item["_account"]     = uname
+                            item["_user_role"]   = _user_role_map.get(uid, "other")
+                            work_items.append(("file", item, None))
+            except M365PermissionError:
+                broadcast("scan_error", {"file": f"Teams ({uname})", "error": _permission_msg("Teams", uname)})
+            except Exception as e:
+                broadcast("scan_error", {"file": f"Teams ({uname})", "error": str(e)})
+            else:
+                tm_count = sum(1 for k, m, _ in work_items if m.get("_source_type") == "teams" and m.get("_account") == uname)
+                if tm_count:
+                    broadcast("scan_phase", {"phase": f"💬 Teams — {uname}: {tm_count} file(s)"})
+        if "email" in sources:
+            for u in user_ids:
+                if _state._scan_abort.is_set():
+                    break
+                _scan_user_email(u["id"], u["displayName"])
+
+        if "onedrive" in sources:
+            for u in user_ids:
+                if _state._scan_abort.is_set():
+                    break
+                _scan_user_onedrive(u["id"], u["displayName"])
+
+        if "sharepoint" in sources:
+            phase_sfx = " Δ" if delta_enabled else ""
+            broadcast("scan_phase", {"phase": LANG.get("m365_phase_sharepoint", "Collecting SharePoint files…") + phase_sfx})
+            try:
+                sites = conn.list_sharepoint_sites()
+                for site in sites:
+                    if _state._scan_abort.is_set():
+                        break
+                    site_id   = site["id"]
+                    site_name = site.get("displayName", site.get("name", site_id))
+                    if delta_enabled:
+                        # Collect per-drive delta for this site
+                        try:
+                            drives = list(conn._paginate(f"/sites/{site_id}/drives", {"$top": "20"}))
+                        except Exception:
+                            drives = []
+                        for drive in drives:
+                            drive_id    = drive["id"]
+                            drive_label = f"{site_name} / {drive.get('name', 'Documents')}"
+                            delta_key   = f"sharepoint:{drive_id}"
+                            saved_link  = delta_tokens.get(delta_key)
+                            try:
+                                items, new_link = conn.iter_drive_delta(drive_id, drive_label, delta_url=saved_link)
+                                if new_link:
+                                    new_delta_tokens[delta_key] = new_link
+                            except M365DeltaTokenExpired:
+                                broadcast("scan_phase", {"phase": f"SharePoint {drive_label}: token expired — full scan"})
+                                if delta_key in delta_tokens:
+                                    del delta_tokens[delta_key]
+                                items, new_link = conn.iter_drive_delta(drive_id, drive_label, delta_url=None)
+                                if new_link:
+                                    new_delta_tokens[delta_key] = new_link
+                            for item in items:
+                                if item.get("deleted"):
+                                    continue
+                                ext = Path(item.get("name", "")).suffix.lower()
+                                if ext not in SUPPORTED_EXTS:
+                                    continue
+                                if _after_cutoff(item.get("lastModifiedDateTime", "")):
+                                    continue
+                                item["_source_type"] = "sharepoint"
+                                work_items.append(("file", item, None))
+                    else:
+                        for item in conn.iter_sharepoint_files(site_id, site_name):
+                            ext = Path(item.get("name", "")).suffix.lower()
+                            if ext not in SUPPORTED_EXTS:
+                                continue
+                            if _after_cutoff(item.get("lastModifiedDateTime", "")):
+                                continue
+                            item["_source_type"] = "sharepoint"
+                            work_items.append(("file", item, None))
+            except Exception as e:
+                broadcast("scan_error", {"file": "SharePoint", "error": str(e)})
+            else:
+                sp_count = sum(1 for k, m, _ in work_items if m.get("_source_type") == "sharepoint")
+                if sp_count:
+                    broadcast("scan_phase", {"phase": f"🌐 SharePoint: {sp_count} file(s)"})
+        if "teams" in sources:
+            # App mode: /users/{id}/joinedTeams is delegated-only.
+            # Build a user→teams index by listing all tenant teams once,
+            # then fetching each team's member list.
+            _app_user_teams: dict = {}  # uid -> [team_dict, ...]
+            if conn.is_app_mode:
+                broadcast("scan_phase", {"phase": LANG.get("m365_phase_teams_index", "Building Teams membership index…")})
+                try:
+                    all_teams = conn.list_all_teams()
+                    scan_uid_set = {u["id"] for u in user_ids}
+                    for team in all_teams:
+                        tid   = team["id"]
+                        tname = team.get("displayName", tid)
+                        member_ids = conn.get_team_members(tid)
+                        for mid in member_ids:
+                            if mid in scan_uid_set:
+                                _app_user_teams.setdefault(mid, []).append(
+                                    {"id": tid, "displayName": tname}
+                                )
+                except Exception as e:
+                    broadcast("scan_error", {"file": "Teams index", "error": str(e)})
+
+            for u in user_ids:
+                if _state._scan_abort.is_set():
+                    break
+                _scan_user_teams(u["id"], u["displayName"])
+            # Deduplicate: same file may appear in multiple users' Teams
+            seen_ids: set = set()
+            deduped = []
+            for entry in work_items:
+                fid = entry[1].get("id", "")
+                if fid and fid in seen_ids:
+                    continue
+                if fid:
+                    seen_ids.add(fid)
+                deduped.append(entry)
+            work_items[:] = deduped
+
+    except Exception as e:
+        broadcast("scan_error", {"file": "collection", "error": str(e)})
+
+    # ── Filter work items already covered by checkpoint ─────────────────────
+    if scanned_ids:
+        work_items = [(k, m, f) for k, m, f in work_items if m.get("id", "") not in scanned_ids]
+
+    total = len(work_items)
+    broadcast("scan_start", {
+        "total": total + resumed_count,
+        "resumed": resumed_count,
+    })
+    # Clear the "Collecting…" phase text now that we're actually scanning items
+    broadcast("scan_phase", {"phase": LANG.get("m365_phase_scanning", "Scanning…")})
+
+    # ── Process items ─────────────────────────────────────────────────────────
+    # Convert to a deque so each item is released from memory as soon as it's
+    # processed (popleft is O(1) and drops the reference immediately).
+    _work_q: deque = deque(work_items)
+    work_items = None  # type: ignore[assignment]  # release the list; items live in _work_q
+    gc.collect()       # run GC now to reclaim body strings freed during collection
+
+    _items_since_save = 0
+    idx = -1
+    while _work_q:
+        if _check_abort():
+            # Save checkpoint so scan can be resumed later
+            _save_checkpoint(ck_key, scanned_ids, _state.flagged_items, _state.scan_meta)
+            return
+        idx += 1
+        kind, meta, _ = _work_q.popleft()  # releases this item from the deque immediately
+        completed = idx + 1
+        grand_total = total + resumed_count
+        grand_done  = resumed_count + completed
+        pct = int((grand_done / grand_total) * 100) if grand_total else 100
+        name = meta.get("name", "") or meta.get("subject", f"email-{idx}")
+
+        broadcast("scan_progress", {
+            "index": grand_done, "total": grand_total,
+            "file": name, "pct": pct, "eta": _eta(completed, total),
+            "source": "m365",
+        })
+
+        try:
+            if kind == "email":
+                msg_id  = meta["id"]
+                subject = meta.get("subject", "(no subject)")
+                meta["name"] = subject
+                meta["_source"] = "Exchange"
+                meta["_source_type"] = "email"
+                meta["_url"] = meta.get("webLink", "")
+
+                # Scan body — use pre-extracted text (body HTML was stripped at
+                # collection time to keep work_items memory footprint small)
+                all_cprs = []
+                body_text = ""
+                if scan_email_body:
+                    body_text = meta.pop("_precomputed_body", "")
+                    body_result = _scan_text_direct(body_text)
+                    all_cprs = list(body_result.get("cprs", []))
+
+                # <span data-i18n="m365_opt_attachments" data-i18n="m365_opt_attachments">Scan attachments</span>
+                uid = meta.get("_account_id", "me")
+                att_results = []  # list of {name, cpr_count}
+                if scan_attachments and meta.get("hasAttachments"):
+                    att_iter = (conn.iter_message_attachments_for(uid, msg_id)
+                                if uid != "me" else conn.iter_message_attachments(msg_id))
+                    for att in att_iter:
+                        att_name = att.get("name", "attachment")
+                        att_ext  = Path(att_name).suffix.lower()
+                        if att_ext not in SUPPORTED_EXTS:
+                            continue
+                        att_size_mb = att.get("size", 0) / 1_048_576
+                        if att_size_mb > max_attach_mb:
+                            broadcast("scan_error", {"file": att_name, "error": f"Skipped — {att_size_mb:.1f} MB exceeds {max_attach_mb} MB limit"})
+                            continue
+                        try:
+                            att_bytes = (conn.download_attachment_for(uid, msg_id, att["id"])
+                                         if uid != "me" else conn.download_attachment(msg_id, att["id"]))
+                            att_result = _scan_bytes(att_bytes, att_name)
+                            att_cprs   = att_result.get("cprs", [])
+                            all_cprs.extend(att_cprs)
+                            att_results.append({"name": att_name, "cpr_count": len(att_cprs)})
+                        except Exception as att_err:
+                            broadcast("scan_error", {"file": att_name, "error": str(att_err)})
+
+                if all_cprs:
+                    meta["_thumb"]         = _placeholder_svg(".eml", subject)
+                    meta["_thumb_is_jpeg"] = False
+                    meta["_attachments"]   = att_results
+                    _email_pii = _get_pii_counts(body_text) if scan_email_body else {}
+                    meta["_transfer_risk"]    = _check_transfer_risk(meta)
+                    meta["_special_category"] = _check_special_category(
+                        body_text if scan_email_body else "", all_cprs)
+                    _broadcast_card(meta, all_cprs, pii_counts=_email_pii)
+                del body_text  # free email text — may be large for HTML-rich emails
+
+            else:  # file
+                drive_id = meta.get("_drive_id") or meta.get("parentReference", {}).get("driveId")
+                item_id  = meta["id"]
+                ext      = Path(name).suffix.lower()
+
+                # Memory guard — skip file download if available RAM is critically low
+                try:
+                    import psutil as _psutil
+                    _avail_mb = _psutil.virtual_memory().available // 1_048_576
+                    if _avail_mb < 300:
+                        broadcast("scan_error", {"file": name, "error": f"Skipped — low memory ({_avail_mb} MB free)"})
+                        logger.warning("[run_scan] low memory (%d MB free), skipping %s", _avail_mb, name)
+                        continue
+                except ImportError:
+                    pass  # psutil not installed — skip guard
+
+                uid = meta.get("_user_id") or meta.get("_account_id", "me")
+                if uid and uid != "me" and not meta.get("_drive_id"):
+                    content = conn.download_drive_item_for(uid, item_id)
+                else:
+                    content = conn.download_item(meta)
+                result  = _scan_bytes(content, name)
+                cprs    = result.get("cprs", [])
+
+                # ── Biometric photo scan (#9) + EXIF (#18) ───────────────
+                _face_count = 0
+                _exif       = {}
+                if ext in PHOTO_EXTS:
+                    if scan_photos:
+                        _face_count = _detect_photo_faces(content, name)
+                    _exif = _extract_exif(content, name)
+
+                # Flag item if CPRs found, faces detected, or EXIF PII found
+                if cprs or _face_count > 0 or _exif.get("has_pii"):
+                    # Make thumbnail
+                    if ext in {".jpg", ".jpeg", ".png"} and PIL_OK:
+                        thumb = _make_thumb(content, name)
+                        meta["_thumb"]         = thumb
+                        meta["_thumb_is_jpeg"] = True
+                    else:
+                        meta["_thumb"]         = _placeholder_svg(ext, name)
+                        meta["_thumb_is_jpeg"] = False
+                    # Widen thumbnail support to HEIC/TIFF for photo items
+                    if _face_count > 0 and meta.get("_thumb", "").startswith("<svg") and PIL_OK:
+                        try:
+                            meta["_thumb"]         = _make_thumb(content, name)
+                            meta["_thumb_is_jpeg"] = True
+                        except Exception:
+                            pass
+                    # Extract text for PII counting (lightweight -- no CPR re-scan)
+                    try:
+                        _file_text = content.decode("utf-8", errors="replace")
+                    except Exception:
+                        _file_text = ""
+                    del content  # raw bytes no longer needed — free before NER/PII counting
+                    _file_pii = _get_pii_counts(_file_text)
+                    meta["_transfer_risk"]    = _check_transfer_risk(meta)
+                    _sc = _check_special_category(_file_text, cprs)
+                    # Photos with detected faces are biometric data (Art. 9) — add
+                    # the category even when no CPR is present in the file.
+                    if _face_count > 0 and "biometric" not in _sc:
+                        _sc = sorted(_sc + ["biometric"])
+                    if _exif.get("gps") and "gps_location" not in _sc:
+                        _sc = sorted(_sc + ["gps_location"])
+                    if _exif.get("has_pii") and "exif_pii" not in _sc:
+                        _sc = sorted(_sc + ["exif_pii"])
+                    meta["_special_category"] = _sc
+                    meta["_face_count"]        = _face_count
+                    meta["_exif"]              = _exif
+                    _broadcast_card(meta, cprs, pii_counts=_file_pii)
+                else:
+                    del content  # no hits — free raw bytes immediately
+
+        except M365PermissionError:
+            uname = meta.get("_account", meta.get("_account_id", ""))
+            broadcast("scan_error", {"file": name, "error": _permission_msg("file", uname or name)})
+        except Exception as e:
+            broadcast("scan_error", {"file": name, "error": str(e)})
+
+        # Mark item as scanned regardless of whether it had CPR hits
+        item_id = meta.get("id", "")
+        if item_id:
+            scanned_ids.add(item_id)
+
+        # Periodic checkpoint save so progress survives crashes / forced quits
+        _items_since_save += 1
+        if _items_since_save >= _CHECKPOINT_SAVE_EVERY:
+            _save_checkpoint(ck_key, scanned_ids, _state.flagged_items, _state.scan_meta)
+            _items_since_save = 0
+            gc.collect()  # periodic GC to reclaim memory from processed items
+
+    grand_total = total + resumed_count
+    _state.scan_meta["total_scanned"] = grand_total
+    _state.scan_meta["flagged_count"] = len(_state.flagged_items)
+    _clear_checkpoint()  # scan completed — checkpoint is no longer needed
+
+    # Finalise DB scan record
+    if _db and _db_scan_id:
+        try:
+            _db.finish_scan(_db_scan_id, grand_total)
+        except Exception as _e:
+            logger.error("[db] finish_scan failed: %s", _e)
+
+    # Persist updated delta tokens so the next scan only fetches changes
+    if delta_enabled and new_delta_tokens:
+        merged = {**delta_tokens, **new_delta_tokens}
+        _save_delta_tokens(merged)
+        broadcast("scan_phase", {"phase": f"Delta tokens saved ({len(new_delta_tokens)} source(s) — next scan will be incremental)"})
+
+    broadcast("scan_done", {"total_scanned": grand_total, "flagged_count": len(_state.flagged_items),
+                             "delta": delta_enabled, "delta_sources": len(new_delta_tokens)})
+
diff --git a/scan_scheduler.py b/scan_scheduler.py
new file mode 100644
index 0000000..c452ee9
--- /dev/null
+++ b/scan_scheduler.py
@@ -0,0 +1,489 @@
+"""
+Scheduler — in-process APScheduler wrapper for automated GDPR scans.
+
+Supports multiple independent named scan jobs.
+Config stored in ~/.gdpr_scanner_schedule.json as {"jobs": [...]}.
+Old single-job format is migrated automatically on first load.
+Run history persisted in the SQLite DB (schedule_runs table).
+"""
+from __future__ import annotations
+
+import json
+import logging
+import time
+import uuid
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+try:
+    from apscheduler.schedulers.background import BackgroundScheduler
+    from apscheduler.triggers.cron import CronTrigger
+    APSCHEDULER_OK = True
+except ImportError:
+    APSCHEDULER_OK = False
+
+# ── Config file ───────────────────────────────────────────────────────────────
+_DATA_DIR      = Path.home() / ".gdprscanner"
+_DATA_DIR.mkdir(exist_ok=True)
+_SCHEDULE_PATH = _DATA_DIR / "schedule.json"
+
+_DEFAULT_JOB: dict[str, Any] = {
+    "id":              "",
+    "name":            "Scheduled scan",
+    "enabled":         False,
+    "frequency":       "daily",
+    "day_of_week":     "mon",
+    "day_of_month":    1,
+    "hour":            2,
+    "minute":          0,
+    "profile_id":      "",
+    "auto_email":      False,
+    "auto_retention":  False,
+    "retention_years": None,
+    "fiscal_year_end": None,
+}
+
+_DEFAULT_CONFIG = _DEFAULT_JOB  # backward-compat alias
+
+
+def _new_job(overrides: dict | None = None) -> dict:
+    job = dict(_DEFAULT_JOB)
+    job["id"] = str(uuid.uuid4())
+    if overrides:
+        job.update(overrides)
+    return job
+
+
+def load_jobs() -> list[dict]:
+    """Return list of job dicts. Migrates old single-job format automatically.
+    Also assigns UUIDs to any jobs that were saved without one."""
+    try:
+        if _SCHEDULE_PATH.exists():
+            data = json.loads(_SCHEDULE_PATH.read_text(encoding="utf-8"))
+            if isinstance(data, dict) and "jobs" in data:
+                jobs = [{**_DEFAULT_JOB, **j} for j in data["jobs"]]
+                # Ensure every job has a non-empty id
+                changed = False
+                for j in jobs:
+                    if not j.get("id"):
+                        j["id"] = str(uuid.uuid4())
+                        changed = True
+                if changed:
+                    _save_jobs_file(jobs)
+                return jobs
+            # Old format: migrate to single-job list
+            if isinstance(data, dict):
+                job = _new_job({**data, "name": "Scheduled scan"})
+                _save_jobs_file([job])
+                return [job]
+    except Exception:
+        pass
+    return []
+
+
+def save_jobs(jobs: list[dict]) -> None:
+    _save_jobs_file(jobs)
+
+
+def _save_jobs_file(jobs: list[dict]) -> None:
+    tmp = _SCHEDULE_PATH.with_suffix(".tmp")
+    tmp.write_text(json.dumps({"jobs": jobs}, indent=2), encoding="utf-8")
+    tmp.replace(_SCHEDULE_PATH)
+    try:
+        _SCHEDULE_PATH.chmod(0o600)
+    except OSError:
+        pass
+
+
+# Backward-compat shims
+def load_schedule_config() -> dict:
+    jobs = load_jobs()
+    return jobs[0] if jobs else dict(_DEFAULT_JOB)
+
+
+def save_schedule_config(cfg: dict) -> None:
+    jobs = load_jobs()
+    if jobs:
+        jobs[0] = {**_DEFAULT_JOB, **cfg}
+    else:
+        jobs = [_new_job(cfg)]
+    save_jobs(jobs)
+
+
+def _build_trigger(job: dict) -> "CronTrigger":
+    freq   = job.get("frequency", "daily")
+    hour   = int(job.get("hour", 2))
+    minute = int(job.get("minute", 0))
+    if freq == "weekly":
+        return CronTrigger(day_of_week=job.get("day_of_week", "mon"),
+                           hour=hour, minute=minute)
+    elif freq == "monthly":
+        return CronTrigger(day=int(job.get("day_of_month", 1)),
+                           hour=hour, minute=minute)
+    return CronTrigger(hour=hour, minute=minute)
+
+
+def _ap_id(job_id: str) -> str:
+    return f"gdpr_scan_{job_id}"
+
+
+# ── Scheduler class ───────────────────────────────────────────────────────────
+
+class ScanScheduler:
+
+    def __init__(self):
+        self._scheduler: BackgroundScheduler | None = None
+        self._lock = threading.Lock()
+        self._last_runs: dict[str, dict] = {}
+        self._running_jobs: set[str] = set()
+
+    # ── Lifecycle ─────────────────────────────────────────────────────────
+
+    def start(self) -> bool:
+        if not APSCHEDULER_OK:
+            return False
+        self._scheduler = BackgroundScheduler(
+            daemon=True,
+            job_defaults={"coalesce": True, "max_instances": 1,
+                          "misfire_grace_time": 3600},
+        )
+        self._scheduler.start()
+        self.reload()
+        return True
+
+    def stop(self):
+        if self._scheduler:
+            self._scheduler.shutdown(wait=False)
+            self._scheduler = None
+
+    def reload(self):
+        if not self._scheduler:
+            return
+        for job in self._scheduler.get_jobs():
+            if job.id.startswith("gdpr_scan_"):
+                self._scheduler.remove_job(job.id)
+        for job_cfg in load_jobs():
+            if job_cfg.get("enabled"):
+                self._scheduler.add_job(
+                    self._execute_scan,
+                    trigger=_build_trigger(job_cfg),
+                    id=_ap_id(job_cfg["id"]),
+                    name=job_cfg.get("name", "GDPR scheduled scan"),
+                    args=[job_cfg["id"]],
+                    replace_existing=True,
+                )
+
+    def next_run_time(self, job_id: str | None = None) -> str | None:
+        if not self._scheduler:
+            return None
+        if job_id:
+            job = self._scheduler.get_job(_ap_id(job_id))
+            if job and job.next_run_time:
+                return job.next_run_time.isoformat()
+            return None
+        times = [j.next_run_time for j in self._scheduler.get_jobs()
+                 if j.id.startswith("gdpr_scan_") and j.next_run_time]
+        return min(times).isoformat() if times else None
+
+    @property
+    def is_running(self) -> bool:
+        return bool(self._running_jobs)
+
+    def get_status(self) -> dict:
+        jobs = load_jobs()
+        job_statuses = []
+        for j in jobs:
+            jid = j["id"]
+            job_statuses.append({
+                "id":         jid,
+                "name":       j.get("name", ""),
+                "enabled":    j.get("enabled", False),
+                "next_run":   self.next_run_time(jid),
+                "is_running": jid in self._running_jobs,
+                "last_run":   self._last_runs.get(jid),
+            })
+        return {
+            "available":  APSCHEDULER_OK,
+            "jobs":       job_statuses,
+            "enabled":    any(j.get("enabled") for j in jobs),
+            "next_run":   self.next_run_time(),
+            "is_running": bool(self._running_jobs),
+        }
+
+    # ── Execute scan ──────────────────────────────────────────────────────
+
+    def _execute_scan(self, job_id: str | None = None):
+        jobs = load_jobs()
+        if not jobs:
+            return
+        if job_id:
+            job_cfg = next((j for j in jobs if j["id"] == job_id), None)
+            if not job_cfg:
+                return
+        else:
+            job_cfg = jobs[0]
+            job_id  = job_cfg["id"]
+
+        if job_id in self._running_jobs:
+            return
+        with self._lock:
+            if job_id in self._running_jobs:
+                return
+            self._running_jobs.add(job_id)
+
+        run = {
+            "started_at": time.time(), "finished_at": None,
+            "status": "running",
+            "job_id": job_id, "job_name": job_cfg.get("name", ""),
+            "profile_id": job_cfg.get("profile_id", ""),
+            "flagged": 0, "scanned": 0, "emailed": 0, "error": "",
+        }
+        self._last_runs[job_id] = run
+        db_run_id: int | None = None
+        _m = None
+        logger.info("[scheduler] Starting job '%s'", job_cfg.get("name", ""))
+
+        try:
+            import gdpr_scanner as _m
+            try:
+                db = _m._get_db()
+                if db:
+                    try:
+                        db_run_id = db.begin_schedule_run(
+                            profile_id=job_cfg.get("profile_id", ""),
+                            job_id=job_id,
+                            job_name=job_cfg.get("name", ""),
+                        )
+                    except TypeError:
+                        db_run_id = db.begin_schedule_run(
+                            profile_id=job_cfg.get("profile_id", ""))
+            except Exception:
+                pass
+
+            _m.broadcast("scheduler_started", {
+                "time": datetime.now(timezone.utc).isoformat(),
+                "job_name": job_cfg.get("name", ""),
+            })
+
+            from routes import state
+            # If connector not set, attempt to restore from saved config
+            if not state.connector or not state.connector.is_authenticated():
+                try:
+                    cfg_saved = _m._load_config()
+                    cid    = cfg_saved.get("client_id", "")
+                    tid    = cfg_saved.get("tenant_id", "")
+                    secret = cfg_saved.get("client_secret", "")
+                    if cid and tid:
+                        from m365_connector import M365Connector
+                        conn = M365Connector(cid, tid, client_secret=secret)
+                        if conn.is_app_mode:
+                            conn.authenticate_app_mode()
+                        if conn.is_authenticated():
+                            state.connector = conn
+                except Exception as _e:
+                    pass
+            if not state.connector or not state.connector.is_authenticated():
+                raise RuntimeError("Not authenticated")
+
+            if not _m._scan_lock.acquire(blocking=False):
+                logger.info("[scheduler] Scan already running — skipping job '%s'", job_cfg.get("name", job_id))
+                _m.broadcast("scheduler_debug", {"msg": f"Skipped — a scan is already running"})
+                return
+
+            try:
+                # Sync connector into gdpr_scanner's module global —
+                # run_scan() reads _connector directly, not state.connector
+                _m._connector = state.connector
+                _m._scan_abort.clear()
+                options = self._build_options(job_cfg)
+                options.setdefault("options", {})["_scheduled"] = True
+                # Fire M365 scan if M365 sources are included
+                m365_sources = [s for s in options.get("sources", [])
+                                if s in ("email","onedrive","sharepoint","teams")]
+                if m365_sources:
+                    opts_m365 = dict(options, sources=m365_sources)
+                    _m.run_scan(opts_m365)
+                # Fire file scan for each file source in the profile
+                # file_sources may be IDs (strings) or full dicts — resolve either
+                _all_file_sources = {s["id"]: s for s in (_m._load_file_sources() or []) if isinstance(s, dict)}
+                for fs in options.get("file_sources", []):
+                    # Resolve string IDs to full source dicts
+                    if isinstance(fs, str):
+                        fs = _all_file_sources.get(fs, {"path": fs, "label": fs})
+                    if not isinstance(fs, dict) or not fs.get("path"):
+                        logger.warning("[scheduler] skipping invalid file source: %r", fs)
+                        continue
+                    try:
+                        _m.run_file_scan(fs)
+                    except Exception as _fse:
+                        import traceback as _tb2
+                        _label = fs.get('label', fs.get('path', str(fs)))
+                        logger.error("[scheduler] file scan error (%s): %s\n%s", _label, _fse, _tb2.format_exc())
+            finally:
+                _m._scan_lock.release()
+
+            # Fire Google scan if Google sources are in the profile and
+            # a Google connector is available.
+            google_sources = options.get("google_sources", [])
+            if not google_sources:
+                # Legacy profiles store everything in sources[]
+                google_sources = [s for s in options.get("sources", [])
+                                  if s in ("gmail", "gdrive")]
+            if google_sources and state.google_connector:
+                from routes.google_scan import (
+                    _run_google_scan as _rgs,
+                    _scan_lock       as _gsl,
+                    _scan_abort      as _gsa,
+                )
+                if _gsl.acquire(blocking=False):
+                    try:
+                        _gsa.clear()
+                        logger.info("[scheduler] Starting Google scan — sources=%s", google_sources)
+                        _rgs({
+                            "sources":     google_sources,
+                            "user_emails": [],  # empty → scan all workspace users
+                            "options":     options.get("options", {}),
+                        })
+                    except Exception as _ge:
+                        import traceback as _tb3
+                        logger.error("[scheduler] Google scan error: %s\n%s", _ge, _tb3.format_exc())
+                    finally:
+                        _gsl.release()
+                else:
+                    logger.info("[scheduler] Google scan already running — skipping")
+
+            run["flagged"] = len(_m.flagged_items)
+            run["scanned"] = _m.scan_meta.get("total_scanned", 0)
+            run["status"]  = "completed"
+            logger.info("[scheduler] Job '%s' completed — %d flagged, %d scanned",
+                        job_cfg.get("name", ""), run["flagged"], run["scanned"])
+
+            if job_cfg.get("auto_email") and state.flagged_items:
+                try:
+                    self._send_email_report(job_cfg)
+                    run["emailed"] = 1
+                except Exception as e:
+                    run["error"] = f"Scan OK, email failed: {e}"
+
+            if job_cfg.get("auto_retention") and job_cfg.get("retention_years"):
+                try:
+                    self._run_retention(job_cfg)
+                except Exception as e:
+                    err = f"Retention failed: {e}"
+                    run["error"] = f"{run['error']} | {err}" if run["error"] else err
+
+            _m.broadcast("scheduler_done", {
+                "flagged": run["flagged"], "scanned": run["scanned"],
+                "emailed": run["emailed"], "job_name": job_cfg.get("name", ""),
+            })
+
+        except Exception as e:
+            import traceback as _tb
+            _tb_str = _tb.format_exc()
+            logger.error("[scheduler] Job failed:\n%s", _tb_str)
+            run["status"] = "failed"
+            run["error"]  = str(e)
+            try:
+                if _m:
+                    # Include last 3 lines of traceback in UI for diagnosis
+                    _tb_lines = _tb_str.strip().splitlines()
+                    _tb_short = ' | '.join(_tb_lines[-4:]) if len(_tb_lines) >= 4 else _tb_str
+                    _m.broadcast("scheduler_error", {"error": str(e) + ' | ' + _tb_short})
+            except Exception:
+                pass
+
+        finally:
+            run["finished_at"] = time.time()
+            self._last_runs[job_id] = run
+            self._running_jobs.discard(job_id)
+            if db_run_id and _m:
+                try:
+                    db = _m._get_db()
+                    if db:
+                        db.finish_schedule_run(db_run_id, **{
+                            k: run[k] for k in
+                            ("status", "flagged", "scanned", "emailed", "error")
+                        })
+                except Exception:
+                    pass
+
+    # ── Helpers ───────────────────────────────────────────────────────────
+
+    def _build_options(self, job_cfg: dict) -> dict:
+        import gdpr_scanner as _m
+        pid = job_cfg.get("profile_id", "")
+        logger.info("[scheduler] Job '%s' — profile_id='%s'", job_cfg.get("name", ""), pid)
+        if pid:
+            p = _m._profile_get(pid)
+            if p:
+                # Derive google_sources from dedicated field; fall back to
+                # filtering the combined sources array for legacy profiles.
+                _all_src = p.get("sources", [])
+                _gs_fallback = [s for s in _all_src if s in ("gmail", "gdrive")]
+                opts = {"sources":        _all_src,
+                        "user_ids":       p.get("user_ids", []),
+                        "options":        p.get("options", {}),
+                        "file_sources":   p.get("file_sources", []),
+                        "google_sources": p.get("google_sources", _gs_fallback)}
+                logger.info("[scheduler]   Profile '%s': sources=%s, users=%d",
+                            p.get("name", pid), opts["sources"], len(opts.get("user_ids", [])))
+                _m.broadcast("scheduler_debug", {
+                    "msg": f"Using profile '{p.get('name',pid)}': sources={opts['sources']}, users={len(opts.get("user_ids",[]))}"})
+                return opts
+            logger.info("[scheduler]   Profile '%s' not found — using saved settings", pid)
+            _m.broadcast("scheduler_debug", {"msg": f"Profile id '{pid}' not found — falling back to saved settings"})
+        saved = _m._load_settings()
+        if saved:
+            logger.info("[scheduler]   Saved settings: sources=%s, users=%d",
+                        saved.get("sources"), len(saved.get("user_ids", [])))
+            _m.broadcast("scheduler_debug", {
+                "msg": f"Using saved settings: sources={saved.get('sources')}, users={len(saved.get('user_ids',[]))}"})
+        return saved or {"sources": ["email", "onedrive"], "user_ids": [], "options": {}}
+
+    def _send_email_report(self, job_cfg: dict):
+        import gdpr_scanner as _m
+        xl_bytes, fname = _m._build_excel_bytes()
+        smtp_cfg   = _m._load_smtp_config()
+        recipients = smtp_cfg.get("recipients", [])
+        if isinstance(recipients, str):
+            recipients = [r.strip() for r in recipients.replace(";", ",").split(",") if r.strip()]
+        if not recipients:
+            raise RuntimeError("No email recipients configured")
+        job_name = job_cfg.get("name", "scheduled scan")
+        subject  = f"GDPR Scanner — {job_name} {datetime.now().strftime('%Y-%m-%d %H:%M')}"
+        body = (
+            "<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
+            "<h2 style='color:#1F3864'>&#128336; GDPR Scanner — scheduled scan report</h2>"
+            f"<p>Job: <strong>{job_name}</strong></p>"
+            f"<p>Scan completed. {len(_m.flagged_items)} item(s) flagged.</p>"
+            f"<p>Report attached: {fname}</p></body></html>")
+        from routes.email import _send_email_graph
+        from routes import state
+        if state.connector and state.connector.is_authenticated():
+            try:
+                _send_email_graph(subject, body, recipients,
+                                  attachment_bytes=xl_bytes, attachment_name=fname)
+                return
+            except Exception:
+                pass
+        _m._send_report_email(xl_bytes, fname, smtp_cfg, recipients)
+
+    def _run_retention(self, job_cfg: dict):
+        import gdpr_scanner as _m
+        if not _m.DB_OK:
+            return
+        db = _m._get_db()
+        if not db:
+            return
+        overdue = db.get_overdue_items(int(job_cfg["retention_years"]),
+                                       fiscal_year_end=job_cfg.get("fiscal_year_end"))
+        if overdue:
+            _m._do_retention_delete(overdue)
+
+
+# ── Module-level singleton ────────────────────────────────────────────────────
+scan_scheduler = ScanScheduler()
diff --git a/skus/education.json b/skus/education.json
new file mode 100644
index 0000000..279420d
--- /dev/null
+++ b/skus/education.json
@@ -0,0 +1,50 @@
+{
+  "_description": "Microsoft Education SKU classification for GDPR Scanner role detection.",
+  "_source": "https://learn.microsoft.com/en-us/entra/identity/users/licensing-service-plan-reference",
+  "_note": "student_ids and staff_ids MUST be disjoint. student is checked first — any overlap causes Faculty users to be misclassified as students. Add new SKUs here; no code change required.",
+  "student_ids": {
+    "314c4481-f395-4525-be8b-2ec4bb1e9d91": "Microsoft 365 A1 for students (STANDARDWOFFPACK_STUDENT)",
+    "c32f9321-a627-406d-a114-1f9c81aaafac": "Microsoft 365 A1 for students (OFFICESUBSCRIPTION_STUDENT / new commerce CSP)",
+    "e82ae690-a2d5-4d76-8d30-7c6e01e6022e": "Microsoft 365 A3 for students",
+    "98b6e773-24d4-4c0d-a968-6e787a1f8204": "Microsoft 365 A5 for students",
+    "46c119d4-0379-4a9d-85e4-97c66d3f909e": "Microsoft 365 A1 for students (student use benefit)",
+    "e960f18a-dd80-4a07-82aa-1744b52d22ba": "Office 365 A1 for students",
+    "78e66a63-337a-4a9a-8959-41c6654dfb56": "Office 365 A3 for students",
+    "8fc2205d-4e51-4401-97f0-8c895b11bed4": "Office 365 A5 for students",
+    "12b8c807-2e20-48fc-b453-542b6ee9d171": "Microsoft 365 A1 for students (device)",
+    "d37cc85e-b4c5-4e39-b1d3-e54fb6dd5d63": "Office 365 A1 for students (device)",
+    "160d616a-4b30-4c5a-9a0b-e06b31a82b4b": "Office 365 A3 for students (device)",
+    "8a89b70c-9c52-4e4a-ab05-5a5e14c6c4f4": "Microsoft Teams Essentials (EDU Student)",
+    "a4e376bd-c61c-4517-878d-55e43f5fc13b": "Microsoft 365 A1 for students (new commerce)"
+  },
+  "staff_ids": {
+    "94763226-9b3c-4e75-a931-5c89701abe66": "Microsoft 365 A1 for faculty",
+    "f30db892-07e9-47e9-837c-80727f46fd3d": "Microsoft Power Automate Free (assigned to faculty)",
+    "4b590615-0888-425a-a965-b3bf7789848d": "Microsoft 365 A3 for faculty",
+    "e578b273-6db4-4691-bba0-8d691f4da603": "Microsoft 365 A5 for faculty",
+    "2d61d025-d6aa-49aa-b8f9-ca2ebb63e3ab": "Microsoft 365 A1 for faculty (faculty use benefit)",
+    "a4585165-0533-458a-97e3-c400570268c4": "Office 365 A1 for faculty",
+    "0c266dff-15dd-4b49-8397-2bb16070ed52": "Office 365 A3 for faculty",
+    "1e7e1070-8ccb-4aca-b470-d7cb538cb70e": "Office 365 A5 for faculty",
+    "15b1d32e-5f65-4a21-a4c4-d1a0e2ee3f8e": "Office 365 A3 for faculty (device)",
+    "ba04c29e-5b81-4f69-a5f6-c4c7d6bade97": "Microsoft 365 A1 for faculty (new commerce)",
+    "c2273bd0-dff7-4215-9ef5-2c7bcfb06425": "Microsoft 365 Apps for Faculty"
+  },
+  "student_fragments": [
+    "STUDENT",
+    "STU_",
+    "_STU",
+    "STANDARDWOFFPACK_STUDENT",
+    "STANDARDWOFFPACK_IW_STUDENT",
+    "OFFICESUBSCRIPTION_STUDENT"
+  ],
+  "staff_fragments": [
+    "FACULTY",
+    "FAC_",
+    "_FAC",
+    "TEACHER",
+    "STANDARDWOFFPACK_FACULTY",
+    "STANDARDWOFFPACK_IW_FACULTY",
+    "OFFICESUBSCRIPTION_FACULTY"
+  ]
+}
\ No newline at end of file
diff --git a/skus/google_ou_roles.json b/skus/google_ou_roles.json
new file mode 100644
index 0000000..9f6565d
--- /dev/null
+++ b/skus/google_ou_roles.json
@@ -0,0 +1,26 @@
+{
+    "_description": "Google Workspace Organizational Unit (OU) path → role mapping for GDPRScanner.",
+    "_note": "orgUnitPath values from Google Admin Console → Directory → Organisational units. Matching is prefix-based and case-insensitive — '/Elever/Indskoling' matches the '/Elever' student rule. Rules are evaluated top-to-bottom; first match wins. Edit this file to match your school's OU structure — no code change required.",
+    "_source": "Google Admin Console → Directory → Administrer organisationsenheder",
+    "student_ou_prefixes": [
+        "/Elever",
+        "/Students",
+        "/Elev",
+        "/Pupils"
+    ],
+    "staff_ou_prefixes": [
+        "/Personale",
+        "/Staff",
+        "/Lærere",
+        "/Ansatte",
+        "/Teachers",
+        "/Admin"
+    ],
+    "_examples": {
+        "gudenaaskolen.dk example": {
+            "student": "/Elever  →  student",
+            "staff":   "/Personale  →  staff",
+            "admin":   "/Admin  →  staff"
+        }
+    }
+}
diff --git a/sse.py b/sse.py
new file mode 100644
index 0000000..8ab08a7
--- /dev/null
+++ b/sse.py
@@ -0,0 +1,54 @@
+"""
+sse.py — Server-Sent Events for GDPRScanner.
+
+Provides:
+  broadcast(event, data)  — push an event to all connected browsers
+  _sse_queues             — list of per-connection Queue objects
+  _sse_buffer             — deque replay buffer for late-connecting browsers
+  _current_scan_id        — injected into every broadcast message
+"""
+from __future__ import annotations
+import json
+import logging
+import queue
+from collections import deque
+
+logger = logging.getLogger(__name__)
+
+# ── SSE state ─────────────────────────────────────────────────────────────────
+_sse_queues: list      = []
+_sse_buffer: deque     = deque(maxlen=500)
+_current_scan_id: str  = ""
+
+def broadcast(event: str, data: dict):
+    global _current_scan_id
+    if _current_scan_id:
+        data = {**data, "scan_id": _current_scan_id}
+    msg = f"event: {event}\ndata: {json.dumps(data)}\n\n"
+    _sse_buffer.append(msg)  # buffer for SSE replay on reconnect
+    for q in list(_sse_queues):
+        try:
+            q.put_nowait(msg)
+        except queue.Full:
+            pass
+    # Clear scan_id after scan_done so replay knows the scan is finished
+    if event == "scan_done" and _current_scan_id:
+        _current_scan_id = ""
+    # When no browser is watching (e.g. scheduled scan), log key events
+    if not _sse_queues:
+        if event == "scan_phase":
+            logger.info("[scan] %s", data.get("phase", ""))
+        elif event == "scan_progress":
+            file = data.get("file") or data.get("name", "")
+            if file:
+                logger.info("[scan] %s/%s — %s", data.get("completed", ""), data.get("total", ""), file)
+        elif event in ("scan_error", "scheduler_error"):
+            logger.error("[scan] %s", data.get("error", "") or data.get("file", ""))
+        elif event == "scan_done":
+            logger.info("[scan] Done — %d flagged, %d scanned",
+                        data.get("flagged_count", 0), data.get("total_scanned", 0))
+        elif event == "scheduler_started":
+            logger.info("[scan] Scheduler started — %s", data.get("job_name", ""))
+        elif event == "scheduler_done":
+            logger.info("[scan] Scheduler done — %d flagged", data.get("flagged", 0))
+
diff --git a/start_gdpr.sh b/start_gdpr.sh
new file mode 100755
index 0000000..a113466
--- /dev/null
+++ b/start_gdpr.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# GDPRScanner — launch script (uses ./venv)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/venv/bin/activate"
+exec python3 "$SCRIPT_DIR/gdpr_scanner.py" "${@}"
diff --git a/static/js/CLAUDE.md b/static/js/CLAUDE.md
new file mode 100644
index 0000000..ab86e84
--- /dev/null
+++ b/static/js/CLAUDE.md
@@ -0,0 +1,28 @@
+# static/js — JS Rules
+
+## Profile dropdown — loader model
+Profiles are **loaders**, not persistent modes. Selecting one pushes settings into the sidebar; the sidebar is always the live state.
+
+- `_setProfileClearBtn(visible)` must be called alongside every assignment to `S._activeProfileId`.
+- **Do not re-add a selectable `value=""` option to `#profileSelect`** — deliberately removed in v1.6.6.
+
+## Profile editor source panel race condition
+`_pmgmtSaveFullEdit` detects whether Google/file checkboxes have rendered by querying the DOM directly:
+```javascript
+const googleRendered = !!document.querySelector('#peSourcesPanel input[data-source-type="google"]');
+const fileRendered   = !!document.querySelector('#peSourcesPanel input[data-source-type="file"]');
+```
+Never revert to `!!window._googleConnected` / `_fileSources.length > 0` — those async proxies can be `true` before the panel has rendered, silently clearing the user's source selection on save.
+
+## Progress bar phase parsing
+`_setProgressPhase(phase)` in `scan.js` parses the phase string against `_PHASE_SOURCE_MAP`:
+1. Source found **and** ` — ` (em-dash) present → split, resolve via `_resolveDisplayName()`, update `S._progressCurrentUser`.
+2. Source found **but no dash** → show pill + `S._progressCurrentUser` (handles sub-phases like folder counts).
+3. No source match → plain text fallback.
+
+`_PHASE_SOURCE_MAP` ordering matters — `Google Workspace` must appear before `Gmail` in the map. The email regex uses `/iu` flags — do not drop the `i`.
+
+## Gotchas
+
+- **Profile editor accounts** — default to unchecked. Only explicitly saved `user_ids` are checked.
+- **Date presets** — stored as `years * 365` (integer days). Do not use `* 365.25`.
diff --git a/static/js/auth.js b/static/js/auth.js
new file mode 100644
index 0000000..732986c
--- /dev/null
+++ b/static/js/auth.js
@@ -0,0 +1,198 @@
+import { S } from './state.js';
+// ── Auth ─────────────────────────────────────────────────────────────────────
+function handleSignIn() {
+  try {
+    startAuth().catch(function(e) {
+      alert('Sign-in error: ' + (e.message || String(e)));
+    });
+  } catch(e) {
+    alert('Sign-in error: ' + (e.message || String(e)));
+  }
+}
+
+async function startAuth() {
+  const clientId     = document.getElementById('clientId').value.trim();
+  const tenantId     = document.getElementById('tenantId').value.trim();
+  const clientSecret = document.getElementById('clientSecret').value.trim();
+  if (!clientId || !tenantId) { alert('Enter Client ID and Tenant ID'); return; }
+
+  // Persist credentials first so they survive restarts regardless of auth outcome
+  await fetch('/api/auth/config', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({client_id: clientId, tenant_id: tenantId, client_secret: clientSecret})
+  });
+
+  const r = await fetch('/api/auth/start', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({client_id: clientId, tenant_id: tenantId, client_secret: clientSecret})
+  });
+  const d = await r.json();
+  if (d.error) { alert(d.error); return; }
+
+  if (d.mode === 'application') {
+    // App mode — token acquired immediately, no device code step needed
+    document.getElementById('configForm').style.display = 'none';
+    document.getElementById('deviceCodeBackdrop').classList.add('open');
+    document.getElementById('deviceCode').textContent = '—';
+    document.getElementById('authStatus').className = 'auth-status success';
+    document.getElementById('authStatus').textContent = '✓ Connected (Application mode — org-wide access)';
+    setTimeout(onAuthenticated, 900);
+    return;
+  }
+
+  document.getElementById('configForm').style.display = 'none';
+  document.getElementById('deviceCodeBackdrop').classList.add('open');
+  document.getElementById('deviceCode').textContent = d.user_code;
+
+  pollAuth();
+}
+
+async function pollAuth() {
+  const r = await fetch('/api/auth/poll', {method: 'POST'});
+  const d = await r.json();
+  if (d.status === 'pending') {
+    setTimeout(pollAuth, 3000);
+  } else if (d.status === 'ok') {
+    document.getElementById('authStatus').className = 'auth-status success';
+    document.getElementById('authStatus').textContent = '✓ Signed in!';
+    setTimeout(onAuthenticated, 800);
+  } else {
+    document.getElementById('authStatus').className = 'auth-status error';
+    document.getElementById('authStatus').textContent = '✗ ' + (d.error || 'Sign-in failed');
+    document.getElementById('configForm').style.display = 'block';
+    document.getElementById('deviceCodeBackdrop').classList.remove('open');
+  }
+}
+
+function cancelAuth() {
+  document.getElementById('configForm').style.display = 'block';
+  document.getElementById('deviceCodeBackdrop').classList.remove('open');
+}
+
+let _currentDisplayName = '';
+
+function _setModeBadge(isAppMode, displayName) {
+  S._currentAppMode    = isAppMode;
+  _currentDisplayName = displayName || '';
+  // Keep Sources modal status dot in sync if it's open
+  const dot = document.getElementById('srcM365StatusDot');
+  if (dot) dot.className = 'srcmgmt-status ' + (isAppMode !== null && isAppMode !== undefined ? 'green' : 'grey');
+}
+
+async function onAuthenticated() {
+  const r = await fetch('/api/auth/status');
+  const d = await r.json();
+  if (d.display_name || d.displayName || d.email) {
+      _setModeBadge(d.app_mode, d.display_name || d.displayName || d.email);
+  }
+  document.getElementById('authScreen').style.display = 'none';
+  document.getElementById('scannerScreen').style.display = 'flex';
+  loadUsers();
+  loadTrend();  // show existing trend if DB has history
+  loadProfiles();  // populate profile dropdown (15c)
+}
+
+function reconfigure() {
+  // Show the auth screen with current credentials pre-filled so user can
+  // update the client secret without losing client_id / tenant_id.
+  document.getElementById('scannerScreen').style.display = 'none';
+  document.getElementById('authScreen').style.display    = 'flex';
+  document.getElementById('configForm').style.display    = 'block';
+  document.getElementById('deviceCodeBackdrop').classList.remove('open');
+}
+
+async function signOut() {
+  await fetch('/api/auth/signout', {method: 'POST'});
+  document.getElementById('scannerScreen').style.display = 'none';
+  document.getElementById('authScreen').style.display = 'flex';
+  document.getElementById('configForm').style.display = 'block';
+  document.getElementById('deviceCodeBackdrop').classList.remove('open');
+  S.flaggedData = []; S.filteredData = [];
+  document.getElementById('grid').innerHTML = '';
+  document.getElementById('grid').style.display = 'none';
+  const _lss2 = document.getElementById('lastScanSummary'); if (_lss2) _lss2.style.display = 'none';
+  document.getElementById('emptyState').style.display = 'flex';
+}
+
+// ── Check auth on load ────────────────────────────────────────────────────────
+
+// Date presets
+(function() {
+  const presets = document.querySelectorAll('.date-preset');
+  const hidden  = document.getElementById('olderThan');
+  const dateIn  = document.getElementById('olderThanDate');
+  function setPreset(btn) {
+    presets.forEach(p => p.classList.remove('selected'));
+    btn.classList.add('selected');
+    const years = parseInt(btn.dataset.years);
+    if (years === 0) {
+      hidden.value = '0';
+      dateIn.value = new Date().toISOString().slice(0, 10);
+    } else {
+      const d = new Date();
+      d.setFullYear(d.getFullYear() - years);
+      hidden.value = Math.round(years * 365.25).toString();
+      dateIn.value = d.toISOString().slice(0, 10);
+    }
+  }
+  presets.forEach(btn => btn.addEventListener('click', () => setPreset(btn)));
+  dateIn.addEventListener('change', () => {
+    presets.forEach(p => p.classList.remove('selected'));
+    if (dateIn.value) {
+      const diffDays = Math.round((Date.now() - new Date(dateIn.value)) / 86400000);
+      hidden.value = diffDays.toString();
+    } else {
+      hidden.value = '0';
+    }
+  });
+  // Trigger default (2yr selected)
+  const def = document.querySelector('.date-preset.selected');
+  if (def) setPreset(def);
+  // Toggle attach size row visibility
+  document.getElementById('optAttachments').addEventListener('change', function() {
+    document.getElementById('attachSizeRow').style.opacity = this.checked ? '1' : '0.4';
+  });
+})();
+
+// ── Viewer mode bootstrap ─────────────────────────────────────────────────────
+if (window.VIEWER_MODE) {
+  document.body.classList.add('viewer-mode');
+  document.getElementById('authScreen').style.display    = 'none';
+  document.getElementById('scannerScreen').style.display = 'flex';
+  try { loadTrend(); } catch(e) {}
+} else {
+(async function() {
+  try {
+    const r = await fetch('/api/auth/status');
+    const d = await r.json();
+  if (d.authenticated) {
+    // Load saved credentials into fields
+    if (d.client_id) document.getElementById('clientId').value = d.client_id;
+    if (d.tenant_id) document.getElementById('tenantId').value = d.tenant_id;
+    if (d.client_secret) document.getElementById('clientSecret').value = d.client_secret;
+      _setModeBadge(d.app_mode, d.display_name || d.email || '');
+    document.getElementById('authScreen').style.display = 'none';
+    document.getElementById('scannerScreen').style.display = 'flex';
+    try { loadUsers(); } catch(e) {}
+    try { loadProfiles(); } catch(e) {}
+    try { loadTrend(); } catch(e) {}
+  } else {
+    // Pre-fill saved credentials
+    if (d.client_id) document.getElementById('clientId').value = d.client_id;
+    if (d.tenant_id) document.getElementById('tenantId').value = d.tenant_id;
+    if (d.client_secret) document.getElementById('clientSecret').value = d.client_secret;
+  }
+  } catch(e) { console.error('Auth status check failed:', e); }
+})();
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.handleSignIn = handleSignIn;
+window.startAuth = startAuth;
+window.pollAuth = pollAuth;
+window.cancelAuth = cancelAuth;
+window._setModeBadge = _setModeBadge;
+window.onAuthenticated = onAuthenticated;
+window.reconfigure = reconfigure;
+window.signOut = signOut;
+window._currentDisplayName = _currentDisplayName;
diff --git a/static/js/connector.js b/static/js/connector.js
new file mode 100644
index 0000000..057d09b
--- /dev/null
+++ b/static/js/connector.js
@@ -0,0 +1,684 @@
+import { S } from './state.js';
+// ── Unified Source Management (#17) ──────────────────────────────────────────
+
+function openSourcesMgmt(tab) {
+  document.getElementById('srcMgmtBackdrop').classList.add('open');
+  switchSrcTab(tab || 'm365');
+  smRefreshStatus();
+  smGoogleRefreshStatus();
+  srcFileRenderList();
+}
+
+function closeSourcesMgmt() {
+  document.getElementById('srcMgmtBackdrop').classList.remove('open');
+}
+
+function switchSrcTab(tab) {
+  ['m365','google','files'].forEach(function(t) {
+    document.getElementById('srcPane'  + t.charAt(0).toUpperCase() + t.slice(1))
+             .classList.toggle('active', t === tab);
+    const btn = document.getElementById('srcTab' + t.charAt(0).toUpperCase() + t.slice(1));
+    if (btn) btn.classList.toggle('active', t === tab);
+  });
+  // Capitalise pane ids correctly: srcPaneM365, srcPaneGoogle, srcPaneFiles
+  const paneMap = {m365:'M365', google:'Google', files:'Files'};
+  ['m365','google','files'].forEach(function(t) {
+    const pane = document.getElementById('srcPane' + paneMap[t]);
+    if (pane) pane.classList.toggle('active', t === tab);
+    const btn  = document.getElementById('srcTab'  + paneMap[t]);
+    if (btn)  btn.classList.toggle('active', t === tab);
+  });
+}
+
+// ── M365 pane ─────────────────────────────────────────────────────────────────
+
+function smRefreshStatus() {
+  const dot   = document.getElementById('srcM365StatusDot');
+  const label = document.getElementById('srcM365StatusLabel');
+  const sub   = document.getElementById('srcM365StatusSub');
+  const disc  = document.getElementById('smDisconnectBtn');
+  const st    = document.getElementById('smConnStatus');
+  if (!dot) return;
+
+  // Load saved credentials and auth status from the correct endpoints
+  fetch('/api/auth/status').then(function(r){ return r.json(); }).then(function(d) {
+    // Pre-fill credential fields
+    const cidEl = document.getElementById('smClientId');
+    const tidEl = document.getElementById('smTenantId');
+    const secEl = document.getElementById('smClientSecret');
+    if (cidEl && d.client_id)  cidEl.value = d.client_id;
+    if (tidEl && d.tenant_id)  tidEl.value = d.tenant_id;
+    if (secEl && d.client_secret) secEl.value = d.client_secret.length > 4 ? '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' : '';
+
+    if (d.authenticated) {
+      dot.className = 'srcmgmt-status green';
+      const who = d.display_name || d.email || '';
+      const mode = d.app_mode ? t('m365_mode_app_short','App mode') : t('m365_mode_delegated_short','Delegated');
+      label.textContent = who || t('m365_srcmgmt_connected','Connected');
+      sub.textContent = mode + (d.email && d.display_name ? '  \u00b7  ' + d.email : '');
+      if (disc) disc.style.display = '';
+      if (st)   st.textContent = '';
+    } else {
+      dot.className = 'srcmgmt-status grey';
+      label.textContent = t('m365_srcmgmt_not_connected','Not connected');
+      sub.textContent = '';
+      if (disc) disc.style.display = 'none';
+      if (st)   st.textContent = '';
+    }
+  }).catch(function(){
+    if (dot) dot.className = 'srcmgmt-status grey';
+  });
+}
+
+async function smConnect() {
+  const cid = document.getElementById('smClientId').value.trim();
+  const tid = document.getElementById('smTenantId').value.trim();
+  const rawSec = document.getElementById('smClientSecret').value;
+  // If field shows placeholder dots and user hasn't changed it, use saved secret (send empty to keep it)
+  const sec = (rawSec === '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022') ? '' : rawSec.trim();
+  const st  = document.getElementById('smConnStatus');
+  if (!cid || !tid) { st.style.color='var(--danger)'; st.textContent=t('m365_err_creds_required','Client ID and Tenant ID required'); return; }
+  st.style.color='var(--muted)'; st.textContent=t('m365_connecting','Connecting...');
+
+  // Persist credentials
+  await fetch('/api/auth/config', {
+    method:'POST', headers:{'Content-Type':'application/json'},
+    body: JSON.stringify({client_id:cid, tenant_id:tid, client_secret:sec})
+  });
+
+  // Start auth — same as the auth screen flow
+  try {
+    const r = await fetch('/api/auth/start', {
+      method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({client_id:cid, tenant_id:tid, client_secret:sec})
+    });
+    const d = await r.json();
+    if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
+
+    if (d.mode === 'application') {
+      // App mode — no device code needed
+      st.style.color='var(--accent)'; st.textContent='\u2714 '+t('m365_connected','Connected');
+      closeSourcesMgmt();
+      setTimeout(onAuthenticated, 400);
+    } else {
+      // Delegated — show device code flow, close modal
+      closeSourcesMgmt();
+      document.getElementById('clientId').value = cid;
+      document.getElementById('tenantId').value = tid;
+      document.getElementById('clientSecret').value = sec;
+      document.getElementById('configForm').style.display = 'none';
+      document.getElementById('authScreen').style.display = 'flex';
+      document.getElementById('deviceCodeBackdrop').classList.add('open');
+      document.getElementById('deviceCode').textContent = d.user_code || '\u2014';
+      pollAuth();
+    }
+  } catch(e) { st.style.color='var(--danger)'; st.textContent=e.message; }
+}
+
+function smDisconnect() {
+  if (!confirm(t('m365_signout_confirm','Disconnect and clear credentials?'))) return;
+  fetch('/api/auth/signout', {method:'POST'}).then(function(){
+    closeSourcesMgmt();
+    signOut();
+  });
+}
+
+// ── Google Workspace pane ─────────────────────────────────────────────────────
+
+// Parsed key dict held in memory while the pane is open — cleared on disconnect
+var _googleKeyDict  = null;
+var _googleAuthMode = 'workspace';
+
+function smGoogleSetMode(mode) {
+  _googleAuthMode = mode;
+  var saSection       = document.getElementById('smGoogleSaSection');
+  var personalSection = document.getElementById('smGooglePersonalSection');
+  var wsSetup         = document.getElementById('smGoogleWorkspaceSetup');
+  var btnWs           = document.getElementById('smGoogleModeWorkspace');
+  var btnPl           = document.getElementById('smGoogleModePersonal');
+  var isPersonal      = (mode === 'personal');
+  if (saSection)       saSection.style.display       = isPersonal ? 'none' : '';
+  if (personalSection) personalSection.style.display  = isPersonal ? '' : 'none';
+  if (wsSetup)         wsSetup.style.display          = isPersonal ? 'none' : '';
+  if (btnWs) { btnWs.style.background = isPersonal ? 'var(--surface)' : 'var(--accent)'; btnWs.style.color = isPersonal ? 'var(--text)' : '#fff'; }
+  if (btnPl) { btnPl.style.background = isPersonal ? 'var(--accent)' : 'var(--surface)'; btnPl.style.color = isPersonal ? '#fff' : 'var(--text)'; }
+}
+
+function smGoogleRefreshStatus() {
+  var wsPromise = fetch('/api/google/auth/status').then(function(r){ return r.json(); }).catch(function(){ return {}; });
+  var personalPromise = fetch('/api/google/personal/status').then(function(r){ return r.json(); }).catch(function(){ return {connected: false}; });
+
+  Promise.all([wsPromise, personalPromise]).then(function(results) {
+    var ws = results[0];
+    var personal = results[1];
+    var dot        = document.getElementById('srcGoogleStatusDot');
+    var label      = document.getElementById('srcGoogleStatusLabel');
+    var sub        = document.getElementById('srcGoogleStatusSub');
+    var disc       = document.getElementById('smGoogleDisconnectBtn');
+    var srcs       = document.getElementById('smGoogleSourcesGroup');
+    var signOutBtn = document.getElementById('smGooglePersonalSignOutBtn');
+    var signInBtn  = document.getElementById('smGooglePersonalSignInBtn');
+    if (!dot) return;
+
+    if (ws.libs_ok === false) {
+      dot.className = 'srcmgmt-status amber';
+      label.textContent = t('m365_google_libs_missing', 'Libraries not installed');
+      sub.textContent   = 'pip install google-auth google-auth-httplib2 google-api-python-client';
+      if (disc) disc.style.display = 'none';
+      if (srcs) srcs.style.display = 'none';
+      return;
+    }
+
+    if (personal.connected) {
+      smGoogleSetMode('personal');
+      window._googleConnected = true;
+      dot.className = 'srcmgmt-status green';
+      label.textContent = personal.email || personal.displayName || t('m365_srcmgmt_connected', 'Connected');
+      sub.textContent   = t('m365_google_mode_personal', 'Personal account');
+      if (disc)       disc.style.display       = 'none';
+      if (srcs)       srcs.style.display       = '';
+      if (signOutBtn) signOutBtn.style.display  = '';
+      if (signInBtn)  signInBtn.style.display   = 'none';
+    } else if (ws.connected) {
+      smGoogleSetMode('workspace');
+      window._googleConnected = true;
+      dot.className = 'srcmgmt-status green';
+      label.textContent = ws.sa_email || t('m365_srcmgmt_connected', 'Connected');
+      sub.textContent   = (ws.project_id ? ws.project_id + '  ·  ' : '') + (ws.admin_email || '');
+      if (disc)       disc.style.display       = '';
+      if (srcs)       srcs.style.display       = '';
+      if (signOutBtn) signOutBtn.style.display  = 'none';
+      if (signInBtn)  signInBtn.style.display   = '';
+      var ae = document.getElementById('smGoogleAdminEmail');
+      if (ae && ws.admin_email && !ae.value) ae.value = ws.admin_email;
+      var gm = document.getElementById('smGoogleSrcGmail');
+      var gd = document.getElementById('smGoogleSrcDrive');
+      if (gm && ws.src_gmail !== undefined) gm.checked = !!ws.src_gmail;
+      if (gd && ws.src_drive !== undefined) gd.checked = !!ws.src_drive;
+    } else {
+      window._googleConnected = false;
+      dot.className = 'srcmgmt-status grey';
+      label.textContent = t('m365_srcmgmt_not_connected', 'Not connected');
+      sub.textContent   = ws.error || personal.error || '';
+      if (disc)       disc.style.display       = 'none';
+      if (srcs)       srcs.style.display       = 'none';
+      if (signOutBtn) signOutBtn.style.display  = 'none';
+      if (signInBtn)  signInBtn.style.display   = '';
+    }
+    renderSourcesPanel();
+    // If the profile editor is open and its source panel has no Google checkboxes yet,
+    // re-render it now that connection status is known.
+    if (document.getElementById('pmgmtEditor')?.classList.contains('open') &&
+        !document.querySelector('#peSourcesPanel input[data-source-type="google"]')) {
+      var _peCheckedIds = Array.from(document.querySelectorAll('#peSourcesPanel input[type=checkbox]'))
+        .filter(function(cb) { return cb.checked; }).map(function(cb) { return cb.dataset.sourceId; });
+      var _peProfile = window._pmgmtEditId ? (S._profiles.find(function(p) { return p.id === window._pmgmtEditId; }) || window._pmgmtNewDraft) : window._pmgmtNewDraft;
+      if (_peProfile) {
+        var _peSavedIds = (_peProfile.sources||[]).concat(_peProfile.google_sources||[]).concat(_peProfile.file_sources||[]);
+        _renderEditorSources(_peCheckedIds.concat(_peSavedIds));
+      }
+    }
+    if (window._googleConnected) {
+      _mergeGoogleUsers();
+    } else {
+      // Remove standalone Google users; reset merged 'both' users back to M365
+      S._allUsers = S._allUsers.filter(function(u){ return (u.platform||'m365') !== 'google'; });
+      S._allUsers.forEach(function(u) {
+        if (u.platform === 'both') { u.platform = 'm365'; delete u.googleEmail; }
+      });
+      renderAccountList();
+    }
+  }).catch(function() {
+    var dot = document.getElementById('srcGoogleStatusDot');
+    if (dot) dot.className = 'srcmgmt-status grey';
+  });
+}
+
+// Wire up file input to read + validate JSON immediately
+(function() {
+  document.addEventListener('DOMContentLoaded', function() {
+    var fi = document.getElementById('smGoogleKeyFile');
+    if (!fi) return;
+    fi.addEventListener('change', function() {
+      var f = fi.files && fi.files[0];
+      if (!f) { _googleKeyDict = null; return; }
+      var reader = new FileReader();
+      reader.onload = function(e) {
+        try {
+          _googleKeyDict = JSON.parse(e.target.result);
+          var nameEl = document.getElementById('smGoogleKeyName');
+          if (nameEl) nameEl.textContent = _googleKeyDict.client_email ? '✔ ' + _googleKeyDict.client_email.split('@')[0] : '✔ loaded';
+        } catch(err) {
+          _googleKeyDict = null;
+          var st = document.getElementById('smGoogleConnStatus');
+          if (st) { st.style.color='var(--danger)'; st.textContent = t('m365_google_invalid_json','Invalid JSON file'); }
+        }
+      };
+      reader.readAsText(f);
+    });
+  });
+})();
+
+async function smGoogleConnect() {
+  var st = document.getElementById('smGoogleConnStatus');
+  var adminEmail = (document.getElementById('smGoogleAdminEmail') || {}).value || '';
+
+  if (!_googleKeyDict) {
+    if (st) { st.style.color='var(--danger)'; st.textContent = t('m365_google_key_required','Select a service account JSON key file'); }
+    return;
+  }
+  if (st) { st.style.color='var(--muted)'; st.textContent = t('m365_connecting','Connecting...'); }
+
+  try {
+    var r = await fetch('/api/google/auth/connect', {
+      method: 'POST',
+      headers: {'Content-Type':'application/json'},
+      body: JSON.stringify({key_json: _googleKeyDict, admin_email: adminEmail})
+    });
+    var d = await r.json();
+    if (d.error) {
+      if (st) { st.style.color='var(--danger)'; st.textContent = d.error; }
+      return;
+    }
+    if (st) { st.style.color='var(--accent)'; st.textContent = '✔ ' + t('m365_connected','Connected'); }
+    smGoogleRefreshStatus();
+  } catch(e) {
+    if (st) { st.style.color='var(--danger)'; st.textContent = e.message; }
+  }
+}
+
+function smGoogleDisconnect() {
+  if (!confirm(t('m365_signout_confirm','Disconnect and clear credentials?'))) return;
+  fetch('/api/google/auth/disconnect', {method:'POST'}).then(function() {
+    _googleKeyDict = null;
+    var fi = document.getElementById('smGoogleKeyFile');
+    if (fi) fi.value = '';
+    var nameEl = document.getElementById('smGoogleKeyName');
+    if (nameEl) nameEl.textContent = '';
+    var st = document.getElementById('smGoogleConnStatus');
+    if (st) st.textContent = '';
+    smGoogleRefreshStatus();
+  });
+}
+
+async function smGooglePersonalStart() {
+  var clientId     = (document.getElementById('smGooglePersonalClientId')     || {}).value || '';
+  var clientSecret = (document.getElementById('smGooglePersonalClientSecret') || {}).value || '';
+  var st = document.getElementById('smGooglePersonalConnStatus');
+  if (!clientId || !clientSecret) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = t('m365_google_personal_creds_required', 'Client ID and secret required'); }
+    return;
+  }
+  if (st) { st.style.color = 'var(--muted)'; st.textContent = t('m365_connecting', 'Connecting...'); }
+  try {
+    var r = await fetch('/api/google/personal/start', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({client_id: clientId, client_secret: clientSecret})
+    });
+    var d = await r.json();
+    if (d.error) {
+      if (st) { st.style.color = 'var(--danger)'; st.textContent = d.error; }
+      return;
+    }
+    var box    = document.getElementById('smGoogleDeviceBox');
+    var codeEl = document.getElementById('smGoogleDeviceCode');
+    var urlEl  = document.getElementById('smGoogleDeviceUrl');
+    var pollSt = document.getElementById('smGooglePollStatus');
+    if (box)    box.style.display  = '';
+    if (codeEl) codeEl.textContent = d.user_code || '—';
+    if (urlEl)  { urlEl.href = d.verification_url || 'https://google.com/device'; urlEl.textContent = (d.verification_url || 'https://google.com/device').replace('https://', ''); }
+    if (pollSt) { pollSt.style.color = 'var(--muted)'; pollSt.textContent = '⏳ ' + t('m365_auth_waiting', 'Waiting for sign-in…'); }
+    if (st)     st.textContent = '';
+    smGooglePersonalPoll();
+  } catch(e) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = e.message; }
+  }
+}
+
+function smGooglePersonalPoll() {
+  fetch('/api/google/personal/poll', {method: 'POST'})
+    .then(function(r) { return r.json(); })
+    .then(function(d) {
+      var pollSt = document.getElementById('smGooglePollStatus');
+      if (d.status === 'pending') {
+        setTimeout(smGooglePersonalPoll, 3000);
+      } else if (d.status === 'ok') {
+        if (pollSt) { pollSt.style.color = 'var(--success)'; pollSt.textContent = '✓ ' + t('m365_connected', 'Connected'); }
+        setTimeout(function() {
+          var box = document.getElementById('smGoogleDeviceBox');
+          if (box) box.style.display = 'none';
+          smGoogleRefreshStatus();
+        }, 1000);
+      } else {
+        if (pollSt) { pollSt.style.color = 'var(--danger)'; pollSt.textContent = '✗ ' + (d.error || 'Sign-in failed'); }
+        setTimeout(function() {
+          var box = document.getElementById('smGoogleDeviceBox');
+          if (box) box.style.display = 'none';
+        }, 3000);
+      }
+    })
+    .catch(function() { setTimeout(smGooglePersonalPoll, 5000); });
+}
+
+function smGooglePersonalSignOut() {
+  if (!confirm(t('m365_signout_confirm', 'Disconnect and clear credentials?'))) return;
+  fetch('/api/google/personal/signout', {method: 'POST'}).then(function() {
+    smGoogleRefreshStatus();
+  });
+}
+
+// Returns {sources, options} reflecting current Google pane state — used by scan launcher
+function getGoogleScanOptions() {
+  var sources = [];
+  if (document.getElementById('smGoogleSrcGmail') && document.getElementById('smGoogleSrcGmail').checked) sources.push('gmail');
+  if (document.getElementById('smGoogleSrcDrive') && document.getElementById('smGoogleSrcDrive').checked) sources.push('gdrive');
+  return {sources: sources, options: {}};
+}
+
+// ── File sources pane ─────────────────────────────────────────────────────────
+
+function srcFileRenderList() {
+  const list = document.getElementById('srcFileList');
+  if (!list) return;
+  if (!S._fileSources.length) {
+    list.innerHTML = '<div class="fsrc-empty">'+t('m365_file_sources_empty','No file sources yet.')+'</div>';
+    return;
+  }
+  list.innerHTML = S._fileSources.map(function(s) {
+    const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
+    const icon  = isSmb ? '\uD83C\uDF10' : '\uD83D\uDCC1';
+    const sid   = _esc(s.id||'');
+    const slabel = _esc(s.label||s.path||'');
+    return '<div class="fsrc-row">'
+      +'<div class="fsrc-row-head">'
+      +'<span class="fsrc-row-label">'+icon+' '+slabel+'</span>'
+      +'<div class="fsrc-actions">'
+      +'<button class="btn-scan" onclick="srcFileScan(\''+sid+'\')">&#9654; '+t('m365_fsrc_scan_btn','Scan')+'</button>'
+      +'<button class="btn-edit" onclick="srcFileEdit(\''+sid+'\')" style="background:none;border:1px solid var(--border);color:var(--muted);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer">'+t('m365_fsrc_edit_btn','Edit')+'</button>'
+      +'<button class="btn-del" onclick="srcFileDelete(\''+sid+'\',\''+slabel+'\')">'+t('m365_profile_delete','Delete')+'</button>'
+      +'</div></div>'
+      +'<div class="fsrc-row-path">'+_esc(s.path||'')+(s.smb_user?'  \u00b7  \uD83D\uDC64 '+_esc(s.smb_user):'')+'</div>'
+      +'</div>';
+  }).join('');
+}
+
+function srcFileDetectSmb() {
+  const p = document.getElementById('srcFilePath').value;
+  const isSmb = p.startsWith('//') || p.startsWith('\\\\');
+  document.getElementById('srcFileSmbFields').style.display = isSmb ? 'flex' : 'none';
+  if (isSmb && !document.getElementById('srcFileSmbHost').value) {
+    document.getElementById('srcFileSmbHost').value = p.replace(/^[\/\\]+/,'').split(/[\/\\]/)[0];
+  }
+}
+
+function srcFileAutoName() {
+  const labelEl = document.getElementById('srcFileLabel');
+  if (labelEl._userEdited) return;
+  const p = document.getElementById('srcFilePath').value.trim();
+  if (!p) { labelEl.value=''; return; }
+  const parts = p.replace(/[\/\\]+$/,'').split(/[\/\\]/);
+  if ((p.startsWith('//')||p.startsWith('\\\\')) && parts.filter(function(x){return x;}).length>=2) {
+    const segs = parts.filter(function(x){return x;});
+    labelEl.value = segs[0]+(segs[1]?' / '+segs[1]:'');
+  } else {
+    labelEl.value = parts[parts.length-1]||p;
+  }
+}
+
+async function srcFileAdd() {
+  const label   = document.getElementById('srcFileLabel').value.trim();
+  const path    = document.getElementById('srcFilePath').value.trim();
+  const smbHost = document.getElementById('srcFileSmbHost').value.trim();
+  const smbUser = document.getElementById('srcFileSmbUser').value.trim();
+  const smbPw   = document.getElementById('srcFileSmbPw').value;
+  const stat    = document.getElementById('srcFileStatus');
+  if (!label) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_name_required','Name is required.'); document.getElementById('srcFileLabel').focus(); return; }
+  if (!path)  { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_path_required','Path is required.'); return; }
+  stat.style.color='var(--muted)'; stat.textContent=t('m365_fsrc_saving','Saving...');
+  if (smbPw && smbUser) {
+    try { await fetch('/api/file_sources/store_creds',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({smb_host:smbHost,smb_user:smbUser,password:smbPw})}); } catch(e){}
+  }
+  try {
+    const editId = document.getElementById('srcFileEditId');
+    const existingId = editId ? editId.value : '';
+    const body = {label, path, smb_host:smbHost, smb_user:smbUser};
+    if (existingId) body.id = existingId;
+    const r = await fetch('/api/file_sources/save',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)});
+    const d = await r.json();
+    if (d.error) { stat.style.color='var(--danger)'; stat.textContent=d.error; return; }
+    ['srcFileLabel','srcFilePath','srcFileSmbHost','srcFileSmbUser','srcFileSmbPw'].forEach(function(id){const el=document.getElementById(id);if(el){el.value='';el._userEdited=false;}});
+    if (editId) editId.value='';
+    const addBtn=document.getElementById('srcFileAddBtn'); if(addBtn) addBtn.textContent=t('m365_fsrc_add_btn','Add');
+    document.getElementById('srcFileSmbFields').style.display='none';
+    stat.style.color='var(--accent)'; stat.textContent='\u2714 '+t('m365_fsrc_saved','Source saved');
+    await _loadFileSources();
+    srcFileRenderList();
+    log(t('m365_fsrc_saved','Source saved')+': '+label);
+  } catch(e){ stat.style.color='var(--danger)'; stat.textContent=e.message; }
+}
+
+function srcFileEdit(id) {
+  const s = S._fileSources.find(function(x){return x.id===id;});
+  if (!s) return;
+  const labelEl = document.getElementById('srcFileLabel');
+  const pathEl  = document.getElementById('srcFilePath');
+  const hostEl  = document.getElementById('srcFileSmbHost');
+  const userEl  = document.getElementById('srcFileSmbUser');
+  const pwEl    = document.getElementById('srcFileSmbPw');
+  const editId  = document.getElementById('srcFileEditId');
+  if (labelEl) { labelEl.value = s.label||''; labelEl._userEdited = true; }
+  if (pathEl)  pathEl.value  = s.path||'';
+  if (hostEl)  hostEl.value  = s.smb_host||'';
+  if (userEl)  userEl.value  = s.smb_user||'';
+  if (pwEl)    pwEl.value    = s.smb_user ? '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' : '';
+  if (editId)  editId.value  = id;
+  const isSmb = (s.path||'').startsWith('//') || (s.path||'').startsWith('\\\\');
+  const smbFields = document.getElementById('srcFileSmbFields');
+  if (smbFields) smbFields.style.display = isSmb ? 'flex' : 'none';
+  const btn = document.getElementById('srcFileAddBtn');
+  if (btn) btn.textContent = t('m365_fsrc_save_changes','Save changes');
+  const stat = document.getElementById('srcFileStatus');
+  if (stat) { stat.style.color='var(--muted)'; stat.textContent='Editing: '+_esc(s.label||s.path||''); }
+}
+
+async function srcFileDelete(id, label) {
+  if (!confirm(t('m365_profile_delete_confirm','Delete')+' "'+label+'"?')) return;
+  await fetch('/api/file_sources/delete',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({id})});
+  await _loadFileSources();
+  srcFileRenderList();
+}
+
+async function srcFileScan(id) {
+  const source = S._fileSources.find(function(s){ return s.id===id; });
+  if (!source) return;
+  closeSourcesMgmt();
+  log(t('m365_fsrc_scan_start','Starting file scan')+': '+(source.label||source.path));
+  try {
+    const r = await fetch('/api/file_scan/start',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(source)});
+    const d = await r.json();
+    if (d.error) log('File scan error: '+d.error,'err');
+  } catch(e){ log('File scan error: '+e.message,'err'); }
+}
+
+// Redirect old openFileSourcesModal() to the new unified modal
+function openFileSourcesModal() { openSourcesMgmt('files'); }
+function closeFileSourcesModal() { closeSourcesMgmt(); }
+
+// ── File Sources (#8) ─────────────────────────────────────────────────────────
+
+async function _loadFileSources() {
+  try {
+    const r = await fetch('/api/file_sources');
+    const d = await r.json();
+    S._fileSources = d.sources || [];
+    _renderFileSources(d.smb_available);
+    renderSourcesPanel();
+    // Re-apply any pending profile source selection (file sources render after load)
+    if (S._pendingProfileSources.length) {
+      document.querySelectorAll('#sourcesPanel input[data-source-type="file"]').forEach(function(cb) {
+        cb.checked = S._pendingProfileSources.includes(cb.dataset.sourceId);
+      });
+      S._pendingProfileSources = [];
+    }
+    // If the profile editor is open and has no file checkboxes yet, re-render it now.
+    if (document.getElementById('pmgmtEditor')?.classList.contains('open') &&
+        !document.querySelector('#peSourcesPanel input[data-source-type="file"]') &&
+        S._fileSources.length > 0) {
+      var _peCheckedIds = Array.from(document.querySelectorAll('#peSourcesPanel input[type=checkbox]'))
+        .filter(function(cb) { return cb.checked; }).map(function(cb) { return cb.dataset.sourceId; });
+      var _peProfile = window._pmgmtEditId ? (S._profiles.find(function(p) { return p.id === window._pmgmtEditId; }) || window._pmgmtNewDraft) : window._pmgmtNewDraft;
+      if (_peProfile) {
+        var _peSavedIds = (_peProfile.sources||[]).concat(_peProfile.google_sources||[]).concat(_peProfile.file_sources||[]);
+        _renderEditorSources(_peCheckedIds.concat(_peSavedIds));
+      }
+    }
+  } catch(e) {
+    const s = document.getElementById('fsrcStatus');
+    if (s) { s.style.color = 'var(--danger)'; s.textContent = 'Error: ' + e.message; }
+  }
+}
+
+function _renderFileSources() {
+  const list = document.getElementById('fsrcList');
+  if (!list) return;
+  if (!S._fileSources.length) {
+    list.innerHTML = '<div class="fsrc-empty">' + t('m365_file_sources_empty','No file sources yet.') + '</div>';
+    return;
+  }
+  list.innerHTML = S._fileSources.map(function(s) {
+    const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
+    const icon  = isSmb ? '\uD83C\uDF10' : '\uD83D\uDCC1';
+    const userPart = s.smb_user ? '  \u00b7  \uD83D\uDC64 ' + _esc(s.smb_user) : '';
+    const sid    = _esc(s.id || '');
+    const slabel = _esc(s.label || s.path || '');
+    return '<div class="fsrc-row">'
+      + '<div class="fsrc-row-head">'
+      + '<span class="fsrc-row-label">' + icon + ' ' + slabel + '</span>'
+      + '<div class="fsrc-actions">'
+      + '<button class="btn-scan" onclick="fsrcScan(\'' + sid + '\')">&#9654; ' + t('m365_fsrc_scan_btn','Scan') + '</button>'
+      + '<button class="btn-del"  onclick="fsrcDelete(\'' + sid + '\',\'' + slabel + '\')">' + t('m365_profile_delete','Delete') + '</button>'
+      + '</div></div>'
+      + '<div class="fsrc-row-path">' + _esc(s.path || '') + userPart + '</div>'
+      + '</div>';
+  }).join('');
+}
+
+function fsrcDetectSmb() {
+  const p = document.getElementById('fsrcPath').value;
+  const isSmb = p.startsWith('//') || p.startsWith('\\\\');
+  document.getElementById('fsrcSmbFields').style.display = isSmb ? 'flex' : 'none';
+  if (isSmb && !document.getElementById('fsrcSmbHost').value) {
+    document.getElementById('fsrcSmbHost').value = p.replace(/^[\/\\]+/,'').split(/[\/\\]/)[0];
+  }
+}
+
+function fsrcAutoName() {
+  // Suggest a name from the path only if the user hasn't typed one yet
+  const labelEl = document.getElementById('fsrcLabel');
+  if (labelEl._userEdited) return;
+  const p = document.getElementById('fsrcPath').value.trim();
+  if (!p) { labelEl.value = ''; return; }
+  // Extract last meaningful path segment
+  const parts = p.replace(/[/\\]+$/, '').split(/[/\\]/);
+  const last = parts[parts.length - 1] || parts[parts.length - 2] || p;
+  // For SMB paths like //nas/share use "nas / share"
+  if ((p.startsWith('//') || p.startsWith('\\\\')) && parts.length >= 3) {
+    const host  = parts.find(function(x){ return x.length > 0; }) || '';
+    const share = parts.filter(function(x){ return x.length > 0; })[1] || '';
+    labelEl.value = share ? host + ' / ' + share : host;
+  } else {
+    labelEl.value = last;
+  }
+}
+
+document.addEventListener('DOMContentLoaded', function() {
+  const labelEl = document.getElementById('fsrcLabel');
+  if (labelEl) {
+    labelEl.addEventListener('input', function() { labelEl._userEdited = !!labelEl.value; });
+  }
+  const srcFileLabelEl = document.getElementById('srcFileLabel');
+  if (srcFileLabelEl) {
+    srcFileLabelEl.addEventListener('input', function() { srcFileLabelEl._userEdited = !!srcFileLabelEl.value; });
+  }
+});
+
+async function fsrcAddSource() {
+  const path    = document.getElementById('fsrcPath').value.trim();
+  const label   = document.getElementById('fsrcLabel').value.trim() || path;
+  const smbHost = document.getElementById('fsrcSmbHost').value.trim();
+  const smbUser = document.getElementById('fsrcSmbUser').value.trim();
+  const smbPw   = document.getElementById('fsrcSmbPw').value;
+  const stat    = document.getElementById('fsrcStatus');
+  if (!label) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_name_required','Name is required.'); document.getElementById('fsrcLabel').focus(); return; }
+  if (!path) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_path_required','Path is required.'); return; }
+  stat.style.color='var(--muted)'; stat.textContent=t('m365_fsrc_saving','Saving...');
+  if (smbPw && smbUser) {
+    try { await fetch('/api/file_sources/store_creds',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({smb_host:smbHost,smb_user:smbUser,password:smbPw})}); } catch(e){}
+  }
+  try {
+    const r = await fetch('/api/file_sources/save',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({label,path,smb_host:smbHost,smb_user:smbUser})});
+    const d = await r.json();
+    if (d.error) { stat.style.color='var(--danger)'; stat.textContent=d.error; return; }
+    ['fsrcLabel','fsrcPath','fsrcSmbHost','fsrcSmbUser','fsrcSmbPw'].forEach(function(id){const el=document.getElementById(id);if(el){el.value='';el._userEdited=false;}});
+    document.getElementById('fsrcSmbFields').style.display='none';
+    stat.style.color='var(--accent)'; stat.textContent='\u2714 '+t('m365_fsrc_saved','Source saved');
+    await _loadFileSources();
+    log(t('m365_fsrc_saved','Source saved')+': '+label);
+  } catch(e){ stat.style.color='var(--danger)'; stat.textContent=e.message; }
+}
+
+async function fsrcDelete(id, label) {
+  if (!confirm(t('m365_profile_delete_confirm','Delete')+' "'+label+'"?')) return;
+  try {
+    await fetch('/api/file_sources/delete',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({id})});
+    await _loadFileSources();
+    log(t('m365_profile_deleted','Deleted')+': '+label);
+  } catch(e){ const s=document.getElementById('fsrcStatus'); if(s) s.textContent=e.message; }
+}
+
+async function fsrcScan(id) {
+  const source = S._fileSources.find(function(s){ return s.id===id; });
+  if (!source) return;
+  closeFileSourcesModal();
+  log(t('m365_fsrc_scan_start','Starting file scan')+': '+(source.label||source.path));
+  try {
+    const r = await fetch('/api/file_scan/start',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(source)});
+    const d = await r.json();
+    if (d.error) log('File scan error: '+d.error,'err');
+  } catch(e){ log('File scan error: '+e.message,'err'); }
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.openSourcesMgmt = openSourcesMgmt;
+window.closeSourcesMgmt = closeSourcesMgmt;
+window.switchSrcTab = switchSrcTab;
+window.smRefreshStatus = smRefreshStatus;
+window.smConnect = smConnect;
+window.smDisconnect = smDisconnect;
+window.smGoogleSetMode = smGoogleSetMode;
+window.smGoogleRefreshStatus = smGoogleRefreshStatus;
+window.smGoogleConnect = smGoogleConnect;
+window.smGoogleDisconnect = smGoogleDisconnect;
+window.smGooglePersonalStart = smGooglePersonalStart;
+window.smGooglePersonalPoll = smGooglePersonalPoll;
+window.smGooglePersonalSignOut = smGooglePersonalSignOut;
+window.getGoogleScanOptions = getGoogleScanOptions;
+window.srcFileRenderList = srcFileRenderList;
+window.srcFileDetectSmb = srcFileDetectSmb;
+window.srcFileAutoName = srcFileAutoName;
+window.srcFileAdd = srcFileAdd;
+window.srcFileEdit = srcFileEdit;
+window.srcFileDelete = srcFileDelete;
+window.srcFileScan = srcFileScan;
+window.openFileSourcesModal = openFileSourcesModal;
+window.closeFileSourcesModal = closeFileSourcesModal;
+window._loadFileSources = _loadFileSources;
+window._renderFileSources = _renderFileSources;
+window.fsrcDetectSmb = fsrcDetectSmb;
+window.fsrcAutoName = fsrcAutoName;
+window.fsrcAddSource = fsrcAddSource;
+window.fsrcDelete = fsrcDelete;
+window.fsrcScan = fsrcScan;
+window._googleKeyDict = _googleKeyDict;
+window._googleAuthMode = _googleAuthMode;
diff --git a/static/js/log.js b/static/js/log.js
new file mode 100644
index 0000000..310b7b8
--- /dev/null
+++ b/static/js/log.js
@@ -0,0 +1,341 @@
+import { S } from './state.js';
+// ── Log ──────────────────────────────────────────────────────────────────────
+const _LOG_SESSION_KEY = 'gdpr_log_session';
+const _LOG_MAX_LINES = 300;
+let _logFilter = 'all'; // 'all' | 'err'
+
+// Maps keywords found in phase strings → {label, pillClass}
+// Emoji patterns cover phases that have no source keyword in text
+// (e.g. "📂 skolehaver: 1 msg(s)" — 📂 is only used for mail folders)
+const _PHASE_SOURCE_MAP = [
+  { re: /OneDrive/i,                       label: 'OneDrive',   cls: 'progress-src-m365'   },
+  { re: /SharePoint/i,                     label: 'SharePoint', cls: 'progress-src-m365'   },
+  { re: /\bTeams\b/i,                      label: 'Teams',      cls: 'progress-src-m365'   },
+  { re: /E-?mail|emails?|msg\(s\)|\uD83D\uDCC2/iu, label: 'Outlook',  cls: 'progress-src-m365' },
+  { re: /Google Workspace/i,               label: 'Gmail',      cls: 'progress-src-google' },
+  { re: /Google Drive/i,                   label: 'GDrive',     cls: 'progress-src-google' },
+  { re: /Gmail/i,                          label: 'Gmail',      cls: 'progress-src-google' },
+  { re: /\bfil(er|S.es)?\b/i,               label: 'Local',      cls: 'progress-src-file'   },
+];
+
+function _escHtml(s) {
+  return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
+}
+
+// Resolve an email address to a display name using S._allUsers, and strip
+// trailing count suffixes like ": 3 file(s)" or ": 5 msg(s)".
+function _resolveDisplayName(text) {
+  if (!text) return text;
+  const stripped = text.replace(/:\s*\d+\s*(file\(s\)|files?|filer|msg\(s\)|folders?)[\u2026\.]*\s*$/iu, '').trim();
+  const check = stripped || text;
+  if (check.includes('@')) {
+    const email = check.toLowerCase();
+    const user = S._allUsers.find(function(u) {
+      return (u.email || '').toLowerCase() === email ||
+             (u.googleEmail || '').toLowerCase() === email;
+    });
+    if (user) return user.displayName;
+  }
+  return stripped || text;
+}
+
+// Tracks the most recent user name shown — used for sub-phases (e.g. mail folder counts)
+// that don't repeat the username in their phase string.
+
+function _setProgressPhase(phase) {
+  const who = document.getElementById('progressWho');
+  if (!who) return;
+
+  // Find source from the full phase string first
+  let srcEntry = null;
+  for (const s of _PHASE_SOURCE_MAP) {
+    if (s.re.test(phase)) { srcEntry = s; break; }
+  }
+
+  // Try "Left — Right" split (em-dash / en-dash only — plain hyphens cause false splits)
+  const dashMatch = phase.match(/^(.+?)\s+[\u2014\u2013]\s+(.+?)[\u2026\.]*\s*$/u);
+
+  if (srcEntry && dashMatch) {
+    const left  = dashMatch[1].trim();
+    const right = dashMatch[2].trim();
+    // Full name is whichever side doesn't contain the source keyword
+    const raw = srcEntry.re.test(left) ? right : left;
+    const displayName = _resolveDisplayName(raw);
+    S._progressCurrentUser = displayName;
+    who.innerHTML =
+      '<span class="progress-src-pill ' + srcEntry.cls + '">' + srcEntry.label + '</span>' +
+      '<span class="progress-user">' + _escHtml(displayName) + '</span>';
+    return;
+  }
+
+  if (srcEntry) {
+    // Source identified but no dash split (e.g. "📂 Indbakke: 3 msg(s)").
+    // Re-use last known user rather than showing a folder path.
+    const displayName = S._progressCurrentUser ||
+      phase.replace(/^[\u{1F000}-\u{1FFFF}\u{2600}-\u{27FF}\s]+/u, '').trim();
+    who.innerHTML =
+      '<span class="progress-src-pill ' + srcEntry.cls + '">' + srcEntry.label + '</span>' +
+      '<span class="progress-user">' + _escHtml(displayName) + '</span>';
+    return;
+  }
+
+  // Informational phase (Auth mode, Delta mode, Resuming, …) — keep pill cleared
+  who.innerHTML = '<span class="progress-phase">' + _escHtml(phase) + '</span>';
+}
+
+function _clearProgressBar() {
+  _setProgressPhase('');
+  document.getElementById('progressStats').textContent = '';
+  document.getElementById('progressEta').textContent   = '';
+  document.getElementById('progressFile').textContent  = '';
+}
+
+function _renderProgressSegments() {
+  const track = document.getElementById('progressTrack');
+  if (!track) return;
+  const sources = [
+    { key: 'm365',   active: S._m365ScanRunning,   color: 'var(--accent)', label: 'M365'  },
+    { key: 'google', active: S._googleScanRunning,  color: '#3a7d44',       label: 'GWS'   },
+    { key: 'file',   active: S._fileScanRunning,    color: '#7a6a9e',       label: 'Files' },
+  ].filter(function(s) { return s.active; });
+  if (!sources.length) { track.innerHTML = ''; return; }
+  track.innerHTML = sources.map(function(s, i) {
+    return '<div class="progress-seg"' + (i < sources.length - 1 ? '' : '') + '>' +
+           '<div class="progress-seg-fill" id="progressFill_' + s.key + '" style="background:' + s.color + ';width:' + (S._srcPct[s.key] || 0) + '%"></div>' +
+           '</div>';
+  }).join('');
+}
+
+function _logAtBottom(p) {
+  return p.scrollHeight - p.scrollTop - p.clientHeight < 24;
+}
+
+function log(msg, cls='') {
+  const p = document.getElementById('logPanel');
+  const live = document.getElementById('logLive');
+  const atBottom = _logAtBottom(p);
+  const d = document.createElement('div');
+  const timestamp = new Date().toLocaleTimeString();
+  d.className = 'log-line' + (cls ? ' log-' + cls : '');
+  d.textContent = timestamp + '  ' + msg;
+  // Insert before live indicator (always last)
+  if (live) p.insertBefore(d, live); else p.appendChild(d);
+  // Apply filter
+  if (_logFilter === 'err' && !cls) d.classList.add('log-err-hidden');
+  if (atBottom) p.scrollTop = p.scrollHeight;
+  // Persist to sessionStorage
+  try {
+    const lines = JSON.parse(sessionStorage.getItem(_LOG_SESSION_KEY) || '[]');
+    lines.push({ t: timestamp, msg, cls });
+    if (lines.length > _LOG_MAX_LINES) lines.splice(0, lines.length - _LOG_MAX_LINES);
+    sessionStorage.setItem(_LOG_SESSION_KEY, JSON.stringify(lines));
+  } catch(e) {}
+}
+
+function setLogLive(msg) {
+  const live = document.getElementById('logLive');
+  if (!live) return;
+  if (msg) {
+    live.style.display = 'block';
+    live.textContent = '▶ ' + msg;
+    const p = document.getElementById('logPanel');
+    if (_logAtBottom(p)) p.scrollTop = p.scrollHeight;
+  } else {
+    live.style.display = 'none';
+    live.textContent = '';
+  }
+}
+
+function setLogFilter(filter) {
+  _logFilter = filter;
+  document.getElementById('logFilterAll').classList.toggle('active', filter === 'all');
+  document.getElementById('logFilterErr').classList.toggle('active', filter === 'err');
+  document.querySelectorAll('#logPanel .log-line:not(#logLive)').forEach(function(d) {
+    const isErr = d.classList.contains('log-err') || d.classList.contains('log-warn');
+    d.classList.toggle('log-err-hidden', filter === 'err' && !isErr);
+  });
+}
+
+function copyLog() {
+  const lines = [];
+  document.querySelectorAll('#logPanel .log-line:not(#logLive)').forEach(function(d) {
+    lines.push(d.textContent);
+  });
+  navigator.clipboard.writeText(lines.join('\n')).then(function() {
+    const btn = document.querySelector('.log-copy-btn');
+    if (btn) { btn.textContent = '✓ Copied'; setTimeout(function(){ btn.textContent = '⎘ Copy'; }, 1500); }
+  }).catch(function() {});
+}
+
+function _restoreLog() {
+  try {
+    const lines = JSON.parse(sessionStorage.getItem(_LOG_SESSION_KEY) || '[]');
+    if (!lines.length) return;
+    const p = document.getElementById('logPanel');
+    const live = document.getElementById('logLive');
+    lines.forEach(function(entry) {
+      const d = document.createElement('div');
+      d.className = 'log-line' + (entry.cls ? ' log-' + entry.cls : '');
+      d.textContent = entry.t + '  ' + entry.msg;
+      if (live) p.insertBefore(d, live); else p.appendChild(d);
+    });
+    p.scrollTop = p.scrollHeight;
+  } catch(e) {}
+}
+
+function _initLogResize() {
+  const handle = document.getElementById('logResizeHandle');
+  const wrap   = document.getElementById('logWrap');
+  const panel  = document.getElementById('logPanel');
+  if (!handle || !wrap || !panel) return;
+  let startY, startH;
+  handle.addEventListener('pointerdown', function(e) {
+    startY = e.clientY;
+    startH = panel.getBoundingClientRect().height;
+    document.body.style.cursor = 'ns-resize';
+    document.body.style.userSelect = 'none';
+    handle.setPointerCapture(e.pointerId);
+    handle.addEventListener('pointermove', onDrag);
+    handle.addEventListener('pointerup',   onUp);
+    handle.addEventListener('pointercancel', onUp);
+    e.preventDefault();
+  });
+  function onDrag(e) {
+    const ROW = 18; // 16px line-height + 2px margin-bottom
+    const PAD = 10; // 6px padding-top + 6px padding-bottom - 2px (no margin on last line)
+    const MIN_ROWS = 2;
+    const MAX_ROWS = 30;
+    const delta = startY - e.clientY; // drag up = taller
+    const rawH = Math.max(60, Math.min(600, startH + delta));
+    const rows = Math.round((rawH - PAD) / ROW);
+    const snapped = Math.max(MIN_ROWS, Math.min(MAX_ROWS, rows)) * ROW + PAD;
+    panel.style.height = snapped + 'px';
+  }
+  function onUp(e) {
+    document.body.style.cursor = '';
+    document.body.style.userSelect = '';
+    handle.releasePointerCapture(e.pointerId);
+    handle.removeEventListener('pointermove', onDrag);
+    handle.removeEventListener('pointerup',   onUp);
+    handle.removeEventListener('pointercancel', onUp);
+  }
+}
+
+function _initPreviewResize() {
+  const handle = document.getElementById('previewResizeHandle');
+  const panel  = document.getElementById('previewPanel');
+  if (!handle || !panel) return;
+  const MIN_W = 280;
+  const MAX_W = Math.round(window.innerWidth * 0.7);
+  let startX, startW;
+  handle.addEventListener('pointerdown', function(e) {
+    if (panel.classList.contains('hidden')) return;
+    startX = e.clientX;
+    startW = panel.getBoundingClientRect().width;
+    document.body.style.cursor = 'col-resize';
+    document.body.style.userSelect = 'none';
+    handle.setPointerCapture(e.pointerId);
+    handle.addEventListener('pointermove', onDrag);
+    handle.addEventListener('pointerup',   onUp);
+    handle.addEventListener('pointercancel', onUp);
+    e.preventDefault();
+  });
+  function onDrag(e) {
+    const delta = startX - e.clientX; // drag left = wider
+    const w = Math.max(MIN_W, Math.min(MAX_W, startW + delta));
+    panel.style.width = w + 'px';
+  }
+  function onUp(e) {
+    document.body.style.cursor = '';
+    document.body.style.userSelect = '';
+    handle.releasePointerCapture(e.pointerId);
+    handle.removeEventListener('pointermove', onDrag);
+    handle.removeEventListener('pointerup',   onUp);
+    handle.removeEventListener('pointercancel', onUp);
+    sessionStorage.setItem('gdpr_preview_width', parseInt(panel.style.width));
+  }
+}
+
+// Called by renderSourcesPanel() after every re-render.
+// Pins the panel to its natural scroll height (all sources visible) unless the
+// user has previously dragged it smaller, in which case that saved height is
+// restored — but only if it's still smaller than the new content height.
+function _fitSourcesPanel() {
+  const panel = document.getElementById('sourcesPanel');
+  if (!panel) return;
+  panel.style.height = '';               // clear to measure natural content height
+  const natural = panel.scrollHeight;
+  try {
+    const saved = parseInt(localStorage.getItem('gdpr_sources_h'));
+    if (saved && saved < natural) {
+      panel.style.height = saved + 'px'; // honour user's smaller preference
+      return;
+    }
+  } catch(e) {}
+  panel.style.height = natural + 'px';  // default: show everything
+}
+
+function _initSourcesResize() {
+  const handle = document.getElementById('sourcesResizeHandle');
+  const panel  = document.getElementById('sourcesPanel');
+  if (!handle || !panel) return;
+
+  let startY, startH, maxH;
+  handle.addEventListener('pointerdown', function(e) {
+    startY = e.clientY;
+    startH = panel.getBoundingClientRect().height;
+    // Max = natural scroll height (enough to show all sources — no more)
+    panel.style.height = '';
+    maxH = panel.scrollHeight;
+    panel.style.height = startH + 'px';
+    document.body.style.cursor = 'ns-resize';
+    document.body.style.userSelect = 'none';
+    handle.setPointerCapture(e.pointerId);
+    handle.addEventListener('pointermove', onDrag);
+    handle.addEventListener('pointerup',   onUp);
+    handle.addEventListener('pointercancel', onUp);
+    e.preventDefault();
+  });
+  function onDrag(e) {
+    const ROW     = 22;   // ~21px per .source-check row (padding:3px 0 + ~15px content)
+    const MIN_H   = ROW * 2;
+    const delta   = e.clientY - startY;  // drag down = taller, drag up = shorter
+    const rawH    = Math.max(MIN_H, Math.min(maxH, startH + delta));
+    const snapped = Math.round(rawH / ROW) * ROW;
+    panel.style.height = Math.max(MIN_H, Math.min(maxH, snapped)) + 'px';
+  }
+  function onUp(e) {
+    document.body.style.cursor = '';
+    document.body.style.userSelect = '';
+    handle.releasePointerCapture(e.pointerId);
+    handle.removeEventListener('pointermove', onDrag);
+    handle.removeEventListener('pointerup',   onUp);
+    handle.removeEventListener('pointercancel', onUp);
+    const h = parseInt(panel.style.height);
+    try {
+      if (h >= maxH) localStorage.removeItem('gdpr_sources_h'); // back to full — forget preference
+      else           localStorage.setItem('gdpr_sources_h', h);
+    } catch(e) {}
+  }
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window._escHtml = _escHtml;
+window._resolveDisplayName = _resolveDisplayName;
+window._setProgressPhase = _setProgressPhase;
+window._clearProgressBar = _clearProgressBar;
+window._renderProgressSegments = _renderProgressSegments;
+window._logAtBottom = _logAtBottom;
+window.log = log;
+window.setLogLive = setLogLive;
+window.setLogFilter = setLogFilter;
+window.copyLog = copyLog;
+window._restoreLog = _restoreLog;
+window._initLogResize = _initLogResize;
+window._initPreviewResize = _initPreviewResize;
+window._initSourcesResize = _initSourcesResize;
+window._fitSourcesPanel   = _fitSourcesPanel;
+window._LOG_SESSION_KEY = _LOG_SESSION_KEY;
+window._LOG_MAX_LINES = _LOG_MAX_LINES;
+window._logFilter = _logFilter;
+window._PHASE_SOURCE_MAP = _PHASE_SOURCE_MAP;
diff --git a/static/js/profiles.js b/static/js/profiles.js
new file mode 100644
index 0000000..1e1d25b
--- /dev/null
+++ b/static/js/profiles.js
@@ -0,0 +1,709 @@
+import { S } from './state.js';
+// ── Profiles (15c) ───────────────────────────────────────────────────────────
+
+
+async function loadProfiles() {
+  try {
+    const r = await fetch('/api/profiles');
+    if (!r.ok) return;
+    const d = await r.json();
+    S._profiles = d.profiles || [];
+    _renderProfileSelect();
+  } catch(e) { /* profiles not critical */ }
+}
+
+function _renderProfileSelect() {
+  const sel = document.getElementById('profileSelect');
+  if (!sel) return;
+  const prev = sel.value;
+  // Clear all except the placeholder option (first)
+  while (sel.options.length > 1) sel.remove(1);
+  for (const p of S._profiles) {
+    const opt = document.createElement('option');
+    opt.value = p.id;
+    const last = p.last_run ? ' — ' + p.last_run.slice(0, 10) : '';
+    opt.textContent = p.name + last;
+    opt.title = p.description || '';
+    sel.appendChild(opt);
+  }
+  // Restore selection if the profile still exists; else fall back to placeholder
+  if (prev && [...sel.options].some(o => o.value === prev)) {
+    sel.value = prev;
+  } else {
+    sel.value = '';
+    S._activeProfileId = null;
+    const clrBtn = document.getElementById('profileClearBtn');
+    if (clrBtn) clrBtn.style.display = 'none';
+  }
+}
+
+function _setProfileClearBtn(visible) {
+  const btn = document.getElementById('profileClearBtn');
+  if (btn) btn.style.display = visible ? 'inline-block' : 'none';
+}
+
+function onProfileChange() {
+  const sel = document.getElementById('profileSelect');
+  const id  = sel.value;
+  if (!id) return;  // placeholder can't be selected (disabled), guard anyway
+  const profile = S._profiles.find(p => p.id === id);
+  if (!profile) return;
+  S._activeProfileId = id;
+  _setProfileClearBtn(true);
+  _applyProfile(profile);
+}
+
+// Clear the active profile label without touching sidebar settings.
+// The sidebar already reflects the loaded (or manually adjusted) state.
+function clearActiveProfile() {
+  S._activeProfileId = null;
+  const sel = document.getElementById('profileSelect');
+  if (sel) sel.value = '';
+  _setProfileClearBtn(false);
+}
+
+
+function _applyProfile(profile) {
+  // ── Sources ──────────────────────────────────────────────────────────────
+  // Restore source selections from profile — works for both M365 and file sources.
+  // File sources may not be rendered yet (they load async), so store their IDs
+  // in S._pendingProfileSources for renderSourcesPanel() to apply after re-render.
+  const profileSources = profile.sources || [];
+  document.querySelectorAll('#sourcesPanel input[data-source-id]').forEach(function(cb) {
+    cb.checked = profileSources.includes(cb.dataset.sourceId);
+  });
+  _updateAccountsVisibility();
+  // Deferred file sources — store IDs now, apply when _loadFileSources() resolves.
+  // Don't filter against S._fileSources here — it may be empty at this point.
+  const _knownSourceIds = new Set(['email', 'onedrive', 'sharepoint', 'teams', 'gmail', 'gdrive']);
+  S._pendingProfileSources = (profile.file_sources && profile.file_sources.length)
+    ? profile.file_sources.slice()
+    : profileSources.filter(function(id) { return !_knownSourceIds.has(id); });
+  // Deferred Google sources — store IDs now, apply when smGoogleRefreshStatus() resolves.
+  const googleIds = profile.google_sources
+    || profileSources.filter(function(id) { return id === 'gmail' || id === 'gdrive'; });
+  S._pendingGoogleSources = googleIds.slice();
+
+  // ── Options ───────────────────────────────────────────────────────────────
+  const opts = profile.options || {};
+
+  if (opts.email_body !== undefined) {
+    const el = document.getElementById('optEmailBody');
+    if (el) el.checked = opts.email_body;
+  }
+
+  if (opts.attachments !== undefined) {
+    const el = document.getElementById('optAttachments');
+    if (el) {
+      el.checked = opts.attachments;
+      // Update the size row opacity directly
+      const sizeRow = document.getElementById('attachSizeRow');
+      if (sizeRow) sizeRow.style.opacity = opts.attachments ? '1' : '0.4';
+    }
+  }
+
+  if (opts.max_attach_mb !== undefined) {
+    const el = document.getElementById('optMaxAttachMB');
+    if (el) el.value = opts.max_attach_mb;
+  }
+
+  if (opts.max_emails !== undefined) {
+    const el = document.getElementById('optMaxEmails');
+    if (el) el.value = opts.max_emails;
+  }
+
+  if (opts.delta !== undefined) {
+    const el = document.getElementById('optDelta');
+    if (el) el.checked = opts.delta;
+  }
+
+  if (opts.scan_photos !== undefined) {
+    const el = document.getElementById('optScanPhotos');
+    if (el) el.checked = opts.scan_photos;
+  }
+
+  // ── Date filter ───────────────────────────────────────────────────────────
+  const days = opts.older_than_days;
+  if (days !== undefined) {
+    const hidden  = document.getElementById('olderThan');
+    const dateIn  = document.getElementById('olderThanDate');
+    const presets = document.querySelectorAll('.date-preset');
+    if (hidden) hidden.value = days;
+    if (dateIn) {
+      if (!days) {
+        dateIn.value = '';
+      } else {
+        const d = new Date();
+        d.setDate(d.getDate() - days);
+        dateIn.value = d.toISOString().slice(0, 10);
+      }
+    }
+    // Highlight matching preset button
+    presets.forEach(p => {
+      const y = parseInt(p.dataset.years || '0');
+      const presetDays = y === 0 ? 0 : y * 365;
+      if (y === 0) {
+        p.classList.toggle('selected', !days);
+      } else {
+        p.classList.toggle('selected', days > 0 && presetDays === days);
+      }
+    });
+  }
+
+  // ── Retention ─────────────────────────────────────────────────────────────
+  const retEnabled = !!(opts.retention_enabled || profile.retention_years);
+  const retEl = document.getElementById('optRetention');
+  if (retEl) {
+    retEl.checked = retEnabled;
+    // Show/hide panel directly
+    const panel = document.getElementById('retentionPanel');
+    if (panel) panel.style.display = retEnabled ? 'block' : 'none';
+  }
+  if (profile.retention_years) {
+    const el = document.getElementById('optRetentionYears');
+    if (el) el.value = profile.retention_years;
+  }
+  if (profile.fiscal_year_end) {
+    const el = document.getElementById('optFiscalYearEnd');
+    if (el) el.value = profile.fiscal_year_end;
+  }
+  updateRetentionCutoffHint && updateRetentionCutoffHint();
+
+  // ── User selection ────────────────────────────────────────────────────────
+  if (profile.user_ids === 'all') {
+    S._allUsers.forEach(u => { u.selected = true; });
+    if (S._allUsers.length) renderAccountList();
+  } else if (Array.isArray(profile.user_ids) && profile.user_ids.length) {
+    window._pendingProfileUserIds = profile.user_ids.map(u => u.id || u);
+    _applyPendingProfileUsers();
+  } else if (Array.isArray(profile.user_ids) && profile.user_ids.length === 0) {
+    // Explicitly empty list — deselect everyone so previous sidebar state doesn't persist
+    S._allUsers.forEach(u => { u.selected = false; });
+    if (S._allUsers.length) renderAccountList();
+  }
+
+  log(t('m365_profile_applied', 'Profile loaded') + ': ' + profile.name);
+}
+
+function _applyPendingProfileUsers() {
+  const ids = window._pendingProfileUserIds;
+  if (!ids || !ids.length || !S._allUsers.length) return;
+  // Select only the users listed in the profile
+  S._allUsers.forEach(u => { u.selected = ids.includes(u.id); });
+  renderAccountList();
+  window._pendingProfileUserIds = null;
+}
+
+async function saveCurrentAsProfile() {
+  const name = prompt(t('m365_profile_save_prompt', 'Profile name:'),
+                      S._activeProfileId
+                        ? (S._profiles.find(p => p.id === S._activeProfileId) || {}).name || ''
+                        : '');
+  if (!name) return;
+  const { sources, fileSources, googleSources, allSources, user_ids, options } = buildScanPayload();
+  const existing = S._profiles.find(p => p.name.toLowerCase() === name.toLowerCase());
+  const profile = {
+    id:               existing?.id || '',
+    name,
+    description:      existing?.description || '',
+    sources:          allSources,
+    google_sources:   googleSources,
+    user_ids,
+    options,
+    retention_years:  parseInt(document.getElementById('optRetentionYears')?.value) || null,
+    fiscal_year_end:  document.getElementById('optFiscalYearEnd')?.value || '',
+    email_to:         '',
+    file_sources:     fileSources,
+  };
+  try {
+    const r = await fetch('/api/profiles/save', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify(profile)
+    });
+    const d = await r.json();
+    if (d.error) { alert(d.error); return; }
+    await loadProfiles();
+    // Select the newly saved profile
+    const sel = document.getElementById('profileSelect');
+    if (sel) { sel.value = d.profile.id; S._activeProfileId = d.profile.id; _setProfileClearBtn(true); }
+    log(t('m365_profile_saved', 'Profile saved') + ': ' + name);
+  } catch(e) {
+    alert('Save failed: ' + e.message);
+  }
+}
+
+// ── Profile management modal (#15d) ──────────────────────────────────────────
+
+function openProfileMgmtModal() {
+  try { _renderProfileMgmt(); } catch(e) { console.error('[profiles] _renderProfileMgmt threw:', e); }
+  document.getElementById('pmgmtBackdrop').classList.add('open');
+  // Auto-open editor for the first profile
+  if (S._profiles.length > 0) {
+    try { _pmgmtOpenEditor(S._profiles[0].id); } catch(e) { console.error('[profiles] _pmgmtOpenEditor threw:', e); }
+  }
+}
+
+function closeProfileMgmt() {
+  document.getElementById('pmgmtBackdrop').classList.remove('open');
+}
+
+function _sourceLabel(id) {
+  const known = {email:'Outlook', onedrive:'OneDrive', sharepoint:'SharePoint', teams:'Teams', gmail:'Gmail', gdrive:'Google Drive'};
+  if (known[id]) return known[id];
+  const fs = S._fileSources.find(s => s.id === id);
+  return fs ? (fs.label || fs.path || id) : id;
+}
+
+function _renderProfileMgmt() {
+  const list = document.getElementById('pmgmtList');
+  if (!list) return;
+  const saved = S._profiles.filter(p => p.name !== 'Default' || S._profiles.length === 1);
+  if (!saved.length) {
+    list.innerHTML = `<div class="pmgmt-empty">${t('m365_profile_no_profiles','No saved profiles yet. Use 💾 to save the current sidebar settings as a profile.')}</div>`;
+    return;
+  }
+  list.innerHTML = '';
+  for (const p of S._profiles) {
+    const sources   = (p.sources || []).map(_sourceLabel).join(', ') || '—';
+    const lastRun   = p.last_run ? p.last_run.slice(0,16).replace('T',' ') : t('m365_profile_never','never');
+    const isActive  = p.id === S._activeProfileId;
+    const row = document.createElement('div');
+    row.className = 'pmgmt-row';
+    row.dataset.id = p.id;
+    row.onclick = function() { _pmgmtOpenEditor(p.id); };
+    row.innerHTML = `
+      <div class="pmgmt-row-head">
+        <span class="pmgmt-name">${_esc(p.name)}${isActive ? ' <span style="color:var(--accent);font-weight:400;font-size:10px">● activ</span>' : ''}</span>
+        <div class="pmgmt-actions">
+          <div style="display:flex;border:1px solid var(--border);border-radius:5px;overflow:hidden">
+            <button class="btn-use" onclick="event.stopPropagation();_pmgmtUse('${p.id}')" style="border-radius:0;border:none;border-right:1px solid var(--border)" data-i18n="m365_profile_use">Brug</button>
+            <button onclick="event.stopPropagation();_pmgmtDuplicate('${p.id}')" style="border-radius:0;border:none" data-i18n="m365_profile_duplicate">Kopier</button>
+          </div>
+          <button class="btn-del" onclick="event.stopPropagation();_pmgmtDelete('${p.id}','${_esc(p.name)}')" data-i18n="m365_profile_delete">Slet</button>
+        </div>
+      </div>
+      <div class="pmgmt-sources">${_esc(sources)}</div>
+      ${p.description ? `<div class="pmgmt-desc">${_esc(p.description)}</div>` : ''}
+      <div class="pmgmt-meta">${t('m365_profile_last_run','Last run')}: ${lastRun}</div>
+    `;
+    list.appendChild(row);
+  }
+}
+
+function _esc(s) {
+  return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
+}
+
+function _pmgmtUse(id) {
+  const profile = S._profiles.find(p => p.id === id);
+  if (!profile) return;
+  S._activeProfileId = id;
+  _setProfileClearBtn(true);
+  _applyProfile(profile);
+  // Sync the topbar dropdown
+  const sel = document.getElementById('profileSelect');
+  if (sel) sel.value = id;
+  closeProfileMgmt();
+}
+
+function _pmgmtOpenEditor(id) {
+  const profile = S._profiles.find(p => p.id === id);
+  if (!profile) return;
+  _openEditorForProfile(profile);
+}
+
+function _openEditorForProfile(profile) {
+  const id = profile.id || '';
+  window._pmgmtEditId = id;
+  _pmgmtRoleActive = '';
+  // Highlight active row
+  document.querySelectorAll('.pmgmt-row').forEach(r => r.classList.toggle('active', id && r.dataset.id === id));
+  document.getElementById('pmgmtEditorTitle').textContent = profile.name;
+  const body = document.getElementById('pmgmtEditorBody');
+  const allSources = profile.sources || [];
+  const opts = profile.options || {};
+  const srcCheck = (id) => allSources.includes(id) ? 'checked' : '';
+
+  // Build account list from S._allUsers
+  const savedIds = new Set((profile.user_ids || []).map(u => u.id || u));
+  // If no saved IDs match current users, treat as all-selected (new profile or users changed)
+  const anyMatch = savedIds.size > 0 && S._allUsers.some(u => savedIds.has(u.id));
+  const accountRows = S._allUsers.map(u => {
+    // Only check if the user was explicitly saved — default to unchecked like the main window
+    const checked = anyMatch && savedIds.has(u.id) ? 'checked' : '';
+    const platBadge = u.platform === 'both' ? '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:linear-gradient(90deg,#E6F1FB 50%,#EAF3DE 50%);color:#1a4a1a;font-weight:500;border:0.5px solid #b5d4b5">M365+GWS</span>'
+      : (u.platform || 'm365') === 'google' ? '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#EAF3DE;color:#3B6D11;font-weight:500">GWS</span>'
+      : '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#E6F1FB;color:#185FA5;font-weight:500">M365</span>';
+    const roleBadge = u.userRole === 'student' ? t('role_student','Elev') : u.userRole === 'staff' ? t('role_staff','Ansat') : t('role_other','Anden');
+    return `<label class="pmgmt-acct-row" data-uid="${_esc(u.id)}"><input type="checkbox" ${checked} data-uid="${_esc(u.id)}"><span style="flex:1;color:var(--color-text-primary);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${_esc(u.displayName)}</span>${platBadge}<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#D3D1C7;color:#444441">${roleBadge}</span></label>`;
+  }).join('');
+
+  body.innerHTML = `
+    <div>
+      <div class="pmgmt-editor-section-title">Navn</div>
+      <input id="pmgmtEditName" type="text" value="${_esc(profile.name)}" style="width:100%;margin-bottom:6px">
+      <textarea id="pmgmtEditDesc" style="width:100%;font-size:12px;height:44px;resize:none" placeholder="Beskrivelse (valgfri)">${_esc(profile.description || '')}</textarea>
+    </div>
+    <div style="display:flex;gap:0;flex:1;min-height:0">
+      <div style="flex:1;display:flex;flex-direction:column;gap:14px;overflow-y:auto;padding-right:16px">
+        <div>
+          <div class="pmgmt-editor-section-title">Kilder</div>
+          <div id="peSourcesPanel"></div>
+        </div>
+        <div>
+          <div class="pmgmt-editor-section-title">
+            <span>Konti</span>
+            <div style="display:flex;gap:4px;align-items:center">
+              <div style="display:flex;background:var(--bg);border:1px solid var(--border);border-radius:6px;overflow:hidden">
+                <button type="button" id="peRoleAll" onclick="_pmgmtRoleFilter('')" style="font-size:10px;height:22px;padding:0 7px;border:none;border-right:1px solid var(--border);background:var(--accent);color:#fff;cursor:pointer;box-sizing:border-box">${t('m365_filter_all','Alle')}</button>
+                <button type="button" id="peRoleStaff" onclick="_pmgmtRoleFilter('staff')" style="font-size:10px;height:22px;padding:0 7px;border:none;border-right:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('role_staff','Ansat')}</button>
+                <button type="button" id="peRoleStudent" onclick="_pmgmtRoleFilter('student')" style="font-size:10px;height:22px;padding:0 7px;border:none;background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('role_student','Elev')}</button>
+              </div>
+              <div style="display:flex;background:var(--bg);border:1px solid var(--border);border-radius:6px;overflow:hidden">
+                <button type="button" onclick="_pmgmtSelectAllAccounts(true)" style="font-size:10px;height:22px;padding:0 7px;border:none;border-right:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('btn_all','Alle')}</button>
+                <button type="button" onclick="_pmgmtSelectAllAccounts(false)" style="font-size:10px;height:22px;padding:0 7px;border:none;background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('btn_none','Ingen')}</button>
+              </div>
+            </div>
+          </div>
+          <div style="display:flex;gap:6px;margin-bottom:4px">
+            <input type="text" id="pmgmtAcctSearch" placeholder="Søg konti…" style="flex:1;font-size:12px" oninput="_pmgmtFilterAccounts(this.value)">
+            <button type="button" onclick="_pmgmtAddManual()" style="font-size:11px;padding:3px 10px;border-radius:5px;border:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;white-space:nowrap">+ Tilføj konto</button>
+          </div>
+          <div class="pmgmt-account-list" id="pmgmtAcctList">${accountRows}</div>
+        </div>
+      </div>
+      <div class="pmgmt-settings-col" style="overflow-y:auto">
+        <div class="pmgmt-editor-section-title">Indstillinger</div>
+        <div style="display:flex;flex-direction:column;gap:6px;font-size:12px">
+          <label style="font-size:11px;color:var(--muted)">${t('m365_opt_date_from','Scan e-mails/filer fra')}</label>
+          <div class="datepicker-wrap">
+            <input type="date" id="peOptDate" autocomplete="off" value="${(function(){ if(!opts.older_than_days) return ''; var d=new Date(); d.setDate(d.getDate()-opts.older_than_days); return d.toISOString().slice(0,10); }())}" onchange="_peSetDate(this.value)">
+          <div class="date-presets">
+            <button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===365   ? 'selected' : ''}" data-years="1"  onclick="_peSetYear(1)">${t('m365_preset_1yr','1 år')}</button>
+            <button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===730   ? 'selected' : ''}" data-years="2"  onclick="_peSetYear(2)">${t('m365_preset_2yr','2 år')}</button>
+            <button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===1825  ? 'selected' : ''}" data-years="5"  onclick="_peSetYear(5)">${t('m365_preset_5yr','5 år')}</button>
+            <button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===3650  ? 'selected' : ''}" data-years="10" onclick="_peSetYear(10)">${t('m365_preset_10yr','10 år')}</button>
+            <button type="button" class="date-preset peYearBtn ${!(opts.older_than_days)           ? 'selected' : ''}" data-years="0"  onclick="_peSetYear(0)">${t('m365_preset_any','Alle')}</button>
+          </div>
+          </div>
+          <input type="hidden" id="peOptDays" value="${opts.older_than_days || 0}">
+          <hr style="border:none;border-top:1px solid var(--pmgmt-divider);margin:2px 0">
+          <div class="pmgmt-opt-row"><span>${t('m365_opt_email_body','Scan e-mailindhold')}</span><label class="toggle"><input type="checkbox" id="peOptBody" ${opts.email_body !== false ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
+          <div class="pmgmt-opt-row"><span>${t('m365_opt_attachments','Scan vedhæftede filer')}</span><label class="toggle"><input type="checkbox" id="peOptAtt" ${opts.attachments !== false ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
+          <div class="pmgmt-opt-row"><span style="color:var(--muted)">${t('m365_opt_max_attach','Maks. vedhæftet filstørrelse (MB)')}</span><input type="number" id="peOptMaxAttach" value="${opts.max_attach_mb || 20}" min="1" max="100" style="width:46px;padding:3px 6px;font-size:11px;text-align:right"></div>
+          <div class="pmgmt-opt-row"><span>${t('m365_opt_max_emails','Maks. e-mails pr. bruger')}</span><input type="number" id="peOptMaxEmails" value="${opts.max_emails || 2000}" min="10" max="50000" style="width:56px;padding:3px 6px;font-size:11px;text-align:right"></div>
+          <div class="pmgmt-opt-row"><span>${t('m365_opt_delta','Delta-scanning')}</span><label class="toggle"><input type="checkbox" id="peOptDelta" ${opts.delta ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
+          <div class="pmgmt-opt-row"><span>${t('m365_opt_scan_photos','Søg efter ansigter i billeder')}</span><label class="toggle"><input type="checkbox" id="peOptPhotos" ${opts.scan_photos ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
+          <hr style="border:none;border-top:1px solid var(--pmgmt-divider);margin:2px 0">
+          <div class="pmgmt-opt-row"><span>${t('m365_opt_retention','Opbevaringspolitik')}</span><label class="toggle"><input type="checkbox" id="peOptRetention" ${profile.retention_years ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
+          <div style="padding:7px 8px;background:var(--bg);border-radius:6px">
+            <div class="pmgmt-opt-row" style="margin-bottom:5px"><span style="color:var(--muted)">${t('m365_ret_years','Opbevaringsår')}</span><input type="number" id="peOptRetYears" value="${profile.retention_years || 5}" min="1" max="30" style="width:46px;padding:3px 6px;font-size:11px;text-align:right"></div>
+            <div style="display:flex;flex-direction:column;gap:3px">
+              <label style="font-size:11px;color:var(--muted)">${t('m365_ret_fy_end','Regnskabsår slut')}</label>
+              <select id="peOptFiscalYearEnd" style="font-size:11px;padding:3px 6px;width:100%">
+                <option value="" ${!profile.fiscal_year_end ? 'selected' : ''}>${t('m365_ret_fy_rolling','Rullende (i dag)')}</option>
+                <option value="12-31" ${profile.fiscal_year_end==='12-31' ? 'selected' : ''}>${t('m365_ret_fy_dec','31 dec (Bogføringsloven)')}</option>
+                <option value="06-30" ${profile.fiscal_year_end==='06-30' ? 'selected' : ''}>${t('m365_ret_fy_jun','30 jun')}</option>
+                <option value="03-31" ${profile.fiscal_year_end==='03-31' ? 'selected' : ''}>${t('m365_ret_fy_mar','31 mar')}</option>
+              </select>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  `;;
+  document.getElementById('pmgmtEditorPlaceholder')?.remove();
+  document.getElementById('pmgmtEditor').classList.add('open');
+  _renderEditorSources((profile.sources || []).concat(profile.google_sources || []).concat(profile.file_sources || []));
+}
+
+function _peSetDate(val) {
+  if (!val) return;
+  const ms   = new Date() - new Date(val);
+  const days = Math.round(ms / 86400000);
+  const hidden = document.getElementById('peOptDays');
+  if (hidden) hidden.value = days;
+  // Clear selected year buttons since user picked a custom date
+  document.querySelectorAll('.peYearBtn').forEach(b => b.classList.remove('selected'));
+}
+
+function _peSetYear(years) {
+  const days = years === 0 ? 0 : years * 365;
+  const hidden = document.getElementById('peOptDays');
+  if (hidden) hidden.value = days;
+  document.querySelectorAll('.peYearBtn').forEach(function(btn) {
+    const y = parseInt(btn.dataset.years);
+    const active = (years === 0 && y === 0) || (years > 0 && y === years);
+    btn.classList.toggle('selected', active);
+  });
+  // Sync the date input
+  var dateInput = document.getElementById('peOptDate');
+  if (dateInput) {
+    if (days === 0) { dateInput.value = ''; }
+    else { var d = new Date(); d.setDate(d.getDate()-days); dateInput.value = d.toISOString().slice(0,10); }
+  }
+}
+
+function _renderEditorSources(checkedIds) {
+  const panel = document.getElementById('peSourcesPanel');
+  if (!panel) return;
+  let html = '';
+  _M365_SOURCES.forEach(function(s) {
+    const toggle = s.toggleId ? document.getElementById(s.toggleId) : null;
+    if (toggle && !toggle.checked) return;
+    const isChecked = checkedIds.includes(s.id);
+    html += '<label class="source-check">'
+      + '<input type="checkbox" data-source-id="' + s.id + '" data-source-type="m365"' + (isChecked ? ' checked' : '') + '>'
+      + '<span class="source-icon">' + s.icon + '</span>'
+      + '<span class="source-label">' + t(s.labelKey, s.labelDefault) + '</span>'
+      + '</label>';
+  });
+  if (window._googleConnected) {
+    var gmailOn = !document.getElementById('smGoogleSrcGmail') || document.getElementById('smGoogleSrcGmail').checked;
+    var driveOn = !document.getElementById('smGoogleSrcDrive') || document.getElementById('smGoogleSrcDrive').checked;
+    if (gmailOn || driveOn) html += '<hr style="border:none;border-top:1px solid var(--border);margin:4px 0">';
+    if (gmailOn) {
+      html += '<label class="source-check"><input type="checkbox" data-source-id="gmail" data-source-type="google"' + (checkedIds.includes('gmail') ? ' checked' : '') + '><span class="source-icon">📧</span><span class="source-label">Gmail</span></label>';
+    }
+    if (driveOn) {
+      html += '<label class="source-check"><input type="checkbox" data-source-id="gdrive" data-source-type="google"' + (checkedIds.includes('gdrive') ? ' checked' : '') + '><span class="source-icon">📁</span><span class="source-label">Google Drive</span></label>';
+    }
+  }
+  if (S._fileSources.length > 0) {
+    html += '<hr style="border:none;border-top:1px solid var(--border);margin:4px 0">';
+    S._fileSources.forEach(function(s) {
+      const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
+      html += '<label class="source-check"><input type="checkbox" data-source-id="' + _esc(s.id) + '" data-source-type="file"' + (checkedIds.includes(s.id) ? ' checked' : '') + '><span class="source-icon">' + (isSmb ? '🌐' : '📁') + '</span><span class="source-label" title="' + _esc(s.path||'') + '">' + _esc(s.label||s.path||s.id) + '</span></label>';
+    });
+  }
+  panel.innerHTML = html;
+}
+
+function _pmgmtNewProfile() {
+  // Create a blank profile shell and open the editor
+  const blank = {
+    id:          '',
+    name:        '',
+    description: '',
+    sources:     [],
+    google_sources: [],
+    user_ids:    [],
+    options:     {},
+    file_sources: [],
+  };
+  // Temporarily add to S._profiles so the editor can find it
+  window._pmgmtNewDraft = blank;
+  _openEditorForProfile(blank);
+}
+
+function _pmgmtCloseEditor() {
+  document.getElementById('pmgmtEditor').classList.remove('open');
+  document.querySelectorAll('.pmgmt-row').forEach(r => r.classList.remove('active'));
+  window._pmgmtEditId = null;
+  closeProfileMgmt();
+}
+
+function _pmgmtSelectAllAccounts(checked) {
+  document.querySelectorAll('#pmgmtAcctList label input[type=checkbox]').forEach(function(cb) {
+    if (cb.closest('label').style.display !== 'none') cb.checked = checked;
+  });
+}
+
+let _pmgmtRoleActive = '';
+function _pmgmtRoleFilter(role) {
+  _pmgmtRoleActive = role;
+  // Update button styles
+  ['peRoleAll','peRoleStaff','peRoleStudent'].forEach(function(id) {
+    const btn = document.getElementById(id);
+    if (!btn) return;
+    const isActive = (id === 'peRoleAll' && role === '') || (id === 'peRoleStaff' && role === 'staff') || (id === 'peRoleStudent' && role === 'student');
+    btn.style.background = isActive ? 'var(--accent)' : 'none';
+    btn.style.color      = isActive ? '#fff' : 'var(--muted)';
+    btn.style.border     = isActive ? '1px solid var(--accent)' : '1px solid var(--border)';
+  });
+  // Apply filter combined with any active text search
+  _pmgmtFilterAccounts(document.getElementById('pmgmtAcctSearch')?.value || '');
+}
+
+function _pmgmtAddManual() {
+  const email = prompt('E-mail adresse:');
+  if (!email || !email.trim()) return;
+  const list = document.getElementById('pmgmtAcctList');
+  if (!list) return;
+  const id = 'manual:' + email.trim().toLowerCase();
+  if (list.querySelector(`input[data-uid="${id}"]`)) return;  // already exists
+  const lbl = document.createElement('label');
+  lbl.className = 'pmgmt-acct-row';
+  lbl.innerHTML = `<input type="checkbox" checked data-uid="${_esc(id)}"><span style="flex:1;color:var(--text);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${_esc(email.trim())}</span><span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#D3D1C7;color:#444441">Manuel</span>`;
+  list.appendChild(lbl);
+}
+
+function _pmgmtFilterAccounts(q) {
+  q = (q || '').toLowerCase();
+  document.querySelectorAll('#pmgmtAcctList label').forEach(function(lbl) {
+    var name = (lbl.querySelector('span') || {}).textContent || '';
+    var uid  = lbl.querySelector('input')?.dataset?.uid || '';
+    var user = S._allUsers.find(u => u.id === uid);
+    var roleOk = !_pmgmtRoleActive || (user && user.userRole === _pmgmtRoleActive);
+    var nameOk = !q || name.toLowerCase().includes(q);
+    lbl.style.display = (roleOk && nameOk) ? '' : 'none';
+  });
+}
+
+async function _pmgmtSaveFullEdit() {
+  const id = window._pmgmtEditId;
+  const profile = (id ? S._profiles.find(p => p.id === id) : null) || window._pmgmtNewDraft || {};
+  const name = document.getElementById('pmgmtEditName')?.value?.trim();
+  if (!name) { alert(t('m365_profile_name_required','Profile name is required.')); return; }
+  const peSources     = Array.from(document.querySelectorAll('#peSourcesPanel input[type=checkbox]:checked'));
+  const m365Sources   = peSources.filter(cb => cb.dataset.sourceType === 'm365').map(cb => cb.dataset.sourceId);
+  const googleSources = peSources.filter(cb => cb.dataset.sourceType === 'google').map(cb => cb.dataset.sourceId);
+  const fileSources   = peSources.filter(cb => cb.dataset.sourceType === 'file').map(cb => cb.dataset.sourceId);
+  // Check whether the checkboxes were actually rendered in the editor DOM —
+  // NOT whether Google is connected or file sources are loaded. Those are async
+  // and may not have resolved when the editor first opened, leaving the panel
+  // without checkboxes even though the connection exists. Using the DOM as the
+  // source of truth avoids a race-condition that silently cleared google/file sources.
+  const googleRendered = !!document.querySelector('#peSourcesPanel input[data-source-type="google"]');
+  const fileRendered   = !!document.querySelector('#peSourcesPanel input[data-source-type="file"]');
+  const effectiveGoogleSources = googleRendered ? googleSources : (profile.google_sources || []);
+  const effectiveFileSources   = fileRendered   ? fileSources   : (profile.file_sources   || []);
+  const allSources    = m365Sources.concat(effectiveGoogleSources).concat(effectiveFileSources);
+  const user_ids = Array.from(document.querySelectorAll('#pmgmtAcctList input[type=checkbox]:checked'))
+    .map(cb => cb.dataset.uid)
+    .filter(Boolean);
+  const updated = {
+    ...profile,
+    name,
+    description: document.getElementById('pmgmtEditDesc')?.value?.trim() || '',
+    sources:        allSources,
+    google_sources: effectiveGoogleSources,
+    file_sources:   effectiveFileSources,
+    user_ids,
+    options: {
+      ...(profile.options || {}),
+      older_than_days: parseInt(document.getElementById('peOptDays')?.value) || 0,
+      email_body:      document.getElementById('peOptBody')?.checked ?? true,
+      attachments:     document.getElementById('peOptAtt')?.checked ?? true,
+      max_attach_mb:   parseInt(document.getElementById('peOptMaxAttach')?.value) || 20,
+      max_emails:      parseInt(document.getElementById('peOptMaxEmails')?.value) || 2000,
+      delta:           document.getElementById('peOptDelta')?.checked ?? false,
+      scan_photos:     document.getElementById('peOptPhotos')?.checked ?? false,
+    },
+    retention_years:  document.getElementById('peOptRetention')?.checked ? (parseInt(document.getElementById('peOptRetYears')?.value) || 5) : null,
+    fiscal_year_end:  document.getElementById('peOptRetention')?.checked ? (document.getElementById('peOptFiscalYearEnd')?.value || '') : '',
+  };
+  try {
+    const r = await fetch('/api/profiles/save', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify(updated)
+    });
+    const d = await r.json();
+    if (d.error) { alert(d.error); return; }
+    await loadProfiles();
+    window._pmgmtNewDraft = null;
+    log(t('m365_profile_saved','Profile saved') + ': ' + name);
+    // Show inline saved feedback without closing the modal
+    const footer = document.querySelector('#pmgmtEditor > div:last-child');
+    if (footer) {
+      const fb = document.createElement('span');
+      fb.textContent = '✓ ' + t('m365_profile_saved', 'Saved');
+      fb.style.cssText = 'font-size:11px;color:var(--success);margin-right:auto';
+      footer.prepend(fb);
+      setTimeout(function() { fb.remove(); }, 2000);
+    }
+    // Re-open the editor for the saved profile so it reflects the saved state
+    const saved = S._profiles.find(function(p) { return p.name === name; });
+    if (saved) { window._pmgmtEditId = saved.id; }
+  } catch(e) { alert('Save failed: ' + e.message); }
+}
+
+
+async function _pmgmtSaveEdit(id) {
+  const name = document.getElementById(`pmgmt-edit-name-${id}`)?.value?.trim();
+  const desc = document.getElementById(`pmgmt-edit-desc-${id}`)?.value?.trim();
+  if (!name) { alert(t('m365_profile_name_required','Profile name is required.')); return; }
+  const profile = S._profiles.find(p => p.id === id);
+  if (!profile) return;
+  const updated = { ...profile, name, description: desc || '' };
+  try {
+    const r = await fetch('/api/profiles/save', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify(updated)
+    });
+    const d = await r.json();
+    if (d.error) { alert(d.error); return; }
+    await loadProfiles();
+    _renderProfileMgmt();
+    log(t('m365_profile_saved','Profile saved') + ': ' + name);
+  } catch(e) { alert('Save failed: ' + e.message); }
+}
+
+async function _pmgmtDuplicate(id) {
+  const profile = S._profiles.find(p => p.id === id);
+  if (!profile) return;
+  const base = profile.name.replace(/ \(copy( \d+)?\)$/, '');
+  // Find a unique name
+  let n = 1, name = base + ' (copy)';
+  while (S._profiles.some(p => p.name === name)) { n++; name = `${base} (copy ${n})`; }
+  const copy = { ...profile, id: '', name, last_run: null, last_scan_id: null };
+  try {
+    const r = await fetch('/api/profiles/save', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify(copy)
+    });
+    const d = await r.json();
+    if (d.error) { alert(d.error); return; }
+    await loadProfiles();
+    _renderProfileMgmt();
+    log(t('m365_profile_duplicated','Profile duplicated') + ': ' + name);
+  } catch(e) { alert('Duplicate failed: ' + e.message); }
+}
+
+async function _pmgmtDelete(id, name) {
+  if (!confirm(t('m365_profile_delete_confirm','Delete profile') + ' "' + name + '"?')) return;
+  try {
+    const r = await fetch('/api/profiles/delete', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify({ id })
+    });
+    const d = await r.json();
+    if (d.error) { alert(d.error); return; }
+    if (S._activeProfileId === id) { S._activeProfileId = null; _setProfileClearBtn(false); }
+    await loadProfiles();
+    _renderProfileMgmt();
+    log(t('m365_profile_deleted','Profile deleted') + ': ' + name);
+  } catch(e) { alert('Delete failed: ' + e.message); }
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.loadProfiles = loadProfiles;
+window._renderProfileSelect = _renderProfileSelect;
+window._setProfileClearBtn = _setProfileClearBtn;
+window.onProfileChange = onProfileChange;
+window.clearActiveProfile = clearActiveProfile;
+window._applyProfile = _applyProfile;
+window._applyPendingProfileUsers = _applyPendingProfileUsers;
+window.saveCurrentAsProfile = saveCurrentAsProfile;
+window.openProfileMgmtModal = openProfileMgmtModal;
+window.closeProfileMgmt = closeProfileMgmt;
+window._sourceLabel = _sourceLabel;
+window._renderProfileMgmt = _renderProfileMgmt;
+window._esc = _esc;
+window._pmgmtUse = _pmgmtUse;
+window._pmgmtOpenEditor = _pmgmtOpenEditor;
+window._openEditorForProfile = _openEditorForProfile;
+window._peSetDate = _peSetDate;
+window._peSetYear = _peSetYear;
+window._renderEditorSources = _renderEditorSources;
+window._pmgmtNewProfile = _pmgmtNewProfile;
+window._pmgmtCloseEditor = _pmgmtCloseEditor;
+window._pmgmtSelectAllAccounts = _pmgmtSelectAllAccounts;
+window._pmgmtRoleFilter = _pmgmtRoleFilter;
+window._pmgmtAddManual = _pmgmtAddManual;
+window._pmgmtFilterAccounts = _pmgmtFilterAccounts;
+window._pmgmtSaveFullEdit = _pmgmtSaveFullEdit;
+window._pmgmtSaveEdit = _pmgmtSaveEdit;
+window._pmgmtDuplicate = _pmgmtDuplicate;
+window._pmgmtDelete = _pmgmtDelete;
+window._pmgmtRoleActive = _pmgmtRoleActive;
diff --git a/static/js/results.js b/static/js/results.js
new file mode 100644
index 0000000..655da00
--- /dev/null
+++ b/static/js/results.js
@@ -0,0 +1,886 @@
+import { S } from './state.js';
+// ── Cards ─────────────────────────────────────────────────────────────────────
+const SOURCE_BADGES = {
+  email:      ['📧', 'badge-email',      'Outlook'],
+  gmail:      ['📧', 'badge-gmail',      'Gmail'],
+  gdrive:     ['📁', 'badge-gdrive',     'GDrive'],
+  onedrive:   ['💾', 'badge-onedrive',   'OneDrive'],
+  sharepoint: ['🌐', 'badge-sharepoint', 'SharePoint'],
+  teams:      ['💬', 'badge-teams',      'Teams'],
+  local:      ['📁', 'badge-local',      'Local'],
+  smb:        ['🌐', 'badge-smb',        'Network'],
+};
+
+function appendCard(f) {
+  const search = document.getElementById('filterSearch').value.trim().toLowerCase();
+  const srcVal = document.getElementById('filterSource').value;
+  if (search && !f.name.toLowerCase().includes(search)) return;
+  if (srcVal  && f.source_type !== srcVal) return;
+
+  const grid = document.getElementById('grid');
+  const [icon, badgeCls, label] = SOURCE_BADGES[f.source_type] || ['📄', '', f.source_type];
+  const src  = f.thumb_b64
+    ? 'data:' + f.thumb_mime + ';base64,' + f.thumb_b64
+    : '/api/thumb?name=' + encodeURIComponent(f.name) + '&type=' + encodeURIComponent(f.source_type);
+
+  const card = document.createElement('div');
+  card.className = 'card' + (S.isListView ? ' list-view' : '');
+  card.dataset.id = f.id;
+  card.onclick = () => openPreview(f);
+
+  const delBtn = window.VIEWER_MODE ? '' : `<button class="card-delete-btn" title="${t('m365_delete_confirm','Delete')}" onclick="event.stopPropagation();deleteItem(${JSON.stringify(f).replace(/"/g,'&quot;')},this.closest('.card'))">🗑</button>`;
+
+  if (S.isListView) {
+    card.innerHTML = `
+      <div style="font-size:24px; flex-shrink:0">${icon}</div>
+      <div class="card-info list-info">
+        <div class="card-name" title="${f.name}">${f.name}</div>
+        <div class="card-meta">${f.size_kb} KB · ${f.modified || ''}${f.folder ? ' · 📂 ' + f.folder : ''}</div>
+        <div class="card-source"><span class="source-badge ${badgeCls}">${label}</span> ${f.source || ''}${f.account_name ? ' · <span class="account-pill" title="' + f.account_name + '">' + (f.user_role === 'student' ? '<span class="role-badge">' + t('role_student','Elev') + '</span>' : f.user_role === 'staff' ? '<span class="role-badge">' + t('role_staff','Ansat') + '</span>' : '') + f.account_name + '</span>' : ''}${f.transfer_risk === 'external-recipient' ? ' <span class="role-pill" style="background:#7B2D00;color:#FFD0B0">⚠ Ext.</span>' : f.transfer_risk ? ' <span class="role-pill" style="background:#003D7B;color:#B0D4FF">🔗</span>' : ''}</div>
+      </div>
+      <span class="cpr-badge">${f.cpr_count} CPR</span>
+      ${f.face_count > 0 ? '<span class="photo-face-badge">' + f.face_count + ' ' + t('m365_badge_faces', f.face_count === 1 ? 'face' : 'faces') + '</span> ' : ''}
+      ${f.exif && f.exif.gps ? '<span class="photo-face-badge" style="background:#0a3a5a;color:#7ec8d0">🌍 GPS</span> ' : ''}
+      ${f.special_category && f.special_category.length ? '<span class="special-cat-badge">⚠ Art.9 — ' + f.special_category.filter(function(s){return s !== 'gps_location' && s !== 'exif_pii';}).join(', ') + '</span> ' : ''}${f.overdue ? '<span class="overdue-badge">🗓 Overdue</span>' : ''}
+      ${delBtn}`;
+  } else {
+    card.innerHTML = `
+      <div class="thumb-wrap"><img src="${src}" alt="${f.name}" loading="lazy"></div>
+      <div class="card-info">
+        <div class="card-name" title="${f.name}">${f.name}</div>
+        <div class="card-meta">${f.size_kb} KB · ${f.modified || ''}</div>
+        ${f.folder ? `<div class="card-meta" style="font-size:10px" title="${f.folder}">📂 ${f.folder}</div>` : ''}
+        <div class="card-source"><span class="source-badge ${badgeCls}">${label}</span>${f.account_name ? ' <span class="account-pill" title="' + f.account_name + '">' + (f.user_role === "student" ? '<span class="role-badge">' + t("role_student","Elev") + "</span>" : f.user_role === "staff" ? '<span class="role-badge">' + t("role_staff","Ansat") + "</span>" : "") + f.account_name + '</span>' : ''}${f.transfer_risk === "external-recipient" ? ' <span class="role-pill" style="background:#7B2D00;color:#FFD0B0">⚠ Ext.</span>' : f.transfer_risk ? ' <span class="role-pill" style="background:#003D7B;color:#B0D4FF">🔗</span>' : ''}</div>
+        <span class="cpr-badge">${f.cpr_count} CPR</span>${f.face_count > 0 ? ' <span class="photo-face-badge">' + f.face_count + ' ' + t('m365_badge_faces', f.face_count === 1 ? 'face' : 'faces') + '</span>' : ''}${f.exif && f.exif.gps ? ' <span class="photo-face-badge" style="background:#0a3a5a;color:#7ec8d0">🌍 GPS</span>' : ''}${f.overdue ? ' <span class="overdue-badge">🗓 Overdue</span>' : ''}
+      </div>
+      ${delBtn}`;
+  }
+  grid.appendChild(card);
+}
+
+function renderGrid(files) {
+  const grid = document.getElementById('grid');
+  grid.innerHTML = '';
+  files.forEach(f => appendCard(f));
+}
+
+// ── Preview panel ─────────────────────────────────────────────────────────────
+let _previewItemId = null;
+
+async function openPreview(f) {
+  // Highlight selected card
+  document.querySelectorAll('.card.selected').forEach(c => c.classList.remove('selected'));
+  const cardEl = document.querySelector(`.card[data-id="${CSS.escape(f.id)}"]`);
+  if (cardEl) cardEl.classList.add('selected');
+
+  const panel   = document.getElementById('previewPanel');
+  const frame   = document.getElementById('previewFrame');
+  const loading = document.getElementById('previewLoading');
+  const title   = document.getElementById('previewTitle');
+  const meta    = document.getElementById('previewMeta');
+
+  panel.classList.remove('hidden');
+  const _savedW = sessionStorage.getItem('gdpr_preview_width');
+  if (_savedW) panel.style.width = _savedW + 'px';
+  title.textContent = f.name;
+  frame.style.display = 'none';
+  loading.style.display = 'flex';
+  loading.textContent = 'Loading preview…';
+
+  meta.innerHTML = [
+    f.account_name ? `<span style="font-weight:500">👤 ${f.account_name}</span>` : '',
+    f.source   ? `<span>${f.source}</span>` : '',
+    f.size_kb  ? `<span>${f.size_kb} KB</span>` : '',
+    f.modified ? `<span>${f.modified}</span>` : '',
+    f.cpr_count ? `<span style="color:var(--danger)">${f.cpr_count} CPR</span>` : '',
+    f.url ? `<button class="preview-open-btn" onclick="window.open('${f.url}','_blank')">${t("m365_preview_open","Open in M365 ↗")}</button>` : '',
+  ].filter(Boolean).join('');
+
+  _previewItemId = f.id;
+  loadDisposition(f.id);  // load disposition for this item (#6)
+
+  try {
+    const r = await fetch('/api/preview/' + encodeURIComponent(f.id)
+      + '?source_type=' + encodeURIComponent(f.source_type || '')
+      + '&account_id='  + encodeURIComponent(f.account_id  || ''));
+    const d = await r.json();
+
+    if (_previewItemId !== f.id) return; // stale — user clicked another card
+
+    if (d.error) {
+      loading.textContent = d.error;
+      return;
+    }
+
+    if (d.type === 'local') {
+      loading.style.display = 'none';
+      frame.style.display = 'block';
+      frame.srcdoc = `<html><body style="font-family:sans-serif;color:#ccc;background:#1e1e1e;padding:24px;display:flex;flex-direction:column;align-items:center;justify-content:center;height:80vh;gap:12px">
+        <div style="font-size:40px">📁</div>
+        <div style="font-size:14px;font-weight:600">${d.name || f.name}</div>
+        <div style="font-size:11px;color:#888">${t('m365_preview_local_file','Local file — no cloud preview available')}</div>
+        <div style="font-size:10px;color:#666;word-break:break-all;max-width:400px;text-align:center">${d.path || ''}</div>
+      </body></html>`;
+      return;
+    }
+
+    if (d.type === 'html' && d.html) {
+      loading.style.display = 'none';
+      frame.style.display = 'block';
+      const theme = document.body.dataset.theme === 'dark' ? '#1e1e1e' : '#ffffff';
+      const textColor = document.body.dataset.theme === 'dark' ? '#e0e0e0' : '#111111';
+      const mutedColor = document.body.dataset.theme === 'dark' ? '#888' : '#666';
+      frame.srcdoc = `<html><body style="margin:0;background:${theme};color:${textColor};font-family:sans-serif;--muted:${mutedColor};--text:${textColor};--mono:monospace">${d.html}</body></html>`;
+      return;
+    }
+
+    if (d.type === 'info' && d.html) {
+      loading.style.display = 'none';
+      frame.style.display = 'block';
+      const theme = document.body.dataset.theme === 'dark' ? '#1e1e1e' : '#ffffff';
+      frame.srcdoc = `<html><body style="margin:0;padding:20px;background:${theme};color:#888;font-family:sans-serif">${d.html}</body></html>`;
+      return;
+    }
+
+    if (d.type === 'iframe' && d.url) {
+      frame.src = d.url;
+      frame.onload = () => {
+        loading.style.display = 'none';
+        frame.style.display = 'block';
+      };
+    } else if (d.type === 'html') {
+      const blob = new Blob([d.html], {type: 'text/html'});
+      frame.src = URL.createObjectURL(blob);
+      frame.onload = () => {
+        loading.style.display = 'none';
+        frame.style.display = 'block';
+      };
+    } else {
+      loading.textContent = t('m365_preview_open','Open in M365') + ' — No preview available.';
+    }
+  } catch(e) {
+    loading.textContent = 'Preview failed: ' + e.message;
+  }
+}
+
+// ── Retention policy (#1) ────────────────────────────────────────────────────
+
+function toggleRetentionPanel() {
+  const enabled = document.getElementById('optRetention').checked;
+  document.getElementById('retentionPanel').style.display = enabled ? 'block' : 'none';
+  if (enabled) updateRetentionCutoffHint();
+}
+
+function updateRetentionCutoffHint() {
+  const years  = parseInt(document.getElementById('optRetentionYears')?.value) || 5;
+  const fyEnd  = document.getElementById('optFiscalYearEnd')?.value || '';
+  const hint   = document.getElementById('retentionCutoffHint');
+  if (!hint) return;
+  // Compute cutoff client-side for instant feedback
+  const today = new Date();
+  let cutoff;
+  if (fyEnd) {
+    const [mm, dd] = fyEnd.split('-').map(Number);
+    let fyEndDate = new Date(today.getFullYear(), mm - 1, dd);
+    if (fyEndDate >= today) fyEndDate = new Date(today.getFullYear() - 1, mm - 1, dd);
+    cutoff = new Date(fyEndDate); cutoff.setFullYear(cutoff.getFullYear() - years);
+  } else {
+    cutoff = new Date(today); cutoff.setFullYear(cutoff.getFullYear() - years);
+  }
+  const iso = cutoff.toISOString().split('T')[0];
+  const mode = fyEnd ? t('m365_ret_mode_fiscal', 'fiscal year') : t('m365_ret_mode_rolling', 'rolling');
+  hint.textContent = t('m365_ret_cutoff_hint', 'Items modified before') + ' ' + iso + ' (' + mode + ') ' + t('m365_ret_cutoff_flagged', 'will be flagged');
+}
+
+// Mark cards as overdue after scan completes or on load
+async function markOverdueCards() {
+  const retentionEnabled = document.getElementById('optRetention')?.checked;
+  if (!retentionEnabled) return;
+  const years  = parseInt(document.getElementById('optRetentionYears')?.value) || 5;
+  const fyEnd  = document.getElementById('optFiscalYearEnd')?.value || '';
+  try {
+    const params = new URLSearchParams({years});
+    if (fyEnd) params.set('fiscal_year_end', fyEnd);
+    const r = await fetch('/api/db/overdue?' + params);
+    const d = await r.json();
+    if (!d.items) return;
+    const overdueIds = new Set(d.items.map(i => i.id));
+    // Mark S.flaggedData entries
+    S.flaggedData.forEach(f => { f.overdue = overdueIds.has(f.id); });
+    // Re-render to show badges
+    renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
+    if (d.count > 0) {
+      log('🗓 ' + d.count + ' ' + t('m365_overdue_found', 'overdue item(s) found') + ' (cutoff: ' + d.cutoff_date + ')', 'warn');
+    }
+  } catch(e) { /* DB not available -- skip */ }
+}
+
+// Pre-filter bulk delete to overdue items
+async function preFilterOverdue() {
+  const years  = parseInt(document.getElementById('optRetentionYears')?.value) || 5;
+  const fyEnd  = document.getElementById('optFiscalYearEnd')?.value || '';
+  try {
+    const params = new URLSearchParams({years});
+    if (fyEnd) params.set('fiscal_year_end', fyEnd);
+    const r = await fetch('/api/db/overdue?' + params);
+    const d = await r.json();
+    if (d.cutoff_date) {
+      document.getElementById('bdOlderThan').value = d.cutoff_date;
+      updateBdPreview();
+    }
+  } catch(e) {
+    // Fallback: compute client-side
+    const today = new Date();
+    const cutoff = new Date(today); cutoff.setFullYear(cutoff.getFullYear() - years);
+    document.getElementById('bdOlderThan').value = cutoff.toISOString().split('T')[0];
+    updateBdPreview();
+  }
+}
+
+function clearBdFilters() {
+  document.getElementById('bdSource').value = '';
+  document.getElementById('bdMinCpr').value = '1';
+  document.getElementById('bdOlderThan').value = '';
+  updateBdPreview();
+}
+
+// ── Data subject lookup (#4) ──────────────────────────────────────────────
+
+let _dsubItems = [];  // items from last lookup, for bulk delete
+
+function openSubjectModal() {
+  document.getElementById("dsubBackdrop").classList.add("open");
+  document.getElementById("dsubInput").value = "";
+  document.getElementById("dsubStatus").textContent = "";
+  document.getElementById("dsubResults").innerHTML = "";
+  document.getElementById("dsubDeleteBtn").style.display = "none";
+  _dsubItems = [];
+  setTimeout(() => document.getElementById("dsubInput").focus(), 80);
+}
+
+function closeDsubModal() {
+  document.getElementById("dsubBackdrop").classList.remove("open");
+}
+
+async function runSubjectLookup() {
+  const cpr = document.getElementById("dsubInput").value.trim();
+  if (!cpr) return;
+  const statusEl  = document.getElementById("dsubStatus");
+  const resultsEl = document.getElementById("dsubResults");
+  const deleteBtn = document.getElementById("dsubDeleteBtn");
+  statusEl.textContent = t("m365_subject_searching", "Searching…");
+  resultsEl.innerHTML  = "";
+  deleteBtn.style.display = "none";
+  _dsubItems = [];
+  try {
+    const r = await fetch("/api/db/subject", {
+      method: "POST", headers: {"Content-Type":"application/json"},
+      body: JSON.stringify({cpr})
+    });
+    const d = await r.json();
+    if (d.error) { statusEl.textContent = d.error; return; }
+    if (!d.count) {
+      statusEl.textContent = t("m365_subject_not_found", "No flagged items found for this CPR number.");
+      return;
+    }
+    statusEl.textContent = d.count + " " + t("m365_subject_found", "item(s) found");
+    _dsubItems = d.items;
+    resultsEl.innerHTML = d.items.map(item => `
+      <div class="dsub-result-row">
+        <div class="dsub-result-name" title="${item.name}">${item.name}</div>
+        <div class="dsub-result-meta">${item.source_type || ""}</div>
+        <div class="dsub-result-meta">${item.modified || ""}</div>
+        <div class="dsub-result-meta" style="color:var(--danger)">${item.cpr_count} CPR</div>
+      </div>
+    `).join("");
+    if (d.count > 0) deleteBtn.style.display = "block";
+  } catch(e) {
+    statusEl.textContent = "Error: " + e.message;
+  }
+}
+
+async function deleteSubjectItems() {
+  if (!_dsubItems.length) return;
+  const count = _dsubItems.length;
+  if (!confirm(`${count} ${t("m365_subject_delete_confirm", "item(s) will be permanently deleted. Continue?")}`))
+    return;
+  const ids = _dsubItems.map(i => i.id);
+  const statusEl = document.getElementById("dsubStatus");
+  statusEl.textContent = t("m365_bulk_deleting", "Deleting…");
+  try {
+    const r = await fetch("/api/delete_bulk", {
+      method: "POST", headers: {"Content-Type":"application/json"},
+      body: JSON.stringify({ids, reason: "data-subject-request"})
+    });
+    const d = await r.json();
+    statusEl.textContent = `${d.deleted || 0} ${t("m365_bulk_deleted","deleted")}`;
+    document.getElementById("dsubDeleteBtn").style.display = "none";
+    document.getElementById("dsubResults").innerHTML = "";
+    _dsubItems = [];
+    // Refresh grid
+    S.flaggedData = S.flaggedData.filter(f => !ids.includes(f.id));
+    S.filteredData = S.filteredData.filter(f => !ids.includes(f.id));
+    renderGrid();
+    updateStats();
+  } catch(e) {
+    statusEl.textContent = "Delete failed: " + e.message;
+  }
+}
+
+// ── Disposition tagging (#6) ───────────────────────────────────────────────
+
+let _dispositionItemId = null;
+
+async function loadDisposition(itemId) {
+  _dispositionItemId = itemId;
+  const row = document.getElementById("dispositionRow");
+  const sel = document.getElementById("dispositionSelect");
+  const saved = document.getElementById("dispositionSaved");
+  row.style.display = "flex";
+  saved.textContent = "";
+  try {
+    const r = await fetch("/api/db/disposition/" + encodeURIComponent(itemId));
+    const d = await r.json();
+    if (d.error) return;  // DB not available -- hide row
+    const status = d.status || "unreviewed";
+    sel.value = status;
+    // Cache on S.flaggedData item so the filter bar works without extra API calls
+    const item = S.flaggedData.find(f => f.id === itemId);
+    if (item) item.disposition = status;
+  } catch(e) {
+    row.style.display = "none";
+  }
+}
+
+async function saveDisposition() {
+  if (!_dispositionItemId) return;
+  const status  = document.getElementById("dispositionSelect").value;
+  const savedEl = document.getElementById("dispositionSaved");
+  savedEl.textContent = "";
+  try {
+    await fetch("/api/db/disposition", {
+      method: "POST", headers: {"Content-Type":"application/json"},
+      body: JSON.stringify({item_id: _dispositionItemId, status})
+    });
+    savedEl.textContent = t("m365_disp_saved", "✓ Saved");
+    setTimeout(() => { savedEl.textContent = ""; }, 2000);
+    // Update cached value on the S.flaggedData item
+    const item = S.flaggedData.find(f => f.id === _dispositionItemId);
+    if (item) item.disposition = status;
+    // Refresh card badge if a disposition filter is active
+    const dispFilter = document.getElementById("filterDisposition")?.value;
+    if (dispFilter) applyFilters();
+  } catch(e) {
+    savedEl.textContent = "Error";
+  }
+}
+
+function closePreview() {
+  const panel = document.getElementById('previewPanel');
+  panel.style.width = '';   // clear inline width so CSS .hidden { width:0 } takes effect
+  panel.classList.add('hidden');
+  document.getElementById('previewFrame').src = '';
+  document.querySelectorAll('.card.selected').forEach(c => c.classList.remove('selected'));
+  _previewItemId = null;
+}
+
+document.addEventListener('keydown', e => {
+  if (e.key === 'Escape') { closeAbout(); closeModeInfo(); closeBulkDelete(); closePreview(); closeDsubModal(); closeSmtpModal(); closeProfileMgmt(); closeImportDBModal(); closeFileSourcesModal(); closeSourcesMgmt(); closeSettings(); closePinPrompt(); }
+});
+
+// ── Delete ────────────────────────────────────────────────────────────────────
+
+async function deleteItem(f, cardEl) {
+  if (!confirm(t('m365_delete_confirm', 'Delete') + ' "' + f.name + '"?\n\n' + t('m365_delete_warning', 'This cannot be undone.'))) return;
+  try {
+    const r = await fetch('/api/delete_item', {
+      method: 'POST', headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({id: f.id, source_type: f.source_type, account_id: f.account_id, drive_id: f.drive_id})
+    });
+    const d = await r.json();
+    if (d.ok) {
+      S.flaggedData   = S.flaggedData.filter(x => x.id !== f.id);
+      S.filteredData  = S.filteredData.filter(x => x.id !== f.id);
+      if (cardEl) cardEl.remove();
+      updateStats();
+      log(t('m365_log_deleted', 'Deleted:') + ' ' + f.name, 'ok');
+      if (_previewItemId === f.id) closePreview();
+    } else {
+      log(t('m365_log_delete_failed', 'Delete failed:') + ' ' + (d.error || '?'), 'err');
+    }
+  } catch(e) {
+    log(t('m365_log_delete_failed', 'Delete failed:') + ' ' + e.message, 'err');
+  }
+}
+
+// ── Bulk delete modal ─────────────────────────────────────────────────────────
+
+function openBulkDelete() {
+  applyI18n();
+  updateBdPreview();
+  document.getElementById('bulkDeleteBackdrop').classList.add('open');
+}
+function closeBulkDelete() {
+  document.getElementById('bulkDeleteBackdrop').classList.remove('open');
+  document.getElementById('bdProgress').textContent = '';
+}
+
+function _bdFilters() {
+  return {
+    source_type:     document.getElementById('bdSource').value,
+    min_cpr:         parseInt(document.getElementById('bdMinCpr').value) || 1,
+    older_than_date: document.getElementById('bdOlderThan').value,
+  };
+}
+
+function _bdMatches() {
+  const f = _bdFilters();
+  return S.flaggedData.filter(x => {
+    if (f.source_type && x.source_type !== f.source_type) return false;
+    if (x.cpr_count < f.min_cpr) return false;
+    if (f.older_than_date && x.modified > f.older_than_date) return false;
+    return true;
+  });
+}
+
+function updateBdPreview() {
+  const matches = _bdMatches();
+  const prev = document.getElementById('bdPreview');
+  if (!prev) return;
+  if (matches.length === 0) {
+    prev.textContent = t('m365_bulk_no_match', 'No items match these criteria.');
+    document.getElementById('bdConfirmBtn').disabled = true;
+  } else {
+    prev.innerHTML = `<strong style="color:var(--danger)">${matches.length}</strong> ${t('m365_bulk_match_count', 'item(s) will be deleted')}`;
+    document.getElementById('bdConfirmBtn').disabled = false;
+  }
+}
+
+
+// ── Auto-connect SSE on page load (#21) ──────────────────────────────────────
+// ── SSE connection management ────────────────────────────────────────────────
+// The browser keeps an SSE connection to /api/scan/stream for live scan events.
+// Problem: idle SSE connections silently die (Flask/Werkzeug threading, proxies,
+// OS TCP keepalive). EventSource auto-reconnects, but during the reconnect
+// window a scheduled scan's events are lost.
+//
+// Solution: a polling watchdog checks /api/scan/status every few seconds.
+// When it detects a running scan (manual or scheduled), it ensures the SSE
+// connection is alive and the progress UI is visible.
+
+let _sseWatchdogTimer = null;
+let _initialStatusChecked = false;
+const _SSE_POLL_INTERVAL = 4000;  // ms between status polls
+
+function _ensureSSE() {
+  // Open SSE if not already open or if the existing connection is dead
+  if (S.es && S.es.readyState !== EventSource.CLOSED) return;
+  if (S.es) { try { S.es.close(); } catch(_){} }
+  console.log('[SSE] Opening connection to /api/scan/stream');
+  S.es = new EventSource('/api/scan/stream');
+  S.es.onopen = function() { console.log('[SSE] Connection established'); };
+  S.es.onerror = function(e) {
+    console.warn('[SSE] Connection error (will auto-reconnect)', e);
+  };
+  _attachScanListeners(S.es);
+  _attachSchedulerListeners(S.es);
+}
+
+function _sseWatchdog() {
+  fetch('/api/scan/status').then(function(r) { return r.json(); }).then(function(status) {
+    if (status.running) {
+      // A scan is in progress — make sure SSE is connected and progress UI is visible
+      _ensureSSE();
+      if (!S._m365ScanRunning && !S._googleScanRunning && !S._fileScanRunning) {
+        document.getElementById('scanBtn').disabled = true;
+        document.getElementById('stopBtn').style.display = 'inline-block';
+        // /api/scan/status checks the M365 lock — if running=true it's an M365 scan
+        S._m365ScanRunning = true; _renderProgressSegments();
+        document.getElementById('progressFile').textContent = t('m365_sse_reconnecting', 'Reconnecting to running scan…');
+        log(t('m365_sse_reconnecting', 'Reconnecting to running scan…'));
+      }
+    }
+    if (!_initialStatusChecked) {
+      _initialStatusChecked = true;
+      if (!status.running) loadLastScanSummary();
+    }
+    // When no scan is running, we still keep polling — the SSE connection
+    // may have died and we need to detect the *next* scheduled scan.
+    // The SSE itself is only opened/reopened when a scan is detected.
+  }).catch(function(err) {
+    // Status endpoint unavailable — server might be restarting
+    console.warn('[SSE] status poll failed:', err);
+  });
+}
+
+function _autoConnectSSEIfRunning() {
+  // Open initial SSE connection
+  _ensureSSE();
+  // Check if a scan is already running (e.g. scheduled scan started before page load)
+  _sseWatchdog();
+  // Start polling watchdog — catches scheduled scans that start later
+  if (!_sseWatchdogTimer) {
+    _sseWatchdogTimer = setInterval(_sseWatchdog, _SSE_POLL_INTERVAL);
+  }
+}
+
+// ── Viewer mode result loader ─────────────────────────────────────────────────
+async function _loadViewerResults() {
+  try {
+    const r = await fetch('/api/db/flagged');
+    const items = await r.json();
+    if (!Array.isArray(items) || items.length === 0) {
+      // Show last-scan summary card (stats only, no items yet)
+      const panel = document.getElementById('lastScanSummary');
+      const empty = document.getElementById('emptyState');
+      const r2 = await fetch('/api/db/stats');
+      const stats = await r2.json();
+      if (stats.scan_id && panel && empty) {
+        const dateStr = stats.finished_at
+          ? new Date(stats.finished_at * 1000).toLocaleDateString('da-DK', {day:'numeric', month:'short', year:'numeric'})
+          : '—';
+        const srcLabels = {email:'Outlook',onedrive:'OneDrive',sharepoint:'SharePoint',teams:'Teams',
+                          gmail:'Gmail',gdrive:'Drive',local:'Lokale filer',smb:'SMB'};
+        const srcStr = Object.keys(stats.by_source || {}).map(s => srcLabels[s] || s).join(' · ') || '—';
+        panel.innerHTML =
+          '<div class="last-scan-card">' +
+            '<h3>' + t('last_scan_title', 'Seneste scanning') + '</h3>' +
+            '<div class="last-scan-stats">' +
+              '<div class="last-scan-stat"><span class="val">' + (stats.flagged_count || 0) + '</span><span class="lbl">' + t('last_scan_hits', 'Fund') + '</span></div>' +
+              '<div class="last-scan-stat"><span class="val">' + (stats.unique_subjects || 0) + '</span><span class="lbl">' + t('last_scan_subjects', 'Unikke CPR') + '</span></div>' +
+              '<div class="last-scan-stat"><span class="val">' + (stats.total_scanned || 0) + '</span><span class="lbl">' + t('last_scan_scanned', 'Scannet') + '</span></div>' +
+            '</div>' +
+            '<div style="margin-top:12px;font-size:11px;color:var(--muted)">' + dateStr + ' &nbsp;·&nbsp; ' + srcStr + '</div>' +
+          '</div>';
+        empty.style.display = 'none';
+        panel.style.display = 'flex';
+      }
+      return;
+    }
+    S.flaggedData = items;
+    S.filteredData = [];
+    const grid = document.getElementById('grid');
+    const emptyState = document.getElementById('emptyState');
+    const lastScan   = document.getElementById('lastScanSummary');
+    if (emptyState) emptyState.style.display = 'none';
+    if (lastScan)   lastScan.style.display   = 'none';
+    if (grid)       grid.style.display       = 'grid';
+    renderGrid(items);
+    try { loadTrend(); } catch(_) {}
+  } catch(e) {
+    console.error('[viewer] failed to load results:', e);
+  }
+}
+
+document.addEventListener('DOMContentLoaded', () => {
+  _restoreLog();
+  _initLogResize();
+  _initPreviewResize();
+  _initSourcesResize();
+  restoreSectionStates();
+  if (window.VIEWER_MODE) {
+    _loadViewerResults();
+    return;
+  }
+  _loadFileSources();
+  _autoConnectSSEIfRunning();  // populates S._fileSources then calls renderSourcesPanel()
+  smGoogleRefreshStatus();    // sets _googleConnected and re-renders sources panel
+  // Restore all source toggle states
+  fetch('/api/src_toggles').then(function(r){ return r.json(); }).then(function(d) {
+    _restoreM365SourceToggles(d);
+    var gm = document.getElementById('smGoogleSrcGmail');
+    var gd = document.getElementById('smGoogleSrcDrive');
+    if (gm && d.src_gmail !== undefined) { gm.checked = !!d.src_gmail; }
+    if (gd && d.src_drive !== undefined) { gd.checked = !!d.src_drive; }
+  }).catch(function(){});
+
+  // ── macOS pywebview: push content below traffic-light buttons ─────────────
+  // In frameless pywebview windows on macOS the content starts at y=0, behind
+  // the system close/minimise/maximise buttons (~28px). Apply a padding only
+  // when running inside pywebview AND on macOS (navigator.platform contains Mac).
+  if (window.pywebview && navigator.platform.toLowerCase().includes('mac')) {
+    document.body.style.paddingTop = '30px';
+  }
+
+  ['bdSource','bdMinCpr','bdOlderThan'].forEach(id => {
+    const el = document.getElementById(id);
+    if (el) el.addEventListener('input', updateBdPreview);
+  });
+  ['optRetentionYears','optFiscalYearEnd'].forEach(id => {
+    const el = document.getElementById(id);
+    if (el) el.addEventListener('change', updateRetentionCutoffHint);
+  });
+  window.addEventListener('resize', () => {
+    const tp = document.getElementById('trendPanel');
+    if (tp && tp.style.display !== 'none') loadTrend();
+  });
+  const deltaCb = document.getElementById('optDelta');
+  if (deltaCb) {
+    deltaCb.addEventListener('change', () => {
+      if (deltaCb.checked) checkDeltaStatus();
+      else document.getElementById('deltaStatusRow').style.display = 'none';
+    });
+  }
+});
+
+async function executeBulkDelete() {
+  const matches = _bdMatches();
+  if (!matches.length) return;
+  const confirmMsg = matches.length + ' ' + t('m365_bulk_confirm_q', 'item(s) will be permanently deleted. Continue?');
+  if (!confirm(confirmMsg)) return;
+
+  const btn = document.getElementById('bdConfirmBtn');
+  const prog = document.getElementById('bdProgress');
+  btn.disabled = true;
+  prog.textContent = t('m365_bulk_deleting', 'Deleting…');
+
+  try {
+    const r = await fetch('/api/delete_bulk', {
+      method: 'POST', headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({ ids: matches.map(x => x.id), filters: {} })
+    });
+    const d = await r.json();
+    if (d.ok) {
+      const deletedSet = new Set(matches.map(x => x.id));
+      S.flaggedData  = S.flaggedData.filter(x => !deletedSet.has(x.id));
+      S.filteredData = S.filteredData.filter(x => !deletedSet.has(x.id));
+      renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
+      updateStats();
+      prog.innerHTML = `<span style="color:var(--ok,#4c4)">✓ ${d.deleted} ${t('m365_bulk_deleted', 'deleted')}</span>` +
+        (d.failed ? ` · <span style="color:var(--danger)">${d.failed} ${t('m365_bulk_failed', 'failed')}</span>` : '');
+      if (d.errors && d.errors.length) {
+        d.errors.forEach(err => log('✗ ' + err.name + ': ' + err.error, 'err'));
+      }
+      log(t('m365_log_bulk_done', 'Bulk delete:') + ' ' + d.deleted + ' deleted, ' + d.failed + ' failed', d.failed ? 'err' : 'ok');
+      if (d.failed === 0) setTimeout(closeBulkDelete, 1800);
+    } else {
+      prog.textContent = d.error || 'Error';
+    }
+  } catch(e) {
+    prog.textContent = e.message;
+  } finally {
+    btn.disabled = false;
+  }
+}
+
+function applyFilters() {
+  const search  = document.getElementById('filterSearch').value.trim().toLowerCase();
+  const srcVal  = document.getElementById('filterSource').value;
+  const dispVal     = document.getElementById('filterDisposition')?.value || '';
+  const transferVal = document.getElementById('filterTransfer')?.value || '';
+  const specialVal  = document.getElementById('filterSpecial')?.value || '';
+  S.filteredData = S.flaggedData.filter(f => {
+    if (search && !f.name.toLowerCase().includes(search)) return false;
+    if (srcVal       && f.source_type !== srcVal) return false;
+    if (dispVal      && (f.disposition || 'unreviewed') !== dispVal) return false;
+    if (transferVal  && (f.transfer_risk || '') !== transferVal) return false;
+    if (specialVal === '1' && !(f.special_category && f.special_category.length)) return false;
+    if (specialVal === 'photo' && !(f.face_count > 0)) return false;
+    return true;
+  });
+  const grid = document.getElementById('grid');
+  if (S.filteredData.length === 0 && S.flaggedData.length > 0) {
+    grid.style.display = 'none';
+    document.getElementById('emptyState').innerHTML =
+      `<div class="empty-icon">🔍</div><div class="empty-text">${t('m365_no_matches','No matches')}</div>`;
+    document.getElementById('emptyState').style.display = 'flex';
+  } else {
+    document.getElementById('emptyState').style.display = 'none';
+    grid.style.display = S.isListView ? 'block' : 'grid';
+    renderGrid(S.filteredData);
+  }
+}
+
+async function exportExcel() {
+  if (!S.flaggedData || S.flaggedData.length === 0) {
+    log(t('m365_export_no_data', 'No results to export.'));
+    return;
+  }
+  if (window.pywebview && window.pywebview.api && window.pywebview.api.save_excel) {
+    try {
+      const r = await window.pywebview.api.save_excel();
+      if (r && r.ok) { log('Excel exported: ' + r.path); }
+      else if (r && r.error && r.error !== 'cancelled') { alert('Export failed: ' + r.error); }
+    } catch(e) { alert('Export failed: ' + e.message); }
+    return;
+  }
+  const btn = document.getElementById('exportBtn');
+  if (btn) { btn.disabled = true; btn.textContent = '⏳'; }
+  try {
+    // In pywebview (macOS/Windows app), blob URL downloads don't work —
+    // use the native save dialog exposed via the JS API instead.
+    if (window.pywebview && window.pywebview.api && window.pywebview.api.save_excel) {
+      const result = await window.pywebview.api.save_excel();
+      if (result && result.ok) {
+        log(t('m365_export_done', 'Excel export ready.'), 'ok');
+      } else {
+        if (result && result.error && result.error !== 'cancelled') {
+          log('Export error: ' + result.error, 'err');
+        }
+      }
+      return;
+    }
+    // Browser / localhost fallback: fetch as blob and trigger download
+    const r = await fetch('/api/export_excel');
+    if (!r.ok) {
+      const err = await r.json().catch(() => ({error: 'Export failed'}));
+      log('Export error: ' + (err.error || r.status), 'err');
+      return;
+    }
+    const blob = await r.blob();
+    const url  = URL.createObjectURL(blob);
+    const a    = document.createElement('a');
+    const disp = r.headers.get('Content-Disposition') || '';
+    const match = disp.match(/filename=([^\s;]+)/);
+    a.href     = url;
+    a.download = match ? match[1] : 'export.xlsx';
+    document.body.appendChild(a);
+    a.click();
+    document.body.removeChild(a);
+    URL.revokeObjectURL(url);
+    log(t('m365_export_done', 'Excel export ready.'), 'ok');
+  } catch(e) {
+    log('Export error: ' + e.message, 'err');
+  } finally {
+    if (btn) { btn.disabled = false; btn.innerHTML = '⬇ Excel'; }
+  }
+}
+
+async function exportArticle30() {
+  if (!S.flaggedData || S.flaggedData.length === 0) {
+    log(t('m365_export_no_data', 'No results to export.'));
+    return;
+  }
+  if (window.pywebview && window.pywebview.api && window.pywebview.api.save_article30) {
+    try {
+      const r = await window.pywebview.api.save_article30();
+      if (r && r.ok) { log('Article 30 exported: ' + r.path); }
+      else if (r && r.error && r.error !== 'cancelled') { alert('Export failed: ' + r.error); }
+    } catch(e) { alert('Export failed: ' + e.message); }
+    return;
+  }
+  const btn = document.getElementById('exportA30Btn');
+  if (btn) { btn.disabled = true; btn.textContent = '⏳'; }
+  try {
+    const r = await fetch('/api/export_article30');
+    if (!r.ok) {
+      const err = await r.json().catch(() => ({error: 'Export failed'}));
+      log('Article 30 export error: ' + (err.error || r.status), 'err');
+      return;
+    }
+    const blob = await r.blob();
+    const url  = URL.createObjectURL(blob);
+    const a    = document.createElement('a');
+    const disp = r.headers.get('Content-Disposition') || '';
+    const match = disp.match(/filename=([^\s;]+)/);
+    a.href     = url;
+    a.download = match ? match[1] : 'article30.docx';
+    document.body.appendChild(a);
+    a.click();
+    document.body.removeChild(a);
+    URL.revokeObjectURL(url);
+    log(t('m365_article30_done', 'Article 30 report ready.'), 'ok');
+  } catch(e) {
+    log('Article 30 export error: ' + e.message, 'err');
+  } finally {
+    if (btn) { btn.disabled = false; btn.innerHTML = '📋 Art.30'; }
+  }
+}
+
+function clearFilters() {
+  document.getElementById('filterSearch').value = '';
+  document.getElementById('filterSource').value = '';
+  const fd = document.getElementById('filterDisposition');
+  if (fd) fd.value = '';
+  const ft = document.getElementById('filterTransfer');
+  if (ft) ft.value = '';
+  const fs = document.getElementById('filterSpecial');
+  if (fs) fs.value = '';
+  applyFilters();
+}
+
+function toggleView() {
+  S.isListView = !S.isListView;
+  document.getElementById('listViewBtn').textContent = S.isListView ? t('m365_btn_grid_view', '⊞ Grid') : t('m365_btn_list_view', '☰ List');
+  document.getElementById('grid').className = S.isListView ? '' : 'grid';
+  document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
+  renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
+}
+
+// ── Hint tooltips ─────────────────────────────────────────────────────────────
+
+function toggleHint(icon) {
+  const isActive = icon.classList.contains('active');
+  // Close all open hints first
+  document.querySelectorAll('.hint-icon.active').forEach(function(el) {
+    el.classList.remove('active');
+    const b = el.nextElementSibling;
+    if (b && b.classList.contains('hint-bubble')) b.style.display = '';
+  });
+  if (!isActive) {
+    icon.classList.add('active');
+    // Position bubble using fixed coords so it escapes sidebar stacking context
+    const bubble = icon.nextElementSibling;
+    if (bubble && bubble.classList.contains('hint-bubble')) {
+      bubble.style.display = 'block';
+      const rect = icon.getBoundingClientRect();
+      bubble.style.top  = Math.round(rect.top + rect.height / 2 - bubble.offsetHeight / 2) + 'px';
+      bubble.style.left = Math.round(rect.right + 8) + 'px';
+    }
+    // Close when clicking anywhere else
+    setTimeout(function() {
+      document.addEventListener('click', function closeHint(e) {
+        if (!e.target.classList.contains('hint-icon')) {
+          document.querySelectorAll('.hint-icon.active').forEach(function(el) {
+            el.classList.remove('active');
+          });
+          document.querySelectorAll('.hint-bubble').forEach(function(el) {
+            el.style.display = '';
+          });
+          document.removeEventListener('click', closeHint);
+        }
+      });
+    }, 0);
+  }
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.appendCard = appendCard;
+window.renderGrid = renderGrid;
+window.openPreview = openPreview;
+window.toggleRetentionPanel = toggleRetentionPanel;
+window.updateRetentionCutoffHint = updateRetentionCutoffHint;
+window.markOverdueCards = markOverdueCards;
+window.preFilterOverdue = preFilterOverdue;
+window.clearBdFilters = clearBdFilters;
+window.openSubjectModal = openSubjectModal;
+window.closeDsubModal = closeDsubModal;
+window.runSubjectLookup = runSubjectLookup;
+window.deleteSubjectItems = deleteSubjectItems;
+window.loadDisposition = loadDisposition;
+window.saveDisposition = saveDisposition;
+window.closePreview = closePreview;
+window.deleteItem = deleteItem;
+window.openBulkDelete = openBulkDelete;
+window.closeBulkDelete = closeBulkDelete;
+window._bdFilters = _bdFilters;
+window._bdMatches = _bdMatches;
+window.updateBdPreview = updateBdPreview;
+window._ensureSSE = _ensureSSE;
+window._sseWatchdog = _sseWatchdog;
+window._autoConnectSSEIfRunning = _autoConnectSSEIfRunning;
+window._loadViewerResults = _loadViewerResults;
+window.executeBulkDelete = executeBulkDelete;
+window.applyFilters = applyFilters;
+window.exportExcel = exportExcel;
+window.exportArticle30 = exportArticle30;
+window.clearFilters = clearFilters;
+window.toggleView = toggleView;
+window.toggleHint = toggleHint;
+window.SOURCE_BADGES = SOURCE_BADGES;
+window._previewItemId = _previewItemId;
+window._dsubItems = _dsubItems;
+window._dispositionItemId = _dispositionItemId;
+window._sseWatchdogTimer = _sseWatchdogTimer;
+window._initialStatusChecked = _initialStatusChecked;
+window._SSE_POLL_INTERVAL = _SSE_POLL_INTERVAL;
diff --git a/static/js/scan.js b/static/js/scan.js
new file mode 100644
index 0000000..496e562
--- /dev/null
+++ b/static/js/scan.js
@@ -0,0 +1,730 @@
+import { S } from './state.js';
+// ── DB Export / Import (#11) ──────────────────────────────────────────────────
+
+async function exportDB() {
+  // In pywebview app, use native save dialog; in browser, use blob download
+  if (window.pywebview && window.pywebview.api && window.pywebview.api.save_db_export) {
+    try {
+      const r = await window.pywebview.api.save_db_export();
+      if (r && r.ok) { log(t('m365_db_exported','Database exported') + ': ' + r.path); }
+      else if (r && r.error && r.error !== 'cancelled') { alert(t('m365_db_export_error','Export failed') + ': ' + r.error); }
+    } catch(e) { alert(t('m365_db_export_error','Export failed') + ': ' + e.message); }
+    return;
+  }
+  // Browser fallback
+  try {
+    const res = await fetch('/api/db/export');
+    if (!res.ok) {
+      const d = await res.json().catch(() => ({}));
+      alert(t('m365_db_export_error','Export failed') + ': ' + (d.error || res.statusText));
+      return;
+    }
+    const blob = await res.blob();
+    const cd   = res.headers.get('Content-Disposition') || '';
+    const m    = cd.match(/filename="([^"]+)"/);
+    const name = m ? m[1] : 'gdpr_export.zip';
+    const url  = URL.createObjectURL(blob);
+    const a    = document.createElement('a');
+    a.href = url; a.download = name; a.click();
+    URL.revokeObjectURL(url);
+    log(t('m365_db_exported','Database exported') + ': ' + name);
+  } catch(e) {
+    alert(t('m365_db_export_error','Export failed') + ': ' + e.message);
+  }
+}
+
+function openImportDBModal() {
+  const fi = document.getElementById('importDbFile');
+  if (fi) fi.value = '';
+  const mode = document.getElementById('importDbMode');
+  if (mode) mode.value = 'merge';
+  document.getElementById('importDbReplaceWarn').style.display = 'none';
+  document.getElementById('importDbStatus').textContent = '';
+  document.getElementById('importDbBackdrop').classList.add('open');
+}
+
+function closeImportDBModal() {
+  document.getElementById('importDbBackdrop').classList.remove('open');
+}
+
+// Show/hide the replace warning when mode changes
+document.addEventListener('DOMContentLoaded', () => {
+  document.getElementById('importDbMode')?.addEventListener('change', function() {
+    document.getElementById('importDbReplaceWarn').style.display =
+      this.value === 'replace' ? 'block' : 'none';
+  });
+});
+
+async function doImportDB() {
+  const fi   = document.getElementById('importDbFile');
+  const mode = document.getElementById('importDbMode')?.value || 'merge';
+  const stat = document.getElementById('importDbStatus');
+  const btn  = document.getElementById('importDbBtn');
+  if (!fi?.files?.length) {
+    stat.textContent = t('m365_db_import_no_file','Please select a ZIP file first.');
+    stat.style.color = 'var(--danger)';
+    return;
+  }
+  if (mode === 'replace') {
+    if (!confirm(t('m365_db_import_replace_confirm',
+      'Replace mode will erase ALL existing scan data and restore from the archive.\n\nMake sure you have a manual backup of ~/.gdpr_scanner.db.\n\nProceed?'))) return;
+  }
+  btn.disabled = true;
+  stat.style.color = 'var(--muted)';
+  stat.textContent = t('m365_db_importing','Importing…');
+  const fd = new FormData();
+  fd.append('file', fi.files[0]);
+  fd.append('mode', mode);
+  if (mode === 'replace') fd.append('confirm', 'yes');
+  try {
+    const r = await fetch('/api/db/import', { method: 'POST', body: fd });
+    const d = await r.json();
+    if (!r.ok || d.error) {
+      stat.style.color = 'var(--danger)';
+      stat.textContent = '✖ ' + (d.error || r.statusText);
+    } else {
+      const counts = Object.entries(d.imported || {}).map(([k,v]) => `${k}: ${v}`).join(', ');
+      stat.style.color = 'var(--accent)';
+      stat.textContent = '✔ ' + t('m365_db_imported','Imported') + (counts ? ' (' + counts + ')' : '');
+      log(t('m365_db_imported','Imported') + ' [' + mode + '] ' + fi.files[0].name);
+    }
+  } catch(e) {
+    stat.style.color = 'var(--danger)';
+    stat.textContent = '✖ ' + e.message;
+  } finally {
+    btn.disabled = false;
+  }
+}
+
+// ── Scan ─────────────────────────────────────────────────────────────────────
+function buildScanPayload() {
+  // Collect checked M365 sources from dynamic panel
+  const sources = [];
+  document.querySelectorAll('#sourcesPanel input[data-source-type="m365"]:checked').forEach(function(cb) {
+    sources.push(cb.dataset.sourceId);
+  });
+  // Collect checked file sources (local/smb) — handled separately in startScan()
+  // but included here so profiles and checkpoint checks are aware of them
+  const fileSources = [];
+  document.querySelectorAll('#sourcesPanel input[data-source-type="file"]:checked').forEach(function(cb) {
+    fileSources.push(cb.dataset.sourceId);
+  });
+  // Collect checked Google sources
+  const googleSources = [];
+  document.querySelectorAll('#sourcesPanel input[data-source-type="google"]:checked').forEach(function(cb) {
+    googleSources.push(cb.dataset.sourceId);
+  });
+  const user_ids = getSelectedUsers();
+  // Merge all source types into a single array for profiles
+  const allSources = sources.concat(fileSources).concat(googleSources);
+  const options = {
+    older_than_days:  parseInt(document.getElementById('olderThan').value) || 0,
+    email_body:       document.getElementById('optEmailBody').checked,
+    attachments:      document.getElementById('optAttachments').checked,
+    max_attach_mb:    parseInt(document.getElementById('optMaxAttachMB').value) || 20,
+    max_emails:       parseInt(document.getElementById('optMaxEmails').value) || 200,
+    delta:            document.getElementById('optDelta') ? document.getElementById('optDelta').checked : false,
+    scan_photos:      document.getElementById('optScanPhotos') ? document.getElementById('optScanPhotos').checked : false,
+    retention_enabled: document.getElementById('optRetention') ? document.getElementById('optRetention').checked : false,
+    retention_years:  parseInt(document.getElementById('optRetentionYears')?.value) || 5,
+    fiscal_year_end:  document.getElementById('optFiscalYearEnd')?.value || '',
+  };
+  return { sources, fileSources, allSources, googleSources, user_ids, options };
+}
+
+async function checkCheckpoint() {
+  const payload = buildScanPayload();
+  if (!payload.sources.length && !payload.fileSources.length) return;
+  if (payload.sources.length && !payload.user_ids.length) return;
+  try {
+    const r = await fetch('/api/scan/checkpoint', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify(payload)
+    });
+    const d = await r.json();
+    const banner = document.getElementById('resumeBanner');
+    if (d.exists) {
+      const ts = d.started_at ? new Date(d.started_at * 1000).toLocaleString([], {dateStyle:'short', timeStyle:'short'}) : '';
+      document.getElementById('resumeBannerText').textContent =
+        t('m365_resume_banner', `Previous scan interrupted (${d.scanned_count} scanned, ${d.flagged_count} found${ts ? ' — ' + ts : ''})`);
+      banner.style.display = 'flex';
+    } else {
+      banner.style.display = 'none';
+    }
+  } catch(e) { /* ignore */ }
+}
+
+async function clearCheckpointAndScan() {
+  await fetch('/api/scan/clear_checkpoint', {method:'POST'});
+  document.getElementById('resumeBanner').style.display = 'none';
+  startScan(false);
+}
+
+async function checkDeltaStatus() {
+  const cb = document.getElementById('optDelta');
+  if (!cb) return;
+  try {
+    const r = await fetch('/api/delta/status');
+    const d = await r.json();
+    const row = document.getElementById('deltaStatusRow');
+    const txt = document.getElementById('deltaStatusText');
+    if (d.exists) {
+      const src = d.count === 1 ? '1 source' : `${d.count} sources`;
+      txt.textContent = t('m365_delta_tokens_saved', `Tokens saved for ${src}`);
+      row.style.display = 'flex';
+      row.style.alignItems = 'center';
+    } else {
+      row.style.display = 'none';
+    }
+  } catch(e) { /* ignore */ }
+}
+
+async function clearDeltaTokens() {
+  await fetch('/api/delta/clear', {method:'POST'});
+  document.getElementById('deltaStatusRow').style.display = 'none';
+  log(t('m365_delta_cleared', 'Delta tokens cleared — next scan will be a full scan.'));
+}
+
+// ── SMTP / Email report modal ─────────────────────────────────────────────────
+
+function openSmtpModal(focusSend) {
+  document.getElementById('smtpBackdrop').classList.add('open');
+  document.getElementById('smtpStatus').textContent = '';
+  loadSmtpConfig();
+  if (focusSend) {
+    setTimeout(() => document.getElementById('smtpRecipients').focus(), 120);
+  }
+}
+
+function closeSmtpModal() {
+  document.getElementById('smtpBackdrop').classList.remove('open');
+}
+
+async function loadSmtpConfig() {
+  try {
+    const r = await fetch('/api/smtp/config');
+    const d = await r.json();
+    if (d.host)       document.getElementById('smtpHost').value        = d.host;
+    if (d.port)       document.getElementById('smtpPort').value        = d.port;
+    if (d.username)   document.getElementById('smtpUser').value        = d.username;
+    if (d.from_addr)  document.getElementById('smtpFrom').value        = d.from_addr;
+    if (d.recipients) document.getElementById('smtpRecipients').value  = Array.isArray(d.recipients) ? d.recipients.join(', ') : d.recipients;
+    if (d.password_saved) document.getElementById('smtpPass').placeholder = '(password saved)';
+    if (d.use_tls !== undefined) document.getElementById('smtpTLS').checked = d.use_tls;
+    if (d.use_ssl !== undefined) document.getElementById('smtpSSL').checked = d.use_ssl;
+  } catch(e) { /* ignore */ }
+}
+
+async function saveSmtpConfig() {
+  const cfg = _smtpFields();
+  const r = await fetch('/api/smtp/config', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(cfg)
+  });
+  const d = await r.json();
+  const el = document.getElementById('smtpStatus');
+  if (d.status === 'saved') {
+    el.style.color = 'var(--success)';
+    el.textContent = t('m365_smtp_saved', 'Settings saved.');
+    if (cfg.password) document.getElementById('smtpPass').placeholder = '(password saved)';
+  } else {
+    el.style.color = 'var(--danger)';
+    el.textContent = d.error || 'Error saving';
+  }
+}
+
+async function sendReport() {
+  const cfg = _smtpFields();
+  const recipStr = document.getElementById('smtpRecipients').value.trim();
+  if (!recipStr) {
+    document.getElementById('smtpStatus').style.color = 'var(--danger)';
+    document.getElementById('smtpStatus').textContent = t('m365_smtp_no_recipients', 'Enter at least one recipient.');
+    document.getElementById('smtpRecipients').focus();
+    return;
+  }
+  const recipients = recipStr.split(/[,;]/).map(s => s.trim()).filter(Boolean);
+  const statusEl = document.getElementById('smtpStatus');
+  statusEl.style.color = 'var(--muted)';
+  statusEl.textContent = t('m365_smtp_sending', 'Sending…');
+
+  const r = await fetch('/api/send_report', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({recipients, smtp: cfg})
+  });
+  const d = await r.json();
+  if (d.status === 'sent') {
+    statusEl.style.color = 'var(--success)';
+    statusEl.textContent = t('m365_smtp_sent', 'Sent to ' + recipients.join(', '));
+    log('Report emailed to ' + recipients.join(', '), 'ok');
+  } else {
+    statusEl.style.color = 'var(--danger)';
+    statusEl.textContent = d.error || 'Send failed';
+    log('Email send failed: ' + (d.error || ''), 'err');
+  }
+}
+
+function _smtpFields() {
+  return {
+    host:       document.getElementById('smtpHost').value.trim(),
+    port:       parseInt(document.getElementById('smtpPort').value) || 587,
+    username:   document.getElementById('smtpUser').value.trim(),
+    password:   document.getElementById('smtpPass').value,
+    from_addr:  document.getElementById('smtpFrom').value.trim(),
+    use_tls:    document.getElementById('smtpTLS').checked,
+    use_ssl:    document.getElementById('smtpSSL').checked,
+    recipients: document.getElementById('smtpRecipients').value,
+  };
+}
+
+
+
+// ── Shared SSE event listeners (#21) ─────────────────────────────────────────
+// Extracted so both startScan() and _autoConnectSSEIfRunning() share identical
+// handlers — fixes the bug where replayed events from a scheduled scan were
+// silently ignored because the page-load SSE only had scheduler_* listeners.
+
+function _attachScanListeners(source) {
+  source.addEventListener('scan_phase', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] scan_phase:', d.phase);
+    // Ensure a progress segment exists before rendering phase text.
+    // scan_phase can arrive before scan_progress (or before scan_start on replay
+    // if scan_start has been pushed out of the 500-event SSE buffer).
+    if (!S._m365ScanRunning && !S._googleScanRunning && !S._fileScanRunning) {
+      var ph = (d.phase || '').toLowerCase();
+      var phaseSrc = /google|gmail|gdrive/.test(ph) ? 'google'
+                   : /^files\s*[—\-–]/.test(ph)    ? 'file'
+                   : 'm365';
+      if (phaseSrc === 'google')    { S._googleScanRunning = true; }
+      else if (phaseSrc === 'file') { S._fileScanRunning   = true; }
+      else                          { S._m365ScanRunning   = true; }
+      document.getElementById('scanBtn').disabled = true;
+      document.getElementById('stopBtn').style.display = 'inline-block';
+      _renderProgressSegments();
+    }
+    _setProgressPhase(d.phase);
+    log(d.phase);
+  });
+  source.addEventListener('scan_progress', function(e) {
+    var d = JSON.parse(e.data);
+    var src = d.source || 'm365';
+    var pct = d.pct !== undefined ? d.pct
+            : (d.total > 0 ? Math.round((d.index || d.completed || 0) / d.total * 100) : 0);
+    S._srcPct[src] = pct;
+    // If reconnecting mid-scan the running flag may not be set yet — ensure segment exists
+    if (src === 'm365'    && !S._m365ScanRunning)   { S._m365ScanRunning   = true; document.getElementById('scanBtn').disabled = true; document.getElementById('stopBtn').style.display = 'inline-block'; _renderProgressSegments(); }
+    if (src === 'google'  && !S._googleScanRunning) { S._googleScanRunning = true; document.getElementById('scanBtn').disabled = true; document.getElementById('stopBtn').style.display = 'inline-block'; _renderProgressSegments(); }
+    if (src === 'file'    && !S._fileScanRunning)   { S._fileScanRunning   = true; document.getElementById('scanBtn').disabled = true; document.getElementById('stopBtn').style.display = 'inline-block'; _renderProgressSegments(); }
+    var fill = document.getElementById('progressFill_' + src);
+    if (fill) fill.style.width = pct + '%';
+    document.getElementById('progressFile').textContent = d.file || '';
+    // Only update stats/ETA from M365 (has meaningful totals and ETA)
+    if (src === 'm365') {
+      var statsEl = document.getElementById('progressStats');
+      if (statsEl && d.total) {
+        statsEl.textContent = (d.index || 0) + ' / ' + d.total;
+      }
+      var etaEl = document.getElementById('progressEta');
+      if (etaEl && d.eta !== undefined) {
+        etaEl.textContent = d.eta ? ('ETA ' + d.eta) : '';
+      }
+    }
+  });
+  source.addEventListener('scan_file', function(e) {
+    var d = JSON.parse(e.data);
+    setLogLive(d.file || '');
+  });
+  source.addEventListener('scan_file_flagged', function(e) {
+    var card = JSON.parse(e.data);
+    console.log('[SSE] scan_file_flagged:', card.name || card.id);
+    if (!S.flaggedData.find(function(x){ return x.id === card.id; })) {
+      S.flaggedData.push(card);
+      S.totalCPR += (card.cpr_count || 0);
+      document.getElementById('filterBar').style.display = 'flex';
+      document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
+      applyFilters();
+    }
+  });
+  source.addEventListener('scan_error', function(e) {
+    var d = JSON.parse(e.data);
+    log((d.file ? d.file + ': ' : '') + d.error, 'err');
+  });
+  source.addEventListener('scan_cancelled', function() {
+    if (S._userStartedScan) {
+      S._userStartedScan = false;
+      if (S.es) { S.es.close(); S.es = null; }
+    }
+    document.getElementById('scanBtn').disabled = false;
+    document.getElementById('stopBtn').style.display = 'none';
+    _clearProgressBar();
+    setLogLive('');
+    log('Scan stopped.', 'warn');
+  });
+  source.addEventListener('scan_done', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] scan_done:', d);
+    // Only close SSE if the user started this scan via the Scan button.
+    // For scheduled scans, keep the SSE connection alive so future
+    // scheduler events are still received.
+    if (S._userStartedScan) {
+      S._userStartedScan = false;
+      if (S.es) { S.es.close(); S.es = null; }
+    }
+    S._srcPct.m365 = 100;
+    S._m365ScanRunning = false;
+    _renderProgressSegments();
+    var _anyRunning = S._googleScanRunning || S._fileScanRunning;
+    if (!_anyRunning) setLogLive('');
+    document.getElementById('scanBtn').disabled = _anyRunning;
+    document.getElementById('stopBtn').style.display = _anyRunning ? 'inline-block' : 'none';
+    if (!_anyRunning) _clearProgressBar();
+    document.getElementById('statsSection').style.display = 'block';
+    document.getElementById('statScanned').textContent = d.total_scanned;
+    document.getElementById('statFlagged').textContent = d.flagged_count;
+    document.getElementById('statCPR').textContent = S.totalCPR;
+    document.getElementById('statsPill').style.display = 'block';
+    updateStats();
+    if (S.flaggedData.length) {
+      document.getElementById('filterBar').style.display = 'flex';
+      document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
+      applyFilters();
+    } else {
+      document.getElementById('emptyState').style.display = 'flex';
+      document.getElementById('emptyState').innerHTML = '<div class="empty-icon">\u2705</div><div class="empty-text">' + t('m365_no_cpr_found','No CPR numbers found.') + '</div>';
+    }
+    var deltaNote = d.delta ? ' (\u0394 delta \u2014 ' + (d.delta_sources||0) + ' source(s) indexed)' : '';
+    log('Scan complete \u2014 ' + d.flagged_count + ' flagged of ' + d.total_scanned + deltaNote, 'ok');
+    if (d.delta) checkDeltaStatus();
+    markOverdueCards();
+    loadTrend();
+  });
+  source.addEventListener('google_scan_done', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] google_scan_done:', d);
+    S._srcPct.google = 100;
+    S._googleScanRunning = false;
+    _renderProgressSegments();
+    if (!S._m365ScanRunning && !S._fileScanRunning) {
+      setLogLive('');
+      document.getElementById('scanBtn').disabled = false;
+      document.getElementById('stopBtn').style.display = 'none';
+      _clearProgressBar();
+      document.getElementById('statsSection').style.display = 'block';
+      document.getElementById('statsPill').style.display = 'block';
+      updateStats();
+      if (S.flaggedData.length) {
+        document.getElementById('filterBar').style.display = 'flex';
+        document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
+        applyFilters();
+      }
+    }
+    log('Google scan complete \u2014 ' + d.flagged_count + ' flagged of ' + d.total_scanned, 'ok');
+    markOverdueCards();
+    loadTrend();
+  });
+  source.addEventListener('file_scan_done', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] file_scan_done:', d);
+    S._srcPct.file = 100;
+    S._fileScanRunning = false;
+    _renderProgressSegments();
+    if (!S._m365ScanRunning && !S._googleScanRunning) {
+      setLogLive('');
+      document.getElementById('scanBtn').disabled = false;
+      document.getElementById('stopBtn').style.display = 'none';
+      _clearProgressBar();
+      document.getElementById('statsSection').style.display = 'block';
+      document.getElementById('statsPill').style.display = 'block';
+      updateStats();
+      if (S.flaggedData.length) {
+        document.getElementById('filterBar').style.display = 'flex';
+        document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
+        applyFilters();
+      }
+    }
+    log('Bestandsscan fuldfÃ¸rt \u2014 ' + d.flagged_count + ' flagget af ' + d.total_scanned, 'ok');
+    markOverdueCards();
+    loadTrend();
+  });
+  // sse_replay_done marks end of buffer replay — log a note so the user knows
+  // earlier events above were replayed from an already-running scan
+  source.addEventListener('sse_replay_done', function() {
+    log(t('m365_sse_replay_note', 'Live log resumed \u2014 earlier entries replayed from running scan.'));
+  });
+}
+
+function _attachSchedulerListeners(source) {
+  source.addEventListener('scheduler_started', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] scheduler_started received:', d);
+    log('\uD83D\uDD50 ' + t('m365_sched_title','Scheduled scan') + ': ' + (d.job_name||'') + '\u2026');
+    // Show progress UI so scan_phase / scan_progress events are visible
+    document.getElementById('scanBtn').disabled = true;
+    document.getElementById('stopBtn').style.display = 'inline-block';
+    S._srcPct = { m365: 0, google: 0, file: 0 }; S._m365ScanRunning = true; _renderProgressSegments();
+    _setProgressPhase((d.job_name||'') + '\u2026');
+    document.getElementById('progressFile').textContent = '';
+  });
+  source.addEventListener('scan_start', function(e) {
+    // Scheduled scans also emit scan_start — show progress UI in case
+    // scheduler_started was missed (e.g. browser reconnected mid-scan)
+    console.log('[SSE] scan_start received');
+    document.getElementById('scanBtn').disabled = true;
+    document.getElementById('stopBtn').style.display = 'inline-block';
+    // Ensure at least the M365 segment is rendered (scan_start is M365-only)
+    if (!S._m365ScanRunning) { S._m365ScanRunning = true; _renderProgressSegments(); }
+  });
+  source.addEventListener('scheduler_done', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] scheduler_done received:', d);
+    document.getElementById('scanBtn').disabled = false;
+    document.getElementById('stopBtn').style.display = 'none';
+    _clearProgressBar();
+    log('\u2713 ' + t('m365_sched_title','Scheduled scan') + ' ' + (d.job_name||'') + ' \u2014 ' + (d.flagged||0) + ' flagged', 'ok');
+    markOverdueCards();
+    loadTrend();
+  });
+  source.addEventListener('scheduler_error', function(e) {
+    var d = JSON.parse(e.data);
+    console.log('[SSE] scheduler_error received:', d);
+    document.getElementById('scanBtn').disabled = false;
+    document.getElementById('stopBtn').style.display = 'none';
+    _clearProgressBar();
+    log('\u26A0 ' + t('m365_sched_title','Scheduled scan') + ' failed: ' + (d.error||''), 'err');
+  });
+}
+
+
+function startScan(resume) {
+  const { sources, fileSources, googleSources, user_ids, options } = buildScanPayload();
+  if (!sources.length && !fileSources.length && !googleSources.length) { alert(t('m365_no_sources','No sources selected — nothing to scan.')); return; }
+  if (sources.length && !user_ids.length && !googleSources.length) { alert('Select at least one account to scan.'); return; }
+
+  // When resuming, keep existing cards; otherwise clear everything
+  if (!resume) {
+    S.flaggedData = []; S.filteredData = []; S.totalCPR = 0;
+    document.getElementById('grid').innerHTML = '';
+    document.getElementById('grid').style.display = 'none';
+    document.getElementById('emptyState').style.display = 'none';
+    const _lss = document.getElementById('lastScanSummary'); if (_lss) _lss.style.display = 'none';
+    document.getElementById('statsSection').style.display = 'none';
+    document.getElementById('statsPill').style.display = 'none';
+  }
+  document.getElementById('resumeBanner').style.display = 'none';
+  document.getElementById('logPanel').innerHTML = '<div class="log-line log-live" id="logLive" style="display:none"></div>';
+  try { sessionStorage.removeItem(_LOG_SESSION_KEY); } catch(e) {}
+  S._m365ScanRunning   = sources.length > 0;
+  S._googleScanRunning = googleSources.length > 0;
+  S._fileScanRunning   = fileSources.length > 0;
+  S._srcPct = { m365: 0, google: 0, file: 0 };
+  S._progressCurrentUser = '';
+  _renderProgressSegments();
+  document.getElementById('scanBtn').disabled = true;
+  document.getElementById('stopBtn').style.display = 'inline-block';
+  // progress segments rendered by _renderProgressSegments() called above
+  document.getElementById('progressFile').textContent = '';
+  _setProgressPhase(t('scan_preparing', 'Preparing…'));
+
+  const dateLabel = options.older_than_days > 0 ? ', ' + t('m365_log_older_than', 'older than') + ' ' + document.getElementById('olderThanDate').value : '';
+  const modeLabel = resume ? t('m365_log_resuming', 'Resuming scan:') : t('m365_log_starting_scan', 'Starting scan:');
+  var googleCount = googleSources.length > 0 ? S._allUsers.filter(function(u) {
+    return u.selected !== false && (u.platform === 'google' || u.platform === 'both');
+  }).length : 0;
+  var totalAccounts = (sources.length > 0 ? user_ids.length : 0) + (googleSources.length > 0 && sources.length === 0 ? googleCount : 0);
+  var allSourceLabels = sources.concat(googleSources);
+  log(modeLabel + ' ' + allSourceLabels.join(', ') + ' — ' + (totalAccounts || googleCount) + ' ' + t('m365_log_accounts', 'account(s)') + dateLabel + '…');
+
+  // Always close and reopen SSE — ensures a fresh queue is registered
+  // before the scan fires events (prevents missed events on the server side)
+  if (S.es) { S.es.close(); S.es = null; }
+  S._userStartedScan = true;
+  _ensureSSE();
+
+  setTimeout(() => {
+    // Fire M365 scan if any M365 sources are selected
+    if (sources.length > 0) {
+      fetch('/api/scan/start', {
+        method: 'POST', headers: {'Content-Type':'application/json'},
+        body: JSON.stringify({sources, user_ids, options, resume: !!resume,
+                              profile_id: S._activeProfileId || null})
+      }).then(r => {
+        if (r.status === 409) { log('Scan already running', 'err'); }
+      }).catch(e => { log('Scan start failed: ' + e, 'err'); });
+    }
+
+    // Fire file scans for each checked file source (local/smb)
+    const checkedFileIds = [];
+    document.querySelectorAll('#sourcesPanel input[data-source-type="file"]:checked').forEach(function(cb) {
+      checkedFileIds.push(cb.dataset.sourceId);
+    });
+    checkedFileIds.forEach(function(id) {
+      const source = S._fileSources.find(function(s) { return s.id === id; });
+      if (!source) return;
+      fetch('/api/file_scan/start', {
+        method: 'POST', headers: {'Content-Type':'application/json'},
+        body: JSON.stringify(Object.assign({}, source, {scan_photos: options.scan_photos || false}))
+      }).catch(e => { log('File scan error: ' + e, 'err'); });
+    });
+
+    // Fire Google Workspace scan if any Google sources are selected
+    const checkedGoogleIds = [];
+    document.querySelectorAll('#sourcesPanel input[data-source-type="google"]:checked').forEach(function(cb) {
+      checkedGoogleIds.push(cb.dataset.sourceId);
+    });
+    if (checkedGoogleIds.length > 0) {
+      // Collect selected Google user emails from the account list
+      var selectedGoogleEmails = S._allUsers
+        .filter(function(u) { return u.selected !== false && (u.platform === 'google' || u.platform === 'both'); })
+        .map(function(u) { return u.platform === 'both' ? u.googleEmail : u.email; })
+        .filter(Boolean);
+      fetch('/api/google/scan/start', {
+        method: 'POST', headers: {'Content-Type':'application/json'},
+        body: JSON.stringify({
+          sources:     checkedGoogleIds,
+          user_emails: selectedGoogleEmails,
+          options:     options
+        })
+      }).then(r => {
+        if (r.status === 409) { log('Google scan already running', 'err'); }
+      }).catch(e => { log('Google scan error: ' + e, 'err'); });
+    }
+
+    // All scan types fired above — no fallback error needed
+  }, 300);
+
+}
+
+function stopScan() {
+  fetch('/api/scan/stop', {method:'POST'});
+}
+
+// ── Trend sparkline (#7) ──────────────────────────────────────────────────────
+
+function drawSparkline(data) {
+  const canvas = document.getElementById('sparkCanvas');
+  if (!canvas) return;
+  const dpr = window.devicePixelRatio || 1;
+  const W   = canvas.offsetWidth || 220;
+  const H   = 60;
+  canvas.width  = W * dpr;
+  canvas.height = H * dpr;
+  const ctx = canvas.getContext('2d');
+  ctx.scale(dpr, dpr);
+
+  const flagged = data.map(d => d.flagged_count);
+  const overdue = data.map(d => d.overdue_count);
+  const maxVal  = Math.max(...flagged, 1) * 1.2;
+  const n       = data.length;
+  const xPos    = i => (i / (n - 1)) * (W - 8) + 4;
+  const yPos    = v => H - 4 - (v / maxVal) * (H - 10);
+
+  const isDark   = document.body.getAttribute('data-theme') !== 'light';
+  const cBlue    = '#378ADD';
+  const cAmber   = '#BA7517';
+  const cFill    = isDark ? 'rgba(55,138,221,0.12)' : 'rgba(55,138,221,0.08)';
+
+  // Fill under flagged line
+  ctx.beginPath();
+  ctx.moveTo(xPos(0), yPos(flagged[0]));
+  for (let i = 1; i < n; i++) ctx.lineTo(xPos(i), yPos(flagged[i]));
+  ctx.lineTo(xPos(n - 1), H);
+  ctx.lineTo(xPos(0), H);
+  ctx.closePath();
+  ctx.fillStyle = cFill;
+  ctx.fill();
+
+  // Flagged line
+  ctx.beginPath();
+  ctx.moveTo(xPos(0), yPos(flagged[0]));
+  for (let i = 1; i < n; i++) ctx.lineTo(xPos(i), yPos(flagged[i]));
+  ctx.strokeStyle = cBlue; ctx.lineWidth = 1.5; ctx.lineJoin = 'round';
+  ctx.stroke();
+
+  // Overdue dashed line
+  ctx.beginPath();
+  ctx.moveTo(xPos(0), yPos(overdue[0]));
+  for (let i = 1; i < n; i++) ctx.lineTo(xPos(i), yPos(overdue[i]));
+  ctx.strokeStyle = cAmber; ctx.lineWidth = 1;
+  ctx.setLineDash([3, 3]); ctx.stroke(); ctx.setLineDash([]);
+
+  // Dot on latest point
+  ctx.beginPath();
+  ctx.arc(xPos(n - 1), yPos(flagged[n - 1]), 3, 0, Math.PI * 2);
+  ctx.fillStyle = cBlue; ctx.fill();
+
+  // Labels: first, middle, last date (MM-DD only)
+  const lblEl = document.getElementById('sparkLabels');
+  if (lblEl) {
+    const fmt = d => d.scan_date.slice(5);
+    lblEl.innerHTML = `<span>${fmt(data[0])}</span><span>${fmt(data[Math.floor(n/2)])}</span><span>${fmt(data[n-1])}</span>`;
+  }
+
+  // Trend change label
+  const last = flagged[n - 1], prev = flagged[n - 2] || last;
+  const diff = last - prev;
+  const pct  = prev ? Math.round(Math.abs(diff / prev) * 100) : 0;
+  const arrow = diff < 0 ? '↓' : diff > 0 ? '↑' : '→';
+  const color = diff < 0 ? 'var(--success)' : diff > 0 ? 'var(--danger)' : 'var(--muted)';
+  const chEl = document.getElementById('trendChange');
+  if (chEl) chEl.innerHTML = `<span style="color:${color}">${arrow} ${pct}%</span>`;
+
+  // Hover tooltip
+  canvas.onmousemove = e => {
+    const rect = canvas.getBoundingClientRect();
+    const mx  = e.clientX - rect.left;
+    const idx = Math.round(((mx - 4) / (W - 8)) * (n - 1));
+    if (idx < 0 || idx >= n) return;
+    const d   = data[idx];
+    const tip = document.getElementById('sparkTip');
+    if (!tip) return;
+    tip.style.display = 'block';
+    tip.textContent = `${d.scan_date}  ${d.flagged_count} / ${d.overdue_count} overdue`;
+    tip.style.left = Math.min(mx, W - tip.offsetWidth - 4) + 'px';
+  };
+  canvas.onmouseleave = () => {
+    const tip = document.getElementById('sparkTip');
+    if (tip) tip.style.display = 'none';
+  };
+}
+
+async function loadTrend() {
+  try {
+    const r = await fetch('/api/db/trend?n=10');
+    if (!r.ok) return;
+    const data = await r.json();
+    if (!Array.isArray(data) || data.length < 2) return;
+    document.getElementById('trendPanel').style.display = 'block';
+    // Defer draw until canvas has layout width
+    setTimeout(() => drawSparkline(data), 60);
+  } catch(e) { /* DB not available */ }
+}
+
+function updateStats() {
+  document.getElementById('pillFlagged').textContent = S.flaggedData.length;
+  document.getElementById('pillScanned').textContent =
+    parseInt(document.getElementById('progressStats').textContent.split('/')[1] || '0') || 0;
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.exportDB = exportDB;
+window.openImportDBModal = openImportDBModal;
+window.closeImportDBModal = closeImportDBModal;
+window.doImportDB = doImportDB;
+window.buildScanPayload = buildScanPayload;
+window.checkCheckpoint = checkCheckpoint;
+window.clearCheckpointAndScan = clearCheckpointAndScan;
+window.checkDeltaStatus = checkDeltaStatus;
+window.clearDeltaTokens = clearDeltaTokens;
+window.openSmtpModal = openSmtpModal;
+window.closeSmtpModal = closeSmtpModal;
+window.loadSmtpConfig = loadSmtpConfig;
+window.saveSmtpConfig = saveSmtpConfig;
+window.sendReport = sendReport;
+window._smtpFields = _smtpFields;
+window._attachScanListeners = _attachScanListeners;
+window._attachSchedulerListeners = _attachSchedulerListeners;
+window.startScan = startScan;
+window.stopScan = stopScan;
+window.drawSparkline = drawSparkline;
+window.loadTrend = loadTrend;
+window.updateStats = updateStats;
diff --git a/static/js/scheduler.js b/static/js/scheduler.js
new file mode 100644
index 0000000..2de9614
--- /dev/null
+++ b/static/js/scheduler.js
@@ -0,0 +1,439 @@
+// ── Scheduler — multi-job (#19) ─────────────────────────────────────────────
+
+var _schedJobs = [];
+
+function schedLoad() {
+  fetch('/api/scheduler/jobs').then(function(r){ return r.json(); }).then(function(d) {
+    _schedJobs = d.jobs || [];
+    schedRenderJobs();
+    schedLoadHistory();
+    // Fetch status AFTER rendering so run buttons exist in the DOM
+    return fetch('/api/scheduler/status').then(function(r){ return r.json(); });
+  }).then(function(d) {
+    if (!d) return;
+    var noAps = document.getElementById('schedNoAps');
+    if (noAps) noAps.style.display = d.available ? 'none' : 'block';
+    schedUpdateSidebarIndicator(d);
+    (d.jobs || []).forEach(function(js) {
+      var descEl = document.getElementById('schedDesc_' + js.id);
+      if (!descEl) return;
+      var j2 = _schedJobs.find(function(x){ return x.id === js.id; });
+      var freqLabel = !j2 ? '' : (j2.frequency === 'weekly' ? 'Weekly' : j2.frequency === 'monthly' ? 'Monthly' : 'Daily');
+      var timeStr = !j2 ? '' : String(j2.hour||0).padStart(2,'0') + ':' + String(j2.minute||0).padStart(2,'0');
+      var base = freqLabel + ' ' + timeStr;
+      var runBtn = document.getElementById('schedRunBtn_' + js.id);
+      if (js.is_running) {
+        descEl.textContent = base + ' \u00b7 Running...';
+        if (runBtn) { runBtn.style.borderColor='#22c55e'; runBtn.style.color='#22c55e'; }
+      } else if (js.next_run) {
+        var dt = new Date(js.next_run);
+        descEl.textContent = base + ' \u00b7 Next: ' + dt.toLocaleString(undefined,{month:'short',day:'numeric',hour:'2-digit',minute:'2-digit'});
+        if (runBtn) { runBtn.style.borderColor='var(--border)'; runBtn.style.color='var(--muted)'; }
+      } else {
+        descEl.textContent = base + (js.enabled ? '' : ' \u00b7 Disabled');
+        if (runBtn) { runBtn.style.borderColor='var(--border)'; runBtn.style.color='var(--muted)'; }
+      }
+    });
+  }).catch(function(e){ console.warn('schedLoad:', e); });
+}
+
+function schedRenderJobs() {
+  var list = document.getElementById('schedJobList');
+  if (!list) return;
+  if (!_schedJobs.length) {
+    list.innerHTML = '<div style="font-size:11px;color:var(--muted);padding:4px 0">No scheduled scans yet.</div>';
+    return;
+  }
+  list.innerHTML = _schedJobs.map(function(j) {
+    var sid  = _esc(j.id);
+    var sname = _esc(j.name || 'Unnamed');
+    var freqLabel = j.frequency === 'weekly' ? 'Weekly' : j.frequency === 'monthly' ? 'Monthly' : 'Daily';
+    var timeStr = String(j.hour||0).padStart(2,'0') + ':' + String(j.minute||0).padStart(2,'0');
+    var desc = freqLabel + ' ' + timeStr;
+    var chk = j.enabled ? ' checked' : '';
+    return '<div style="display:flex;align-items:center;gap:6px;padding:5px 6px;border:1px solid var(--border);border-radius:6px;background:var(--surface)">'
+      + '<label class="toggle" style="flex:unset;margin:0"><input type="checkbox"'+chk+' onchange="schedToggleEnabled(\''+sid+'\',this.checked)"><span class="toggle-slider"></span></label>'
+      + '<div style="flex:1;min-width:0">'
+      + '<div style="font-size:12px;font-weight:600;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">'+sname+'</div>'
+      + '<div id="schedDesc_'+sid+'" style="font-size:10px;color:var(--muted)">'+desc+'</div>'
+      + '</div>'
+      + '<button onclick="schedRunJob(\''+sid+'\')" id="schedRunBtn_'+sid+'" style="background:none;border:1px solid var(--border);color:var(--muted);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer" title="Run now">&#9654;</button>'
+      + '<button onclick="schedEditJob(\''+sid+'\')" style="background:none;border:1px solid var(--border);color:var(--muted);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer" title="Edit">&#9998;</button>'
+      + '<button onclick="schedDeleteJob(\''+sid+'\')" style="background:none;border:1px solid var(--danger);color:var(--danger);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer" title="Delete">&#10005;</button>'
+      + '</div>';
+  }).join('');
+}
+
+function schedToggleEnabled(id, enabled) {
+  var j = _schedJobs.find(function(x){ return x.id === id; });
+  if (!j) return;
+  var updated = Object.assign({}, j, {enabled: enabled});
+  fetch('/api/scheduler/jobs/save', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(updated)
+  }).then(function(r){ return r.json(); }).then(function(d) {
+    if (d.error) { alert('Error: ' + d.error); return; }
+    j.enabled = enabled;
+    schedLoad();
+  }).catch(function(e){ alert('Error: ' + e); });
+}
+
+function schedAddJob() {
+  document.getElementById('schedEditId').value = '';
+  document.getElementById('schedName').value = '';
+  document.getElementById('schedEnabled').checked = true;
+  document.getElementById('schedFrequency').value = 'daily';
+  document.getElementById('schedDow').value = 'mon';
+  document.getElementById('schedDom').value = 1;
+  document.getElementById('schedHour').value = 2;
+  document.getElementById('schedMinute').value = 0;
+  document.getElementById('schedAutoEmail').checked = false;
+  document.getElementById('schedAutoRetention').checked = false;
+  var titleEl = document.getElementById('schedEditorTitle');
+  if (titleEl) titleEl.textContent = t('m365_sched_editor_new', 'New scheduled scan');
+  schedPopulateProfiles('');
+  schedToggleFreqRows();
+  document.getElementById('schedJobEditor').style.display = 'block';
+  document.getElementById('schedSaveStatus').textContent = '';
+  document.getElementById('schedName').focus();
+}
+
+function schedEditJob(id) {
+  var j = _schedJobs.find(function(x){ return x.id === id; });
+  if (!j) return;
+  document.getElementById('schedEditId').value = j.id;
+  document.getElementById('schedName').value = j.name || '';
+  document.getElementById('schedEnabled').checked = !!j.enabled;
+  document.getElementById('schedFrequency').value = j.frequency || 'daily';
+  document.getElementById('schedDow').value = j.day_of_week || 'mon';
+  document.getElementById('schedDom').value = j.day_of_month || 1;
+  document.getElementById('schedHour').value = j.hour != null ? j.hour : 2;
+  document.getElementById('schedMinute').value = j.minute != null ? j.minute : 0;
+  document.getElementById('schedAutoEmail').checked = !!j.auto_email;
+  document.getElementById('schedAutoRetention').checked = !!j.auto_retention;
+  var titleEl = document.getElementById('schedEditorTitle');
+  if (titleEl) titleEl.textContent = t('m365_sched_editor_edit', 'Edit scheduled scan');
+  schedPopulateProfiles(j.profile_id || '');
+  schedToggleFreqRows();
+  document.getElementById('schedJobEditor').style.display = 'block';
+  document.getElementById('schedSaveStatus').textContent = '';
+}
+
+function schedCancelEdit() {
+  document.getElementById('schedJobEditor').style.display = 'none';
+}
+
+function schedSaveJob() {
+  var name = document.getElementById('schedName').value.trim();
+  if (!name) {
+    var st = document.getElementById('schedSaveStatus');
+    st.textContent = t('m365_sched_name_required', 'Name is required');
+    st.style.color = 'var(--danger)';
+    document.getElementById('schedName').focus();
+    return;
+  }
+  var job = {
+    id:             document.getElementById('schedEditId').value || '',
+    name:           name,
+    enabled:        document.getElementById('schedEnabled').checked,
+    frequency:      document.getElementById('schedFrequency').value,
+    day_of_week:    document.getElementById('schedDow').value,
+    day_of_month:   parseInt(document.getElementById('schedDom').value) || 1,
+    hour:           parseInt(document.getElementById('schedHour').value) || 0,
+    minute:         parseInt(document.getElementById('schedMinute').value) || 0,
+    profile_id:     document.getElementById('schedProfile').value,
+    auto_email:     document.getElementById('schedAutoEmail').checked,
+    auto_retention: document.getElementById('schedAutoRetention').checked,
+  };
+  var st = document.getElementById('schedSaveStatus');
+  st.style.color = 'var(--muted)'; st.textContent = 'Saving...';
+  fetch('/api/scheduler/jobs/save', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(job)
+  }).then(function(r){ return r.json(); }).then(function(d) {
+    if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
+    st.style.color = 'var(--accent)'; st.textContent = '\u2713 Saved';
+    setTimeout(function(){ st.textContent=''; }, 1500);
+    document.getElementById('schedJobEditor').style.display = 'none';
+    schedLoad();
+  }).catch(function(e){ st.style.color='var(--danger)'; st.textContent=e.message; });
+}
+
+function schedDeleteJob(id) {
+  var j = _schedJobs.find(function(x){ return x.id === id; });
+  var name = j ? j.name : id;
+  if (!confirm('Delete "' + name + '"?')) return;
+  fetch('/api/scheduler/jobs/delete', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({id: id})
+  }).then(function(r){ return r.json(); }).then(function(d) {
+    if (d.error) { alert('Delete failed: ' + d.error); return; }
+    schedLoad();
+  }).catch(function(e){ alert('Delete error: ' + e); });
+}
+
+function schedRunJob(id) {
+  var j = _schedJobs.find(function(x){ return x.id === id; });
+  var name = j ? j.name : 'this scan';
+  if (!confirm('Run "' + name + '" now?')) return;
+  fetch('/api/scheduler/jobs/run_now', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({id: id})
+  }).then(function(r){ return r.json(); }).then(function(d) {
+    if (d.error) alert(d.error);
+    else schedLoad();
+  });
+}
+
+function schedToggleFreqRows() {
+  var freq = document.getElementById('schedFrequency');
+  if (!freq) return;
+  var val = freq.value;
+  var dowRow = document.getElementById('schedDowRow');
+  var domRow = document.getElementById('schedDomRow');
+  if (dowRow) dowRow.style.display = val === 'weekly'  ? 'flex' : 'none';
+  if (domRow) domRow.style.display = val === 'monthly' ? 'flex' : 'none';
+}
+
+function schedPopulateProfiles(selectedId) {
+  fetch('/api/profiles').then(function(r){ return r.json(); }).then(function(d) {
+    var sel = document.getElementById('schedProfile');
+    if (!sel) return;
+    var firstOpt = sel.options[0];
+    sel.innerHTML = '';
+    sel.appendChild(firstOpt);
+    (d.profiles || []).forEach(function(p) {
+      var o = document.createElement('option');
+      o.value = p.id || p.name;
+      o.textContent = p.name;
+      if ((p.id || p.name) === selectedId) o.selected = true;
+      sel.appendChild(o);
+    });
+  });
+}
+
+function schedLoadHistory() {
+  var el = document.getElementById('schedHistory');
+  if (!el) return;
+  fetch('/api/scheduler/history?limit=10').then(function(r){ return r.json(); }).then(function(d) {
+    var runs = d.runs || [];
+    if (!runs.length) { el.innerHTML = '<em>No scheduled runs yet</em>'; return; }
+    var html = '';
+    runs.forEach(function(r) {
+      var ts = r.started_at ? new Date(r.started_at * 1000).toLocaleString() : '-';
+      var icon = r.status === 'completed' ? '\u2713' : r.status === 'failed' ? '\u2716' : '\u23f3';
+      var jname = r.job_name ? '<strong>' + _esc(r.job_name) + '</strong> - ' : '';
+      html += icon + ' ' + jname + ts + ' - ' + (r.flagged||0) + ' flagged';
+      if (r.emailed) html += ' \u2709';
+      if (r.error) html += ' <span style="color:var(--danger)">' + _esc(r.error.substring(0,60)) + '</span>';
+      html += '<br>';
+    });
+    el.innerHTML = html;
+  });
+}
+
+function schedUpdateSidebarIndicator(d) {
+  var wrap = document.getElementById('schedNextIndicator');
+  var txt  = document.getElementById('schedNextText');
+  if (!wrap || !txt) return;
+  if (d && d.enabled && d.next_run) {
+    try {
+      var dt = new Date(d.next_run);
+      txt.textContent = t('m365_sched_next', 'Next') + ': ' + dt.toLocaleString(undefined, {month:'short',day:'numeric',hour:'2-digit',minute:'2-digit'});
+      wrap.style.display = 'inline-flex';
+    } catch(e) { wrap.style.display = 'none'; }
+  } else {
+    wrap.style.display = 'none';
+  }
+}
+
+// Poll scheduler status every 60s
+setInterval(function() {
+  fetch('/api/scheduler/status').then(function(r){ return r.json(); }).then(function(d) {
+    schedUpdateSidebarIndicator(d);
+  }).catch(function(){});
+}, 60000);
+document.addEventListener('DOMContentLoaded', function() {
+  fetch('/api/scheduler/status').then(function(r){ return r.json(); }).then(function(d) {
+    schedUpdateSidebarIndicator(d);
+  }).catch(function(){});
+});
+
+// ── General tab ───────────────────────────────────────────────────────────────
+
+function stPopulateGeneral() {
+  stLoadPinStatus();
+  // Populate language selector (mirrors the hidden langSelect)
+  const src = document.getElementById('langSelect');
+  const dst = document.getElementById('langSelectSettings');
+  if (src && dst && dst.options.length === 0) {
+    Array.from(src.options).forEach(function(opt) {
+      const o = document.createElement('option');
+      o.value = opt.value; o.textContent = opt.textContent;
+      if (opt.selected) o.selected = true;
+      dst.appendChild(o);
+    });
+  } else if (src && dst) {
+    dst.value = src.value;
+  }
+  // Populate About rows
+  fetch('/api/about').then(function(r){ return r.json(); }).then(function(d) {
+    const set = function(id, val) { const el=document.getElementById(id); if(el) el.textContent=val||'\u2014'; };
+    set('st-about-python',  d.python);
+    set('st-about-msal',    d.msal);
+    set('st-about-requests',d.requests);
+    set('st-about-openpyxl',d.openpyxl);
+  }).catch(function(){});
+}
+
+// ── Email tab ─────────────────────────────────────────────────────────────────
+
+function stLoadSmtp() {
+  fetch('/api/smtp/config').then(function(r){ return r.json(); }).then(function(d) {
+    const set = function(id, val) { const el=document.getElementById(id); if(el) el.value=val||''; };
+    set('st-smtpHost', d.host);
+    set('st-smtpPort', d.port || 587);
+    set('st-smtpUser', d.user);
+    set('st-smtpFrom', d.from_addr);
+    set('st-smtpTo',   Array.isArray(d.recipients) ? d.recipients.join(', ') : (d.recipients||''));
+    const tls = document.getElementById('st-smtpTls');
+    if (tls) tls.checked = d.starttls !== false;
+    const pw = document.getElementById('st-smtpPw');
+    if (pw) pw.value = d.has_password ? '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' : '';
+  }).catch(function(){});
+}
+
+async function stSmtpSave() {
+  const st = document.getElementById('st-smtpStatus');
+  const rawPw = document.getElementById('st-smtpPw').value;
+  const pw = rawPw === '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' ? null : rawPw;
+  const body = {
+    host:       document.getElementById('st-smtpHost').value.trim(),
+    port:       parseInt(document.getElementById('st-smtpPort').value) || 587,
+    user:       document.getElementById('st-smtpUser').value.trim(),
+    from_addr:  document.getElementById('st-smtpFrom').value.trim(),
+    recipients: document.getElementById('st-smtpTo').value.split(/[,;]/).map(function(s){return s.trim();}).filter(Boolean),
+    starttls:   document.getElementById('st-smtpTls').checked,
+  };
+  if (pw !== null) body.password = pw;
+  st.style.color = 'var(--muted)'; st.textContent = t('m365_smtp_saving','Saving...');
+  try {
+    const r = await fetch('/api/smtp/config', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(body)});
+    const d = await r.json();
+    if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
+    st.style.color='var(--accent)'; st.textContent='\u2714 '+t('m365_smtp_saved','Saved');
+  } catch(e){ st.style.color='var(--danger)'; st.textContent=e.message; }
+}
+
+async function stSmtpTest() {
+  const st = document.getElementById('st-smtpStatus');
+  await stSmtpSave();
+  if (st) { st.style.color='var(--muted)'; st.textContent=t('m365_smtp_testing','Testing connection\u2026'); }
+  try {
+    const r = await fetch('/api/smtp/test', {method:'POST', headers:{'Content-Type':'application/json'},
+      body:JSON.stringify({})});
+    const d = await r.json();
+    if (d.ok) {
+      if (st) { st.style.color='var(--accent)'; st.textContent='\u2714 ' + (d.message || t('m365_smtp_test_ok','Connection successful')); }
+    } else {
+      if (st) { st.style.color='var(--danger)'; st.textContent='\u2717 ' + (d.error || t('m365_smtp_test_fail','Connection failed')); }
+    }
+  } catch(e) {
+    if (st) { st.style.color='var(--danger)'; st.textContent='\u2717 ' + e.message; }
+  }
+}
+
+async function stSmtpSend() {
+  const st = document.getElementById('st-smtpStatus');
+  // First save current field values
+  await stSmtpSave();
+  // Check we have recipients
+  const recipStr = document.getElementById('st-smtpTo').value.trim();
+  if (!recipStr) {
+    if (st) { st.style.color='var(--danger)'; st.textContent=t('m365_smtp_no_recipients','Enter at least one recipient.'); }
+    return;
+  }
+  const recipients = recipStr.split(/[,;]/).map(function(s){return s.trim();}).filter(Boolean);
+  const rawPw = document.getElementById('st-smtpPw').value;
+  const cfg = {
+    host:      document.getElementById('st-smtpHost').value.trim(),
+    port:      parseInt(document.getElementById('st-smtpPort').value) || 587,
+    username:  document.getElementById('st-smtpUser').value.trim(),
+    password:  rawPw === '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' ? null : rawPw,
+    from_addr: document.getElementById('st-smtpFrom').value.trim(),
+    use_tls:   document.getElementById('st-smtpTls').checked,
+    use_ssl:   false,
+  };
+  if (st) { st.style.color='var(--muted)'; st.textContent=t('m365_smtp_sending','Sending\u2026'); }
+  try {
+    const r = await fetch('/api/send_report', {method:'POST', headers:{'Content-Type':'application/json'},
+      body:JSON.stringify({recipients, smtp:cfg})});
+    const d = await r.json();
+    if (d.status === 'sent') {
+      if (st) { st.style.color='var(--accent)'; st.textContent=t('m365_smtp_sent','\u2714 Sent'); }
+      log(t('m365_smtp_sent','Report sent to') + ' ' + recipients.join(', '), 'ok');
+    } else {
+      if (st) { st.style.color='var(--danger)'; st.textContent=d.error||'Send failed'; }
+      log('Email send failed: '+(d.error||''),'err');
+    }
+  } catch(e){
+    if (st) { st.style.color='var(--danger)'; st.textContent=e.message; }
+  }
+}
+
+// ── Database tab ──────────────────────────────────────────────────────────────
+
+function stLoadDbStats() {
+  fetch('/api/db/stats').then(function(r){ return r.json(); }).then(function(d) {
+    const el = document.getElementById('st-dbStats');
+    if (!el) return;
+    if (d.error) { el.textContent = d.error; return; }
+    el.innerHTML =
+      '<span>' + t('m365_stat_scanned','Scanned items') + '</span>: <strong>' + (d.total_items||0) + '</strong><br>' +
+      '<span>' + t('m365_stat_flagged','Flagged items') + '</span>: <strong>' + (d.flagged_items||0) + '</strong><br>' +
+      '<span>' + t('m365_db_scans','Scans') + '</span>: <strong>' + (d.total_scans||0) + '</strong>';
+  }).catch(function(){ });
+}
+
+function stResetDB() {
+  if (!confirm(t('m365_db_reset_confirm','Reset database? All scan results will be deleted.'))) return;
+  requirePin(t('m365_settings_enter_pin_reset','Enter admin PIN to reset the database.'), function(pin) {
+    fetch('/api/db/reset', {method:'POST', headers:{'Content-Type':'application/json'},
+      body:JSON.stringify({confirm:'yes', pin:pin})
+    }).then(function(r){ return r.json(); }).then(function(d) {
+      if (d.error === 'incorrect_pin') { log(t('m365_settings_pin_wrong','Incorrect PIN \u2014 reset cancelled.'), 'err'); return; }
+      if (d.error) { log('Reset failed: '+d.error, 'err'); return; }
+      stLoadDbStats();
+      log(t('m365_db_reset_done','Database reset'));
+    }).catch(function(e){ log('Reset failed: '+e,'err'); });
+  });
+}
+
+// Redirect old openSmtpModal to Settings email tab
+function openSmtpModal(send) {
+  openSettings('email');
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.schedLoad = schedLoad;
+window.schedRenderJobs = schedRenderJobs;
+window.schedToggleEnabled = schedToggleEnabled;
+window.schedAddJob = schedAddJob;
+window.schedEditJob = schedEditJob;
+window.schedCancelEdit = schedCancelEdit;
+window.schedSaveJob = schedSaveJob;
+window.schedDeleteJob = schedDeleteJob;
+window.schedRunJob = schedRunJob;
+window.schedToggleFreqRows = schedToggleFreqRows;
+window.schedPopulateProfiles = schedPopulateProfiles;
+window.schedLoadHistory = schedLoadHistory;
+window.schedUpdateSidebarIndicator = schedUpdateSidebarIndicator;
+window.stPopulateGeneral = stPopulateGeneral;
+window.stLoadSmtp = stLoadSmtp;
+window.stSmtpSave = stSmtpSave;
+window.stSmtpTest = stSmtpTest;
+window.stSmtpSend = stSmtpSend;
+window.stLoadDbStats = stLoadDbStats;
+window.stResetDB = stResetDB;
+window.openSmtpModal = openSmtpModal;
+window._schedJobs = _schedJobs;
diff --git a/static/js/sources.js b/static/js/sources.js
new file mode 100644
index 0000000..b8d237a
--- /dev/null
+++ b/static/js/sources.js
@@ -0,0 +1,269 @@
+import { S } from './state.js';
+// ── Dynamic sources panel ─────────────────────────────────────────────────────
+
+// Fixed M365 sources — always present when authenticated
+const _M365_SOURCES = [
+  { id: 'email',      icon: '\uD83D\uDCE7', labelKey: 'm365_src_email',      labelDefault: 'Exchange / Outlook', toggleId: 'smSrcEmail' },
+  { id: 'onedrive',   icon: '\uD83D\uDCBE', labelKey: 'm365_src_onedrive',   labelDefault: 'OneDrive',           toggleId: 'smSrcOneDrive' },
+  { id: 'sharepoint', icon: '\uD83C\uDF10', labelKey: 'm365_src_sharepoint', labelDefault: 'SharePoint',         toggleId: 'smSrcSharePoint' },
+  { id: 'teams',      icon: '\uD83D\uDCAC', labelKey: 'm365_src_teams',      labelDefault: 'Teams',              toggleId: 'smSrcTeams' },
+];
+
+// Future connector stubs — uncomment when implemented
+// const _GMAIL_SOURCE  = { id: 'gmail',        icon: '\uD83D\uDCE7', labelKey: 'm365_src_gmail',       labelDefault: 'Gmail',        type: 'm365' };
+// const _GDRIVE_SOURCE = { id: 'googledrive',  icon: '\uD83D\uDCC1', labelKey: 'm365_src_googledrive', labelDefault: 'Google Drive', type: 'm365' };
+
+function renderSourcesPanel() {
+  const panel = document.getElementById('sourcesPanel');
+  if (!panel) return;
+
+  // Remember currently checked state before re-render
+  const checked = {};
+  panel.querySelectorAll('input[data-source-id]').forEach(function(cb) {
+    checked[cb.dataset.sourceId] = cb.checked;
+  });
+
+  let html = '';
+
+  // M365 fixed sources — only show if their toggle in Source Management is on
+  _M365_SOURCES.forEach(function(s) {
+    const toggle = s.toggleId ? document.getElementById(s.toggleId) : null;
+    if (toggle && !toggle.checked) return;  // hidden by user in Source Management
+    const isChecked = (s.id in checked) ? checked[s.id] : true;
+    html += '<label class="source-check">'
+      + '<input type="checkbox" data-source-id="' + s.id + '" data-source-type="m365"' + (isChecked ? ' checked' : '') + ' onchange="_onSourceChange()">'
+      + '<span class="source-icon">' + s.icon + '</span>'
+      + '<span class="source-label" data-i18n="' + s.labelKey + '">' + t(s.labelKey, s.labelDefault) + '</span>'
+      + '</label>';
+  });
+
+  // Google Workspace sources — only show if connected
+  if (window._googleConnected) {
+    var gmailToggle = document.getElementById('smGoogleSrcGmail');
+    var driveToggle = document.getElementById('smGoogleSrcDrive');
+    var showGmail = !gmailToggle || gmailToggle.checked;
+    var showDrive = !driveToggle || driveToggle.checked;
+    if (showGmail || showDrive) {
+      html += '<div style="margin:6px 0 2px"><hr style="border:none;border-top:1px solid var(--border);margin:1px 0 2px"></div>';
+    }
+    if (showGmail) {
+      var isCheckedG = ('gmail' in checked) ? checked['gmail']
+        : S._pendingGoogleSources !== null ? S._pendingGoogleSources.includes('gmail')
+        : true;
+      html += '<label class="source-check"><input type="checkbox" data-source-id="gmail" data-source-type="google"' + (isCheckedG ? ' checked' : '') + ' onchange="_onSourceChange()"><span class="source-icon">📧</span><span class="source-label">Gmail</span></label>';
+    }
+    if (showDrive) {
+      var isCheckedD = ('gdrive' in checked) ? checked['gdrive']
+        : S._pendingGoogleSources !== null ? S._pendingGoogleSources.includes('gdrive')
+        : true;
+      html += '<label class="source-check"><input type="checkbox" data-source-id="gdrive" data-source-type="google"' + (isCheckedD ? ' checked' : '') + ' onchange="_onSourceChange()"><span class="source-icon">📁</span><span class="source-label">Google Drive</span></label>';
+    }
+    // Pending has been applied — clear it
+    S._pendingGoogleSources = null;
+  }
+
+  // File sources (local / SMB) — one entry per saved source
+  if (S._fileSources.length > 0) {
+    html += '<div style="margin:6px 0 2px;font-size:10px;color:var(--muted);text-transform:uppercase;letter-spacing:.04em">'
+      + '<hr style="border:none;border-top:1px solid var(--border);margin:1px 0 2px">';
+    S._fileSources.forEach(function(s) {
+      const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
+      const icon  = isSmb ? '\uD83C\uDF10' : '\uD83D\uDCC1';
+      const label = s.label || s.path || s.id;
+      const isChecked = (s.id in checked) ? checked[s.id] : true;
+      html += '<label class="source-check">'
+        + '<input type="checkbox" data-source-id="' + _esc(s.id) + '" data-source-type="file"' + (isChecked ? ' checked' : '') + '>'
+        + '<span class="source-icon">' + icon + '</span>'
+        + '<span class="source-label" title="' + _esc(s.path || '') + '">' + _esc(label) + '</span>'
+        + '</label>';
+    });
+  }
+
+  panel.innerHTML = html;
+
+  // Resize panel to fit all rendered sources (respects user's saved smaller preference)
+  if (typeof _fitSourcesPanel === 'function') _fitSourcesPanel();
+
+  // Grey out the accounts section when no M365 sources are selected
+  _updateAccountsVisibility();
+}
+
+function _onSourceChange() {
+  _updateAccountsVisibility();
+  renderAccountList();
+}
+
+function _onGoogleSourceToggle() {
+  // Re-render sources panel (hides/shows Gmail+Drive checkboxes in KILDER)
+  renderSourcesPanel();
+  // Re-render accounts — 'both' users show as M365-only when Google sources disabled
+  renderAccountList();
+  // Persist toggle state
+  var gm = document.getElementById('smGoogleSrcGmail');
+  var gd = document.getElementById('smGoogleSrcDrive');
+  fetch('/api/src_toggles', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({
+      src_gmail: gm ? gm.checked : true,
+      src_drive: gd ? gd.checked : true
+    })
+  }).catch(function(){});
+}
+function _saveM365SourceToggles() {
+  var state = {};
+  _M365_SOURCES.forEach(function(s) {
+    var el = s.toggleId ? document.getElementById(s.toggleId) : null;
+    if (el) state['src_toggle_' + s.id] = el.checked;
+  });
+  fetch('/api/src_toggles', {
+    method: 'POST', headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(state)
+  }).catch(function(){});
+}
+
+function _restoreM365SourceToggles(settings) {
+  _M365_SOURCES.forEach(function(s) {
+    var el = s.toggleId ? document.getElementById(s.toggleId) : null;
+    var key = 'src_toggle_' + s.id;
+    if (el && settings[key] !== undefined) el.checked = !!settings[key];
+  });
+  renderSourcesPanel();
+}
+
+function _googleSourcesEnabled() {
+  return !!(document.getElementById('smGoogleSrcGmail') && document.getElementById('smGoogleSrcGmail').checked)
+      || !!(document.getElementById('smGoogleSrcDrive') && document.getElementById('smGoogleSrcDrive').checked);
+}
+
+
+function _updateAccountsVisibility() {
+  const panel = document.getElementById('sourcesPanel');
+  const anyActive = panel
+    ? Array.from(panel.querySelectorAll('input[data-source-type]')).some(cb => cb.checked)
+    : false;
+  const sec = document.getElementById('accountsSection');
+  if (!sec) return;
+  sec.style.opacity       = anyActive ? '1' : '0.35';
+  sec.style.pointerEvents = anyActive ? '' : 'none';
+  sec.title               = anyActive ? '' : t('m365_accounts_disabled_tip', 'Select a source to enable account selection');
+}
+
+// ── Admin PIN ─────────────────────────────────────────────────────────────────
+
+let _pinCallback = null;
+
+async function stLoadPinStatus() {
+  const r = await fetch('/api/admin/pin');
+  const d = await r.json();
+  const statusEl = document.getElementById('stPinStatus');
+  const currentRow = document.getElementById('stCurrentPinRow');
+  if (d.pin_set) {
+    if (statusEl) statusEl.textContent = '\u2714 ' + t('m365_settings_pin_set', 'Admin PIN is set');
+    if (currentRow) currentRow.style.display = '';
+  } else {
+    if (statusEl) statusEl.textContent = t('m365_settings_pin_not_set', 'No PIN set \u2014 Reset DB is unprotected');
+    if (currentRow) currentRow.style.display = 'none';
+  }
+}
+
+async function stSavePin() {
+  const newPin     = document.getElementById('stNewPin').value;
+  const confirmPin = document.getElementById('stConfirmPin').value;
+  const currentPin = document.getElementById('stCurrentPin')?.value || '';
+  const st         = document.getElementById('stPinSaveStatus');
+  if (!newPin) { st.style.color='var(--danger)'; st.textContent=t('m365_settings_pin_required','New PIN is required.'); return; }
+  if (newPin !== confirmPin) { st.style.color='var(--danger)'; st.textContent=t('m365_settings_pin_mismatch','PINs do not match.'); return; }
+  st.style.color='var(--muted)'; st.textContent=t('m365_fsrc_saving','Saving...');
+  try {
+    const r = await fetch('/api/admin/pin', {method:'POST', headers:{'Content-Type':'application/json'},
+      body: JSON.stringify({current_pin: currentPin, new_pin: newPin})});
+    const d = await r.json();
+    if (d.error === 'incorrect_pin') { st.style.color='var(--danger)'; st.textContent=t('m365_settings_pin_wrong','Current PIN is incorrect.'); return; }
+    if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
+    st.style.color='var(--accent)'; st.textContent='\u2714 '+t('m365_settings_pin_saved','PIN saved');
+    ['stNewPin','stConfirmPin','stCurrentPin'].forEach(function(id){const el=document.getElementById(id);if(el)el.value='';});
+    stLoadPinStatus();
+  } catch(e){ st.style.color='var(--danger)'; st.textContent=e.message; }
+}
+
+// PIN prompt — used for destructive actions
+function requirePin(message, callback) {
+  fetch('/api/admin/pin').then(function(r){return r.json();}).then(function(d) {
+    if (!d.pin_set) {
+      // No PIN set — proceed directly
+      callback('');
+      return;
+    }
+    _pinCallback = callback;
+    const msg = document.getElementById('pinPromptMsg');
+    const inp = document.getElementById('pinPromptInput');
+    const err = document.getElementById('pinPromptError');
+    if (msg) msg.textContent = message || t('m365_settings_enter_pin','Enter admin PIN to continue.');
+    if (inp) inp.value = '';
+    if (err) err.textContent = '';
+    document.getElementById('pinPromptBackdrop').classList.add('open');
+    setTimeout(function(){ if(inp) inp.focus(); }, 100);
+  });
+}
+
+function closePinPrompt() {
+  document.getElementById('pinPromptBackdrop').classList.remove('open');
+  _pinCallback = null;
+}
+
+function confirmPinPrompt() {
+  const pin = document.getElementById('pinPromptInput').value;
+  const err = document.getElementById('pinPromptError');
+  if (!pin) { if(err) err.textContent = t('m365_settings_pin_required','PIN is required.'); return; }
+  const cb = _pinCallback;   // save before closePinPrompt nulls it
+  closePinPrompt();
+  if (cb) cb(pin);
+}
+
+// ── Settings modal ────────────────────────────────────────────────────────────
+
+function openSettings(tab) {
+  document.getElementById('settingsBackdrop').classList.add('open');
+  switchSettingsTab(tab || 'general');
+  stPopulateGeneral();
+  if (tab === 'email')    stLoadSmtp();
+  if (tab === 'database') stLoadDbStats();
+  if (tab === 'scheduler') schedLoad();
+}
+
+function closeSettings() {
+  document.getElementById('settingsBackdrop').classList.remove('open');
+}
+
+function switchSettingsTab(tab) {
+  ['general','security','scheduler','email','database'].forEach(function(t) {
+    var cap = t.charAt(0).toUpperCase() + t.slice(1);
+    var pane = document.getElementById('stPane' + cap);
+    var btn  = document.getElementById('stTab'  + cap);
+    if (pane) pane.classList.toggle('active', t === tab);
+    if (btn)  btn.classList.toggle('active', t === tab);
+  });
+  if (tab === 'security')  { stLoadPinStatus(); if (typeof stLoadViewerPinStatus === 'function') stLoadViewerPinStatus(); }
+  if (tab === 'email')     stLoadSmtp();
+  if (tab === 'database')  stLoadDbStats();
+  if (tab === 'scheduler') schedLoad();
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.renderSourcesPanel = renderSourcesPanel;
+window._onSourceChange = _onSourceChange;
+window._onGoogleSourceToggle = _onGoogleSourceToggle;
+window._saveM365SourceToggles = _saveM365SourceToggles;
+window._restoreM365SourceToggles = _restoreM365SourceToggles;
+window._googleSourcesEnabled = _googleSourcesEnabled;
+window._updateAccountsVisibility = _updateAccountsVisibility;
+window.stLoadPinStatus = stLoadPinStatus;
+window.stSavePin = stSavePin;
+window.requirePin = requirePin;
+window.closePinPrompt = closePinPrompt;
+window.confirmPinPrompt = confirmPinPrompt;
+window.openSettings = openSettings;
+window.closeSettings = closeSettings;
+window.switchSettingsTab = switchSettingsTab;
+window._M365_SOURCES = _M365_SOURCES;
+window._pinCallback = _pinCallback;
diff --git a/static/js/state.js b/static/js/state.js
new file mode 100644
index 0000000..d89869b
--- /dev/null
+++ b/static/js/state.js
@@ -0,0 +1,31 @@
+// state.js — shared mutable state for GDPRScanner
+// Imported by every module that needs cross-module state.
+// Use S.varName everywhere instead of bare varName.
+
+export const S = {
+  // Scan results
+  flaggedData:          [],
+  filteredData:         [],
+  totalCPR:             0,
+  isListView:           false,
+  // SSE connection
+  es:                   null,
+  _userStartedScan:     false,
+  // Scan running flags + progress
+  _m365ScanRunning:     false,
+  _googleScanRunning:   false,
+  _fileScanRunning:     false,
+  _srcPct:              { m365: 0, google: 0, file: 0 },
+  _progressCurrentUser: '',
+  // Users
+  _allUsers:            [],
+  // Auth
+  _currentAppMode:      null,
+  // Profiles
+  _profiles:            [],
+  _activeProfileId:     null,
+  _pendingProfileSources: [],
+  _pendingGoogleSources:  null,
+  // Sources
+  _fileSources:         [],
+};
diff --git a/static/js/ui.js b/static/js/ui.js
new file mode 100644
index 0000000..61c58fa
--- /dev/null
+++ b/static/js/ui.js
@@ -0,0 +1,120 @@
+import { S } from './state.js';
+// Global error trap — logs JS errors to console without blocking the page
+window.onerror = function(msg, src, line, col, err) {
+  console.error('JS Error [' + (src||'').split('/').pop() + ':' + line + '] ' + msg, err);
+  return false;
+};
+window.addEventListener('unhandledrejection', function(e) {
+  console.error('Unhandled promise rejection:', e.reason);
+});
+
+// ── Theme ────────────────────────────────────────────────────────────────────
+function openAbout() {
+  document.getElementById('aboutBackdrop').classList.add('open');
+  fetch('/api/about').then(r => r.json()).then(d => {
+    document.getElementById('about-python').textContent   = d.python   || '—';
+    document.getElementById('about-msal').textContent     = d.msal     || '—';
+    document.getElementById('about-requests').textContent = d.requests || '—';
+    document.getElementById('about-openpyxl').textContent = d.openpyxl || '—';
+  }).catch(() => {});
+}
+function closeAbout() {
+  document.getElementById('aboutBackdrop').classList.remove('open');
+}
+
+// ── Mode info modal ───────────────────────────────────────────────────────────
+function openModeInfo() {
+  const isApp = S._currentAppMode === true;
+  const title   = document.getElementById('modeInfoTitle');
+  const sub     = document.getElementById('modeInfoSubtitle');
+  const rows    = document.getElementById('modeInfoRows');
+
+  if (isApp) {
+    title.textContent = t('m365_mode_app', '🔑 App mode — org-wide');
+    sub.textContent   = t('m365_auth_mode_app_short', 'Application permissions · client credentials');
+    rows.innerHTML = `
+      <div class="about-row"><span>${t('m365_info_permissions','Permissions')}</span><span>Application</span></div>
+      <div class="about-row"><span>${t('m365_info_signin','Sign-in required')}</span><span>${t('m365_info_no','No')}</span></div>
+      <div class="about-row"><span>${t('m365_info_scope','Scope')}</span><span>${t('m365_info_scope_org','All users in tenant')}</span></div>
+      <div class="about-row"><span>${t('m365_info_consent','Admin consent')}</span><span>${t('m365_info_required','Required')}</span></div>
+      <div style="margin-top:12px;font-size:11px;color:var(--muted);line-height:1.6">
+        ${t('m365_info_app_desc','The app authenticates with a Client Secret and accesses all users\' data directly via Microsoft Graph — no interactive sign-in needed. Ideal for automated or scheduled scans.')}
+      </div>`;
+  } else {
+    title.textContent = t('m365_mode_delegated', '👤 Delegated');
+    sub.textContent   = t('m365_auth_mode_delegated_short', 'Delegated permissions · device code flow');
+    rows.innerHTML = `
+      <div class="about-row"><span>${t('m365_info_permissions','Permissions')}</span><span>Delegated</span></div>
+      <div class="about-row"><span>${t('m365_info_signin','Sign-in required')}</span><span>${t('m365_info_yes','Yes')}</span></div>
+      <div class="about-row"><span>${t('m365_info_scope','Scope')}</span><span>${t('m365_info_scope_user','Signed-in user only')}</span></div>
+      <div class="about-row"><span>${t('m365_info_admin','Global Admin')}</span><span>${t('m365_info_expands_scope','Expands scope to all users')}</span></div>
+      <div style="margin-top:12px;font-size:11px;color:var(--muted);line-height:1.6">
+        ${t('m365_info_delegated_desc','The app acts on behalf of the signed-in user via the device code flow. By default only that user\'s data is accessible. A Global Admin can grant broader consent to scan all users.')}
+      </div>`;
+  }
+  document.getElementById('modeInfoBackdrop').classList.add('open');
+}
+function closeModeInfo() {
+  document.getElementById('modeInfoBackdrop').classList.remove('open');
+}
+
+function toggleTheme() {
+  const t = document.body.dataset.theme === 'dark' ? 'light' : 'dark';
+  document.body.dataset.theme = t;
+  document.getElementById('themeBtn').textContent = t === 'dark' ? '🌙' : '☀️';
+  try { localStorage.setItem('m365_theme', t); } catch(e) {}
+}
+(function() {
+  try {
+    const t = localStorage.getItem('m365_theme');
+    if (t) {
+      document.body.dataset.theme = t;
+      const btn = document.getElementById('themeBtn');
+      if (btn) btn.textContent = t === 'dark' ? '🌙' : '☀️';
+    }
+  } catch(e) {}
+})();
+
+// ── Language selector ─────────────────────────────────────────────────────────
+fetch('/api/langs').then(r => r.json()).then(d => {
+  const sel = document.getElementById('langSelect');
+  if (!sel || !d.langs || d.langs.length < 2) {
+    if (sel) sel.style.display = 'none';
+    return;
+  }
+  d.langs.forEach(l => {
+    const opt = document.createElement('option');
+    opt.value = l.code;
+    opt.textContent = l.name;
+    if (l.code === d.current) opt.selected = true;
+    sel.appendChild(opt);
+  });
+}).catch(() => {
+  const sel = document.getElementById('langSelect');
+  if (sel) sel.style.display = 'none';
+});
+
+async function setLang(code) {
+  const r = await fetch('/api/set_lang', {
+    method: 'POST',
+    headers: {'Content-Type': 'application/json'},
+    body: JSON.stringify({lang: code})
+  });
+  const d = await r.json();
+  if (d.translations) {
+    // Update the in-memory LANG dict and re-apply all translations in place.
+    // This keeps all scan results, cards, and state intact.
+    Object.assign(LANG, d.translations);
+    applyI18n();
+    // Re-render the grid so card text (source badges etc.) picks up new strings
+    if (S.flaggedData.length) renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
+  }
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.openAbout = openAbout;
+window.closeAbout = closeAbout;
+window.openModeInfo = openModeInfo;
+window.closeModeInfo = closeModeInfo;
+window.toggleTheme = toggleTheme;
+window.setLang = setLang;
diff --git a/static/js/users.js b/static/js/users.js
new file mode 100644
index 0000000..82258f6
--- /dev/null
+++ b/static/js/users.js
@@ -0,0 +1,475 @@
+import { S } from './state.js';
+// ── Accounts ──────────────────────────────────────────────────────────────────
+
+async function loadUsers() {
+  const list    = document.getElementById('accountsList');
+  const loading = document.getElementById('accountsLoading');
+  if (!list) return;
+  if (loading) loading.textContent = t('lbl_loading', 'Loading…');
+  // Ensure source panel checkboxes exist before we render the account list
+  if (!document.querySelector('#sourcesPanel input') && typeof renderSourcesPanel === 'function') {
+    renderSourcesPanel();
+  }
+  try {
+    const r = await fetch('/api/users');
+    if (!r.ok) { if (loading) loading.textContent = 'Could not load users'; return; }
+    const d = await r.json();
+    if (d.error) { if (loading) loading.textContent = d.error; return; }
+    // Merge with any manually-added users, preserving them
+    const fetched = d.users || [];
+    fetched.forEach(u => { u.platform = 'm365'; });
+    const existingManual = S._allUsers.filter(u => u.manual);
+    const fetchedIds = new Set(fetched.map(u => u.id));
+    const toAdd = existingManual.filter(u => !fetchedIds.has(u.id));
+    // Preserve existing selected state for users already in S._allUsers;
+    // new users default to selected=true
+    const prevSelected = new Map(S._allUsers.map(u => [u.id, u.selected]));
+    fetched.forEach(u => {
+      u.selected = prevSelected.has(u.id) ? prevSelected.get(u.id) : false;
+    });
+    S._allUsers = [...fetched, ...toAdd];
+    renderAccountList(fetched.length <= 1);
+    // Merge Google users separately so they're not blocked by M365 auth timing
+    _mergeGoogleUsers();
+    checkCheckpoint();
+    checkDeltaStatus();
+    _applyPendingProfileUsers();
+
+    // Show warning banner when no users could be classified
+    const warn = document.getElementById('skuWarnBanner');
+    if (warn) {
+      const allOther = fetched.length > 0 && fetched.every(u => u.userRole === 'other');
+      warn.style.display = allOther ? 'block' : 'none';
+    }
+  } catch(e) {
+    if (loading) loading.textContent = 'Could not load users';
+  }
+}
+
+async function _mergeGoogleUsers() {
+  if (!window._googleConnected) return;
+  try {
+    var gr = await fetch('/api/google/scan/users');
+    if (!gr.ok) return;
+    var gd = await gr.json();
+    if (gd.error) return;
+    var prevSelected = new Map(S._allUsers.map(function(u){ return [u.id, u.selected]; }));
+
+    // Build displayName → Google user map for cross-platform matching
+    // Both M365 and GWS are maintained from AD — full name is identical
+    var googleByName = {};
+    (gd.users || []).forEach(function(gu) {
+      var name = (gu.displayName || '').trim().toLowerCase();
+      if (name) googleByName[name] = gu;
+    });
+
+    // Merge onto M365 users where display name matches
+    var matchedNames = new Set();
+    S._allUsers.forEach(function(u) {
+      if ((u.platform || 'm365') !== 'm365') return;
+      var name = (u.displayName || '').trim().toLowerCase();
+      var gu = googleByName[name];
+      if (gu) {
+        u.platform    = 'both';
+        u.googleEmail = gu.email;
+        // Keep M365 displayName (from AD, authoritative)
+        matchedNames.add(name);
+      } else {
+        // Clear previous merge if Google disconnected
+        delete u.googleEmail;
+        u.platform = 'm365';
+      }
+    });
+
+    // Add unmatched Google users as standalone entries
+    var googleUsers = [];
+    (gd.users || []).forEach(function(gu) {
+      var name = (gu.displayName || '').trim().toLowerCase();
+      if (matchedNames.has(name)) return;  // already merged
+      var uid = 'google:' + gu.email;
+      googleUsers.push({
+        id:          uid,
+        displayName: gu.displayName || gu.email,
+        email:       gu.email,
+        userRole:    gu.userRole || 'other',
+        platform:    'google',
+        selected:    prevSelected.has(uid) ? prevSelected.get(uid) : false,
+      });
+    });
+
+    // Remove stale standalone Google users, add fresh unmatched ones
+    S._allUsers = S._allUsers.filter(function(u){ return (u.platform||'m365') !== 'google'; });
+    S._allUsers = S._allUsers.concat(googleUsers);
+    renderAccountList();
+  } catch(e) { /* Google users unavailable */ }
+}
+
+let _activeRoleFilter = '';  // '' = all, 'staff', 'student'
+
+// ── Sidebar section collapse ──────────────────────────────────────────────────
+const _COLLAPSE_SECTIONS = ['sourcesPanelSection', 'optionsSection', 'accountsSection', 'logSection'];
+
+function toggleSection(id) {
+  const body = document.getElementById(id + 'Body');
+  if (!body) return;
+  const collapsing = body.style.display !== 'none';
+  body.style.display = collapsing ? 'none' : '';
+  const btn = document.getElementById(id + '-btn');
+  if (btn) btn.textContent = collapsing ? '▸' : '▾';
+  if (id === 'accountsSection') {
+    const sec = document.getElementById('accountsSection');
+    if (sec) sec.style.flex = collapsing ? '0 0 auto' : '1';
+  }
+  try { localStorage.setItem('sc_' + id, collapsing ? '1' : '0'); } catch(e) {}
+}
+
+function restoreSectionStates() {
+  _COLLAPSE_SECTIONS.forEach(function(id) {
+    try {
+      if (localStorage.getItem('sc_' + id) === '1') {
+        const body = document.getElementById(id + 'Body');
+        if (body) body.style.display = 'none';
+        const btn = document.getElementById(id + '-btn');
+        if (btn) btn.textContent = '▸';
+        if (id === 'accountsSection') {
+          const sec = document.getElementById('accountsSection');
+          if (sec) sec.style.flex = '0 0 auto';
+        }
+      }
+    } catch(e) {}
+  });
+}
+
+// ── Role filter with counts ───────────────────────────────────────────────────
+function updateRoleFilterCounts() {
+  const total   = S._allUsers.filter(function(u){ return !u.manual; }).length;
+  const staff   = S._allUsers.filter(function(u){ return !u.manual && u.userRole === 'staff'; }).length;
+  const student = S._allUsers.filter(function(u){ return !u.manual && u.userRole === 'student'; }).length;
+  const btnAll  = document.getElementById('rfAll');
+  const btnStaff   = document.getElementById('rfStaff');
+  const btnStudent = document.getElementById('rfStudent');
+  if (btnAll)     btnAll.textContent     = t('m365_role_all','All') + (total   ? ' (' + total   + ')' : '');
+  if (btnStaff)   btnStaff.textContent   = t('role_staff','Ansat') + (staff   ? ' (' + staff   + ')' : '');
+  if (btnStudent) btnStudent.textContent = t('role_student','Elev') + (student ? ' (' + student + ')' : '');
+}
+
+function setRoleFilter(role) {
+  _activeRoleFilter = role;
+  [['rfAll',''],['rfStaff','staff'],['rfStudent','student']].forEach(function(pair) {
+    const btn = document.getElementById(pair[0]);
+    if (!btn) return;
+    const active = role === pair[1];
+    btn.style.background = active ? 'var(--accent)' : 'none';
+    btn.style.color      = active ? '#fff' : 'var(--muted)';
+  });
+  updateRoleFilterCounts();
+  filterUsers();
+}
+
+// ── Last scan summary (empty state) ──────────────────────────────────────────
+async function loadLastScanSummary() {
+  try {
+    const r = await fetch('/api/db/stats');
+    const d = await r.json();
+    if (!d.scan_id || S.flaggedData.length > 0) return;
+    const panel = document.getElementById('lastScanSummary');
+    const empty = document.getElementById('emptyState');
+    if (!panel || !empty) return;
+
+    const dateStr = d.finished_at
+      ? new Date(d.finished_at * 1000).toLocaleDateString('da-DK', {day:'numeric', month:'short', year:'numeric'})
+      : '—';
+    const sources = Object.keys(d.by_source || {});
+    const srcLabels = {'email':'Outlook','onedrive':'OneDrive','sharepoint':'SharePoint','teams':'Teams',
+                       'gmail':'Gmail','gdrive':'Drive','local':'Lokale filer','smb':'SMB'};
+    const srcStr = sources.map(function(s){ return srcLabels[s] || s; }).join(' · ') || '—';
+
+    panel.innerHTML =
+      '<div class="last-scan-card">' +
+        '<h3>' + t('last_scan_title', 'Seneste scanning') + '</h3>' +
+        '<div class="last-scan-stats">' +
+          '<div class="last-scan-stat"><span class="val">' + (d.flagged_count || 0) + '</span><span class="lbl">' + t('last_scan_hits', 'Fund') + '</span></div>' +
+          '<div class="last-scan-stat"><span class="val">' + (d.unique_subjects || 0) + '</span><span class="lbl">' + t('last_scan_subjects', 'Unikke CPR') + '</span></div>' +
+          '<div class="last-scan-stat"><span class="val">' + (d.total_scanned || 0) + '</span><span class="lbl">' + t('last_scan_scanned', 'Scannet') + '</span></div>' +
+        '</div>' +
+        '<div style="margin-top:12px;font-size:11px;color:var(--muted)">' + dateStr + ' &nbsp;·&nbsp; ' + srcStr + '</div>' +
+      '</div>' +
+      '<div class="empty-text" style="font-size:12px">' + t('m365_empty_hint', 'Vælg kilder og klik på <strong>Scan</strong><br>for at starte en ny scanning') + '</div>';
+
+    empty.style.display = 'none';
+    panel.style.display = 'flex';
+  } catch(e) {}
+}
+
+function renderAccountList(showAdminNote = false) {
+  updateRoleFilterCounts();
+  const list = document.getElementById('accountsList');
+  if (!list) return;
+  const q = (document.getElementById('userSearch')?.value || '').toLowerCase().trim();
+
+  let visible = S._allUsers;
+
+  // Filter by platform: only show accounts relevant to checked sources
+  // If the sources panel hasn't been rendered yet (no checkboxes at all), treat M365 as active
+  var panelHasAny = !!document.querySelector('#sourcesPanel input[data-source-type]');
+  var hasM365Src   = panelHasAny
+    ? !!document.querySelector('#sourcesPanel input[data-source-type="m365"]:checked')
+    : S._allUsers.some(function(u){ return !u.platform || u.platform === 'm365' || u.platform === 'both'; });
+  var hasGoogleSrc = !!document.querySelector('#sourcesPanel input[data-source-type="google"]:checked');
+  // Always filter — if neither is active, show nothing
+  // Check if Google is enabled in Source Management (not just selected in KILDER)
+  var googleEnabled = !!(document.getElementById('smGoogleSrcGmail') && document.getElementById('smGoogleSrcGmail').checked)
+                   || !!(document.getElementById('smGoogleSrcDrive') && document.getElementById('smGoogleSrcDrive').checked);
+  var effectiveGws = hasGoogleSrc && googleEnabled;
+  visible = visible.filter(function(u) {
+    var plat = u.platform || 'm365';
+    if (plat === 'both') return hasM365Src || effectiveGws;
+    return (plat === 'm365' && hasM365Src) || (plat === 'google' && effectiveGws);
+  });
+
+  // Apply role filter first
+  if (_activeRoleFilter) {
+    visible = visible.filter(u => (u.userRole || 'other') === _activeRoleFilter);
+  }
+
+  // Then apply text search
+  if (q) {
+    visible = visible.filter(u =>
+      (u.displayName || '').toLowerCase().includes(q) ||
+      (u.email || '').toLowerCase().includes(q));
+  }
+
+  _updateUserCountBadge(visible.length, S._allUsers.length);
+
+  const note = (!q && !_activeRoleFilter && showAdminNote)
+    ? `<div style="font-size:10px;color:var(--muted);padding:4px 0 6px;line-height:1.4">${t('m365_admin_note','Only showing your account. To list all users, an admin must grant <strong>User.Read.All</strong> consent.')}</div>`
+    : '';
+
+  const noMatch = (q || _activeRoleFilter) && !visible.length
+    ? `<div style="padding:4px 0;color:var(--muted);font-size:11px">${t('m365_no_users_match','No users match')} "${q || _activeRoleFilter}"</div>`
+    : '';
+
+  list.innerHTML = note + noMatch + visible.map(u => `
+    <label style="display:flex;align-items:center;gap:7px;padding:2px 0;cursor:pointer">
+      <input type="checkbox" class="account-check" data-id="${u.id}" data-name="${u.displayName}" data-role="${u.userRole || 'other'}"
+             ${u.selected !== false ? 'checked' : ''}
+             onchange="onAccountCheckChange('${u.id}', this.checked)">
+      <span style="flex:1;overflow:hidden">
+        <span style="display:block;font-weight:500;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">${u.displayName}${u.isMe ? ' <span style=\'color:var(--accent);font-size:10px\'>(you)</span>' : ''}${u.manual ? ' <span style=\'color:var(--muted);font-size:10px\'>(manual)</span>' : ''}</span>
+        <span style="color:var(--muted);font-size:10px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;display:block">${u.email}</span>
+      </span>
+      <span style="font-size:9px;padding:1px 5px;border-radius:10px;flex-shrink:0;"
+            class="${u.platform==='both' ? (hasM365Src && effectiveGws ? 'plat-badge-both' : effectiveGws ? 'plat-badge-google' : 'plat-badge-m365') : (u.platform||'m365')==='google' ? 'plat-badge-google' : 'plat-badge-m365'}">
+        ${u.platform==='both' ? (hasM365Src && effectiveGws ? 'M365 + GWS' : effectiveGws ? 'GWS' : 'M365') : (u.platform||'m365')==='google' ? 'GWS' : 'M365'}
+      </span>
+      <button type="button" onclick="cycleUserRole(this.getAttribute('data-uid'))"
+              data-uid="${u.id.replace(/&/g,'&amp;').replace(/'/g,'&#39;').replace(/"/g,'&quot;')}"
+              title="${t('m365_role_cycle_tip','Click to change role')}"
+              class="role-badge" style="font-size:9px;padding:1px 5px;cursor:pointer;flex-shrink:0;white-space:nowrap;border:none;${u.roleOverride ? 'color:var(--color-text-info);outline:1px solid var(--color-border-info)' : ''}">
+        ${u.userRole === 'student' ? t('role_student','Elev') : u.userRole === 'staff' ? t('role_staff','Ansat') : t('role_other','Anden')}${u.roleOverride ? ' ✎' : ''}
+      </button>
+      ${u.manual ? `<button onclick="removeUser(this.getAttribute('data-uid'))" data-uid="${u.id.replace(/&/g,'&amp;').replace(/'/g,'&#39;').replace(/"/g,'&quot;')}" style="background:none;border:none;color:var(--muted);cursor:pointer;font-size:13px;padding:0;flex-shrink:0" title="Remove">×</button>` : ''}
+    </label>`).join('');
+}
+
+function _updateUserCountBadge(visible, total) {
+  const badge = document.getElementById('userCountBadge');
+  if (!badge) return;
+  if (total === 0) { badge.textContent = ''; return; }
+  badge.textContent = visible < total ? `(${visible} / ${total})` : `(${total})`;
+}
+
+// ── SKU debug — surface unknown tenant SKU IDs so they can be added to m365_skus.json ──
+async function showSkuDebug() {
+  let modal = document.getElementById('skuDebugModal');
+  if (!modal) {
+    modal = document.createElement('div');
+    modal.id = 'skuDebugModal';
+    modal.style.cssText = 'position:fixed;inset:0;background:rgba(0,0,0,.55);z-index:1000;display:flex;align-items:center;justify-content:center';
+    modal.onclick = e => { if (e.target === modal) modal.remove(); };
+    document.body.appendChild(modal);
+  }
+  modal.innerHTML = `<div style="background:var(--surface);border:1px solid var(--border);border-radius:10px;padding:22px 26px;width:min(520px,95vw);max-height:80vh;display:flex;flex-direction:column;gap:12px;font-size:12px">
+    <div style="display:flex;align-items:center;justify-content:space-between">
+      <strong style="font-size:13px">${t('m365_sku_debug_title','🔍 Tenant SKU IDs')}</strong>
+      <button onclick="document.getElementById('skuDebugModal').remove()" style="background:none;border:none;color:var(--muted);cursor:pointer;font-size:16px">×</button>
+    </div>
+    <div style="color:var(--muted);font-size:11px;line-height:1.5">${t('m365_sku_debug_desc','These are the raw SKU IDs assigned to your users. Any marked <b>❓ unknown</b> are not in <code>classification/m365_skus.json</code> — copy them in under <code>student_ids</code> or <code>staff_ids</code> and restart.')}</div>
+    <div id="skuDebugList" style="overflow-y:auto;flex:1;font-family:var(--mono);font-size:11px">Loading…</div>
+    <div style="display:flex;justify-content:flex-end;gap:8px;padding-top:4px;border-top:1px solid var(--border)">
+      <button onclick="document.getElementById('skuDebugModal').remove()" style="background:none;border:1px solid var(--border);color:var(--muted);padding:4px 14px;border-radius:6px;cursor:pointer">${t('btn_close','Close')}</button>
+    </div>
+  </div>`;
+
+  const listEl = document.getElementById('skuDebugList');
+  try {
+    const r = await fetch('/api/users/license_debug');
+    const d = await r.json();
+    if (d.error) { listEl.textContent = 'Error: ' + d.error; return; }
+
+    // Collect unique SKUs across all users
+    const skuSeen = {};  // skuId → {name, role, count, known}
+    for (const u of (d.users || [])) {
+      for (let i = 0; i < (u.skuIds || []).length; i++) {
+        const id  = u.skuIds[i];
+        const nm  = (u.skuNames || [])[i] || '';
+        if (!skuSeen[id]) skuSeen[id] = { name: nm, role: u.role, count: 0 };
+        skuSeen[id].count++;
+      }
+    }
+
+    const rows = Object.entries(skuSeen).sort((a,b) => b[1].count - a[1].count);
+    if (!rows.length) { listEl.textContent = t('m365_sku_debug_none','No license data returned — check that the app has User.Read.All permission.'); return; }
+
+    const knownStudent = new Set((d.student_ids || []));
+    const knownStaff   = new Set((d.staff_ids   || []));
+
+    listEl.innerHTML = rows.map(([id, info]) => {
+      const known = knownStudent.has(id) ? '🎓 student'
+                  : knownStaff.has(id)   ? '👔 staff'
+                  : '❓ unknown';
+      const color = known.startsWith('❓') ? 'var(--danger)' : 'var(--accent)';
+      return `<div style="display:flex;align-items:baseline;gap:8px;padding:3px 0;border-bottom:1px solid var(--border)">
+        <code style="flex:1;color:var(--text);user-select:all">${id}</code>
+        <span style="color:var(--muted);font-size:10px;white-space:nowrap">${info.name || '—'}</span>
+        <span style="color:${color};font-size:10px;white-space:nowrap;flex-shrink:0">${known} (${info.count})</span>
+      </div>`;
+    }).join('');
+  } catch(e) {
+    listEl.textContent = 'Error: ' + e.message;
+  }
+}
+
+function filterUsers() {
+  const showAdminNote = S._allUsers.filter(u => !u.manual).length <= 1;
+  renderAccountList(showAdminNote);
+}
+
+async function cycleUserRole(id) {
+  // Cycle: student → staff → other → (clear override, back to auto)
+  if (!id) { console.warn('cycleUserRole: no id'); return; }
+  const u = S._allUsers.find(u => u.id === id);
+  if (!u) { console.warn('cycleUserRole: user not found for id', id); return; }
+  const cycle = ['student', 'staff', 'other'];
+  let next;
+  if (!u.roleOverride) {
+    // First click: remember auto role, pin to next in cycle
+    u._autoRole   = u.userRole;
+    u._cycleSteps = 0;
+    const cur = cycle.indexOf(u.userRole);
+    next = cycle[(cur + 1) % cycle.length];
+  } else {
+    u._cycleSteps = (u._cycleSteps || 0) + 1;
+    if (u._cycleSteps >= cycle.length) {
+      next = '';  // full cycle completed — clear override
+    } else {
+      const cur = cycle.indexOf(u.userRole);
+      next = cycle[(cur + 1) % cycle.length];
+    }
+  }
+  try {
+    const r = await fetch('/api/users/role_override', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({user_id: id, role: next})
+    });
+    const d = await r.json();
+    if (d.error) { log('Role override failed: ' + d.error, 'err'); return; }
+    // Update local state
+    if (next) {
+      if (!u.roleOverride) u._autoRole = u.userRole;  // remember original for clear
+      u.userRole = next;
+      u.roleOverride = true;
+    } else {
+      u.userRole = u._autoRole || u.userRole;
+      u.roleOverride = false;
+      u._autoRole = undefined;
+    }
+    // Update the role filter count badges and re-render
+    renderAccountList(S._allUsers.filter(u => !u.manual).length <= 1);
+    log((next ? t('m365_role_set', 'Role set') + ': ' + next : t('m365_role_cleared', 'Role override cleared')) + ' — ' + (u.displayName || id));
+  } catch(e) {
+    log('Role override error: ' + e.message, 'err');
+  }
+}
+
+function removeUser(id) {
+  S._allUsers = S._allUsers.filter(u => u.id !== id);
+  renderAccountList(S._allUsers.filter(u => !u.manual).length <= 1);
+}
+
+async function addUserManually() {
+  const input = document.getElementById('addUserInput');
+  const upn = input.value.trim();
+  if (!upn) return;
+  // Look up the user via server
+  const btn = input.nextElementSibling;
+  btn.disabled = true; btn.textContent = '…';
+  try {
+    const r = await fetch('/api/users/lookup?upn=' + encodeURIComponent(upn));
+    const d = await r.json();
+    if (d.error) { alert('User not found: ' + d.error); return; }
+    if (S._allUsers.find(u => u.id === d.id)) { alert('User already in list.'); return; }
+    S._allUsers.push({...d, manual: true});
+    input.value = '';
+    renderAccountList(S._allUsers.filter(u => !u.manual).length <= 1);
+  } catch(e) {
+    alert('Lookup failed: ' + e.message);
+  } finally {
+    btn.disabled = false; btn.textContent = '+';
+  }
+}
+
+function onAccountCheckChange(id, checked) {
+  const user = S._allUsers.find(u => u.id === id);
+  if (user) user.selected = checked;
+}
+
+function selectAllAccounts(checked) {
+  // Toggle all visible users (respects search + role filter)
+  const visible = new Set(
+    Array.from(document.querySelectorAll('#accountsList .account-check')).map(cb => cb.dataset.id)
+  );
+  S._allUsers.forEach(u => { if (visible.has(u.id)) u.selected = checked; });
+  document.querySelectorAll('#accountsList .account-check').forEach(cb => cb.checked = checked);
+}
+
+function getSelectedUsers() {
+  // Only return M365 users — Google users are handled separately via selectedGoogleEmails
+  let selected = S._allUsers.filter(u => u.selected !== false && (u.platform === 'm365' || u.platform === 'both'));
+  // Respect the active role filter — hidden users must not sneak into the scan
+  // even if they were checked before the filter was applied.
+  if (_activeRoleFilter) {
+    selected = selected.filter(u => (u.userRole || 'other') === _activeRoleFilter);
+  }
+  if (selected.length) {
+    return selected.map(u => ({
+      id: u.id, displayName: u.displayName, userRole: u.userRole || 'other'
+    }));
+  }
+  // Fallback to DOM if S._allUsers not yet populated
+  return Array.from(document.querySelectorAll('.account-check:checked')).map(cb => ({
+    id: cb.dataset.id, displayName: cb.dataset.name, userRole: cb.dataset.role || 'other'
+  }));
+}
+
+// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
+window.loadUsers = loadUsers;
+window._mergeGoogleUsers = _mergeGoogleUsers;
+window.toggleSection = toggleSection;
+window.restoreSectionStates = restoreSectionStates;
+window.updateRoleFilterCounts = updateRoleFilterCounts;
+window.setRoleFilter = setRoleFilter;
+window.loadLastScanSummary = loadLastScanSummary;
+window.renderAccountList = renderAccountList;
+window._updateUserCountBadge = _updateUserCountBadge;
+window.showSkuDebug = showSkuDebug;
+window.filterUsers = filterUsers;
+window.cycleUserRole = cycleUserRole;
+window.removeUser = removeUser;
+window.addUserManually = addUserManually;
+window.onAccountCheckChange = onAccountCheckChange;
+window.selectAllAccounts = selectAllAccounts;
+window.getSelectedUsers = getSelectedUsers;
+window._activeRoleFilter = _activeRoleFilter;
+window._COLLAPSE_SECTIONS = _COLLAPSE_SECTIONS;
diff --git a/static/js/viewer.js b/static/js/viewer.js
new file mode 100644
index 0000000..ce19203
--- /dev/null
+++ b/static/js/viewer.js
@@ -0,0 +1,225 @@
+// ── Viewer token management (#33) ─────────────────────────────────────────────
+// Share button → modal to create, copy, and revoke read-only viewer links.
+
+function openShareModal() {
+  document.getElementById('shareBackdrop').classList.add('open');
+  document.getElementById('shareNewLinkRow').style.display = 'none';
+  document.getElementById('shareLabel').value = '';
+  document.getElementById('shareExpiry').value = '30';
+  _renderTokenList();
+  fetch('/api/viewer/pin').then(function(r){ return r.json(); }).then(function(d) {
+    const el = document.getElementById('sharePinStatus');
+    if (el) el.textContent = d.pin_set ? t('share_pin_set', 'Set') : t('share_pin_not_set', 'Not set');
+  }).catch(function(){});
+}
+
+function closeShareModal() {
+  document.getElementById('shareBackdrop').classList.remove('open');
+}
+
+async function _renderTokenList() {
+  const list = document.getElementById('shareTokenList');
+  list.innerHTML = '<div style="font-size:12px;color:var(--muted);padding:4px 0">' + t('lbl_loading', 'Loading…') + '</div>';
+  try {
+    const r = await fetch('/api/viewer/tokens');
+    const tokens = await r.json();
+    if (!tokens.length) {
+      list.innerHTML = '<div style="font-size:12px;color:var(--muted);padding:4px 0">' + t('share_no_links', 'No active links.') + '</div>';
+      return;
+    }
+    list.innerHTML = '';
+    tokens.forEach(tok => {
+      const expires = tok.expires_at
+        ? new Date(tok.expires_at * 1000).toLocaleDateString(undefined, {day:'numeric', month:'short', year:'numeric'})
+        : t('share_expires_never', 'Never');
+      const lastUsed = tok.last_used_at
+        ? new Date(tok.last_used_at * 1000).toLocaleDateString(undefined, {day:'numeric', month:'short'})
+        : '—';
+      const row = document.createElement('div');
+      row.style.cssText = 'display:flex;align-items:center;gap:8px;padding:6px 10px;background:var(--bg);border:1px solid var(--border);border-radius:6px;font-size:12px';
+      row.innerHTML =
+        '<div style="flex:1;min-width:0">' +
+          '<div style="font-weight:500;color:var(--text);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">' +
+            (tok.label || '<span style="color:var(--muted);font-style:italic">' + t('share_unlabelled', 'Unlabelled') + '</span>') +
+          '</div>' +
+          '<div style="font-size:10px;color:var(--muted);margin-top:1px">' +
+            t('share_expires_prefix', 'Expires:') + ' ' + expires + ' &nbsp;·&nbsp; ' + t('share_last_used', 'Last used:') + ' ' + lastUsed +
+          '</div>' +
+        '</div>' +
+        '<button title="' + t('share_copy_link_prompt', 'Copy link:') + '" onclick="copyTokenLink(\'' + tok.token + '\',this)" ' +
+          'style="height:24px;padding:0 8px;background:none;border:1px solid var(--border);color:var(--muted);border-radius:4px;font-size:11px;cursor:pointer;flex-shrink:0">' + t('log_copy', 'Copy') + '</button>' +
+        '<button title="' + t('share_revoke', 'Revoke') + '" onclick="revokeToken(\'' + tok.token + '\',this.closest(\'div[style]\'))" ' +
+          'style="height:24px;padding:0 8px;background:none;border:1px solid var(--danger);color:var(--danger);border-radius:4px;font-size:11px;cursor:pointer;flex-shrink:0">' + t('share_revoke', 'Revoke') + '</button>';
+      list.appendChild(row);
+    });
+  } catch(e) {
+    list.innerHTML = '<div style="font-size:12px;color:var(--danger);padding:4px 0">' + t('share_load_error', 'Failed to load links.') + '</div>';
+  }
+}
+
+async function createShareLink() {
+  const label   = document.getElementById('shareLabel').value.trim();
+  const expiry  = document.getElementById('shareExpiry').value;
+  const body    = {label};
+  if (expiry) body.expires_days = parseInt(expiry);
+  try {
+    const r = await fetch('/api/viewer/tokens', {
+      method: 'POST', headers: {'Content-Type':'application/json'},
+      body: JSON.stringify(body),
+    });
+    if (!r.ok) throw new Error('Server error ' + r.status);
+    const entry = await r.json();
+    const url = window.location.origin + '/view?token=' + encodeURIComponent(entry.token);
+    const urlInput = document.getElementById('shareNewLinkUrl');
+    urlInput.value = url;
+    document.getElementById('shareNewLinkRow').style.display = 'block';
+    document.getElementById('shareCopyBtn').textContent = t('log_copy', 'Copy');
+    document.getElementById('shareLabel').value = '';
+    _renderTokenList();
+  } catch(e) {
+    alert(t('share_create_error', 'Failed to create link:') + ' ' + e.message);
+  }
+}
+
+function copyShareLink() {
+  const url = document.getElementById('shareNewLinkUrl').value;
+  _copyText(url, document.getElementById('shareCopyBtn'));
+}
+
+function copyTokenLink(token, btn) {
+  const url = window.location.origin + '/view?token=' + encodeURIComponent(token);
+  _copyText(url, btn);
+}
+
+function _copyText(text, btn) {
+  navigator.clipboard.writeText(text).then(() => {
+    const orig = btn.textContent;
+    btn.textContent = t('share_copied', 'Copied!');
+    setTimeout(() => { btn.textContent = orig; }, 1800);
+  }).catch(() => {
+    // Fallback for HTTP contexts
+    try {
+      const ta = document.createElement('textarea');
+      ta.value = text;
+      ta.style.position = 'fixed'; ta.style.opacity = '0';
+      document.body.appendChild(ta);
+      ta.select();
+      document.execCommand('copy');
+      document.body.removeChild(ta);
+      const orig = btn.textContent;
+      btn.textContent = t('share_copied', 'Copied!');
+      setTimeout(() => { btn.textContent = orig; }, 1800);
+    } catch(_) {}
+  });
+}
+
+async function revokeToken(token, rowEl) {
+  if (!confirm(t('share_revoke_confirm', 'Revoke this link? Anyone using it will immediately lose access.'))) return;
+  try {
+    const r = await fetch('/api/viewer/tokens/' + encodeURIComponent(token), {method: 'DELETE'});
+    if (!r.ok) throw new Error('Server error ' + r.status);
+    rowEl.remove();
+    const list = document.getElementById('shareTokenList');
+    if (!list.children.length) {
+      list.innerHTML = '<div style="font-size:12px;color:var(--muted);padding:4px 0">' + t('share_no_links', 'No active links.') + '</div>';
+    }
+    // Hide the copy row if the just-revoked token was the last created
+    const newRow = document.getElementById('shareNewLinkRow');
+    if (newRow) {
+      const shownUrl = document.getElementById('shareNewLinkUrl')?.value || '';
+      if (shownUrl.includes(token)) newRow.style.display = 'none';
+    }
+  } catch(e) {
+    alert(t('share_revoke_error', 'Failed to revoke:') + ' ' + e.message);
+  }
+}
+
+// ── Viewer PIN — Settings UI ──────────────────────────────────────────────────
+
+async function stLoadViewerPinStatus() {
+  try {
+    const r = await fetch('/api/viewer/pin');
+    const d = await r.json();
+    const statusEl     = document.getElementById('stViewerPinStatus');
+    const currentRow   = document.getElementById('stViewerCurrentPinRow');
+    const clearBtn     = document.getElementById('stViewerPinClearBtn');
+    if (d.pin_set) {
+      if (statusEl)   statusEl.textContent   = '\u2714 ' + t('viewer_pin_is_set', 'Viewer PIN is set');
+      if (currentRow) currentRow.style.display = '';
+      if (clearBtn)   clearBtn.style.display   = '';
+    } else {
+      if (statusEl)   statusEl.textContent   = t('viewer_pin_not_set_msg', 'No PIN set \u2014 /view requires a token link');
+      if (currentRow) currentRow.style.display = 'none';
+      if (clearBtn)   clearBtn.style.display   = 'none';
+    }
+  } catch(e) {}
+}
+
+async function stSaveViewerPin() {
+  const newPin     = (document.getElementById('stViewerNewPin')?.value    || '').trim();
+  const currentPin = (document.getElementById('stViewerCurrentPin')?.value || '').trim();
+  const st         = document.getElementById('stViewerPinSaveStatus');
+  if (!newPin) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = t('m365_settings_pin_required', 'PIN is required.'); }
+    return;
+  }
+  if (!/^\d{4,8}$/.test(newPin)) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = t('viewer_pin_format', 'PIN must be 4\u20138 digits.'); }
+    return;
+  }
+  if (st) { st.style.color = 'var(--muted)'; st.textContent = t('viewer_pin_saving', 'Saving\u2026'); }
+  try {
+    const r = await fetch('/api/viewer/pin', {
+      method: 'POST', headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({pin: newPin, current_pin: currentPin}),
+    });
+    const d = await r.json();
+    if (!r.ok) {
+      if (st) { st.style.color = 'var(--danger)'; st.textContent = d.error || 'Error.'; }
+      return;
+    }
+    if (st) { st.style.color = 'var(--accent)'; st.textContent = '\u2714 ' + t('viewer_pin_saved', 'PIN saved'); }
+    if (document.getElementById('stViewerNewPin'))    document.getElementById('stViewerNewPin').value    = '';
+    if (document.getElementById('stViewerCurrentPin')) document.getElementById('stViewerCurrentPin').value = '';
+    stLoadViewerPinStatus();
+  } catch(e) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = e.message; }
+  }
+}
+
+async function stClearViewerPin() {
+  const currentPin = (document.getElementById('stViewerCurrentPin')?.value || '').trim();
+  const st         = document.getElementById('stViewerPinSaveStatus');
+  if (!currentPin) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = t('m365_settings_pin_required', 'PIN is required.'); }
+    document.getElementById('stViewerCurrentPin')?.focus();
+    return;
+  }
+  if (!confirm(t('viewer_pin_clear_confirm', 'Remove the viewer PIN? /view will require a token link again.'))) return;
+  try {
+    const r = await fetch('/api/viewer/pin', {
+      method: 'DELETE', headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({current_pin: currentPin}),
+    });
+    const d = await r.json();
+    if (!r.ok) {
+      if (st) { st.style.color = 'var(--danger)'; st.textContent = d.error || 'Error.'; }
+      return;
+    }
+    if (st) { st.style.color = 'var(--muted)'; st.textContent = t('viewer_pin_cleared', 'PIN cleared'); }
+    stLoadViewerPinStatus();
+  } catch(e) {
+    if (st) { st.style.color = 'var(--danger)'; st.textContent = e.message; }
+  }
+}
+
+// ── Window exports ────────────────────────────────────────────────────────────
+window.openShareModal       = openShareModal;
+window.closeShareModal      = closeShareModal;
+window.createShareLink      = createShareLink;
+window.copyShareLink        = copyShareLink;
+window.copyTokenLink        = copyTokenLink;
+window.revokeToken          = revokeToken;
+window.stLoadViewerPinStatus = stLoadViewerPinStatus;
+window.stSaveViewerPin      = stSaveViewerPin;
+window.stClearViewerPin     = stClearViewerPin;
diff --git a/static/style.css b/static/style.css
new file mode 100644
index 0000000..05a38dd
--- /dev/null
+++ b/static/style.css
@@ -0,0 +1,616 @@
+  :root {
+    --pmgmt-divider: #484850;
+    --bg:      #0f0f11;
+    --surface: #18181c;
+    --border:  #2a2a30;
+    --accent:  #0078d4;
+    --accent2: #f5a623;
+    --text:    #e8e6e1;
+    --muted:   #6b6970;
+    --success: #2ecc71;
+    --danger:  #e74c3c;
+    --mono:    'IBM Plex Mono', monospace;
+    --sans:    'IBM Plex Sans', sans-serif;
+  }
+  [data-theme="light"] {
+    --pmgmt-divider: #b0b0bc;
+    --bg:      #f5f5f7;
+    --surface: #ffffff;
+    --border:  #d8d8df;
+    --accent:  #0060b0;
+    --text:    #1a1a1f;
+    --muted:   #888891;
+    --success: #1a9952;
+    --danger:  #c0392b;
+  }
+  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; transition: background-color .15s, border-color .15s, color .1s; }
+  html, body { height: 100%; margin: 0; background: var(--bg); color: var(--text); font-family: var(--sans); box-sizing: border-box; }
+  input, select, textarea { background: var(--surface); color: var(--text); border: 1px solid var(--border); border-radius: 6px; padding: 7px 10px; font-family: var(--sans); font-size: 13px; }
+  input:focus, select:focus { outline: none; border-color: var(--accent); }
+  button { cursor: pointer; font-family: var(--sans); font-size: 13px; }
+
+  /* Layout */
+  .layout { display: flex; height: 100%; overflow: hidden; }
+  #sourcesPanel::-webkit-scrollbar,
+  #accountsList::-webkit-scrollbar { width: 4px; }
+  #sourcesPanel::-webkit-scrollbar-track,
+  #accountsList::-webkit-scrollbar-track { background: transparent; }
+  #sourcesPanel::-webkit-scrollbar-thumb,
+  #accountsList::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
+  #sourcesPanel { scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
+  #accountsList { scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
+  .sidebar { width: 260px; min-width: 260px; display: flex; flex-direction: column;
+             background: var(--surface); border-right: 1px solid var(--border); overflow: hidden; }
+  .sidebar-header { padding: 10px 12px 8px; border-bottom: 1px solid var(--border); }
+  .sidebar-title { font-size: 15px; font-weight: 600; }
+  .sidebar-section { padding: 7px 12px; border-bottom: 1px solid var(--border); }
+  .section-label { font-size: 10px; font-weight: 600; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; margin-bottom: 4px; }
+  .section-collapse-btn { background: none; border: none; color: var(--muted); cursor: pointer; font-size: 10px; padding: 0; line-height: 1; transition: transform .15s; }
+  .section-collapsed > .section-collapse-btn { transform: rotate(-90deg); }
+  .last-scan-summary { display: flex; flex-direction: column; align-items: center; gap: 14px; padding: 24px 16px; }
+  .last-scan-card { background: var(--surface); border: 1px solid var(--border); border-radius: 10px; padding: 16px 24px; min-width: 280px; text-align: center; }
+  .last-scan-card h3 { font-size: 12px; font-weight: 600; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; margin: 0 0 12px; }
+  .last-scan-stats { display: flex; gap: 24px; justify-content: center; }
+  .last-scan-stat { display: flex; flex-direction: column; align-items: center; gap: 2px; }
+  .last-scan-stat .val { font-size: 22px; font-weight: 700; color: var(--text); }
+  .last-scan-stat .lbl { font-size: 10px; color: var(--muted); }
+  input[type="text"], input[type="number"], input[type="date"] {
+    width: 100%; background: var(--bg); border: 1px solid var(--border); border-radius: 6px;
+    color: var(--text); font-family: var(--mono); font-size: 12px; padding: 0 10px;
+    height: 26px; box-sizing: border-box; outline: none;
+  }
+  [data-theme="dark"]  input[type="date"] { color-scheme: dark; }
+  [data-theme="light"] input[type="date"] { color-scheme: light; }
+  input[type="text"]:focus, input[type="number"]:focus, input[type="date"]:focus {
+    border-color: var(--accent);
+  }
+  .datepicker-wrap { margin-bottom: 4px; }
+  .date-presets { display: flex; margin-top: 6px; background: var(--bg); border: 1px solid var(--border); border-radius: 6px; overflow: hidden; }
+  .date-preset {
+    flex: 1; min-width: 0; background: none; border: none; border-right: 1px solid var(--border);
+    color: var(--muted); font-family: var(--mono); font-size: 10px; padding: 0 2px; height: 26px;
+    cursor: pointer; transition: background .15s, color .15s; letter-spacing: 0.05em;
+  }
+  .date-preset:last-child { border-right: none; }
+  .date-preset:hover { background: var(--surface); color: var(--text); }
+  .date-preset.selected { background: var(--accent); color: #fff; }
+  .role-filter-btn { flex: 1; font-size: 10px; height: 26px; padding: 0 4px; cursor: pointer; border: none; background: none; color: var(--muted); }
+  .role-filter-btn:hover { background: var(--surface); color: var(--text); }
+  .role-filter-btn.rf-sep { border-right: 1px solid var(--border); }
+  .toggle-row { display: flex; align-items: center; justify-content: space-between; margin-bottom: 5px; }
+  .toggle-label { font-size: 12px; color: var(--text); }
+  /* Hint icon + speech bubble tooltip */
+  .hint-wrap { position:relative; display:inline-flex; align-items:center; margin-left:5px; }
+  .hint-icon {
+    display:inline-flex; align-items:center; justify-content:center;
+    width:14px; height:14px; border-radius:50%;
+    border:1px solid var(--muted); color:var(--muted);
+    font-size:9px; font-weight:700; cursor:pointer;
+    flex-shrink:0; user-select:none; line-height:1;
+    transition:border-color .15s, color .15s;
+  }
+  .hint-icon:hover, .hint-icon.active { border-color:var(--accent); color:var(--accent); }
+  .hint-bubble {
+    display:none; position:fixed;
+    background:var(--surface); border:1px solid var(--border);
+    border-radius:8px; padding:7px 10px;
+    font-size:10px; color:var(--muted); line-height:1.5;
+    width:200px; z-index:9999; box-shadow:0 4px 16px rgba(0,0,0,.35);
+    pointer-events:none;
+  }
+  .hint-bubble::before {
+    content:''; position:absolute; right:100%; top:50%; transform:translateY(-50%);
+    border:5px solid transparent; border-right-color:var(--border);
+  }
+  .hint-bubble::after {
+    content:''; position:absolute; right:calc(100% - 1px); top:50%; transform:translateY(-50%);
+    border:5px solid transparent; border-right-color:var(--surface);
+  }
+  /* bubble display controlled by toggleHint() JS */
+  .toggle { position: relative; width: 32px; height: 18px; flex-shrink: 0; }
+  .toggle input { opacity: 0; width: 0; height: 0; }
+  .toggle-slider {
+    position: absolute; inset: 0; background: var(--border); border-radius: 18px;
+    cursor: pointer; transition: 0.2s;
+  }
+  .toggle-slider::before {
+    content: ''; position: absolute; width: 14px; height: 14px; left: 2px; top: 2px;
+    background: var(--muted); border-radius: 50%; transition: 0.2s;
+  }
+  .toggle input:checked + .toggle-slider { background: var(--accent); }
+  .toggle input:checked + .toggle-slider::before { transform: translateX(14px); background: #fff; }
+  .main { flex: 1; display: flex; flex-direction: column; overflow: hidden; }
+
+  /* Auth panel */
+  .auth-panel { flex: 1; display: flex; align-items: center; justify-content: center; padding: 40px; }
+  .auth-card { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; padding: 32px; width: 100%; max-width: 480px; }
+  .auth-title { font-size: 20px; font-weight: 600; margin-bottom: 6px; }
+  .auth-sub { font-size: 13px; color: var(--muted); margin-bottom: 24px; line-height: 1.5; }
+  .form-row { margin-bottom: 14px; }
+  .form-label { font-size: 12px; color: var(--muted); margin-bottom: 4px; display: block; }
+  .form-row input { width: 100%; }
+  .btn-primary { background: var(--accent); color: #fff; border: none; padding: 9px 20px; border-radius: 7px; font-weight: 500; }
+  .btn-primary:hover { filter: brightness(1.1); }
+
+  /* Device code flow */
+  .device-code-box { background: var(--bg); border: 1px solid var(--border); border-radius: 8px; padding: 20px; margin: 16px 0; text-align: center; }
+  .device-code { font-family: var(--mono); font-size: 28px; font-weight: 600; letter-spacing: .15em; color: var(--accent); margin: 10px 0; }
+  .device-url  { font-size: 13px; color: var(--muted); }
+  .device-url a { color: var(--accent); }
+  .auth-status { font-size: 13px; margin-top: 12px; padding: 8px 12px; border-radius: 6px; }
+  .auth-status.waiting  { background: rgba(0,120,212,.1); color: var(--accent); }
+  .auth-status.success  { background: rgba(46,204,113,.1); color: var(--success); }
+  .auth-status.error    { background: rgba(231,76,60,.1);  color: var(--danger); }
+
+  /* Source selector */
+  .source-check { display: flex; align-items: center; gap: 6px; padding: 3px 0; cursor: pointer; }
+  .source-check input[type=checkbox] { width: 15px; height: 15px; accent-color: var(--accent); cursor: pointer; }
+  .account-check { width: 14px; height: 14px; accent-color: var(--accent); cursor: pointer; flex-shrink: 0; margin: 0; }
+  .source-icon { font-size: 13px; }
+  .source-label { font-size: 12px; }
+
+  /* Topbar */
+  .topbar { display: flex; align-items: center; gap: 10px; padding: 10px 16px;
+    position: sticky; top: 0; z-index: 20;
+    border-bottom: 1px solid var(--border); background: var(--surface); flex-shrink: 0; }
+  .scan-btn { background: var(--accent); color: #fff; border: none; height: 26px; padding: 0 16px; border-radius: 7px; font-weight: 500; font-size: 13px; cursor: pointer; }
+  .scan-btn:hover:not(:disabled) { filter: brightness(1.1); }
+  .scan-btn:disabled { opacity: .5; cursor: default; }
+  .stop-btn { background: transparent; color: var(--danger); border: 1px solid var(--danger); height: 26px; padding: 0 12px; border-radius: 7px; font-size: 13px; cursor: pointer; }
+  .stats-pill { background: var(--bg); border: 1px solid var(--border); border-radius: 20px; height: 26px; padding: 0 12px; font-size: 12px; color: var(--muted); display: flex; align-items: center; }
+  .stats-pill span { color: var(--text); font-weight: 600; }
+  .spacer { flex: 1; }
+  .theme-btn { background: none; border: 1px solid var(--border); color: var(--muted); height: 26px; padding: 0 10px; border-radius: 7px; font-size: 14px; cursor: pointer; }
+  .theme-btn:hover { border-color: var(--accent); color: var(--accent); }
+  .topbar-sep { width: 1px; height: 20px; background: var(--border); flex-shrink: 0; margin: 0 2px; }
+  .config-group { display: flex; align-items: center; background: var(--bg); border: 1px solid var(--border); border-radius: 7px; overflow: hidden; }
+  .config-group button { background: none; border: none; border-right: 1px solid var(--border); color: var(--muted); padding: 0 11px; height: 26px; font-size: 11px; cursor: pointer; white-space: nowrap; }
+  .config-group button:last-child { border-right: none; }
+  .config-group button:hover { background: var(--surface); color: var(--text); }
+
+  /* Progress bar */
+  .progress-bar { display: flex; align-items: center; gap: 10px; padding: 0 16px;
+                  height: 32px; min-height: 32px; max-height: 32px;
+                  background: var(--bg); border-top: 1px solid var(--border); font-size: 12px; color: var(--muted); flex-shrink: 0;
+                  line-height: 1; overflow: hidden; }
+  .progress-who { display:flex; align-items:center; gap:5px; flex-shrink:0; min-width:0; max-width:45%; overflow:hidden; }
+  .progress-src-pill { font-size:9px; font-weight:500; padding:1px 5px; border-radius:10px; flex-shrink:0; white-space:nowrap; }
+  .progress-src-m365   { background:#E6F1FB; color:#185FA5; }
+  .progress-src-google { background:#EAF3DE; color:#3B6D11; }
+  .progress-src-file   { background:#EDE8F5; color:#5a4080; }
+  .progress-user { font-size:11px; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis; }
+  .progress-file { flex:1; font-size:11px; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis; opacity:0.7; text-align:right; }
+  .progress-track { width: 180px; height: 6px; background: var(--border); border-radius: 3px; flex-shrink: 0; display: flex; overflow: hidden; }
+  .progress-seg   { height: 100%; flex: 1; background: var(--border); position: relative; }
+  .progress-seg + .progress-seg { border-left: 1px solid var(--bg); }
+  .progress-seg-fill { height: 100%; width: 0; transition: width .3s ease; }
+  .progress-phase { font-size: 11px; color: var(--accent); flex-shrink:0; }
+
+  /* Filter bar */
+  .filter-bar { display: flex; align-items: center; gap: 8px; padding: 6px 16px;
+                border-bottom: 1px solid var(--border); background: var(--surface); flex-shrink: 0; }
+  .filter-bar input, .filter-bar select { height: 26px; box-sizing: border-box; padding: 0 8px; font-size: 12px; }
+  .filter-bar input  { width: 180px; }
+  .filter-bar select { width: 130px; }
+  .filter-bar button { height: 26px; padding: 0 10px; border-radius: 5px; font-size: 12px; cursor: pointer; box-sizing: border-box; }
+  .filter-clear { background: none; border: 1px solid var(--border); color: var(--muted); font-size: 12px; height: 26px; padding: 0 10px; border-radius: 5px; cursor: pointer; box-sizing: border-box; }
+  .filter-clear:hover { border-color: var(--danger); color: var(--danger); }
+
+  /* Grid */
+  .grid-area { flex: 1; overflow-y: auto; padding: 24px; min-width: 0; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
+  .grid-area::-webkit-scrollbar { width: 4px; }
+  .grid-area::-webkit-scrollbar-track { background: transparent; }
+  .grid-area::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
+  .grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px,1fr)); gap: 14px; }
+  /* Preview panel */
+  .content-area { flex: 1; display: flex; overflow: hidden; min-height: 0; }
+  .preview-panel {
+    width: 420px; flex-shrink: 0;
+    display: flex; flex-direction: row;
+    background: var(--surface);
+    overflow: hidden;
+    transition: none;
+  }
+  .preview-panel.hidden { width: 0; }
+  .preview-resize-handle {
+    width: 8px; flex-shrink: 0; cursor: col-resize; position: relative;
+    background: transparent; border-left: 1px solid var(--border);
+  }
+  .preview-resize-handle::after {
+    content: ''; position: absolute; inset: 0 -4px; /* extend hit area 4px each side */
+  }
+  .preview-resize-handle:hover { background: var(--accent); opacity: 0.35; }
+  .preview-inner { flex: 1; display: flex; flex-direction: column; overflow: hidden; }
+  .preview-header {
+    display: flex; align-items: center; justify-content: space-between;
+    padding: 10px 14px; border-bottom: 1px solid var(--border); flex-shrink: 0;
+  }
+  .preview-title { font-size: 12px; font-weight: 600; color: var(--text); overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; margin-right: 8px; }
+  .preview-close { background: none; border: none; color: var(--muted); font-size: 18px; cursor: pointer; padding: 0; line-height: 1; }
+  .preview-close:hover { color: var(--text); }
+  .preview-body { flex: 1; overflow: hidden; position: relative; }
+  .preview-body iframe { width: 100%; height: 100%; border: none; display: block; overflow-x: hidden; }
+  .preview-loading { position: absolute; inset: 0; display: flex; align-items: center; justify-content: center; color: var(--muted); font-size: 12px; }
+  .preview-meta { padding: 10px 14px; border-top: 1px solid var(--border); font-size: 11px; color: var(--muted); display: flex; gap: 10px; flex-wrap: wrap; flex-shrink: 0; }
+  .preview-open-btn { margin-left: auto; background: var(--accent); color: #fff; border: none; border-radius: 5px; padding: 4px 10px; font-size: 11px; cursor: pointer; white-space: nowrap; }
+  .card.selected { outline: 2px solid var(--accent); outline-offset: 2px; }
+  .card { background: var(--surface); border: 1px solid var(--border); border-radius: 10px; overflow: hidden; cursor: pointer; transition: border-color .15s, box-shadow .15s; }
+  .card:hover { border-color: var(--accent); box-shadow: 0 0 0 1px var(--accent); }
+  .card.list-view { display: flex; align-items: center; gap: 12px; padding: 10px 14px; border-radius: 8px; }
+  .thumb-wrap { aspect-ratio: 7/9; overflow: hidden; background: var(--bg); }
+  .thumb-wrap img { width: 100%; height: 100%; object-fit: cover; }
+  .card-info { padding: 10px 12px; }
+  .card-info.list-info { flex: 1; padding: 0; }
+  .card-name { font-size: 12px; font-weight: 500; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; margin-bottom: 3px; }
+  .card-meta { font-size: 11px; color: var(--muted); }
+  .plat-badge-m365  { background:#E6F1FB; color:#185FA5; font-weight:500; }
+  .plat-badge-google { background:#EAF3DE; color:#3B6D11; font-weight:500; }
+  .plat-badge-both   { background:linear-gradient(90deg,#E6F1FB 50%,#EAF3DE 50%); color:#1a4a1a; font-weight:500; border:0.5px solid #b5d4b5; }
+  .role-badge { font-size:9px; padding:1px 6px; border-radius:10px; background:#D3D1C7; color:#444441; font-weight:500; margin-right:3px; }
+  .card-source { font-size: 10px; color: var(--muted); margin-top: 2px; display: flex; align-items: center; gap: 4px; flex-wrap: wrap; }
+  .source-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px; font-weight: 500; white-space: nowrap; }
+  .cpr-badge { background: var(--danger); color: #fff; font-size: 9px; font-weight: 600; padding: 1px 5px; border-radius: 10px; display: inline-block; }
+  .card-delete-btn { position:absolute; top:6px; right:6px; background:rgba(0,0,0,0.45); color:#fff; border:none; border-radius:50%; width:22px; height:22px; font-size:13px; line-height:22px; text-align:center; cursor:pointer; opacity:0.35; transition:opacity .15s; padding:0; z-index:1; }
+  .card:hover .card-delete-btn { opacity:1; }
+  .card.list-view .card-delete-btn { position:static; opacity:1; background:transparent; color:var(--muted); flex-shrink:0; }
+  .bulk-delete-modal { max-width:460px; }
+  .bulk-criteria-row { display:flex; align-items:center; gap:8px; margin-bottom:8px; font-size:12px; }
+  .bulk-criteria-row label { flex:0 0 130px; color:var(--muted); }
+  .bulk-criteria-row select, .bulk-criteria-row input { flex:1; font-size:12px; padding:4px 7px; background:var(--bg2); border:1px solid var(--border); border-radius:5px; color:var(--text); }
+  .delete-progress { font-size:12px; color:var(--muted); margin-top:10px; min-height:18px; }
+  .btn-danger { background:var(--danger); color:#fff; border:none; padding:7px 16px; border-radius:6px; font-size:12px; cursor:pointer; font-weight:600; }
+  .btn-danger:disabled { opacity:.5; cursor:not-allowed; }
+
+  /* Profile management modal (#15d) */
+  .pmgmt-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.55); z-index: 1000;
+    align-items: center; justify-content: center;
+  }
+  .pmgmt-backdrop.open { display: flex; }
+  .pmgmt-modal {
+    display: flex; gap: 0; width: min(820px,96vw); max-height: 80vh; overflow: hidden;
+    background: var(--surface); border-radius: 10px;
+    border: 1px solid var(--border); box-shadow: 0 8px 32px rgba(0,0,0,.4); font-size: 12px; color: var(--text);
+
+  }
+  .pmgmt-panel-list { width: 260px; min-width: 260px; display: flex; flex-direction: column; border-right: 1px solid var(--pmgmt-divider); }
+  .pmgmt-panel-editor { flex: 1; display: none; flex-direction: column; overflow: hidden; }
+  .pmgmt-panel-editor.open { display: flex; }
+  .pmgmt-list { overflow-y: auto; flex: 1; display: flex; flex-direction: column; }
+  .pmgmt-row { cursor: pointer; padding: 8px 12px; border-bottom: 1px solid var(--pmgmt-divider); }
+  .pmgmt-row:hover { background: var(--bg); }
+  .pmgmt-row.active { background: rgba(99,126,210,.15); }
+  .pmgmt-row.active .pmgmt-name { color: var(--accent); }
+  .pmgmt-row.active .pmgmt-sources, .pmgmt-row.active .pmgmt-meta { opacity: .7; }
+  .pmgmt-row-head { display: flex; align-items: center; gap: 8px; }
+  .pmgmt-name { font-weight: 500; flex: 1; font-size: 13px; color: var(--text); }
+  .pmgmt-meta { font-size: 10px; color: var(--muted); margin-top: 1px; }
+  .pmgmt-desc { font-size: 11px; color: var(--muted); margin-top: 2px; font-style: italic; }
+  .pmgmt-sources { font-size: 11px; color: var(--muted); margin-top: 1px; }
+  .pmgmt-actions { display: flex; gap: 5px; flex-shrink: 0; }
+  .pmgmt-actions button { border: 1px solid var(--border); background: none; color: var(--muted); border-radius: 5px; height: 26px; padding: 0 8px; font-size: 11px; cursor: pointer; box-sizing: border-box; }
+  .pmgmt-actions button:hover { color: var(--text); border-color: var(--text); }
+  .pmgmt-actions button.btn-use { background: var(--accent); color: #fff; border-color: var(--accent); }
+  .pmgmt-actions button.btn-del { color: var(--danger); border-color: var(--danger); }
+  .pmgmt-empty { color: var(--muted); font-size: 12px; text-align: center; padding: 24px 12px; }
+  .pmgmt-editor-body { flex: 1; overflow: hidden; padding: 14px 16px; display: flex; flex-direction: column; gap: 14px; }
+  .pmgmt-editor-section-title { font-size: 10px; font-weight: 600; color: var(--text); text-transform: uppercase; letter-spacing: .07em; margin-bottom: 8px; padding-bottom: 5px; border-bottom: 1px solid var(--pmgmt-divider); display: flex; align-items: center; justify-content: space-between; opacity: .9; }
+  .pmgmt-account-list { display: flex; flex-direction: column; gap: 3px; max-height: 160px; overflow-y: auto; margin-top: 4px; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
+  .pmgmt-account-list::-webkit-scrollbar { width: 4px; }
+  .pmgmt-account-list::-webkit-scrollbar-track { background: transparent; }
+  .pmgmt-account-list::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
+  #peSourcesPanel { max-height: 130px; overflow-y: auto; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
+  #peSourcesPanel::-webkit-scrollbar { width: 4px; }
+  #peSourcesPanel::-webkit-scrollbar-track { background: transparent; }
+  #peSourcesPanel::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
+  .pmgmt-acct-row { display: flex; align-items: center; gap: 6px; font-size: 12px; cursor: pointer; padding: 2px 0; }
+  .pmgmt-opt-row { display: flex; align-items: center; justify-content: space-between; font-size: 12px; color: var(--text); }
+  .pmgmt-settings-col { flex: 1; padding-left: 16px; border-left: 1px solid var(--pmgmt-divider); }
+  .pmgmt-opt-row input[type=number] { width: 60px; font-size: 12px; height: 26px; box-sizing: border-box; }
+  .pmgmt-acct-row span:first-of-type { flex:1; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
+  .pmgmt-panel-list { width: 260px; min-width: 260px; display: flex; flex-direction: column; border-right: 0.5px solid var(--color-border-tertiary); }
+  .pmgmt-panel-editor { flex: 1; display: none; flex-direction: column; }
+  .pmgmt-panel-editor.open { display: flex; }
+  .pmgmt-row { cursor: pointer; padding: 8px 12px; border-radius: 0; border-bottom: 0.5px solid var(--color-border-tertiary); }
+  .pmgmt-row:hover { background: var(--color-background-secondary); }
+  .pmgmt-row.active { background: var(--color-background-info); }
+  .pmgmt-row.active .pmgmt-name { color: var(--color-text-info); }
+  .pmgmt-row.active .pmgmt-sources, .pmgmt-row.active .pmgmt-meta { color: var(--color-text-info); opacity: .7; }
+  .pmgmt-editor-body { flex: 1; overflow: hidden; padding: 14px 16px; display: flex; flex-direction: column; gap: 14px; }
+  .pmgmt-editor-section-title { font-size: 10px; font-weight: 500; color: var(--color-text-secondary); text-transform: uppercase; letter-spacing: .06em; margin-bottom: 8px; }
+  .pmgmt-account-list { display: flex; flex-direction: column; gap: 3px; max-height: 160px; overflow-y: auto; margin-top: 6px; }
+  .pmgmt-account-list label { display: flex; align-items: center; gap: 6px; font-size: 12px; cursor: pointer; padding: 2px 0; }
+  .pmgmt-opt-row { display: flex; align-items: center; justify-content: space-between; font-size: 12px; color: var(--color-text-primary); }
+  .pmgmt-opt-row input[type=number] { width: 60px; font-size: 12px; }
+
+
+
+
+  /* Settings modal */
+  .settings-backdrop {
+    display:none; position:fixed; inset:0;
+    background:rgba(0,0,0,.55); z-index:1200;
+    align-items:center; justify-content:center;
+  }
+  .settings-backdrop.open { display:flex; }
+  .settings-modal {
+    background:var(--surface); border:1px solid var(--border);
+    border-radius:10px; width:min(540px,96vw);
+    display:flex; flex-direction:column; overflow:hidden;
+    font-size:12px; color:var(--text);
+  }
+  .settings-header { padding:16px 20px 0; display:flex; align-items:center; justify-content:space-between; }
+  .settings-header h2 { font-size:14px; font-weight:700; margin:0; }
+  .settings-tabs { display:flex; border-bottom:1px solid var(--border); padding:0 20px; margin-top:12px; }
+  .settings-tab {
+    height:36px; padding:0 14px; font-size:12px; cursor:pointer; border:none;
+    background:none; color:var(--muted); border-bottom:2px solid transparent;
+    margin-bottom:-1px; font-weight:500;
+  }
+  .settings-tab.active { color:var(--accent); border-bottom-color:var(--accent); font-weight:600; }
+  .settings-body { padding:16px 20px; overflow-y:auto; max-height:65vh; display:flex; flex-direction:column; gap:14px; }
+  .settings-pane { display:none; flex-direction:column; gap:12px; }
+  .settings-pane.active { display:flex; }
+  .settings-group { display:flex; flex-direction:column; gap:6px; }
+  .settings-group-title { font-size:10px; font-weight:700; color:var(--muted); text-transform:uppercase; letter-spacing:.05em; }
+  .settings-row { display:flex; align-items:center; gap:10px; }
+  .settings-row label { flex:0 0 110px; font-size:11px; color:var(--muted); }
+  .settings-row input, .settings-row select { flex:1; font-size:12px; height:26px; padding:0 8px; background:var(--bg); border:1px solid var(--border); border-radius:5px; color:var(--text); box-sizing:border-box; }
+  .settings-footer { padding:10px 20px; border-top:1px solid var(--border); display:flex; justify-content:flex-end; gap:8px; }
+  .settings-about-row { display:flex; justify-content:space-between; font-size:11px; padding:3px 0; border-bottom:1px solid var(--border); }
+  .settings-about-row:last-child { border-bottom:none; }
+
+  /* Unified Source Management modal (#17) */
+  .srcmgmt-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.55); z-index: 1100;
+    align-items: center; justify-content: center;
+  }
+  .srcmgmt-backdrop.open { display: flex; }
+  .srcmgmt-modal {
+    background: var(--surface); border: 1px solid var(--border);
+    border-radius: 10px; width: min(620px, 96vw);
+    display: flex; flex-direction: column;
+    font-size: 12px; color: var(--text); overflow: hidden;
+  }
+  .srcmgmt-header { padding: 18px 22px 0; display: flex; align-items: center; justify-content: space-between; }
+  .srcmgmt-header h2 { font-size: 14px; font-weight: 700; margin: 0; }
+  .srcmgmt-tabs { display: flex; gap: 0; border-bottom: 1px solid var(--border); padding: 0 22px; margin-top: 14px; }
+  .srcmgmt-tab {
+    height: 36px; padding: 0 16px; font-size: 12px; cursor: pointer; border: none;
+    background: none; color: var(--muted); border-bottom: 2px solid transparent;
+    margin-bottom: -1px; font-weight: 500; transition: color .15s;
+  }
+  .srcmgmt-tab:hover { color: var(--text); }
+  .srcmgmt-tab.active { color: var(--accent); border-bottom-color: var(--accent); font-weight: 600; }
+  .srcmgmt-tab.stub { opacity: .45; cursor: default; }
+  .srcmgmt-body { padding: 18px 22px; overflow-y: auto; max-height: 65vh; display: flex; flex-direction: column; gap: 14px; }
+  .srcmgmt-pane { display: none; flex-direction: column; gap: 14px; }
+  .srcmgmt-pane.active { display: flex; }
+  .srcmgmt-group { display: flex; flex-direction: column; gap: 6px; }
+  .srcmgmt-group-title { font-size: 10px; font-weight: 700; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; }
+  .srcmgmt-row { display: flex; align-items: center; gap: 10px; padding: 7px 10px; border-radius: 7px; background: var(--bg2); border: 1px solid var(--border); }
+  .srcmgmt-row-icon { font-size: 16px; flex-shrink: 0; width: 22px; text-align: center; }
+  .srcmgmt-row-label { flex: 1; font-size: 12px; font-weight: 500; }
+  .srcmgmt-row-sub { font-size: 10px; color: var(--muted); }
+  .srcmgmt-status { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
+  .srcmgmt-status.green  { background: #3fb950; }
+  .srcmgmt-status.amber  { background: #d29922; }
+  .srcmgmt-status.grey   { background: var(--border); }
+  .srcmgmt-cred-form { display: flex; flex-direction: column; gap: 8px; padding: 10px; border: 1px solid var(--border); border-radius: 8px; background: var(--bg); }
+  .srcmgmt-cred-row { display: flex; align-items: center; gap: 8px; }
+  .srcmgmt-cred-row label { flex: 0 0 110px; font-size: 11px; color: var(--muted); }
+  .srcmgmt-cred-row input { flex: 1; font-size: 12px; height: 26px; padding: 0 8px; background: var(--surface); border: 1px solid var(--border); border-radius: 5px; color: var(--text); box-sizing: border-box; }
+  .srcmgmt-footer { padding: 12px 22px; border-top: 1px solid var(--border); display: flex; justify-content: flex-end; gap: 8px; }
+
+  /* File Sources modal (#8) */
+  .fsrc-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.55); z-index: 1000;
+    align-items: center; justify-content: center;
+  }
+  .fsrc-backdrop.open { display: flex; }
+  .fsrc-modal {
+    background: var(--surface); border: 1px solid var(--border);
+    border-radius: 10px; padding: 22px 26px;
+    width: min(560px, 95vw);
+    display: flex; flex-direction: column; gap: 12px;
+    font-size: 12px; color: var(--text);
+  }
+  .fsrc-modal h2 { font-size: 14px; font-weight: 700; margin: 0; }
+  .fsrc-list { overflow-y: auto; flex-shrink: 0; display: flex; flex-direction: column; gap: 8px; height: calc(5 * 58px); min-height: 58px; border: 1px solid var(--border); border-radius: 7px; padding: 6px; background: var(--bg); }
+  .fsrc-row { border: 1px solid var(--border); border-radius: 8px; padding: 10px 12px; background: var(--bg2); display: flex; flex-direction: column; gap: 4px; }
+  .fsrc-row-head { display: flex; align-items: center; gap: 8px; }
+  .fsrc-row-label { font-weight: 600; flex: 1; font-size: 12px; }
+  .fsrc-row-path { font-size: 10px; color: var(--muted); font-family: var(--mono); }
+  .fsrc-actions { display: flex; gap: 5px; flex-shrink: 0; }
+  .fsrc-actions button { border: 1px solid var(--border); background: none; color: var(--muted); border-radius: 5px; padding: 2px 8px; font-size: 11px; cursor: pointer; }
+  .fsrc-actions button:hover { color: var(--text); border-color: var(--text); }
+  .fsrc-actions button.btn-scan { background: var(--accent); color: #fff; border-color: var(--accent); }
+  .fsrc-actions button.btn-del { color: var(--danger); border-color: var(--danger); }
+  .fsrc-form { display: flex; flex-direction: column; gap: 8px; padding: 10px; border: 1px dashed var(--border); border-radius: 8px; }
+  .fsrc-form-row { display: flex; align-items: center; gap: 8px; }
+  .fsrc-form-row label { flex: 0 0 120px; font-size: 11px; color: var(--muted); }
+  .fsrc-form-row input { flex: 1; font-size: 12px; padding: 4px 8px; background: var(--bg); border: 1px solid var(--border); border-radius: 5px; color: var(--text); }
+  .fsrc-smb-fields { display: none; }
+  .fsrc-empty { color: var(--muted); font-size: 12px; text-align: center; padding: 20px 0; }
+  .fsrc-footer { display: flex; justify-content: flex-end; gap: 8px; padding-top: 4px; border-top: 1px solid var(--border); }
+
+  /* Import DB modal (#11) */
+  .import-db-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.55); z-index: 1000;
+    align-items: center; justify-content: center;
+  }
+  .import-db-backdrop.open { display: flex; }
+  .import-db-modal {
+    background: var(--surface); border: 1px solid var(--border);
+    border-radius: 10px; padding: 24px 28px;
+    width: min(420px, 95vw);
+    display: flex; flex-direction: column; gap: 14px;
+    font-size: 12px; color: var(--text);
+  }
+  .import-db-modal h2 { font-size: 14px; font-weight: 700; margin: 0; }
+  .import-db-modal p { margin: 0; color: var(--muted); line-height: 1.5; }
+  .account-pill { font-size: 10px; color: var(--muted); white-space: nowrap; overflow: hidden;
+    text-overflow: ellipsis; max-width: 140px; display: inline-block; vertical-align: middle; }
+  .role-pill { font-size: 9px; padding: 1px 5px; border-radius: 10px; font-weight: 500; white-space: nowrap; }
+  .role-pill.student { background: #0f3d6e; color: #7ec8ff; }
+  .role-pill.staff   { background: #1a3a1a; color: #7ed07e; }
+  [data-theme="light"] .role-pill.student { background: #dbeeff; color: #0054a6; }
+  [data-theme="light"] .role-pill.staff   { background: #dff0df; color: #1a6e1a; }
+  .special-cat-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px;
+    background: #4B0082; color: #E0B0FF; font-weight: 500; white-space: nowrap; }
+  [data-theme="light"] .special-cat-badge { background: #EDE0FF; color: #5A007A; }
+  .photo-face-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px;
+    background: #005060; color: #80E8FF; font-weight: 500; white-space: nowrap; }
+  [data-theme="light"] .photo-face-badge { background: #D0F4FF; color: #00505F; }
+  .overdue-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px;
+    background: #7c3200; color: #ffb347; font-weight: 600; white-space: nowrap; }
+  [data-theme="light"] .overdue-badge { background: #fff3e0; color: #c55a00; }
+  .badge-email { background: rgba(139,68,173,.2); color: #b87fd8; }
+  .badge-onedrive { background: rgba(0,120,212,.2); color: #5ba4e8; }
+  .badge-sharepoint { background: rgba(0,160,100,.2); color: #2ecc71; }
+  .badge-teams { background: rgba(88,101,242,.2); color: #9ba4ff; }
+  .badge-local { background: rgba(40,120,40,.2); color: #7ec87e; }
+  .badge-smb   { background: rgba(20,100,140,.2); color: #7ec8d0; }
+  .badge-gmail  { background: rgba(234,67,53,.18); color: #ea4335; }
+  .badge-gdrive { background: rgba(15,117,210,.18); color: #0f75d2; }
+
+  /* Empty state */
+  .empty-state { display: flex; flex-direction: column; align-items: center; justify-content: center;
+                 height: 100%; color: var(--muted); text-align: center; gap: 12px; }
+  .empty-icon { font-size: 48px; opacity: .3; }
+  .empty-text { font-size: 14px; line-height: 1.6; }
+
+  /* Log panel */
+  .log-wrap { display: flex; flex-direction: column; flex-shrink: 0; border-top: 1px solid var(--border); }
+  .sources-resize-handle { height: 5px; cursor: ns-resize; background: transparent; flex-shrink: 0; }
+  .sources-resize-handle:hover { background: var(--border); }
+  .log-resize-handle { height: 5px; cursor: ns-resize; background: transparent; flex-shrink: 0; }
+  .log-resize-handle:hover { background: var(--border); }
+  .log-header { display: flex; align-items: center; gap: 6px; padding: 3px 10px; background: var(--bg); border-bottom: 1px solid var(--border); flex-shrink: 0; }
+  .log-header-title { font-size: 10px; font-weight: 600; color: var(--muted); letter-spacing: 0.04em; text-transform: uppercase; flex: 1; }
+  .log-filter-btn { font-size: 10px; height: 18px; padding: 0 6px; border: 1px solid var(--border); border-radius: 4px; background: none; color: var(--muted); cursor: pointer; }
+  .log-filter-btn.active { background: var(--accent); color: #fff; border-color: var(--accent); }
+  .log-copy-btn { font-size: 10px; height: 18px; padding: 0 6px; border: 1px solid var(--border); border-radius: 4px; background: none; color: var(--muted); cursor: pointer; }
+  .log-copy-btn:hover { color: var(--text); }
+  .log-panel { height: 154px; min-height: 60px; overflow-y: auto; background: var(--bg); padding: 6px 14px; font-family: var(--mono); font-size: 11px; line-height: 16px; color: var(--muted); flex: none; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
+  .log-panel::-webkit-scrollbar { width: 4px; }
+  .log-panel::-webkit-scrollbar-track { background: transparent; }
+  .log-panel::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
+  .log-line { margin-bottom: 2px; white-space: pre-wrap; word-break: break-all; }
+  .log-err  { color: var(--danger); }
+  .log-ok   { color: var(--success); }
+  .log-warn { color: #e0922a; }
+  .log-live { color: var(--muted); opacity: 0.7; font-style: italic; }
+  .log-line.log-err-hidden { display: none; }
+  .sidebar-footer { padding: 6px 12px; border-top: 1px solid var(--border); display: flex; align-items: center; justify-content: space-between; margin-top: auto; flex-shrink: 0; }
+  .sidebar-footer select { background: var(--surface); border: 1px solid var(--border); border-radius: 4px; color: var(--muted); font-size: 10px; padding: 2px 4px; cursor: pointer; }
+  /* Data subject lookup modal */
+  .dsub-modal-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.45); z-index: 1000;
+    align-items: center; justify-content: center;
+  }
+  .dsub-modal-backdrop.open { display: flex; }
+  .dsub-modal {
+    background: var(--surface); border: 1px solid var(--border);
+    border-radius: 10px; padding: 22px 26px;
+    width: 500px; max-width: 95vw; max-height: 80vh;
+    display: flex; flex-direction: column; gap: 12px;
+    font-family: var(--sans); color: var(--text);
+  }
+  .dsub-modal h2 { font-size: 14px; font-weight: 600; margin: 0; }
+  .dsub-input-row { display: flex; gap: 8px; }
+  .dsub-input-row input { flex: 1; font-size: 13px; letter-spacing: .05em; }
+  .dsub-results { flex: 1; overflow-y: auto; min-height: 0; }
+  .dsub-result-row { display: flex; align-items: center; gap: 8px; padding: 7px 0;
+    border-bottom: 1px solid var(--border); font-size: 12px; }
+  .dsub-result-row:last-child { border-bottom: none; }
+  .dsub-result-name { flex: 1; font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
+  .dsub-result-meta { color: var(--muted); font-size: 11px; white-space: nowrap; }
+  .dsub-footer { display: flex; gap: 8px; padding-top: 4px; }
+  .dsub-footer button { flex: 1; padding: 7px; border-radius: 7px; font-size: 12px; cursor: pointer; font-family: var(--sans); }
+  /* Disposition widget */
+  .disposition-row { display: flex; align-items: center; gap: 8px; padding: 8px 14px;
+    border-top: 1px solid var(--border); flex-shrink: 0; }
+  .disposition-label { font-size: 11px; color: var(--muted); white-space: nowrap; }
+  .disposition-select { flex: 1; font-size: 11px; padding: 4px 6px; }
+  .disposition-save { padding: 4px 10px; border-radius: 6px; font-size: 11px;
+    background: var(--accent); color: #fff; border: none; cursor: pointer; white-space: nowrap; }
+  .disposition-saved { font-size: 10px; color: var(--success); }
+
+  /* Trend sparkline */
+  .spark-wrap { position: relative; height: 60px; margin: 6px 0 2px; }
+  .spark-wrap canvas { width: 100%; height: 60px; }
+  .spark-tip { display:none; position:absolute; background:var(--surface);
+    border:1px solid var(--border); border-radius:5px; padding:3px 7px;
+    font-size:10px; color:var(--text); pointer-events:none; white-space:nowrap;
+    top:0; left:0; z-index:10; }
+  .spark-labels { display:flex; justify-content:space-between;
+    font-size:9px; color:var(--muted); margin-bottom:4px; }
+  .spark-legend { display:flex; gap:10px; font-size:9px;
+    color:var(--muted); margin-top:3px; }
+  .spark-legend span { display:flex; align-items:center; gap:3px; }
+  .spark-dot { width:8px; height:2px; border-radius:1px; }
+
+  .about-modal-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.45); z-index: 1000;
+    align-items: center; justify-content: center;
+  }
+  .about-modal-backdrop.open { display: flex; }
+  .about-modal {
+    background: var(--surface); border: 1px solid var(--border);
+    border-radius: 8px; padding: 32px 36px;
+    max-width: 380px; width: 90%;
+    font-family: var(--mono); font-size: 12px; color: var(--text);
+  }
+  .about-modal h2 { font-size: 16px; font-weight: 700; margin: 0 0 4px; color: var(--text); font-family: var(--mono); }
+  .about-modal .about-version { color: var(--accent); font-size: 11px; margin-bottom: 20px; }
+  .about-modal .about-row { display: flex; justify-content: space-between; padding: 5px 0; border-bottom: 1px solid var(--border); color: var(--muted); }
+  .about-modal .about-row span:last-child { color: var(--text); }
+  .about-close { margin-top: 20px; width: 100%; padding: 8px; background: var(--accent); color: #fff; border: none; border-radius: 6px; font-size: 13px; cursor: pointer; font-family: var(--mono); }
+
+  /* SMTP modal */
+  .smtp-modal-backdrop {
+    display: none; position: fixed; inset: 0;
+    background: rgba(0,0,0,0.45); z-index: 1000;
+    align-items: center; justify-content: center;
+  }
+  .smtp-modal-backdrop.open { display: flex; }
+  .smtp-modal {
+    background: var(--surface); border: 1px solid var(--border);
+    border-radius: 10px; padding: 24px 28px;
+    width: 460px; max-width: 95vw; max-height: 90vh; overflow-y: auto;
+    font-family: var(--sans); font-size: 12px; color: var(--text);
+  }
+  .smtp-modal h2 { font-size: 15px; font-weight: 600; margin: 0 0 4px; }
+  .smtp-modal .smtp-subtitle { color: var(--muted); font-size: 11px; margin-bottom: 18px; }
+  .smtp-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 10px 16px; }
+  .smtp-grid .full { grid-column: 1 / -1; }
+  .smtp-field label { display: block; font-size: 11px; color: var(--muted); margin-bottom: 3px; }
+  .smtp-field input { width: 100%; padding: 6px 8px; font-size: 12px; }
+  .smtp-toggle-row { display: flex; align-items: center; gap: 8px; margin-top: 4px; font-size: 11px; color: var(--muted); }
+  .smtp-divider { grid-column: 1 / -1; border: none; border-top: 1px solid var(--border); margin: 4px 0; }
+  .smtp-footer { display: flex; gap: 8px; margin-top: 18px; }
+  .smtp-footer button { flex: 1; padding: 8px; border-radius: 7px; font-size: 12px; cursor: pointer; font-family: var(--sans); }
+  .smtp-status { font-size: 11px; margin-top: 8px; min-height: 16px; text-align: center; }
+
+  /* ── Viewer mode — hide scan controls ──────────────────────────────────── */
+  body.viewer-mode #scanBtn,
+  body.viewer-mode #stopBtn,
+  body.viewer-mode #profileBar,
+  body.viewer-mode .topbar-sep,
+  body.viewer-mode .config-group { display: none !important; }
+  body.viewer-mode #resumeBanner { display: none !important; }
+  body.viewer-mode #bulkDeleteBtn { display: none !important; }
+  body.viewer-mode .card-delete-btn { display: none !important; }
+  body.viewer-mode #dsubDeleteBtn { display: none !important; }
+  body.viewer-mode #shareBtn { display: none !important; }
+  body.viewer-mode .sidebar { display: none !important; }
+  body.viewer-mode #viewerBrand { display: inline !important; }
+  body.viewer-mode #logWrap { display: none !important; }
+  body.viewer-mode #progressBar { display: none !important; }
diff --git a/templates/CLAUDE.md b/templates/CLAUDE.md
new file mode 100644
index 0000000..b57f674
--- /dev/null
+++ b/templates/CLAUDE.md
@@ -0,0 +1,29 @@
+# templates/ — CSS & HTML Rules
+
+## CSS variables
+Use the app's own variables (defined in `static/style.css`). Never use claude.ai system variables like `var(--color-background-primary)` — the app uses `var(--bg)`, `var(--surface)`, `var(--border)`, `var(--text)`, `var(--muted)`, `var(--accent)`, `var(--danger)`, `var(--success)`.
+
+Theme is switched via `[data-theme="light"]` attribute on `<body>` — not `prefers-color-scheme`.
+
+## Standard control height: 26px
+Every interactive element in the topbar and sidebar. Exception: `.toggle` is `32×18px` — do not change to 26px.
+
+## Pill cluster container pattern
+```css
+display: flex; background: var(--bg); border: 1px solid var(--border);
+border-radius: 6px; overflow: hidden;
+```
+Buttons inside: `border-right: 1px solid var(--border)` as dividers; last child has none. Selected: `background: var(--accent); color: #fff`.
+
+## Danger buttons
+Never place destructive actions (delete, reset, disconnect, sign out) inside a pill cluster. Standalone button with `border: 1px solid var(--danger); color: var(--danger)`, separated by a gap. Applies everywhere — topbar, sidebar, modals, list rows.
+
+## Badge sizing standard
+All badges — platform, role, source, CPR, faces, Art.9, overdue, risk — use: `font-size: 9px; padding: 1px 5px; border-radius: 10px`. Never override with larger inline styles. New badge classes always start from this standard.
+
+## No emojis in button labels
+All buttons use plain text — topbar, filter bar, modals, settings, and lang file values. No `▶ ■ 💾 ⚙ 🕐 ⬇ ⬆ 🗑 📋 ☰ ⊞`.
+
+## Gotchas
+
+- **Label click forwarding** — interactive elements inside `<label>` get clicks forwarded to the label's checkbox. Use `<button type="button">` to prevent this.
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..6b48a52
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,1276 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover">
+  <title>GDPRScanner</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@400;500;600&display=swap" rel="stylesheet">
+  <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+
+<script>
+// ── i18n ─────────────────────────────────────────────────────────────────────
+var LANG = {{ lang_json | safe }};
+// ── Viewer mode ───────────────────────────────────────────────────────────────
+window.VIEWER_MODE = {{ 'true' if viewer_mode else 'false' }};
+function t(key, fallback) {
+  return LANG[key] !== undefined ? LANG[key] : (fallback !== undefined ? fallback : key);
+}
+function applyI18n() {
+  document.querySelectorAll('[data-i18n]').forEach(el => {
+    const v = LANG[el.getAttribute('data-i18n')];
+    if (v !== undefined) {
+      // Use textContent for <option> elements — innerHTML can break select rendering
+      if (el.tagName === 'OPTION') el.textContent = v;
+      else el.innerHTML = v;
+    }
+  });
+  document.querySelectorAll('[data-i18n-placeholder]').forEach(el => {
+    const v = LANG[el.getAttribute('data-i18n-placeholder')];
+    if (v !== undefined) el.placeholder = v;
+  });
+  document.querySelectorAll('[data-i18n-title]').forEach(el => {
+    const v = LANG[el.getAttribute('data-i18n-title')];
+    if (v !== undefined) el.title = v;
+  });
+}
+document.addEventListener('DOMContentLoaded', applyI18n);
+</script>
+</head>
+<body data-theme="dark">
+<div class="layout" id="layout">
+
+  <!-- Sidebar -->
+  <div class="sidebar">
+    <div class="sidebar-header">
+      <div class="sidebar-title">🔍 GDPRScanner</div>
+    </div>
+
+    <!-- Sources — rendered dynamically by renderSourcesPanel() -->
+    <div class="sidebar-section" id="sourcesPanelSection">
+      <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
+        <div class="section-label" style="margin-bottom:0" data-i18n="m365_sources">Sources</div>
+        <button class="section-collapse-btn" onclick="toggleSection('sourcesPanelSection')" id="sourcesPanelSection-btn">▾</button>
+      </div>
+      <div id="sourcesPanelSectionBody">
+        <div id="sourcesPanel" style="overflow-y:auto;display:flex;flex-direction:column;gap:0"></div>
+        <div class="sources-resize-handle" id="sourcesResizeHandle"></div>
+      </div>
+    </div>
+
+    <!-- Options -->
+    <div class="sidebar-section" id="optionsSection">
+      <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
+        <div class="section-label" style="margin-bottom:0" data-i18n="m365_options">Options</div>
+        <button class="section-collapse-btn" onclick="toggleSection('optionsSection')" id="optionsSection-btn">▾</button>
+      </div>
+      <div id="optionsSectionBody">
+
+      <label style="font-size:11px;color:var(--muted);display:block;margin-bottom:4px" data-i18n="m365_opt_date_from">Flag items older than</label>
+      <div class="datepicker-wrap">
+        <input type="date" id="olderThanDate" autocomplete="off">
+        <div class="date-presets">
+          <button class="date-preset" data-years="1" data-i18n="m365_preset_1yr">1 yr</button>
+          <button class="date-preset selected" data-years="2" data-i18n="m365_preset_2yr">2 yr</button>
+          <button class="date-preset" data-years="5" data-i18n="m365_preset_5yr">5 yr</button>
+          <button class="date-preset" data-years="10" data-i18n="m365_preset_10yr">10 yr</button>
+          <button class="date-preset" data-years="0" data-i18n="m365_preset_any">Any</button>
+        </div>
+      </div>
+      <input type="hidden" id="olderThan" value="730">
+
+      <div style="margin-top:4px">
+        <div class="toggle-row">
+          <span class="toggle-label"><span data-i18n="m365_opt_email_body">Scan email body</span></span>
+          <label class="toggle"><input type="checkbox" id="optEmailBody" checked><span class="toggle-slider"></span></label>
+        </div>
+        <div class="toggle-row">
+          <span class="toggle-label" data-i18n="m365_opt_attachments">Scan attachments</span>
+          <label class="toggle"><input type="checkbox" id="optAttachments" checked><span class="toggle-slider"></span></label>
+        </div>
+        <div class="toggle-row" id="attachSizeRow">
+          <span class="toggle-label" style="color:var(--muted)" data-i18n="m365_opt_max_attach">Max attachment size</span>
+          <div style="display:flex;align-items:center;gap:4px">
+            <input type="number" id="optMaxAttachMB" value="20" min="1" max="100"
+                   style="width:46px;padding:3px 6px;font-size:11px;text-align:right">
+          </div>
+        </div>
+        <div class="toggle-row">
+          <span class="toggle-label"><span data-i18n="m365_opt_max_emails">Max emails per user</span></span>
+          <input type="number" id="optMaxEmails" value="2000" min="10" max="50000"
+                 style="width:56px;padding:3px 6px;font-size:11px;text-align:right">
+        </div>
+        <div class="toggle-row">
+          <span class="toggle-label" style="flex:1">
+            <span data-i18n="m365_opt_delta">Delta scan</span><span class="hint-wrap"><span class="hint-icon" onclick="toggleHint(this)">?</span><span class="hint-bubble" data-i18n="m365_opt_delta_hint">Changed items only (after first full scan)</span></span>
+          </span>
+          <label class="toggle"><input type="checkbox" id="optDelta"><span class="toggle-slider"></span></label>
+        </div>
+        <div id="deltaStatusRow" style="display:none;font-size:10px;padding:3px 0 2px;color:var(--muted)">
+          <span id="deltaStatusText"></span>
+          <button onclick="clearDeltaTokens()" style="background:none;border:none;color:var(--danger);font-size:10px;cursor:pointer;padding:0 0 0 6px" data-i18n="m365_delta_clear">Clear tokens</button>
+        </div>
+
+        <!-- Photo / biometric scan (#9) -->
+        <div class="toggle-row">
+          <span class="toggle-label" style="flex:1">
+            <span data-i18n="m365_opt_scan_photos">Scan photos for faces</span><span class="hint-wrap"><span class="hint-icon" onclick="toggleHint(this)">?</span><span class="hint-bubble" data-i18n="m365_opt_scan_photos_hint">Flags images with detected faces as Art. 9 biometric data. Slower — opt in.</span></span>
+          </span>
+          <label class="toggle"><input type="checkbox" id="optScanPhotos"><span class="toggle-slider"></span></label>
+        </div>
+
+        <!-- Retention policy (suggestion #1) -->
+        <div class="toggle-row">
+          <span class="toggle-label" style="flex:1">
+            <span data-i18n="m365_opt_retention">Retention policy</span><span class="hint-wrap"><span class="hint-icon" onclick="toggleHint(this)">?</span><span class="hint-bubble" data-i18n="m365_opt_retention_hint">Flag and delete items older than N years</span></span>
+          </span>
+          <label class="toggle"><input type="checkbox" id="optRetention" onchange="toggleRetentionPanel()"><span class="toggle-slider"></span></label>
+        </div>
+        <div id="retentionPanel" style="display:none;margin-top:5px;padding:7px 8px;background:var(--bg);border-radius:6px;font-size:11px">
+          <div style="display:flex;align-items:center;gap:8px;margin-bottom:5px">
+            <label style="color:var(--muted);flex:1" data-i18n="m365_ret_years">Retention years</label>
+            <input type="number" id="optRetentionYears" value="5" min="1" max="30"
+                   style="width:46px;padding:3px 6px;font-size:11px;text-align:right">
+          </div>
+          <div style="display:flex;flex-direction:column;gap:3px">
+            <label style="color:var(--muted)" data-i18n="m365_ret_fy_end">Fiscal year end</label>
+            <select id="optFiscalYearEnd" style="font-size:11px;padding:3px 6px;width:100%">
+              <option value="" data-i18n="m365_ret_fy_rolling">Rolling (today)</option>
+              <option value="12-31" data-i18n="m365_ret_fy_dec">31 Dec (Bogføringsloven)</option>
+              <option value="06-30" data-i18n="m365_ret_fy_jun">30 Jun</option>
+              <option value="03-31" data-i18n="m365_ret_fy_mar">31 Mar</option>
+            </select>
+          </div>
+          <div id="retentionCutoffHint" style="font-size:10px;color:var(--muted);margin-top:4px"></div>
+        </div>
+      </div>
+      </div><!-- /optionsSectionBody -->
+    </div>
+
+    <!-- Accounts -->
+    <div class="sidebar-section" id="accountsSection" style="flex:1;display:flex;flex-direction:column;overflow:hidden;padding:7px 12px;border-bottom:1px solid var(--border)">
+      <div class="section-label" style="display:flex;align-items:center;justify-content:space-between">
+        <span style="display:flex;align-items:center;gap:5px">
+          <span data-i18n="m365_accounts">Accounts</span>
+          <span id="userCountBadge" style="font-size:10px;color:var(--muted);font-weight:400"></span>
+        </span>
+        <span style="display:flex;align-items:center;gap:6px">
+          <div style="display:flex;border:1px solid var(--border);border-radius:5px;overflow:hidden">
+            <button onclick="selectAllAccounts(true)" style="background:none;border:none;border-right:1px solid var(--border);color:var(--accent);font-size:11px;cursor:pointer;padding:0 7px;height:22px" data-i18n="btn_all">Alle</button>
+            <button onclick="selectAllAccounts(false)" style="background:none;border:none;border-right:1px solid var(--border);color:var(--muted);font-size:11px;cursor:pointer;padding:0 7px;height:22px" data-i18n="btn_none">Ingen</button>
+            <button onclick="loadUsers()" style="background:none;border:none;color:var(--muted);font-size:11px;cursor:pointer;padding:0 7px;height:22px" title="Refresh">↻</button>
+          </div>
+          <button class="section-collapse-btn" onclick="toggleSection('accountsSection')" id="accountsSection-btn">▾</button>
+        </span>
+      </div>
+      <div id="accountsSectionBody" style="display:flex;flex-direction:column;flex:1;overflow:hidden;min-height:0">
+      <div style="margin:4px 0 3px">
+        <input id="userSearch" type="text" data-i18n-placeholder="m365_search_users" placeholder="Search users…"
+               oninput="filterUsers()"
+               style="width:100%;box-sizing:border-box;font-size:11px;padding:5px 8px;background:var(--bg2);border:1px solid var(--border);border-radius:5px;color:var(--text);outline:none">
+      </div>
+      <div style="display:flex;align-items:center;gap:4px;margin-bottom:4px">
+        <div style="flex:1;display:flex;background:var(--bg);border:1px solid var(--border);border-radius:6px;overflow:hidden">
+          <button onclick="setRoleFilter('')" id="rfAll" class="role-filter-btn rf-sep" style="background:var(--accent);color:#fff" data-i18n="m365_role_all">All</button>
+          <button onclick="setRoleFilter('staff')" id="rfStaff" class="role-filter-btn rf-sep" data-i18n="role_staff">Ansat</button>
+          <button onclick="setRoleFilter('student')" id="rfStudent" class="role-filter-btn" data-i18n="role_student">Elev</button>
+        </div>
+        <button onclick="showSkuDebug()" title="Show tenant SKU IDs" style="font-size:13px;height:26px;padding:0 7px;border-radius:5px;cursor:pointer;border:1px solid var(--border);background:none;color:var(--muted);flex-shrink:0;box-sizing:border-box">&#128269;</button>
+      </div>
+      <div id="skuWarnBanner" style="display:none;background:#7c1a0030;border:1px solid var(--danger);border-radius:5px;padding:6px 8px;font-size:10px;color:#ff9090;line-height:1.5;margin-bottom:4px">&#9888; No users classified. SKU IDs unknown &#8212; click &#128269; to diagnose.</div>
+      <div id="accountsList" style="font-size:12px;color:var(--muted);flex:1;overflow-y:auto;min-height:0">
+        <div id="accountsLoading" style="padding:4px 0">Loading…</div>
+      </div>
+      <div style="margin-top:5px">
+        <div style="font-size:10px;color:var(--muted);margin-bottom:3px" data-i18n="m365_add_account_label">Add account manually:</div>
+        <div style="display:flex;gap:4px">
+          <input id="addUserInput" type="text" data-i18n-placeholder="m365_add_account_placeholder" placeholder="email or UPN" style="flex:1;font-size:11px;padding:4px 7px;min-width:0">
+          <button onclick="addUserManually()" style="background:var(--accent);color:#fff;border:none;padding:4px 8px;border-radius:5px;font-size:11px;cursor:pointer;flex-shrink:0">+</button>
+        </div>
+      </div>
+      </div><!-- /accountsSectionBody -->
+    </div>
+
+    <!-- Stats -->
+    <div class="sidebar-section" id="statsSection" style="display:none">
+      <div class="section-label" data-i18n="m365_stats">Stats</div>
+      <div style="font-size:12px; color:var(--muted); line-height:1.8">
+        <span data-i18n="m365_stat_scanned">Scanned</span>: <strong id="statScanned">0</strong><br>
+        <span data-i18n="m365_stat_flagged">Flagged</span>: <strong id="statFlagged" style="color:var(--danger)">0</strong><br>
+        <span data-i18n="m365_stat_cpr">CPR hits</span>: <strong id="statCPR" style="color:var(--accent2)">0</strong>
+      </div>
+
+      <!-- Trend sparkline (#7) -->
+      <div id="trendPanel" style="display:none;margin-top:8px;padding-top:8px;border-top:1px solid var(--border)">
+        <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
+          <span style="font-size:10px;color:var(--muted)" data-i18n="m365_trend_title">Trend</span>
+          <span style="font-size:10px;color:var(--muted)" id="trendChange"></span>
+        </div>
+        <div class="spark-wrap">
+          <canvas id="sparkCanvas"></canvas>
+          <div class="spark-tip" id="sparkTip"></div>
+        </div>
+        <div class="spark-labels" id="sparkLabels"></div>
+        <div class="spark-legend">
+          <span><span class="spark-dot" style="background:#378ADD"></span><span data-i18n="m365_trend_flagged">Flagged</span></span>
+          <span><span class="spark-dot" style="background:#BA7517;opacity:.7"></span><span data-i18n="m365_trend_overdue">Overdue</span></span>
+        </div>
+      </div>
+    </div>
+
+    <!-- SMTP / Email Report -->
+
+    <!-- Sidebar footer: hidden lang select (still used by setLang) -->
+    <div class="sidebar-footer">
+      <select id="langSelect" onchange="setLang(this.value)" style="display:none"></select>
+    </div>
+  </div>
+
+  <!-- Main area -->
+  <div class="main" id="mainArea">
+
+    <!-- Auth screen (shown when not connected) -->
+    <div id="authScreen" class="auth-panel" style="padding-top: env(safe-area-inset-top, 0px)">
+      <div class="auth-card">
+        <div class="auth-title" data-i18n="m365_connect_title">Connect to Microsoft 365</div>
+        <div class="auth-sub" data-i18n="m365_connect_sub">Enter your Azure app credentials to sign in.</div>
+
+        <div id="configForm">
+          <div class="form-row">
+            <label class="form-label" data-i18n="m365_label_client_id">Client ID (Application ID)</label>
+            <input type="text" id="clientId" placeholder="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" value="">
+          </div>
+          <div class="form-row">
+            <label class="form-label" data-i18n="m365_label_tenant_id">Tenant ID</label>
+            <input type="text" id="tenantId" placeholder="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" value="">
+          </div>
+          <div class="form-row">
+            <label class="form-label" style="display:flex;align-items:center;gap:6px">
+              <span data-i18n="m365_label_client_secret">Client Secret</span>
+              <span style="font-size:10px;color:var(--muted);font-weight:400" data-i18n="m365_secret_hint">(optional — enables org-wide scanning)</span>
+            </label>
+            <input type="password" id="clientSecret" placeholder="Leave blank for personal sign-in" value="" autocomplete="off">
+          </div>
+          <div style="font-size:11px;color:var(--muted);margin-bottom:10px;line-height:1.5">
+            <strong data-i18n="m365_label_client_secret">Client Secret</strong>: <span data-i18n="m365_secret_desc_app">app accesses all users' data directly (Application permissions, no sign-in required).</span><br>
+            <strong data-i18n="m365_btn_sign_out" style="display:none"></strong><span data-i18n="m365_secret_desc_delegated">you sign in as yourself and can only scan your own data unless you're a Global Admin.</span>
+          </div>
+          <div style="display:flex; gap:8px; margin-top:8px">
+            <button class="btn-primary" onclick="handleSignIn()" data-i18n="m365_btn_connect">Connect</button>
+          </div>
+        </div>
+      </div>
+    </div>
+
+    <!-- Scanner screen (shown when connected) -->
+    <div id="scannerScreen" style="display:none; flex-direction:column; height:100%; padding-top: env(safe-area-inset-top, 0px)">
+
+      <!-- Topbar -->
+      <div class="topbar">
+        <span id="viewerBrand" style="display:none;font-size:15px;font-weight:600;color:var(--text);white-space:nowrap;margin-right:6px">🔍 GDPRScanner</span>
+        <button class="scan-btn" id="scanBtn" onclick="startScan()" data-i18n="m365_btn_scan">Scan</button>
+        <button class="stop-btn" id="stopBtn" style="display:none" onclick="stopScan()" data-i18n="m365_btn_stop">Stop</button>
+
+        <!-- Profile selector (15c) -->
+        <div id="profileBar" style="display:flex;align-items:center;gap:6px;margin-left:10px">
+          <span style="font-size:10px;color:var(--muted)" data-i18n="m365_profile_label">Profil:</span>
+          <select id="profileSelect" onchange="onProfileChange()"
+                  style="font-size:11px;height:26px;padding:0 6px;border-radius:6px;border:1px solid var(--border);background:var(--surface);color:var(--text);max-width:160px;cursor:pointer;box-sizing:border-box">
+            <option value="" disabled selected data-i18n="m365_profile_placeholder">— Vælg profil —</option>
+          </select>
+          <button id="profileClearBtn" onclick="clearActiveProfile()"
+                  style="display:none;background:none;border:1px solid var(--border);color:var(--muted);border-radius:5px;font-size:11px;height:26px;padding:0 7px;cursor:pointer;box-sizing:border-box" data-i18n="m365_profile_clear_btn" title="Ryd aktiv profil">Ryd</button>
+          <button onclick="saveCurrentAsProfile()" data-i18n="m365_profile_save_btn"
+                  style="background:none;border:1px solid var(--border);color:var(--muted);border-radius:5px;font-size:11px;height:26px;padding:0 7px;cursor:pointer;box-sizing:border-box" data-i18n-title="m365_profile_save_tip">Gem</button>
+          <div id="schedNextIndicator" style="display:none;height:26px;align-items:center;font-size:10px;color:var(--muted);cursor:pointer;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;border:1px solid var(--border);border-radius:5px;padding:0 7px;box-sizing:border-box" onclick="openSettings('scheduler')" title="Click to configure scheduler"><span id="schedNextText"></span></div>
+        </div>
+        <div class="topbar-sep"></div>
+        <div class="config-group">
+          <button onclick="openProfileMgmtModal()" data-i18n="m365_profile_manage_btn" title="Manage profiles">Profiler</button>
+          <button onclick="openSourcesMgmt('m365')" data-i18n="m365_sources_manage_btn" title="Manage sources">Kilder</button>
+          <button onclick="openSettings('general')" data-i18n="m365_btn_settings" title="Settings">Indstillinger</button>
+        </div>
+
+        <div class="spacer"></div>
+        <div class="stats-pill" id="statsPill" style="display:none">
+          <span id="pillFlagged">0</span data-i18n="m365_pill_flagged"> flagged</span> · <span id="pillScanned">0</span> <span data-i18n="m365_pill_scanned">scanned
+        </div>
+        <button class="theme-btn" onclick="openSubjectModal()" data-i18n-title="m365_subject_title" title="Data subject lookup" style="font-size:13px">🔍</button>
+        <button class="theme-btn" id="shareBtn" onclick="openShareModal()" data-i18n-title="share_modal_title" title="Share results" style="font-size:13px">&#128279;</button>
+        <button class="theme-btn" onclick="(function(){var l=(document.getElementById('langSelect')?.value||'da');if(window.pywebview&&pywebview.api&&pywebview.api.open_manual){pywebview.api.open_manual(l);}else{window.open('/manual?lang='+l,'gdpr_manual','width=960,height=800,resizable=yes,scrollbars=yes');}})();" title="Hjælp / Help" style="font-size:13px;font-weight:600">?</button>
+        <button class="theme-btn" id="themeBtn" onclick="toggleTheme()" title="Toggle dark/light mode">🌙</button>
+      </div>
+
+      <!-- Resume checkpoint banner -->
+      <div id="resumeBanner" style="display:none;align-items:center;gap:10px;padding:8px 14px;background:var(--surface);border-bottom:1px solid var(--border);font-size:12px;color:var(--text)">
+        <span>⏸</span>
+        <span id="resumeBannerText"></span>
+        <button onclick="startScan(true)"  style="padding:3px 10px;border-radius:5px;background:var(--accent);color:#fff;border:none;cursor:pointer;font-size:12px" data-i18n="m365_btn_resume">Resume</button>
+        <button onclick="clearCheckpointAndScan()" style="padding:3px 10px;border-radius:5px;background:none;border:1px solid var(--border);color:var(--muted);cursor:pointer;font-size:12px" data-i18n="m365_btn_start_fresh">Start fresh</button>
+      </div>
+
+      <!-- Filter bar — full width, above grid + preview -->
+      <div class="filter-bar" id="filterBar">
+          <input type="text" id="filterSearch" data-i18n-placeholder="m365_filter_search" placeholder="Search…" oninput="applyFilters()">
+          <select id="filterSource" onchange="applyFilters()">
+            <option value="" data-i18n="m365_filter_all_sources">All sources</option>
+            <option value="email" data-i18n="m365_filter_email">Outlook</option>
+            <option value="onedrive" data-i18n="m365_filter_onedrive">OneDrive</option>
+            <option value="sharepoint" data-i18n="m365_filter_sharepoint">SharePoint</option>
+            <option value="teams" data-i18n="m365_filter_teams">Teams</option>
+            <option value="gmail">Gmail</option>
+            <option value="gdrive">Google Drive</option>
+            <option value="local" data-i18n="m365_filter_local">Lokal</option>
+            <option value="smb" data-i18n="m365_filter_smb">Netværk (SMB)</option>
+          </select>
+          <select id="filterDisposition" onchange="applyFilters()" style="width:150px">
+            <option value="" data-i18n="m365_filter_all_disp">All dispositions</option>
+            <option value="unreviewed"       data-i18n="m365_disp_unreviewed">Unreviewed</option>
+            <option value="retain-legal"     data-i18n="m365_disp_retain_legal">Retain — legal</option>
+            <option value="retain-legitimate" data-i18n="m365_disp_retain_legit">Retain — legitimate</option>
+            <option value="retain-contract"  data-i18n="m365_disp_retain_contract">Retain — contract</option>
+            <option value="delete-scheduled" data-i18n="m365_disp_delete_sched">Delete — scheduled</option>
+            <option value="deleted"          data-i18n="m365_disp_deleted">Deleted</option>
+            <option value="personal-use"      data-i18n="m365_disp_personal_use">Personal use — out of scope</option>
+          </select>
+          <select id="filterTransfer" onchange="applyFilters()" style="width:160px">
+            <option value="" data-i18n="m365_filter_all_transfer">All items</option>
+            <option value="external-recipient" data-i18n="m365_filter_ext_recipient">⚠ External recipient</option>
+            <option value="external-share" data-i18n="m365_filter_ext_share">🔗 External share</option>
+            <option value="shared" data-i18n="m365_filter_shared">🔗 Shared</option>
+          </select>
+          <select id="filterSpecial" onchange="applyFilters()" style="width:150px">
+            <option value="" data-i18n="m365_filter_all_special">All risk levels</option>
+            <option value="1" data-i18n="m365_filter_special_only">⚠ Art. 9 only</option>
+            <option value="photo" data-i18n="m365_filter_photo_only">📷 Photos / biometric</option>
+          </select>
+          <button class="filter-clear" onclick="clearFilters()" data-i18n="m365_filter_clear">Ryd</button>
+          <div class="spacer"></div>
+          <button id="exportBtn" onclick="exportExcel()" style="background:none;border:1px solid var(--border);color:var(--muted)" data-i18n="m365_btn_export_excel" title="Export results as Excel">Excel</button>
+          <button id="exportA30Btn" onclick="exportArticle30()" style="background:none;border:1px solid var(--accent);color:var(--accent)" data-i18n="m365_btn_export_article30" title="Export GDPR Article 30 report as Word document">Art.30</button>
+          <button id="bulkDeleteBtn" onclick="openBulkDelete()" style="background:none;border:1px solid var(--danger);color:var(--danger)" data-i18n="m365_btn_bulk_delete" title="Bulk delete">Slet</button>
+          <button id="listViewBtn" style="background:none;border:1px solid var(--border);color:var(--muted)" onclick="toggleView()" data-i18n="m365_btn_list_view">Liste</button>
+      </div>
+
+      <!-- Content area: grid + preview panel -->
+      <div class="content-area">
+        <div style="flex:1; display:flex; flex-direction:column; overflow:hidden; min-width:220px">
+
+        <!-- Grid -->
+        <div class="grid-area" id="gridArea">
+          <div class="empty-state" id="emptyState">
+            <div class="empty-icon">☁️</div>
+            <div class="empty-text" data-i18n="m365_empty_hint">Select sources and click <strong>Scan</strong><br>to find documents with CPR numbers</div>
+          </div>
+          <div id="lastScanSummary" style="display:none" class="empty-state last-scan-summary"></div>
+          <div class="grid" id="grid" style="display:none"></div>
+        </div>
+
+        <!-- Progress bar -->
+        <div class="progress-bar" id="progressBar">
+          <div class="progress-who" id="progressWho"></div>
+          <span class="progress-file" id="progressFile"></span>
+          <span id="progressStats" style="flex-shrink:0"></span>
+          <span id="progressEta"   style="flex-shrink:0; margin-left:6px"></span>
+          <div class="progress-track" id="progressTrack"></div>
+        </div>
+
+        <!-- Log -->
+        <div class="log-wrap" id="logWrap">
+          <div class="log-header">
+            <button class="section-collapse-btn" onclick="toggleSection('logSection')" id="logSection-btn">▾</button>
+            <span class="log-header-title">Log</span>
+            <button class="log-filter-btn active" id="logFilterAll" onclick="setLogFilter('all')" title="Show all log entries" data-i18n="btn_all">All</button>
+            <button class="log-filter-btn" id="logFilterErr" onclick="setLogFilter('err')" title="Show errors only" data-i18n="btn_errors">Errors</button>
+            <button class="log-copy-btn" onclick="copyLog()" title="Copy log to clipboard" data-i18n="log_copy">Copy</button>
+          </div>
+          <div id="logSectionBody">
+            <div class="log-resize-handle" id="logResizeHandle"></div>
+            <div class="log-panel" id="logPanel"><div class="log-line log-live" id="logLive" style="display:none"></div></div>
+          </div>
+        </div>
+
+        </div><!-- end flex col -->
+
+        <!-- Preview panel -->
+        <div class="preview-panel hidden" id="previewPanel">
+          <div class="preview-resize-handle" id="previewResizeHandle"></div>
+          <div class="preview-inner">
+            <div class="preview-header">
+              <div class="preview-title" id="previewTitle">—</div>
+              <button class="preview-close" onclick="closePreview()" data-i18n-title="m365_preview_close" title="Close">×</button>
+            </div>
+            <div class="preview-body" id="previewBody">
+              <div class="preview-loading" id="previewLoading">Loading preview…</div>
+              <iframe id="previewFrame" sandbox="allow-scripts allow-same-origin allow-forms allow-popups" style="display:none"></iframe>
+            </div>
+            <div class="preview-meta" id="previewMeta"></div>
+            <!-- Disposition widget (#6) -->
+            <div class="disposition-row" id="dispositionRow" style="display:none">
+              <span class="disposition-label" data-i18n="m365_disposition_label">Disposition</span>
+              <select class="disposition-select" id="dispositionSelect">
+                <option value="unreviewed"      data-i18n="m365_disp_unreviewed">Unreviewed</option>
+                <option value="retain-legal"    data-i18n="m365_disp_retain_legal">Retain — legal obligation</option>
+                <option value="retain-legitimate" data-i18n="m365_disp_retain_legit">Retain — legitimate interest</option>
+                <option value="retain-contract" data-i18n="m365_disp_retain_contract">Retain — contract</option>
+                <option value="delete-scheduled" data-i18n="m365_disp_delete_sched">Delete — scheduled</option>
+                <option value="deleted"         data-i18n="m365_disp_deleted">Deleted</option>
+                <option value="personal-use"    data-i18n="m365_disp_personal_use">Personal use — out of scope</option>
+              </select>
+              <button class="disposition-save" onclick="saveDisposition()" data-i18n="m365_disp_save">Save</button>
+              <span class="disposition-saved" id="dispositionSaved"></span>
+            </div>
+          </div>
+        </div>
+
+      </div><!-- end content-area -->
+    </div>
+
+  </div>
+</div>
+
+<!-- Device code modal -->
+<div class="about-modal-backdrop" id="deviceCodeBackdrop">
+  <div class="about-modal" style="max-width:400px;text-align:center">
+    <h2 data-i18n="m365_connect_title">Connect to Microsoft 365</h2>
+    <div class="device-code-box" style="margin:16px 0">
+      <div class="device-url"><span data-i18n="m365_device_code_go">Go to</span> <a href="https://microsoft.com/devicelogin" target="_blank">microsoft.com/devicelogin</a></div>
+      <div class="device-code" id="deviceCode">—</div>
+      <div class="device-url" data-i18n="m365_device_code_enter">and enter this code</div>
+    </div>
+    <div class="auth-status waiting" id="authStatus" style="margin-bottom:12px">⏳ Waiting for sign-in…</div>
+    <button class="about-close" style="background:transparent;border:1px solid var(--border);color:var(--muted)" onclick="cancelAuth()" data-i18n="m365_btn_cancel_auth">Cancel</button>
+  </div>
+</div>
+
+<!-- Mode info modal -->
+<div class="about-modal-backdrop" id="modeInfoBackdrop" onclick="if(event.target===this)closeModeInfo()">
+  <div class="about-modal" style="max-width:440px">
+    <h2 id="modeInfoTitle"></h2>
+    <div class="about-version" id="modeInfoSubtitle"></div>
+    <div id="modeInfoRows"></div>
+    <div style="border-top:1px solid var(--border);margin-top:14px;padding-top:12px;display:flex;flex-direction:column;gap:6px">
+      <button class="btn" style="width:100%;font-size:11px;padding:7px 12px;background:transparent;border:1px solid var(--border);color:var(--muted)"
+        onclick="closeModeInfo();reconfigure()" data-i18n="m365_btn_reconfigure">Reconfigure</button>
+      <button class="btn" style="width:100%;font-size:11px;padding:7px 12px;background:transparent;border:1px solid var(--danger);color:var(--danger)"
+        onclick="closeModeInfo();signOut()" data-i18n="m365_btn_sign_out">Sign out</button>
+    </div>
+    <button class="about-close" onclick="closeModeInfo()" data-i18n="btn_close">Close</button>
+  </div>
+</div>
+
+<!-- Bulk delete modal -->
+<div class="about-modal-backdrop" id="bulkDeleteBackdrop" onclick="if(event.target===this)closeBulkDelete()">
+  <div class="about-modal bulk-delete-modal">
+    <h2><span data-i18n="m365_bulk_delete_title">Bulk Delete</span></h2>
+    <div class="about-version" data-i18n="m365_bulk_delete_sub">Permanently removes items from Microsoft 365. Emails go to Deleted Items; files go to the recycle bin.</div>
+
+    <div style="margin:14px 0 6px;font-size:11px;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em" data-i18n="m365_bulk_filter_heading">Filter what to delete</div>
+    <div style="display:flex;gap:6px;margin-bottom:10px">
+      <button onclick="preFilterOverdue()" style="flex:1;background:none;border:1px solid var(--accent2);color:var(--accent2);padding:4px 8px;border-radius:6px;font-size:11px;cursor:pointer" data-i18n="m365_bulk_overdue_btn">🗓 Filter overdue</button>
+      <button onclick="clearBdFilters()" style="background:none;border:1px solid var(--border);color:var(--muted);padding:4px 8px;border-radius:6px;font-size:11px;cursor:pointer" data-i18n="m365_bulk_clear_filters">Clear filters</button>
+    </div>
+
+    <div class="bulk-criteria-row">
+      <label data-i18n="m365_bulk_filter_source">Source type</label>
+      <select id="bdSource">
+        <option value="" data-i18n="m365_filter_all_sources">All sources</option>
+        <option value="email" data-i18n="m365_filter_email">Email</option>
+        <option value="onedrive" data-i18n="m365_filter_onedrive">OneDrive</option>
+        <option value="sharepoint" data-i18n="m365_filter_sharepoint">SharePoint</option>
+        <option value="teams" data-i18n="m365_filter_teams">Teams</option>
+        <option value="gmail">Gmail</option>
+        <option value="gdrive">Google Drive</option>
+        <option value="local" data-i18n="m365_filter_local">Lokal</option>
+        <option value="smb" data-i18n="m365_filter_smb">Netværk (SMB)</option>
+      </select>
+    </div>
+    <div class="bulk-criteria-row">
+      <label data-i18n="m365_bulk_filter_min_cpr">Min CPR hits</label>
+      <input type="number" id="bdMinCpr" min="1" value="1" style="width:80px;flex:none">
+    </div>
+    <div class="bulk-criteria-row">
+      <label data-i18n="m365_bulk_filter_older_than">Older than date</label>
+      <input type="date" id="bdOlderThan">
+    </div>
+
+    <div id="bdPreview" style="font-size:12px;color:var(--muted);margin:10px 0 4px"></div>
+
+    <div style="border-top:1px solid var(--border);margin-top:10px;padding-top:12px;display:flex;gap:8px;align-items:center">
+      <button class="btn-danger" id="bdConfirmBtn" onclick="executeBulkDelete()" data-i18n="m365_bulk_delete_confirm">Delete matching items</button>
+      <button class="btn" style="background:transparent;border:1px solid var(--border);color:var(--muted);padding:7px 14px;border-radius:6px;font-size:12px;cursor:pointer" onclick="closeBulkDelete()" data-i18n="btn_close">Close</button>
+      <div class="delete-progress" id="bdProgress"></div>
+    </div>
+  </div>
+</div>
+
+<!-- Settings modal -->
+<div class="settings-backdrop" id="settingsBackdrop" onclick="if(event.target===this)closeSettings()">
+  <div class="settings-modal">
+    <div class="settings-header">
+      <h2 data-i18n="m365_settings_title">⚙ Settings</h2>
+      <button onclick="closeSettings()" style="background:none;border:none;color:var(--muted);font-size:18px;cursor:pointer;padding:0 4px;line-height:1">&times;</button>
+    </div>
+    <div class="settings-tabs">
+      <button class="settings-tab" id="stTabGeneral"  onclick="switchSettingsTab('general')"  data-i18n="m365_settings_tab_general">General</button>
+      <button class="settings-tab" id="stTabSecurity" onclick="switchSettingsTab('security')" data-i18n="m365_settings_tab_security">Security</button>
+      <button class="settings-tab" id="stTabScheduler" onclick="switchSettingsTab('scheduler')" data-i18n="m365_settings_tab_scheduler">Scheduler</button>
+      <button class="settings-tab" id="stTabEmail"    onclick="switchSettingsTab('email')"    data-i18n="m365_settings_tab_email">Email report</button>
+      <button class="settings-tab" id="stTabDatabase" onclick="switchSettingsTab('database')" data-i18n="m365_settings_tab_database">Database</button>
+    </div>
+    <div class="settings-body">
+
+      <!-- ── General pane ──────────────────────────────────────────────────── -->
+      <div class="settings-pane" id="stPaneGeneral">
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_settings_appearance">Appearance</div>
+          <div class="settings-row">
+            <label data-i18n="m365_settings_language">Language</label>
+            <select id="langSelectSettings" onchange="setLang(this.value); document.getElementById('langSelect').value=this.value;"></select>
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_settings_theme">Theme</label>
+            <label class="toggle" style="flex:unset"><input type="checkbox" id="themeToggle" onchange="toggleTheme()"><span class="toggle-slider"></span></label>
+          </div>
+        </div>
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_settings_about">About</div>
+          <div class="settings-about-row"><span>🔍 GDPRScanner</span><span style="color:var(--muted)">v{{ app_version }}</span></div>
+          <div class="settings-about-row"><span data-i18n="label_python">Python</span><span id="st-about-python" style="color:var(--muted)">—</span></div>
+          <div class="settings-about-row"><span>MSAL</span><span id="st-about-msal" style="color:var(--muted)">—</span></div>
+          <div class="settings-about-row"><span>Requests</span><span id="st-about-requests" style="color:var(--muted)">—</span></div>
+          <div class="settings-about-row"><span>openpyxl</span><span id="st-about-openpyxl" style="color:var(--muted)">—</span></div>
+        </div>
+      </div>
+
+      <!-- ── Security pane ─────────────────────────────────────────────────── -->
+      <div class="settings-pane" id="stPaneSecurity">
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_settings_admin_pin">Admin PIN</div>
+          <div style="font-size:10px;color:var(--muted);line-height:1.5;margin-bottom:4px" data-i18n="m365_settings_pin_hint">Required for destructive actions (e.g. Reset DB). Leave blank to disable.</div>
+          <div id="stPinStatus" style="font-size:10px;color:var(--muted);margin-bottom:6px"></div>
+          <div class="settings-row" id="stCurrentPinRow" style="display:none">
+            <label data-i18n="m365_settings_current_pin">Current PIN</label>
+            <input id="stCurrentPin" type="password" autocomplete="off" placeholder="••••">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_settings_new_pin">New PIN</label>
+            <input id="stNewPin" type="password" autocomplete="off" placeholder="••••">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_settings_confirm_pin">Confirm PIN</label>
+            <input id="stConfirmPin" type="password" autocomplete="off" placeholder="••••">
+          </div>
+          <div style="display:flex;justify-content:flex-end;gap:8px;margin-top:4px">
+            <div id="stPinSaveStatus" style="flex:1;font-size:11px;color:var(--muted);align-self:center"></div>
+            <button onclick="stSavePin()" style="background:var(--accent);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_settings_save_pin">Save PIN</button>
+          </div>
+        </div>
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="viewer_pin_group_title">Viewer PIN</div>
+          <div style="font-size:10px;color:var(--muted);line-height:1.5;margin-bottom:4px" data-i18n="viewer_pin_desc">A numeric PIN (4–8 digits) that lets anyone open <code style="font-size:10px">/view</code> in a browser for read-only access to results without a token URL.</div>
+          <div id="stViewerPinStatus" style="font-size:10px;color:var(--muted);margin-bottom:6px"></div>
+          <div class="settings-row" id="stViewerCurrentPinRow" style="display:none">
+            <label data-i18n="m365_settings_current_pin">Current PIN</label>
+            <input id="stViewerCurrentPin" type="password" autocomplete="off" placeholder="••••">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_settings_new_pin">New PIN</label>
+            <input id="stViewerNewPin" type="password" inputmode="numeric" maxlength="8" autocomplete="off" placeholder="4–8 digits">
+          </div>
+          <div style="display:flex;justify-content:flex-end;gap:8px;margin-top:4px">
+            <div id="stViewerPinSaveStatus" style="flex:1;font-size:11px;color:var(--muted);align-self:center"></div>
+            <button type="button" onclick="stClearViewerPin()" id="stViewerPinClearBtn" style="display:none;background:none;border:1px solid var(--danger);color:var(--danger);height:26px;padding:0 12px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="viewer_pin_clear">Clear PIN</button>
+            <button type="button" onclick="stSaveViewerPin()" style="background:var(--accent);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_settings_save_pin">Save PIN</button>
+          </div>
+        </div>
+      </div>
+
+      <!-- ── Scheduler pane (#19) ──────────────────────────────────────────── -->
+      <div class="settings-pane" id="stPaneScheduler">
+
+        <!-- ── Job list ───────────────────────────────────────────────────── -->
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_sched_title">🕐 Scheduled scans</div>
+          <div style="font-size:10px;color:var(--muted);line-height:1.5;margin-bottom:6px" data-i18n="m365_sched_hint">Run scans automatically at a set time. Requires an active M365 connection (application mode recommended).</div>
+          <div id="schedNoAps" style="display:none;font-size:11px;color:var(--danger);margin-bottom:8px" data-i18n="m365_sched_no_aps">⚠ APScheduler not installed. Run: pip install apscheduler</div>
+          <div id="schedJobList" style="display:flex;flex-direction:column;gap:4px;margin-bottom:8px"></div>
+          <button onclick="schedAddJob()" style="background:none;border:1px dashed var(--border);color:var(--muted);height:26px;padding:0 12px;border-radius:6px;font-size:12px;cursor:pointer;width:100%;text-align:left;box-sizing:border-box" data-i18n="m365_sched_add">+ Add scheduled scan</button>
+        </div>
+
+        <!-- ── Job editor (shown when adding / editing) ───────────────────── -->
+        <div id="schedJobEditor" style="display:none">
+          <div class="settings-group" style="border:1px solid var(--border);border-radius:8px;padding:10px">
+            <div class="settings-group-title" id="schedEditorTitle" data-i18n="m365_sched_editor_new">New scheduled scan</div>
+            <input type="hidden" id="schedEditId" value="">
+            <div class="settings-row">
+              <label data-i18n="m365_sched_name">Name</label>
+              <input id="schedName" type="text" placeholder="e.g. Nightly tenant scan" style="flex:1">
+            </div>
+            <div class="settings-row">
+              <label data-i18n="m365_sched_enabled">Enabled</label>
+              <label class="toggle" style="flex:unset"><input type="checkbox" id="schedEnabled"><span class="toggle-slider"></span></label>
+            </div>
+            <div class="settings-row">
+              <label data-i18n="m365_sched_frequency">Frequency</label>
+              <select id="schedFrequency" onchange="schedToggleFreqRows()" style="flex:1;height:26px;padding:0 8px;border:1px solid var(--border);border-radius:5px;background:var(--surface);color:var(--text);font-size:12px;box-sizing:border-box">
+                <option value="daily" data-i18n="m365_sched_freq_daily">Daily</option>
+                <option value="weekly" data-i18n="m365_sched_freq_weekly">Weekly</option>
+                <option value="monthly" data-i18n="m365_sched_freq_monthly">Monthly</option>
+              </select>
+            </div>
+            <div class="settings-row" id="schedDowRow" style="display:none">
+              <label data-i18n="m365_sched_dow">Day of week</label>
+              <select id="schedDow" style="flex:1;height:26px;padding:0 8px;border:1px solid var(--border);border-radius:5px;background:var(--surface);color:var(--text);font-size:12px;box-sizing:border-box">
+                <option value="mon" data-i18n="m365_sched_dow_mon">Monday</option><option value="tue" data-i18n="m365_sched_dow_tue">Tuesday</option>
+                <option value="wed" data-i18n="m365_sched_dow_wed">Wednesday</option><option value="thu" data-i18n="m365_sched_dow_thu">Thursday</option>
+                <option value="fri" data-i18n="m365_sched_dow_fri">Friday</option><option value="sat" data-i18n="m365_sched_dow_sat">Saturday</option>
+                <option value="sun" data-i18n="m365_sched_dow_sun">Sunday</option>
+              </select>
+            </div>
+            <div class="settings-row" id="schedDomRow" style="display:none">
+              <label data-i18n="m365_sched_dom">Day of month</label>
+              <input id="schedDom" type="number" min="1" max="28" value="1" style="max-width:70px">
+            </div>
+            <div class="settings-row">
+              <label data-i18n="m365_sched_time">Time</label>
+              <div style="display:flex;gap:4px;align-items:center">
+                <input id="schedHour" type="number" min="0" max="23" value="2" style="width:50px">
+                <span>:</span>
+                <input id="schedMinute" type="number" min="0" max="59" value="0" style="width:50px">
+              </div>
+            </div>
+            <div class="settings-row">
+              <label data-i18n="m365_sched_profile">Profile</label>
+              <select id="schedProfile" style="flex:1;height:26px;padding:0 8px;border:1px solid var(--border);border-radius:5px;background:var(--surface);color:var(--text);font-size:12px;box-sizing:border-box">
+                <option value="" data-i18n="m365_sched_profile_last">Last saved settings</option>
+              </select>
+            </div>
+            <div class="settings-row">
+              <label data-i18n="m365_sched_auto_email">Email report automatically</label>
+              <label class="toggle" style="flex:unset"><input type="checkbox" id="schedAutoEmail"><span class="toggle-slider"></span></label>
+            </div>
+            <div class="settings-row">
+              <label data-i18n="m365_sched_auto_retention">Enforce retention policy</label>
+              <label class="toggle" style="flex:unset"><input type="checkbox" id="schedAutoRetention"><span class="toggle-slider"></span></label>
+            </div>
+            <div style="display:flex;justify-content:flex-end;gap:8px;margin-top:8px">
+              <div id="schedSaveStatus" style="flex:1;font-size:11px;color:var(--muted);align-self:center"></div>
+              <button onclick="schedCancelEdit()" style="background:none;border:1px solid var(--border);color:var(--muted);height:26px;padding:0 12px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="btn_cancel">Cancel</button>
+              <button onclick="schedSaveJob()" style="background:var(--accent);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="btn_save">Save</button>
+            </div>
+          </div>
+        </div>
+
+        <!-- ── History ────────────────────────────────────────────────────── -->
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_sched_status">Recent runs</div>
+          <div id="schedHistory" style="font-size:10px;color:var(--muted);line-height:1.6;height:72px;overflow-y:auto;border:1px solid var(--border);border-radius:4px;padding:4px 6px"></div>
+        </div>
+
+      </div>
+
+      <!-- ── Email report pane ─────────────────────────────────────────────── -->
+      <div class="settings-pane" id="stPaneEmail">
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_smtp_title">Email report (SMTP)</div>
+          <div class="settings-row">
+            <label data-i18n="m365_smtp_host">SMTP host</label>
+            <input id="st-smtpHost" type="text" placeholder="smtp.office365.com">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_smtp_port">Port</label>
+            <input id="st-smtpPort" type="number" value="587" style="max-width:80px">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_smtp_user">Username</label>
+            <input id="st-smtpUser" type="text" autocomplete="off">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_smtp_pw">Password</label>
+            <input id="st-smtpPw" type="password" autocomplete="off">
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_smtp_from">From</label>
+            <input id="st-smtpFrom" type="text">
+          </div>
+          <div class="settings-row">
+            <label>STARTTLS</label>
+            <label class="toggle" style="flex:unset"><input type="checkbox" id="st-smtpTls" checked><span class="toggle-slider"></span></label>
+          </div>
+          <div class="settings-row">
+            <label data-i18n="m365_smtp_recipients">Recipients</label>
+            <input id="st-smtpTo" type="text" placeholder="a@school.dk, b@school.dk">
+          </div>
+          <div style="display:flex;justify-content:flex-end;gap:8px;margin-top:4px">
+            <div id="st-smtpStatus" style="flex:1;font-size:11px;color:var(--muted);align-self:center"></div>
+            <button onclick="stSmtpSave()" style="background:none;border:1px solid var(--border);color:var(--muted);height:26px;padding:0 12px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="btn_save">Save</button>
+            <button onclick="stSmtpTest()" style="background:none;border:1px solid var(--border);color:var(--muted);height:26px;padding:0 12px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="m365_smtp_test">Test</button>
+            <button onclick="stSmtpSend()" style="background:var(--accent);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_smtp_send">Send now</button>
+          </div>
+        </div>
+      </div>
+
+      <!-- ── Database pane ─────────────────────────────────────────────────── -->
+      <div class="settings-pane" id="stPaneDatabase">
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_db_title">Database</div>
+          <div id="st-dbStats" style="font-size:11px;color:var(--muted);line-height:1.8"></div>
+        </div>
+        <div class="settings-group">
+          <div class="settings-group-title" data-i18n="m365_settings_db_actions">Actions</div>
+          <div style="display:flex;align-items:center;gap:8px">
+            <div style="display:flex;background:var(--bg);border:1px solid var(--border);border-radius:6px;overflow:hidden">
+              <button onclick="exportDB()" style="background:none;border:none;border-right:1px solid var(--border);color:var(--muted);height:26px;padding:0 14px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="m365_db_export">Export</button>
+              <button onclick="openImportDBModal()" style="background:none;border:none;color:var(--muted);height:26px;padding:0 14px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="m365_db_import">Import</button>
+            </div>
+            <button onclick="stResetDB()" style="background:none;border:1px solid var(--danger);color:var(--danger);height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="m365_db_reset">Reset DB</button>
+          </div>
+        </div>
+      </div>
+
+    </div><!-- /.settings-body -->
+    <div class="settings-footer">
+      <button onclick="closeSettings()" style="background:none;border:1px solid var(--border);color:var(--muted);height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="btn_close">Close</button>
+    </div>
+  </div>
+</div>
+
+<!-- Admin PIN prompt (used for Reset DB) -->
+<div class="settings-backdrop" id="pinPromptBackdrop" style="z-index:1300" onclick="if(event.target===this)closePinPrompt()">
+  <div class="settings-modal" style="width:min(340px,94vw)">
+    <div class="settings-header">
+      <h2 data-i18n="m365_settings_enter_pin">Enter admin PIN</h2>
+      <button onclick="closePinPrompt()" style="background:none;border:none;color:var(--muted);font-size:18px;cursor:pointer;padding:0 4px;line-height:1">&times;</button>
+    </div>
+    <div class="settings-body" style="gap:10px">
+      <div style="font-size:12px;color:var(--muted)" id="pinPromptMsg"></div>
+      <input id="pinPromptInput" type="password" placeholder="••••"
+             style="width:100%;font-size:16px;padding:8px 12px;border:1px solid var(--border);border-radius:6px;background:var(--bg);color:var(--text);letter-spacing:.2em"
+             onkeydown="if(event.key==='Enter')confirmPinPrompt()">
+      <div id="pinPromptError" style="font-size:11px;color:var(--danger);min-height:14px"></div>
+    </div>
+    <div class="settings-footer">
+      <button onclick="closePinPrompt()" style="background:none;border:1px solid var(--border);color:var(--muted);height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="btn_cancel">Cancel</button>
+      <button onclick="confirmPinPrompt()" style="background:var(--danger);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="btn_confirm">Confirm</button>
+    </div>
+  </div>
+</div>
+
+<!-- About modal -->
+<!-- Data Subject Lookup Modal (#4) -->
+<div class="dsub-modal-backdrop" id="dsubBackdrop" onclick="if(event.target===this)closeDsubModal()">
+  <div class="dsub-modal">
+    <h2 data-i18n="m365_subject_title">🔍 Data subject lookup</h2>
+    <div style="font-size:11px;color:var(--muted)" data-i18n="m365_subject_desc">Find all flagged items containing a given CPR number. The CPR is hashed before querying and is never stored in plaintext.</div>
+    <div class="dsub-input-row">
+      <input id="dsubInput" type="text" placeholder="DDMMYY-XXXX" data-i18n-placeholder="m365_subject_placeholder"
+             onkeydown="if(event.key==='Enter')runSubjectLookup()">
+      <button onclick="runSubjectLookup()" style="padding:6px 14px;border-radius:7px;background:var(--accent);color:#fff;border:none;font-size:12px;cursor:pointer" data-i18n="m365_subject_search">Search</button>
+    </div>
+    <div id="dsubStatus" style="font-size:11px;color:var(--muted);min-height:16px"></div>
+    <div class="dsub-results" id="dsubResults"></div>
+    <div class="dsub-footer">
+      <button onclick="closeDsubModal()" style="background:none;border:1px solid var(--border);color:var(--muted)" data-i18n="btn_close">Close</button>
+      <button id="dsubDeleteBtn" onclick="deleteSubjectItems()" style="display:none;background:var(--danger);color:#fff;border:none;font-weight:500" data-i18n="m365_subject_delete_all">Delete all for this person</button>
+    </div>
+  </div>
+</div>
+
+<!-- SMTP / Email Report Modal -->
+<div class="smtp-modal-backdrop" id="smtpBackdrop" onclick="if(event.target===this)closeSmtpModal()">
+  <div class="smtp-modal">
+    <h2 data-i18n="m365_smtp_title">✉ Email report</h2>
+    <div class="smtp-subtitle" data-i18n="m365_smtp_desc">Configure SMTP settings to send the scan report by email.</div>
+
+    <div class="smtp-grid">
+      <!-- Server -->
+      <div class="smtp-field">
+        <label data-i18n="m365_smtp_host">SMTP host</label>
+        <input id="smtpHost" type="text" placeholder="smtp.office365.com">
+      </div>
+      <div class="smtp-field">
+        <label data-i18n="m365_smtp_port">Port</label>
+        <input id="smtpPort" type="number" value="587" style="width:80px">
+      </div>
+
+      <!-- Auth -->
+      <div class="smtp-field">
+        <label data-i18n="m365_smtp_user">Username</label>
+        <input id="smtpUser" type="text" placeholder="scanner@company.com">
+      </div>
+      <div class="smtp-field">
+        <label data-i18n="m365_smtp_pass">Password</label>
+        <input id="smtpPass" type="password" placeholder="(saved)">
+      </div>
+
+      <!-- From -->
+      <div class="smtp-field full">
+        <label><span data-i18n="m365_smtp_from">From address</span> <span style="font-weight:400;color:var(--muted)" data-i18n="m365_smtp_from_hint">(optional — defaults to username)</span></label>
+        <input id="smtpFrom" type="text" placeholder="scanner@company.com">
+      </div>
+
+      <!-- TLS / SSL -->
+      <div class="full">
+        <hr class="smtp-divider">
+        <div style="display:flex;gap:20px">
+          <div class="smtp-toggle-row">
+            <label class="toggle" title="STARTTLS (port 587)"><input type="checkbox" id="smtpTLS" checked><span class="toggle-slider"></span></label>
+            <span data-i18n="m365_smtp_tls">STARTTLS</span>
+            <span style="color:var(--muted);font-size:10px">(port 587)</span>
+          </div>
+          <div class="smtp-toggle-row">
+            <label class="toggle" title="SMTPS (port 465)"><input type="checkbox" id="smtpSSL"><span class="toggle-slider"></span></label>
+            <span data-i18n="m365_smtp_ssl">SSL</span>
+            <span style="color:var(--muted);font-size:10px">(port 465)</span>
+          </div>
+        </div>
+      </div>
+
+      <!-- Recipients -->
+      <div class="smtp-field full">
+        <hr class="smtp-divider">
+        <label data-i18n="m365_smtp_recipients">Recipients</label>
+        <input id="smtpRecipients" type="text" placeholder="compliance@company.com, ciso@company.com">
+        <div style="font-size:10px;color:var(--muted);margin-top:3px" data-i18n="m365_smtp_recipients_hint">Comma or semicolon separated</div>
+      </div>
+    </div>
+
+    <div class="smtp-footer">
+      <button onclick="closeSmtpModal()" style="background:none;border:1px solid var(--border);color:var(--muted)" data-i18n="btn_close">Close</button>
+      <button onclick="saveSmtpConfig()" style="background:none;border:1px solid var(--accent);color:var(--accent)" data-i18n="m365_smtp_save">Save settings</button>
+      <button onclick="sendReport()" style="background:var(--accent);color:#fff;border:none;font-weight:500" data-i18n="m365_smtp_send">Send now</button>
+    </div>
+    <div class="smtp-status" id="smtpStatus"></div>
+  </div>
+</div>
+
+<!-- Share / Viewer token modal (#33) -->
+<div class="about-modal-backdrop" id="shareBackdrop" onclick="if(event.target===this)closeShareModal()">
+  <div class="about-modal" style="max-width:520px;width:min(520px,96vw)">
+    <h2 style="margin:0 0 4px;font-size:15px" data-i18n="share_modal_title">Share results</h2>
+    <div style="font-size:12px;color:var(--muted);margin-bottom:14px" data-i18n="share_modal_desc">Read-only links let a DPO or reviewer browse results and tag dispositions without access to scan controls or credentials.</div>
+
+    <!-- Create new token -->
+    <div style="background:var(--bg);border:1px solid var(--border);border-radius:7px;padding:10px 12px;margin-bottom:12px">
+      <div style="font-size:11px;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.04em;margin-bottom:8px" data-i18n="share_new_link">New link</div>
+      <div style="display:flex;gap:8px;align-items:flex-end;flex-wrap:wrap">
+        <div style="flex:1;min-width:120px">
+          <div style="font-size:11px;color:var(--muted);margin-bottom:3px" data-i18n="share_label_lbl">Label (optional)</div>
+          <input id="shareLabel" type="text" data-i18n-placeholder="share_label_placeholder" placeholder="e.g. DPO review 2026" style="width:100%;box-sizing:border-box;font-size:12px;padding:5px 8px;background:var(--surface);border:1px solid var(--border);border-radius:5px;color:var(--text)">
+        </div>
+        <div style="width:100px">
+          <div style="font-size:11px;color:var(--muted);margin-bottom:3px" data-i18n="share_expires_in">Expires in</div>
+          <select id="shareExpiry" style="width:100%;font-size:12px;padding:5px 6px;background:var(--surface);border:1px solid var(--border);border-radius:5px;color:var(--text)">
+            <option value="" data-i18n="share_expires_never">Never</option>
+            <option value="7" data-i18n="share_expires_7d">7 days</option>
+            <option value="30" selected data-i18n="share_expires_30d">30 days</option>
+            <option value="90" data-i18n="share_expires_90d">90 days</option>
+            <option value="365" data-i18n="share_expires_1y">1 year</option>
+          </select>
+        </div>
+        <button onclick="createShareLink()" style="height:30px;padding:0 14px;background:var(--accent);color:#fff;border:none;border-radius:5px;font-size:12px;cursor:pointer;flex-shrink:0" data-i18n="share_create">Create</button>
+      </div>
+      <div id="shareNewLinkRow" style="display:none;margin-top:10px">
+        <div style="font-size:11px;color:var(--muted);margin-bottom:4px" data-i18n="share_copy_link_prompt">Copy link:</div>
+        <div style="display:flex;gap:6px;align-items:center">
+          <input id="shareNewLinkUrl" type="text" readonly style="flex:1;font-size:11px;padding:5px 8px;background:var(--bg2,var(--bg));border:1px solid var(--border);border-radius:5px;color:var(--text);min-width:0">
+          <button onclick="copyShareLink()" id="shareCopyBtn" style="height:26px;padding:0 10px;background:none;border:1px solid var(--border);color:var(--muted);border-radius:5px;font-size:11px;cursor:pointer;flex-shrink:0" data-i18n="log_copy">Copy</button>
+        </div>
+      </div>
+    </div>
+
+    <!-- Existing tokens -->
+    <div style="font-size:11px;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.04em;margin-bottom:6px" data-i18n="share_active_links">Active links</div>
+    <div id="shareTokenList" style="display:flex;flex-direction:column;gap:5px;max-height:180px;overflow-y:auto"></div>
+
+    <!-- PIN status -->
+    <div style="margin-top:12px;padding:8px 10px;background:var(--bg);border:1px solid var(--border);border-radius:6px;display:flex;align-items:center;gap:10px;font-size:12px">
+      <span style="flex:1;color:var(--muted)"><span data-i18n="share_viewer_pin_label">Viewer PIN:</span> <span id="sharePinStatus" style="color:var(--text)">—</span></span>
+      <button type="button" onclick="closeShareModal();openSettings('security')" style="height:24px;padding:0 10px;background:none;border:1px solid var(--border);color:var(--muted);border-radius:4px;font-size:11px;cursor:pointer" data-i18n="share_pin_configure">Configure</button>
+    </div>
+
+    <div style="display:flex;justify-content:flex-end;margin-top:14px">
+      <button onclick="closeShareModal()" style="background:none;border:1px solid var(--border);color:var(--muted);border-radius:5px;font-size:12px;height:28px;padding:0 14px;cursor:pointer" data-i18n="btn_close">Close</button>
+    </div>
+  </div>
+</div>
+
+<div class="about-modal-backdrop" id="aboutBackdrop" onclick="if(event.target===this)closeAbout()">
+  <div class="about-modal">
+    <h2>🔍 GDPRScanner</h2>
+    <div class="about-version">v{{ app_version }}</div>
+    <div class="about-row"><span data-i18n="label_python">Python</span><span id="about-python">—</span></div>
+    <div class="about-row"><span>MSAL</span><span id="about-msal">—</span></div>
+    <div class="about-row"><span>Requests</span><span id="about-requests">—</span></div>
+    <div class="about-row"><span>openpyxl</span><span id="about-openpyxl">—</span></div>
+    <button class="about-close" onclick="closeAbout()" data-i18n="btn_close">Close</button>
+  </div>
+</div>
+
+<!-- Unified Source Management modal (#17) -->
+<div class="srcmgmt-backdrop" id="srcMgmtBackdrop" onclick="if(event.target===this)closeSourcesMgmt()">
+  <div class="srcmgmt-modal">
+    <div class="srcmgmt-header">
+      <h2 data-i18n="m365_srcmgmt_title">⚙️ Source management</h2>
+      <button onclick="closeSourcesMgmt()" style="background:none;border:none;color:var(--muted);font-size:18px;cursor:pointer;padding:0 4px;line-height:1">&times;</button>
+    </div>
+
+    <!-- Tabs -->
+    <div class="srcmgmt-tabs">
+      <button class="srcmgmt-tab" id="srcTabM365"    onclick="switchSrcTab('m365')"   data-i18n="m365_srcmgmt_tab_m365">Microsoft 365</button>
+      <button class="srcmgmt-tab" id="srcTabGoogle" onclick="switchSrcTab('google')" data-i18n="m365_srcmgmt_tab_google">Google Workspace</button>
+      <button class="srcmgmt-tab" id="srcTabFiles"   onclick="switchSrcTab('files')"  data-i18n="m365_srcmgmt_tab_files">File sources</button>
+    </div>
+
+    <div class="srcmgmt-body">
+
+      <!-- ── Microsoft 365 pane ───────────────────────────────────────────── -->
+      <div class="srcmgmt-pane" id="srcPaneM365">
+
+        <!-- Connection status row -->
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_srcmgmt_connection">Connection</div>
+          <div class="srcmgmt-row" id="srcM365StatusRow">
+            <span class="srcmgmt-row-icon">☁️</span>
+            <div style="flex:1">
+              <div class="srcmgmt-row-label" id="srcM365StatusLabel" data-i18n="m365_srcmgmt_not_connected">Not connected</div>
+              <div class="srcmgmt-row-sub" id="srcM365StatusSub"></div>
+            </div>
+            <span class="srcmgmt-status grey" id="srcM365StatusDot"></span>
+          </div>
+        </div>
+
+        <!-- Azure credentials -->
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_srcmgmt_azure_creds">Azure credentials</div>
+          <div class="srcmgmt-cred-form">
+            <div class="srcmgmt-cred-row">
+              <label data-i18n="m365_label_client_id">Client ID</label>
+              <input id="smClientId" type="text" placeholder="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" autocomplete="off">
+            </div>
+            <div class="srcmgmt-cred-row">
+              <label data-i18n="m365_label_tenant_id">Tenant ID</label>
+              <input id="smTenantId" type="text" placeholder="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" autocomplete="off">
+            </div>
+            <div class="srcmgmt-cred-row">
+              <label data-i18n="m365_label_client_secret">Client Secret</label>
+              <input id="smClientSecret" type="password" placeholder="Leave blank for delegated sign-in" autocomplete="off">
+            </div>
+            <div style="display:flex;justify-content:flex-end;gap:6px;margin-top:2px">
+              <div id="smConnStatus" style="font-size:11px;color:var(--muted);align-self:center;flex:1"></div>
+              <button onclick="smDisconnect()" id="smDisconnectBtn" style="display:none;background:none;border:1px solid var(--danger);color:var(--danger);height:26px;padding:0 12px;border-radius:6px;font-size:11px;cursor:pointer;box-sizing:border-box" data-i18n="m365_btn_sign_out">Disconnect</button>
+              <button onclick="smConnect()" style="background:var(--accent);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_btn_connect">Connect</button>
+            </div>
+          </div>
+        </div>
+
+        <!-- M365 source toggles -->
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_srcmgmt_sources_m365">Sources to scan</div>
+          <div id="srcM365Toggles" style="display:flex;flex-direction:column;gap:6px">
+            <div class="srcmgmt-row">
+              <span class="srcmgmt-row-icon">📧</span>
+              <div style="flex:1"><div class="srcmgmt-row-label" data-i18n="m365_src_email">Exchange / Outlook</div></div>
+              <label class="toggle" style="flex-shrink:0"><input type="checkbox" id="smSrcEmail" checked onchange="renderSourcesPanel();renderAccountList();_saveM365SourceToggles()"><span class="toggle-slider"></span></label>
+            </div>
+            <div class="srcmgmt-row">
+              <span class="srcmgmt-row-icon">💾</span>
+              <div style="flex:1"><div class="srcmgmt-row-label" data-i18n="m365_src_onedrive">OneDrive</div></div>
+              <label class="toggle" style="flex-shrink:0"><input type="checkbox" id="smSrcOneDrive" checked onchange="renderSourcesPanel();renderAccountList();_saveM365SourceToggles()"><span class="toggle-slider"></span></label>
+            </div>
+            <div class="srcmgmt-row">
+              <span class="srcmgmt-row-icon">🌐</span>
+              <div style="flex:1"><div class="srcmgmt-row-label" data-i18n="m365_src_sharepoint">SharePoint</div></div>
+              <label class="toggle" style="flex-shrink:0"><input type="checkbox" id="smSrcSharePoint" checked onchange="renderSourcesPanel();renderAccountList();_saveM365SourceToggles()"><span class="toggle-slider"></span></label>
+            </div>
+            <div class="srcmgmt-row">
+              <span class="srcmgmt-row-icon">💬</span>
+              <div style="flex:1"><div class="srcmgmt-row-label" data-i18n="m365_src_teams">Teams</div></div>
+              <label class="toggle" style="flex-shrink:0"><input type="checkbox" id="smSrcTeams" checked onchange="renderSourcesPanel();renderAccountList();_saveM365SourceToggles()"><span class="toggle-slider"></span></label>
+            </div>
+          </div>
+        </div>
+      </div>
+
+      <!-- ── Google Workspace pane (stub) ────────────────────────────────── -->
+      <!-- ── Google Workspace pane ────────────────────────────────────────────── -->
+      <div class="srcmgmt-pane" id="srcPaneGoogle">
+
+        <!-- Connection status row -->
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_srcmgmt_connection">Connection</div>
+          <div class="srcmgmt-row" id="srcGoogleStatusRow">
+            <span class="srcmgmt-row-icon">🔵</span>
+            <div style="flex:1">
+              <div class="srcmgmt-row-label" id="srcGoogleStatusLabel" data-i18n="m365_srcmgmt_not_connected">Not connected</div>
+              <div class="srcmgmt-row-sub"  id="srcGoogleStatusSub"></div>
+            </div>
+            <span class="srcmgmt-status grey" id="srcGoogleStatusDot"></span>
+          </div>
+        </div>
+
+        <!-- Auth mode toggle -->
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_google_auth_mode">Auth mode</div>
+          <div style="display:flex;gap:0;border:1px solid var(--border);border-radius:6px;overflow:hidden;width:fit-content">
+            <button type="button" id="smGoogleModeWorkspace" onclick="smGoogleSetMode('workspace')" style="height:26px;padding:0 14px;font-size:12px;border:none;cursor:pointer;background:var(--accent);color:#fff;box-sizing:border-box" data-i18n="m365_google_mode_workspace">Workspace</button>
+            <button type="button" id="smGoogleModePersonal"  onclick="smGoogleSetMode('personal')"  style="height:26px;padding:0 14px;font-size:12px;border:none;cursor:pointer;background:var(--surface);color:var(--text);box-sizing:border-box" data-i18n="m365_google_mode_personal">Personal account</button>
+          </div>
+        </div>
+
+        <!-- Service account credentials -->
+        <div class="srcmgmt-group" id="smGoogleSaSection">
+          <div class="srcmgmt-group-title" data-i18n="m365_google_sa_creds">Service account credentials</div>
+          <div class="srcmgmt-cred-form">
+            <div class="srcmgmt-cred-row" style="align-items:flex-start;flex-direction:column;gap:4px">
+              <label style="flex:unset" data-i18n="m365_google_sa_key_file">Service Account JSON key</label>
+              <div style="display:flex;gap:6px;width:100%;align-items:center">
+                <input type="file" id="smGoogleKeyFile" accept=".json" style="flex:1;font-size:11px;color:var(--muted);background:var(--bg);border:1px solid var(--border);border-radius:5px;padding:3px 6px">
+                <span id="smGoogleKeyName" style="font-size:10px;color:var(--accent);white-space:nowrap"></span>
+              </div>
+              <div style="font-size:10px;color:var(--muted);line-height:1.5" data-i18n="m365_google_sa_key_hint">Download from Google Cloud Console → IAM &amp; Admin → Service Accounts → Keys → Add Key → JSON</div>
+            </div>
+            <div class="srcmgmt-cred-row">
+              <label data-i18n="m365_google_admin_email">Admin email</label>
+              <input id="smGoogleAdminEmail" type="email" placeholder="admin@yourdomain.com" autocomplete="off">
+            </div>
+            <div style="font-size:10px;color:var(--muted);margin-top:-4px;padding-left:118px;line-height:1.5" data-i18n="m365_google_admin_email_hint">Used for domain-wide delegation — must be a Workspace super-admin.</div>
+            <div style="display:flex;justify-content:flex-end;gap:6px;margin-top:4px">
+              <div id="smGoogleConnStatus" style="font-size:11px;color:var(--muted);align-self:center;flex:1"></div>
+              <button onclick="smGoogleDisconnect()" id="smGoogleDisconnectBtn" style="display:none;background:none;border:1px solid var(--danger);color:var(--danger);height:26px;padding:0 12px;border-radius:6px;font-size:11px;cursor:pointer;box-sizing:border-box" data-i18n="m365_btn_sign_out">Disconnect</button>
+              <button onclick="smGoogleConnect()" style="background:#4285f4;color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_btn_connect">Connect</button>
+            </div>
+          </div>
+        </div>
+
+        <!-- Personal account credentials -->
+        <div class="srcmgmt-group" id="smGooglePersonalSection" style="display:none">
+          <div class="srcmgmt-group-title" data-i18n="m365_google_personal_creds">Personal account</div>
+          <div class="srcmgmt-cred-form">
+            <div class="srcmgmt-cred-row">
+              <label data-i18n="m365_google_personal_client_id">Client ID</label>
+              <input id="smGooglePersonalClientId" type="text" placeholder="….apps.googleusercontent.com" autocomplete="off">
+            </div>
+            <div class="srcmgmt-cred-row">
+              <label data-i18n="m365_google_personal_client_secret">Client secret</label>
+              <input id="smGooglePersonalClientSecret" type="password" placeholder="" autocomplete="off">
+            </div>
+            <div style="font-size:10px;color:var(--muted);margin-top:-4px;padding-left:118px;line-height:1.5" data-i18n="m365_google_personal_hint">Create OAuth 2.0 Desktop credentials in Google Cloud Console, then paste the client ID and secret above.</div>
+            <div id="smGoogleDeviceBox" class="device-code-box" style="display:none">
+              <div class="device-url"><span data-i18n="m365_device_code_go">Go to</span> <a id="smGoogleDeviceUrl" href="https://google.com/device" target="_blank">google.com/device</a></div>
+              <div class="device-code" id="smGoogleDeviceCode">—</div>
+              <div class="device-url" data-i18n="m365_device_code_enter">and enter this code</div>
+              <div id="smGooglePollStatus" style="font-size:12px;color:var(--muted);margin-top:8px"></div>
+            </div>
+            <div style="display:flex;justify-content:flex-end;gap:6px;margin-top:4px">
+              <div id="smGooglePersonalConnStatus" style="font-size:11px;color:var(--muted);align-self:center;flex:1"></div>
+              <button type="button" onclick="smGooglePersonalSignOut()" id="smGooglePersonalSignOutBtn" style="display:none;background:none;border:1px solid var(--danger);color:var(--danger);height:26px;padding:0 12px;border-radius:6px;font-size:11px;cursor:pointer;box-sizing:border-box" data-i18n="m365_btn_sign_out">Sign out</button>
+              <button type="button" onclick="smGooglePersonalStart()" id="smGooglePersonalSignInBtn" style="background:#4285f4;color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_google_personal_sign_in">Sign in</button>
+            </div>
+          </div>
+          <div style="font-size:11px;color:var(--muted);line-height:1.7;padding:10px 12px;background:var(--bg);border:1px solid var(--border);border-radius:7px;margin-top:8px">
+            <strong data-i18n="m365_google_personal_setup_title">Setup required:</strong><br>
+            1. <span data-i18n="m365_google_personal_setup_step1">In Google Cloud Console, create a project and enable Gmail API + Drive API.</span><br>
+            2. <span data-i18n="m365_google_personal_setup_step2">Create OAuth 2.0 credentials (Desktop app type) and copy the client ID and secret.</span><br>
+            3. <span data-i18n="m365_google_personal_setup_step3">Add your Google account email to the OAuth consent screen test users list.</span>
+          </div>
+        </div>
+
+        <!-- Sources to scan -->
+        <div class="srcmgmt-group" id="smGoogleSourcesGroup" style="display:none">
+          <div class="srcmgmt-group-title" data-i18n="m365_srcmgmt_sources_google">Sources to scan</div>
+          <div style="display:flex;flex-direction:column;gap:6px">
+            <div class="srcmgmt-row">
+              <span class="srcmgmt-row-icon">📧</span>
+              <div style="flex:1"><div class="srcmgmt-row-label" data-i18n="m365_google_src_gmail">Gmail</div></div>
+              <label class="toggle" style="flex-shrink:0"><input type="checkbox" id="smGoogleSrcGmail" checked onchange="_onGoogleSourceToggle()"><span class="toggle-slider"></span></label>
+            </div>
+            <div class="srcmgmt-row">
+              <span class="srcmgmt-row-icon">📁</span>
+              <div style="flex:1"><div class="srcmgmt-row-label" data-i18n="m365_google_src_drive">Google Drive</div></div>
+              <label class="toggle" style="flex-shrink:0"><input type="checkbox" id="smGoogleSrcDrive" checked onchange="_onGoogleSourceToggle()"><span class="toggle-slider"></span></label>
+            </div>
+          </div>
+        </div>
+
+        <!-- Setup guide callout (workspace only) -->
+        <div class="srcmgmt-group" id="smGoogleWorkspaceSetup">
+          <div style="font-size:11px;color:var(--muted);line-height:1.7;padding:10px 12px;background:var(--bg);border:1px solid var(--border);border-radius:7px">
+            <strong data-i18n="m365_google_setup_title">Setup required in Google Workspace:</strong><br>
+            1. <span data-i18n="m365_google_setup_step1">Create a Google Cloud project and enable Gmail API + Drive API + Admin SDK.</span><br>
+            2. <span data-i18n="m365_google_setup_step2">Create a service account, download the JSON key, and enable domain-wide delegation.</span><br>
+            3. <span data-i18n="m365_google_setup_step3">In Workspace Admin → Security → API Controls → Domain-wide delegation, add the service account client ID with scopes:</span><br>
+            <code style="font-size:10px;word-break:break-all;display:block;margin:4px 0;padding:4px 6px;background:var(--bg2);border-radius:4px">https://www.googleapis.com/auth/gmail.readonly, https://www.googleapis.com/auth/drive.readonly, https://www.googleapis.com/auth/admin.directory.user.readonly</code>
+          </div>
+        </div>
+
+      </div>
+
+      <!-- ── File sources pane ────────────────────────────────────────────── -->
+      <div class="srcmgmt-pane" id="srcPaneFiles">
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_srcmgmt_file_sources">File sources</div>
+          <div class="fsrc-list" id="srcFileList" style="max-height:calc(4 * 62px)">
+            <div class="fsrc-empty" data-i18n="m365_file_sources_empty">No file sources yet.</div>
+          </div>
+        </div>
+
+        <!-- Add source form (moved from fsrcBackdrop) -->
+        <div class="srcmgmt-group">
+          <div class="srcmgmt-group-title" data-i18n="m365_file_sources_add">Add source</div>
+          <div class="fsrc-form" style="border-color:var(--border)">
+            <div class="fsrc-form-row">
+              <label>Name <span style="color:var(--accent)">*</span></label>
+              <input id="srcFileLabel" type="text" placeholder="e.g. Teacher files, NAS archive" maxlength="80" autocomplete="off">
+            </div>
+            <div class="fsrc-form-row">
+              <label data-i18n="m365_fsrc_path">Path</label>
+              <input id="srcFilePath" type="text" placeholder="~/Documents  or  //nas/shares" oninput="srcFileDetectSmb(); srcFileAutoName()">
+            </div>
+            <div id="srcFileSmbFields" style="display:none;flex-direction:column;gap:6px">
+              <div style="font-size:10px;color:var(--accent)" data-i18n="m365_fsrc_smb_detected">SMB/CIFS network share detected</div>
+              <div class="fsrc-form-row">
+                <label data-i18n="m365_fsrc_smb_host">SMB host</label>
+                <input id="srcFileSmbHost" type="text" placeholder="nas.school.dk">
+              </div>
+              <div class="fsrc-form-row">
+                <label data-i18n="m365_fsrc_smb_user">Username</label>
+                <input id="srcFileSmbUser" type="text" placeholder="DOMAIN\\username">
+              </div>
+              <div class="fsrc-form-row">
+                <label data-i18n="m365_fsrc_smb_pw">Password</label>
+                <input id="srcFileSmbPw" type="password" placeholder="Stored in OS keychain">
+              </div>
+              <div style="font-size:10px;color:var(--muted)" data-i18n="m365_fsrc_smb_pw_hint">Saved to OS keychain — never stored in a file.</div>
+            </div>
+            <div style="display:flex;align-items:center;gap:8px">
+              <input type="hidden" id="srcFileEditId" value="">
+              <div id="srcFileStatus" style="flex:1;font-size:11px;color:var(--muted)"></div>
+              <button onclick="srcFileAdd()" id="srcFileAddBtn" style="background:var(--accent);color:#fff;border:none;height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600;box-sizing:border-box" data-i18n="m365_fsrc_add_btn">Add</button>
+            </div>
+          </div>
+        </div>
+      </div>
+
+    </div><!-- /.srcmgmt-body -->
+
+    <div class="srcmgmt-footer">
+      <button onclick="closeSourcesMgmt()" style="background:none;border:1px solid var(--border);color:var(--muted);height:26px;padding:0 14px;border-radius:6px;font-size:12px;cursor:pointer;box-sizing:border-box" data-i18n="btn_close">Close</button>
+    </div>
+  </div>
+</div>
+
+<!-- File Sources modal (#8) — kept for backward compat; redirects to #17 modal -->
+
+<div class="fsrc-backdrop" id="fsrcBackdrop" onclick="if(event.target===this)closeFileSourcesModal()">
+  <div class="fsrc-modal">
+    <h2 data-i18n="m365_file_sources_title">📁 File Sources</h2>
+    <div class="fsrc-list" id="fsrcList">
+      <div class="fsrc-empty" data-i18n="m365_file_sources_empty">No file sources yet. Add a local folder or network share below.</div>
+    </div>
+
+    <!-- Add source form -->
+    <div class="fsrc-form" id="fsrcForm">
+      <div style="font-size:11px;font-weight:600;color:var(--text)" data-i18n="m365_file_sources_add">Add source</div>
+      <div class="fsrc-form-row">
+        <label data-i18n="m365_fsrc_label">Name <span style="color:var(--accent)">*</span></label>
+        <input id="fsrcLabel" type="text" placeholder="e.g. Teacher files, NAS archive" maxlength="80" autocomplete="off">
+      </div>
+      <div class="fsrc-form-row">
+        <label data-i18n="m365_fsrc_path">Path</label>
+        <input id="fsrcPath" type="text" placeholder="~/Documents  or  //nas/shares" oninput="fsrcDetectSmb(); fsrcAutoName()">
+      </div>
+      <div id="fsrcSmbFields" class="fsrc-smb-fields" style="display:none;flex-direction:column;gap:6px">
+        <div style="font-size:10px;color:var(--accent);margin:-2px 0 2px" data-i18n="m365_fsrc_smb_detected">SMB/CIFS network share detected</div>
+        <div class="fsrc-form-row">
+          <label data-i18n="m365_fsrc_smb_host">SMB host</label>
+          <input id="fsrcSmbHost" type="text" placeholder="nas.school.dk">
+        </div>
+        <div class="fsrc-form-row">
+          <label data-i18n="m365_fsrc_smb_user">Username</label>
+          <input id="fsrcSmbUser" type="text" placeholder="DOMAIN\\username or username">
+        </div>
+        <div class="fsrc-form-row">
+          <label data-i18n="m365_fsrc_smb_pw">Password</label>
+          <input id="fsrcSmbPw" type="password" placeholder="Stored in OS keychain">
+        </div>
+        <div style="font-size:10px;color:var(--muted)" data-i18n="m365_fsrc_smb_pw_hint">Password is saved to the OS keychain — never stored in a file.</div>
+      </div>
+      <div style="display:flex;justify-content:flex-end">
+        <button onclick="fsrcAddSource()" style="background:var(--accent);color:#fff;border:none;padding:5px 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600" data-i18n="m365_fsrc_add_btn">Add</button>
+      </div>
+    </div>
+
+    <div id="fsrcStatus" style="min-height:14px;font-size:11px;color:var(--muted)"></div>
+    <div class="fsrc-footer">
+      <button onclick="closeFileSourcesModal()" style="background:none;border:1px solid var(--border);color:var(--muted);padding:5px 14px;border-radius:6px;font-size:12px;cursor:pointer" data-i18n="btn_close">Close</button>
+    </div>
+  </div>
+</div>
+
+<!-- Profile management modal (#15d) -->
+<div class="pmgmt-backdrop" id="pmgmtBackdrop" onclick="if(event.target===this)closeProfileMgmt()">
+  <div class="pmgmt-modal">
+    <div class="pmgmt-panel-list">
+      <div style="padding:10px 14px;border-bottom:1px solid var(--border);display:flex;align-items:center">
+        <span style="font-size:13px;font-weight:500;color:var(--text)" data-i18n="m365_profile_manage_title">Profiler</span>
+      </div>
+      <div class="pmgmt-list" id="pmgmtList" style="flex:1;overflow-y:auto">
+        <div class="pmgmt-empty" data-i18n="m365_profile_no_profiles">No saved profiles yet.</div>
+      </div>
+      <div style="padding:10px 14px;border-top:1px solid var(--border);display:flex;flex-direction:column;gap:6px">
+        <button onclick="_pmgmtNewProfile()" style="width:100%;font-size:12px;height:26px;border-radius:6px;border:1px solid var(--accent);background:none;color:var(--accent);cursor:pointer;box-sizing:border-box">+ Ny profil</button>
+      </div>
+    </div>
+    <div class="pmgmt-panel-editor" id="pmgmtEditor">
+      <div style="padding:10px 16px;border-bottom:1px solid var(--border);display:flex;align-items:center;justify-content:space-between">
+        <span id="pmgmtEditorTitle" style="font-size:13px;font-weight:500;color:var(--text)">Rediger profil</span>
+        <button onclick="closeProfileMgmt()" style="background:none;border:none;color:var(--muted);font-size:18px;cursor:pointer;padding:0;line-height:1">&#215;</button>
+      </div>
+      <div class="pmgmt-editor-body" id="pmgmtEditorBody"><div id="pmgmtEditorPlaceholder" style="flex:1;display:flex;align-items:center;justify-content:center;color:var(--muted);font-size:12px;text-align:center;padding:24px">Klik på en profil for at redigere</div></div>
+      <div style="padding:10px 16px;border-top:1px solid var(--border);display:flex;justify-content:flex-end;gap:8px">
+        <button onclick="_pmgmtCloseEditor()" style="font-size:12px;height:26px;padding:0 12px;border-radius:6px;border:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;box-sizing:border-box" data-i18n="btn_close">Luk</button>
+        <button onclick="_pmgmtSaveFullEdit()" style="font-size:12px;height:26px;padding:0 12px;border-radius:6px;border:1px solid var(--accent);background:rgba(99,126,210,.15);color:var(--accent);cursor:pointer;box-sizing:border-box" data-i18n="btn_save">Gem profil</button>
+      </div>
+    </div>
+  </div>
+</div>
+<!-- Import DB modal (#11) -->
+<div class="import-db-backdrop" id="importDbBackdrop" onclick="if(event.target===this)closeImportDBModal()">
+  <div class="import-db-modal">
+    <h2 data-i18n="m365_db_import_title">📥 Import Database</h2>
+    <p data-i18n="m365_db_import_desc">Select a previously exported <code>.zip</code> file. <b>Merge</b> adds dispositions and deletion log. <b>Replace</b> wipes and fully restores.</p>
+    <div>
+      <label style="font-size:11px;color:var(--muted);display:block;margin-bottom:4px" data-i18n="m365_db_import_file">ZIP file</label>
+      <input type="file" id="importDbFile" accept=".zip" style="width:100%;box-sizing:border-box;padding:5px 8px;background:var(--bg);border:1px solid var(--border);border-radius:5px;color:var(--text);font-size:12px">
+    </div>
+    <div style="display:flex;align-items:center;gap:10px">
+      <label style="font-size:11px;color:var(--muted)" data-i18n="m365_db_import_mode">Mode:</label>
+      <select id="importDbMode" style="font-size:12px;padding:4px 8px;background:var(--bg);border:1px solid var(--border);border-radius:5px;color:var(--text)">
+        <option value="merge" data-i18n="m365_db_import_merge">Merge (safe)</option>
+        <option value="replace" data-i18n="m365_db_import_replace">Replace (full restore)</option>
+      </select>
+    </div>
+    <div id="importDbReplaceWarn" style="display:none;background:#7c1a0060;border:1px solid var(--danger);border-radius:6px;padding:8px 10px;font-size:11px;color:#ff7070;line-height:1.5" data-i18n="m365_db_import_replace_warn">⚠ Replace mode will erase all existing scan data before restoring. Make sure you have a backup of ~/.gdpr_scanner.db first.</div>
+    <div id="importDbStatus" style="min-height:16px;font-size:11px;color:var(--muted)"></div>
+    <div style="display:flex;justify-content:flex-end;gap:8px;padding-top:4px;border-top:1px solid var(--border)">
+      <button onclick="closeImportDBModal()" style="background:none;border:1px solid var(--border);color:var(--muted);padding:5px 14px;border-radius:6px;font-size:12px;cursor:pointer" data-i18n="btn_close">Close</button>
+      <button id="importDbBtn" onclick="doImportDB()" style="background:var(--accent);color:#fff;border:none;padding:5px 14px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600" data-i18n="m365_db_import_run">Import</button>
+    </div>
+  </div>
+</div>
+
+<script type="module" src="/static/js/ui.js"></script>
+<script type="module" src="/static/js/log.js"></script>
+<script type="module" src="/static/js/users.js"></script>
+<script type="module" src="/static/js/auth.js"></script>
+<script type="module" src="/static/js/profiles.js"></script>
+<script type="module" src="/static/js/scan.js"></script>
+<script type="module" src="/static/js/results.js"></script>
+<script type="module" src="/static/js/sources.js"></script>
+<script type="module" src="/static/js/scheduler.js"></script>
+<script type="module" src="/static/js/connector.js"></script>
+<script type="module" src="/static/js/viewer.js"></script>
+</body>
+</html>
diff --git a/templates/viewer_denied.html b/templates/viewer_denied.html
new file mode 100644
index 0000000..57eccad
--- /dev/null
+++ b/templates/viewer_denied.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>GDPRScanner — Access denied</title>
+  <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+  <style>
+    body { display: flex; align-items: center; justify-content: center; min-height: 100vh; margin: 0; }
+    .denied-card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 8px;
+      padding: 32px 40px;
+      text-align: center;
+      max-width: 360px;
+    }
+    .denied-card h1 { font-size: 16px; font-weight: 600; margin: 0 0 8px; color: var(--text); }
+    .denied-card p  { font-size: 13px; color: var(--muted); margin: 0; }
+  </style>
+</head>
+<body data-theme="dark">
+  <div class="denied-card">
+    <h1>Access denied</h1>
+    <p>This link is invalid or has expired.<br>Ask the administrator for a new link.</p>
+  </div>
+</body>
+</html>
diff --git a/templates/viewer_pin.html b/templates/viewer_pin.html
new file mode 100644
index 0000000..2b8f416
--- /dev/null
+++ b/templates/viewer_pin.html
@@ -0,0 +1,82 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>GDPRScanner — Enter PIN</title>
+  <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+  <style>
+    body { display: flex; align-items: center; justify-content: center; min-height: 100vh; margin: 0; }
+    .pin-card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 8px;
+      padding: 32px 40px;
+      width: min(340px, 92vw);
+      box-sizing: border-box;
+    }
+    .pin-card h1 { font-size: 15px; font-weight: 600; margin: 0 0 6px; color: var(--text); }
+    .pin-card p  { font-size: 12px; color: var(--muted); margin: 0 0 18px; }
+    .pin-input {
+      width: 100%; box-sizing: border-box;
+      font-size: 22px; letter-spacing: .3em; text-align: center;
+      padding: 10px 12px; border-radius: 6px;
+      border: 1px solid var(--border); background: var(--bg);
+      color: var(--text); outline: none; margin-bottom: 12px;
+    }
+    .pin-input:focus { border-color: var(--accent); }
+    .pin-btn {
+      width: 100%; padding: 10px; border: none; border-radius: 6px;
+      background: var(--accent); color: #fff; font-size: 13px;
+      font-weight: 600; cursor: pointer; font-family: var(--sans);
+    }
+    .pin-btn:disabled { opacity: .5; cursor: default; }
+    .pin-error { font-size: 12px; color: var(--danger); margin-top: 8px; min-height: 16px; text-align: center; }
+  </style>
+</head>
+<body data-theme="dark">
+  <div class="pin-card">
+    <h1>GDPRScanner</h1>
+    <p>Enter the viewer PIN to access results.</p>
+    <input id="pinInput" class="pin-input" type="password" inputmode="numeric"
+           maxlength="8" placeholder="••••" autocomplete="off"
+           onkeydown="if(event.key==='Enter')submitPin()">
+    <button class="pin-btn" id="pinBtn" onclick="submitPin()">Continue</button>
+    <div class="pin-error" id="pinError"></div>
+  </div>
+  <script>
+    async function submitPin() {
+      const pin = document.getElementById('pinInput').value.trim();
+      if (!pin) return;
+      const btn = document.getElementById('pinBtn');
+      const err = document.getElementById('pinError');
+      btn.disabled = true;
+      err.textContent = '';
+      try {
+        const r = await fetch('/api/viewer/pin/verify', {
+          method: 'POST',
+          headers: {'Content-Type': 'application/json'},
+          body: JSON.stringify({pin})
+        });
+        if (r.ok) {
+          window.location.href = '/view';
+        } else {
+          const d = await r.json().catch(() => ({}));
+          if (r.status === 429) {
+            err.textContent = d.error || 'Too many attempts. Try again later.';
+          } else {
+            err.textContent = d.error || 'Incorrect PIN.';
+            document.getElementById('pinInput').value = '';
+            document.getElementById('pinInput').focus();
+          }
+          btn.disabled = false;
+        }
+      } catch(e) {
+        err.textContent = 'Network error. Please try again.';
+        btn.disabled = false;
+      }
+    }
+    document.getElementById('pinInput').focus();
+  </script>
+</body>
+</html>
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..3e48f03
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,113 @@
+"""
+conftest.py — shared fixtures for GDPRScanner test suite.
+"""
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+# Ensure the project root is on sys.path so all modules are importable
+ROOT = Path(__file__).parent.parent
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+
+# ── File fixtures ─────────────────────────────────────────────────────────────
+
+@pytest.fixture()
+def tmp_dir(tmp_path):
+    return tmp_path
+
+
+@pytest.fixture()
+def docx_with_cpr(tmp_path):
+    """Word document containing 3 CPR numbers in different positions."""
+    from docx import Document
+    doc = Document()
+    doc.add_paragraph("Elev 1: CPR 290472-1234 er registreret i systemet.")
+    doc.add_paragraph("Elev 2: personnummer 010185-4321.")
+    tbl = doc.add_table(rows=2, cols=2)
+    tbl.cell(0, 0).text = "Navn"
+    tbl.cell(0, 1).text = "CPR"
+    tbl.cell(1, 0).text = "Anne Hansen"
+    tbl.cell(1, 1).text = "CPR: 150364-5678"
+    p = tmp_path / "sample_with_cpr.docx"
+    doc.save(p)
+    return p
+
+
+@pytest.fixture()
+def docx_no_cpr(tmp_path):
+    """Word document with no CPR numbers."""
+    from docx import Document
+    doc = Document()
+    doc.add_paragraph("Ingen personoplysninger her.")
+    doc.add_paragraph("Konto: 1234-5678  Telefon: 33 12 34 56")
+    p = tmp_path / "sample_no_cpr.docx"
+    doc.save(p)
+    return p
+
+
+@pytest.fixture()
+def xlsx_with_cpr(tmp_path):
+    """Excel workbook containing 1 CPR in a cell."""
+    from openpyxl import Workbook
+    wb = Workbook()
+    ws = wb.active
+    ws["A1"] = "Navn"
+    ws["B1"] = "CPR"
+    ws["A2"] = "Test Person"
+    ws["B2"] = "CPR: 290472-1234"
+    p = tmp_path / "sample_with_cpr.xlsx"
+    wb.save(p)
+    return p
+
+
+@pytest.fixture()
+def xlsx_no_cpr(tmp_path):
+    """Excel workbook with account numbers that look CPR-like."""
+    from openpyxl import Workbook
+    wb = Workbook()
+    ws = wb.active
+    ws["A1"] = "Kontonummer"
+    ws["B1"] = "Beløb"
+    ws["A2"] = "12345678"      # 8-digit — too short
+    ws["A3"] = "29047212345"   # 11-digit — too long
+    ws["A4"] = "Reg: 2904"
+    p = tmp_path / "sample_no_cpr.xlsx"
+    wb.save(p)
+    return p
+
+
+@pytest.fixture()
+def txt_with_art9(tmp_path):
+    """Plain text with CPR adjacent to Article 9 health keywords."""
+    content = (
+        "Eleven CPR 290472-1234 har diagnosen diabetes og modtager behandling.\n"
+        "Kontakt læge vedr. sygemelding."
+    )
+    p = tmp_path / "sample_art9.txt"
+    p.write_text(content, encoding="utf-8")
+    return p
+
+
+@pytest.fixture()
+def binary_garbage(tmp_path):
+    """Binary file that must not crash the scanner."""
+    p = tmp_path / "sample_binary.bin"
+    p.write_bytes(bytes(range(256)) * 100)
+    return p
+
+
+@pytest.fixture()
+def tmp_db(tmp_path):
+    """Fresh in-memory-path SQLite DB for each test."""
+    from gdpr_db import ScanDB
+    db_path = tmp_path / "test.db"
+    db = ScanDB(str(db_path))
+    yield db
+    try:
+        db_path.unlink()
+    except Exception:
+        pass
diff --git a/tests/test_app_config.py b/tests/test_app_config.py
new file mode 100644
index 0000000..8aa96ce
--- /dev/null
+++ b/tests/test_app_config.py
@@ -0,0 +1,254 @@
+"""
+test_app_config.py — Tests for app_config.py.
+
+Covers:
+  - LANG loading and key access
+  - Article 9 keyword detection (_check_special_category)
+  - Config load/save round-trip
+  - Admin PIN hash/verify
+  - Profile CRUD (_profile_save, _profile_get, _profile_delete)
+  - SMTP password encryption/decryption round-trip
+"""
+import sys
+import json
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import app_config
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. i18n
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestLang:
+
+    def test_lang_dict_loaded(self):
+        assert isinstance(app_config.LANG, dict)
+        assert len(app_config.LANG) > 0
+
+    def test_lang_has_lang_code(self):
+        assert "_lang_code" in app_config.LANG
+
+    def test_load_lang_returns_dict(self):
+        lang = app_config._load_lang()
+        assert isinstance(lang, dict)
+
+    def test_load_lang_forced_en(self):
+        lang = app_config._load_lang_forced("en")
+        assert isinstance(lang, dict)
+        assert len(lang) > 0
+
+    def test_load_lang_forced_da(self):
+        lang = app_config._load_lang_forced("da")
+        assert isinstance(lang, dict)
+        assert len(lang) > 0
+
+    def test_load_lang_forced_de(self):
+        lang = app_config._load_lang_forced("de")
+        assert isinstance(lang, dict)
+        assert len(lang) > 0
+
+    def test_missing_lang_falls_back(self):
+        # Unknown lang code should fall back without raising
+        lang = app_config._load_lang_forced("xx")
+        assert isinstance(lang, dict)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 2. Article 9 keyword detection
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestCheckSpecialCategory:
+
+    def _cats(self, text):
+        cprs = [{"raw": "290472-1234"}]
+        return app_config._check_special_category(text, cprs)
+
+    def test_health_keyword_detected(self):
+        cats = self._cats("CPR: 290472-1234 har diagnosen diabetes og behandling")
+        assert "health" in cats
+
+    def test_trade_union_keyword_detected(self):
+        cats = self._cats("CPR: 290472-1234 er fagforeningsmedlem tillidsrepræsentant")
+        assert "trade_union" in cats
+
+    def test_religion_keyword_detected(self):
+        cats = self._cats("CPR: 290472-1234 kirke konfirmation")
+        assert "religion" in cats
+
+    def test_no_keyword_returns_empty(self):
+        cats = self._cats("CPR: 290472-1234 bor i Aarhus")
+        assert cats == []
+
+    def test_empty_text_returns_empty(self):
+        cats = app_config._check_special_category("", [])
+        assert cats == []
+
+    def test_keyword_without_cpr_still_detected(self):
+        # No CPR — keyword still triggers if no CPR list given
+        cats = app_config._check_special_category("diagnose sygemelding behandling", [])
+        assert "health" in cats
+
+    def test_returns_sorted_list(self):
+        cats = self._cats("CPR 290472-1234 diabetes fagforening")
+        assert cats == sorted(cats)
+
+    def test_compiled_keywords_populated(self):
+        assert len(app_config._compiled_keywords) > 0
+
+    def test_keyword_flat_has_entries(self):
+        assert len(app_config._keyword_flat) > 0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. Config load / save
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestConfig:
+
+    def test_load_config_returns_dict(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
+        cfg = app_config._load_config()
+        assert isinstance(cfg, dict)
+
+    def test_save_and_load_round_trip(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
+        app_config._save_config({"client_id": "test-id", "tenant_id": "test-tid"})
+        cfg = app_config._load_config()
+        assert cfg["client_id"] == "test-id"
+        assert cfg["tenant_id"] == "test-tid"
+
+    def test_save_config_creates_file(self, tmp_path, monkeypatch):
+        cfg_path = tmp_path / "config.json"
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", cfg_path)
+        app_config._save_config({"x": 1})
+        assert cfg_path.exists()
+
+    def test_load_missing_file_returns_empty(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "nonexistent.json")
+        cfg = app_config._load_config()
+        assert cfg == {}
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 4. Admin PIN
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestAdminPin:
+
+    def test_pin_not_set_initially(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
+        # Fresh config — no PIN
+        app_config._save_config({})
+        assert app_config._admin_pin_is_set() is False
+
+    def test_set_and_verify_pin(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
+        app_config._save_config({})
+        app_config._set_admin_pin("1234")
+        assert app_config._verify_admin_pin("1234") is True
+
+    def test_wrong_pin_fails(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
+        app_config._save_config({})
+        app_config._set_admin_pin("1234")
+        assert app_config._verify_admin_pin("9999") is False
+
+    def test_pin_is_set_after_setting(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
+        app_config._save_config({})
+        app_config._set_admin_pin("5678")
+        assert app_config._admin_pin_is_set() is True
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. Profiles
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestProfiles:
+
+    @pytest.fixture(autouse=True)
+    def _isolate(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_SETTINGS_PATH", tmp_path / "settings.json")
+
+    def test_profiles_load_returns_list(self):
+        profiles = app_config._profiles_load()
+        assert isinstance(profiles, list)
+
+    def test_save_and_get_profile(self):
+        profile = {
+            "id": "test-uuid-1",
+            "name": "Test Profile",
+            "sources": ["email"],
+            "user_ids": "all",
+            "options": {},
+        }
+        app_config._profile_save(profile)
+        loaded = app_config._profile_get("Test Profile")
+        assert loaded is not None
+        assert loaded["name"] == "Test Profile"
+
+    def test_profile_get_by_id(self):
+        profile = {"id": "uid-42", "name": "By ID", "sources": [], "options": {}}
+        app_config._profile_save(profile)
+        loaded = app_config._profile_get("uid-42")
+        assert loaded is not None
+
+    def test_profile_delete(self):
+        profile = {"id": "del-1", "name": "To Delete", "sources": [], "options": {}}
+        app_config._profile_save(profile)
+        deleted = app_config._profile_delete("To Delete")
+        assert deleted is True
+        assert app_config._profile_get("To Delete") is None
+
+    def test_delete_nonexistent_returns_false(self):
+        assert app_config._profile_delete("Does Not Exist") is False
+
+    def test_profiles_load_after_save(self):
+        app_config._profile_save({"id": "p1", "name": "P1", "sources": [], "options": {}})
+        app_config._profile_save({"id": "p2", "name": "P2", "sources": [], "options": {}})
+        profiles = app_config._profiles_load()
+        names = [p["name"] for p in profiles]
+        assert "P1" in names
+        assert "P2" in names
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 6. SMTP password encryption
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestFernet:
+
+    @pytest.fixture(autouse=True)
+    def _isolate(self, tmp_path, monkeypatch):
+        monkeypatch.setattr(app_config, "_MACHINE_ID_PATH", tmp_path / "machine_id")
+
+    def test_encrypt_decrypt_round_trip(self):
+        fernet = app_config._get_fernet()
+        if fernet is None:
+            pytest.skip("cryptography not installed")
+        plaintext = "my-secret-smtp-password"
+        encrypted = app_config._encrypt_password(plaintext)
+        decrypted = app_config._decrypt_password(encrypted)
+        assert decrypted == plaintext
+
+    def test_encrypt_returns_string(self):
+        fernet = app_config._get_fernet()
+        if fernet is None:
+            pytest.skip("cryptography not installed")
+        result = app_config._encrypt_password("test")
+        assert isinstance(result, str)
+
+    def test_encrypted_differs_from_plaintext(self):
+        fernet = app_config._get_fernet()
+        if fernet is None:
+            pytest.skip("cryptography not installed")
+        enc = app_config._encrypt_password("password123")
+        assert enc != "password123"
+
+    def test_decrypt_empty_returns_empty(self):
+        result = app_config._decrypt_password("")
+        assert result == ""
diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py
new file mode 100644
index 0000000..abb550d
--- /dev/null
+++ b/tests/test_checkpoint.py
@@ -0,0 +1,147 @@
+"""
+test_checkpoint.py — Tests for checkpoint.py.
+
+Covers:
+  - _checkpoint_key: stable hashing of scan options
+  - _save_checkpoint / _load_checkpoint / _clear_checkpoint
+  - _load_delta_tokens / _save_delta_tokens
+"""
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import checkpoint
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+    """Redirect all disk writes to a temp dir for each test."""
+    monkeypatch.setattr(checkpoint, "_CHECKPOINT_PATH", tmp_path / "checkpoint.json")
+    monkeypatch.setattr(checkpoint, "_DELTA_PATH",      tmp_path / "delta.json")
+
+
+_OPTS = {
+    "sources": ["email", "onedrive"],
+    "user_ids": [{"id": "user-1"}, {"id": "user-2"}],
+    "options": {"older_than_days": 365},
+}
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. _checkpoint_key
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestCheckpointKey:
+
+    def test_returns_string(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        assert isinstance(key, str)
+
+    def test_key_is_hex(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        int(key, 16)  # raises ValueError if not hex
+
+    def test_same_options_same_key(self):
+        assert checkpoint._checkpoint_key(_OPTS) == checkpoint._checkpoint_key(_OPTS)
+
+    def test_different_sources_different_key(self):
+        opts2 = {**_OPTS, "sources": ["sharepoint"]}
+        assert checkpoint._checkpoint_key(_OPTS) != checkpoint._checkpoint_key(opts2)
+
+    def test_different_users_different_key(self):
+        opts2 = {**_OPTS, "user_ids": [{"id": "user-99"}]}
+        assert checkpoint._checkpoint_key(_OPTS) != checkpoint._checkpoint_key(opts2)
+
+    def test_source_order_irrelevant(self):
+        opts_a = {**_OPTS, "sources": ["email", "onedrive"]}
+        opts_b = {**_OPTS, "sources": ["onedrive", "email"]}
+        assert checkpoint._checkpoint_key(opts_a) == checkpoint._checkpoint_key(opts_b)
+
+    def test_empty_options(self):
+        key = checkpoint._checkpoint_key({})
+        assert isinstance(key, str) and len(key) > 0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 2. Save / load / clear
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestSaveLoadCheckpoint:
+
+    def test_load_returns_none_when_no_file(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        assert checkpoint._load_checkpoint(key) is None
+
+    def test_save_then_load(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        checkpoint._save_checkpoint(
+            key,
+            scanned_ids={"id1", "id2", "id3"},
+            flagged=[{"id": "c1", "name": "file.docx"}],
+            meta={"started_at": 1700000000},
+        )
+        loaded = checkpoint._load_checkpoint(key)
+        assert loaded is not None
+
+    def test_scanned_ids_preserved(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        checkpoint._save_checkpoint(key, {"id1", "id2"}, [], {})
+        loaded = checkpoint._load_checkpoint(key)
+        assert set(loaded["scanned_ids"]) == {"id1", "id2"}
+
+    def test_flagged_items_preserved(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        cards = [{"id": "c1"}, {"id": "c2"}]
+        checkpoint._save_checkpoint(key, set(), cards, {})
+        loaded = checkpoint._load_checkpoint(key)
+        assert len(loaded["flagged"]) == 2
+
+    def test_wrong_key_returns_none(self):
+        key = checkpoint._checkpoint_key(_OPTS)
+        checkpoint._save_checkpoint(key, {"id1"}, [], {})
+        other_opts = {**_OPTS, "sources": ["sharepoint"]}
+        other_key = checkpoint._checkpoint_key(other_opts)
+        assert checkpoint._load_checkpoint(other_key) is None
+
+    def test_clear_removes_file(self, tmp_path):
+        key = checkpoint._checkpoint_key(_OPTS)
+        checkpoint._save_checkpoint(key, {"id1"}, [], {})
+        checkpoint._clear_checkpoint()
+        assert checkpoint._load_checkpoint(key) is None
+
+    def test_clear_on_missing_file_does_not_raise(self):
+        checkpoint._clear_checkpoint()  # no file exists — must not raise
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. Delta tokens
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestDeltaTokens:
+
+    def test_load_returns_empty_when_no_file(self):
+        assert checkpoint._load_delta_tokens() == {}
+
+    def test_save_then_load(self):
+        tokens = {
+            "email:user1": "https://graph.microsoft.com/v1.0/me/mailFolders/delta?$deltaToken=abc",
+            "onedrive:user1": "https://graph.microsoft.com/v1.0/me/drive/delta?token=xyz",
+        }
+        checkpoint._save_delta_tokens(tokens)
+        loaded = checkpoint._load_delta_tokens()
+        assert loaded == tokens
+
+    def test_overwrite_preserves_new_value(self):
+        checkpoint._save_delta_tokens({"key": "old_url"})
+        checkpoint._save_delta_tokens({"key": "new_url"})
+        assert checkpoint._load_delta_tokens()["key"] == "new_url"
+
+    def test_save_empty_dict(self):
+        checkpoint._save_delta_tokens({})
+        assert checkpoint._load_delta_tokens() == {}
diff --git a/tests/test_db.py b/tests/test_db.py
new file mode 100644
index 0000000..7b7fbe5
--- /dev/null
+++ b/tests/test_db.py
@@ -0,0 +1,267 @@
+"""
+test_db.py — Tests for gdpr_db.py (ScanDB).
+
+Covers:
+  - begin_scan / finish_scan round-trip
+  - save_item and retrieval
+  - CPR index stores hash, never plaintext
+  - lookup_data_subject returns matching items
+  - set_disposition / get_disposition
+  - Deletion log
+  - Export / import cycle (merge and replace modes)
+"""
+import sys
+import hashlib
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from gdpr_db import ScanDB
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _make_card(item_id="abc123", cpr_count=1, source_type="email", role="staff"):
+    return {
+        "id":               item_id,
+        "name":             f"{item_id}.docx",
+        "source":           "email",
+        "source_type":      source_type,
+        "cpr_count":        cpr_count,
+        "url":              "https://example.com/item",
+        "size_kb":          12.5,
+        "modified":         "2024-03-01",
+        "thumb_b64":        "",
+        "thumb_mime":       "image/svg+xml",
+        "risk":             None,
+        "account_id":       "user-1",
+        "account_name":     "Test User",
+        "user_role":        role,
+        "drive_id":         "",
+        "attachments":      [],
+        "folder":           "",
+        "transfer_risk":    "",
+        "special_category": [],
+        "face_count":       0,
+        "exif":             {},
+    }
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. Scan lifecycle
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestScanLifecycle:
+
+    def test_begin_scan_returns_int(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        assert isinstance(scan_id, int)
+        assert scan_id > 0
+
+    def test_begin_scan_increments(self, tmp_db):
+        id1 = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        id2 = tmp_db.begin_scan({"sources": ["onedrive"], "user_ids": []})
+        assert id2 > id1
+
+    def test_finish_scan_does_not_raise(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.finish_scan(scan_id, 42)  # must not raise
+
+    def test_multiple_scans_independent(self, tmp_db):
+        id1 = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(id1, _make_card("item-a"), ["290472-1234"])
+        id2 = tmp_db.begin_scan({"sources": ["onedrive"], "user_ids": []})
+        tmp_db.save_item(id2, _make_card("item-b"), ["010185-4321"])
+        tmp_db.finish_scan(id1, 1)
+        tmp_db.finish_scan(id2, 1)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 2. save_item
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestSaveItem:
+
+    def test_save_item_does_not_raise(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
+
+    def test_save_item_without_cprs(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card(cpr_count=0), [])
+
+    def test_save_multiple_items(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        for i in range(5):
+            tmp_db.save_item(scan_id, _make_card(f"item-{i}"), ["290472-1234"])
+
+    def test_save_item_with_pii_counts(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        pii = {"cpr": 1, "name": 2, "email": 0}
+        tmp_db.save_item(scan_id, _make_card(), ["290472-1234"], pii_counts=pii)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. CPR index — hash only, never plaintext
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestCprIndex:
+
+    def test_cpr_not_stored_in_plaintext(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
+        # Read the raw DB and confirm plaintext CPR is absent
+        import sqlite3
+        with sqlite3.connect(tmp_db._path) as con:
+            rows = con.execute("SELECT cpr_hash FROM cpr_index").fetchall()
+        assert len(rows) == 1
+        stored = rows[0][0]
+        assert stored != "290472-1234"
+        assert "290472" not in stored
+
+    def test_cpr_hash_is_sha256(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
+        import sqlite3
+        with sqlite3.connect(tmp_db._path) as con:
+            rows = con.execute("SELECT cpr_hash FROM cpr_index").fetchall()
+        stored = rows[0][0]
+        expected = hashlib.sha256("290472-1234".encode()).hexdigest()
+        assert stored == expected
+
+    def test_lookup_finds_item(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card("item-x"), ["290472-1234"])
+        results = tmp_db.lookup_data_subject("290472-1234")
+        assert len(results) >= 1
+
+    def test_lookup_returns_correct_item(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card("target-item"), ["290472-1234"])
+        results = tmp_db.lookup_data_subject("290472-1234")
+        ids = [r.get("id") or r.get("item_id") for r in results]
+        assert "target-item" in ids
+
+    def test_lookup_different_cpr_returns_empty(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
+        results = tmp_db.lookup_data_subject("010185-4321")
+        assert results == []
+
+    def test_lookup_multiple_items_for_same_cpr(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card("item-a"), ["290472-1234"])
+        tmp_db.save_item(scan_id, _make_card("item-b"), ["290472-1234"])
+        results = tmp_db.lookup_data_subject("290472-1234")
+        assert len(results) >= 2
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 4. Dispositions
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestDispositions:
+
+    def test_get_disposition_returns_none_for_unknown(self, tmp_db):
+        assert tmp_db.get_disposition("nonexistent") is None
+
+    def test_set_and_get_disposition(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card("disp-item"), ["290472-1234"])
+        tmp_db.set_disposition("disp-item", "retain-legal", "Bogfoeringsloven", "", "admin")
+        disp = tmp_db.get_disposition("disp-item")
+        assert disp is not None
+        assert disp["status"] == "retain-legal"
+
+    def test_disposition_legal_basis_stored(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card("disp-2"), [])
+        tmp_db.set_disposition("disp-2", "delete-scheduled", "Data minimisation", "", "reviewer")
+        disp = tmp_db.get_disposition("disp-2")
+        assert disp["legal_basis"] == "Data minimisation"
+
+    def test_disposition_overwrite(self, tmp_db):
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        tmp_db.save_item(scan_id, _make_card("disp-3"), [])
+        tmp_db.set_disposition("disp-3", "unreviewed", "", "", "")
+        tmp_db.set_disposition("disp-3", "deleted", "", "", "admin")
+        disp = tmp_db.get_disposition("disp-3")
+        assert disp["status"] == "deleted"
+
+    def test_all_disposition_values_accepted(self, tmp_db):
+        statuses = ["unreviewed", "retain-legal", "retain-legitimate",
+                    "retain-contract", "delete-scheduled", "deleted"]
+        scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
+        for i, status in enumerate(statuses):
+            item_id = f"disp-status-{i}"
+            tmp_db.save_item(scan_id, _make_card(item_id), [])
+            tmp_db.set_disposition(item_id, status, "", "", "test")
+            disp = tmp_db.get_disposition(item_id)
+            assert disp["status"] == status
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. Export / import
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestExportImport:
+
+    def _populate(self, db):
+        scan_id = db.begin_scan({"sources": ["email"], "user_ids": []})
+        db.save_item(scan_id, _make_card("exp-1"), ["290472-1234"])
+        db.save_item(scan_id, _make_card("exp-2"), ["010185-4321"])
+        db.set_disposition("exp-1", "retain-legal", "Bogfoeringsloven", "", "admin")
+        db.finish_scan(scan_id, 2)
+
+    def test_export_creates_zip(self, tmp_db, tmp_path):
+        if not hasattr(tmp_db, "export_db"):
+            pytest.skip("export_db not implemented")
+        self._populate(tmp_db)
+        export_path = tmp_path / "export.zip"
+        tmp_db.export_db(str(export_path))
+        assert export_path.exists()
+        assert export_path.stat().st_size > 0
+
+    def test_export_zip_contains_expected_files(self, tmp_db, tmp_path):
+        if not hasattr(tmp_db, "export_db"):
+            pytest.skip("export_db not implemented")
+        self._populate(tmp_db)
+        export_path = tmp_path / "export.zip"
+        tmp_db.export_db(str(export_path))
+        import zipfile
+        with zipfile.ZipFile(export_path) as zf:
+            names = zf.namelist()
+        for expected in ["export_meta.json", "flagged_items.json", "dispositions.json"]:
+            assert expected in names
+
+    def test_import_merge_adds_dispositions(self, tmp_path):
+        if not hasattr(ScanDB, "export_db"):
+            pytest.skip("export_db not implemented")
+        # Source DB
+        src = ScanDB(str(tmp_path / "src.db"))
+        self._populate(src)
+        export_path = tmp_path / "export.zip"
+        src.export_db(str(export_path))
+
+        # Target DB (fresh)
+        tgt = ScanDB(str(tmp_path / "tgt.db"))
+        tgt.import_db(str(export_path), mode="merge")
+        # Disposition for exp-1 should now exist in tgt
+        disp = tgt.get_disposition("exp-1")
+        assert disp is not None
+
+    def test_import_replace_restores_items(self, tmp_path):
+        if not hasattr(ScanDB, "export_db"):
+            pytest.skip("export_db not implemented")
+        src = ScanDB(str(tmp_path / "src2.db"))
+        self._populate(src)
+        export_path = tmp_path / "export2.zip"
+        src.export_db(str(export_path))
+
+        tgt = ScanDB(str(tmp_path / "tgt2.db"))
+        tgt.import_db(str(export_path), mode="replace")
+        results = tgt.lookup_data_subject("290472-1234")
+        assert len(results) >= 1
diff --git a/tests/test_document_scanner.py b/tests/test_document_scanner.py
new file mode 100644
index 0000000..dcc8f97
--- /dev/null
+++ b/tests/test_document_scanner.py
@@ -0,0 +1,224 @@
+"""
+test_document_scanner.py — Tests for CPR detection in document_scanner.py.
+
+Covers:
+  - extract_matches: context-gated CPR detection
+  - is_valid_cpr: date validation and modulo-11
+  - scan_docx: CPR detection in Word documents (including table cells)
+  - scan_xlsx: CPR detection in Excel cells with context
+  - False-positive suppression (invoices, phone numbers, account numbers)
+"""
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import document_scanner as ds
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _cprs(text: str) -> list:
+    """Return list of CPR dicts found in text via extract_matches."""
+    found, _ = ds.extract_matches(text, 1, "test")
+    return found
+
+
+def _has_cpr(text: str) -> bool:
+    return bool(_cprs(text))
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. Date validation — is_valid_cpr
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestIsValidCpr:
+    def test_valid_date_returns_true(self):
+        valid, _ = ds.is_valid_cpr("29", "04", "72", "1234")
+        assert valid is True
+
+    def test_invalid_month_returns_false(self):
+        valid, _ = ds.is_valid_cpr("01", "13", "70", "1234")
+        assert valid is False
+
+    def test_invalid_day_zero_returns_false(self):
+        valid, _ = ds.is_valid_cpr("00", "01", "70", "1234")
+        assert valid is False
+
+    def test_invalid_day_32_returns_false(self):
+        valid, _ = ds.is_valid_cpr("32", "01", "70", "1234")
+        assert valid is False
+
+    def test_february_31_invalid(self):
+        valid, _ = ds.is_valid_cpr("31", "02", "90", "1234")
+        assert valid is False
+
+    def test_returns_tuple_of_two(self):
+        result = ds.is_valid_cpr("01", "01", "70", "1234")
+        assert isinstance(result, tuple)
+        assert len(result) == 2
+
+    def test_mod11_field_is_bool(self):
+        _, mod11 = ds.is_valid_cpr("01", "01", "70", "1234")
+        assert isinstance(mod11, bool)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 2. extract_matches — context-gated detection
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestExtractMatches:
+
+    # ── Should detect ─────────────────────────────────────────────────────────
+
+    def test_detects_cpr_with_label(self):
+        assert _has_cpr("CPR: 290472-1234")
+
+    def test_detects_cpr_uppercase_label(self):
+        assert _has_cpr("CPR-nummer: 290472-1234")
+
+    def test_detects_personnummer_keyword(self):
+        assert _has_cpr("personnummer 010185-4321")
+
+    def test_detects_no_separator(self):
+        assert _has_cpr("cpr nummer 2904721234")
+
+    def test_detects_space_separator(self):
+        assert _has_cpr("CPR 290472 1234")
+
+    def test_result_contains_formatted_field(self):
+        cprs = _cprs("CPR: 290472-1234")
+        assert cprs[0]["formatted"] == "290472-1234"
+
+    def test_result_contains_raw_field(self):
+        cprs = _cprs("CPR: 290472-1234")
+        assert "raw" in cprs[0]
+
+    def test_multiple_cprs_returned(self):
+        text = "CPR: 290472-1234 og personnummer 010185-4321"
+        cprs = _cprs(text)
+        assert len(cprs) == 2
+
+    # ── Should NOT detect ─────────────────────────────────────────────────────
+
+    def test_rejects_naked_number_without_context(self):
+        # No context keyword and no mod-11 — should be suppressed
+        assert not _has_cpr("2904721234")
+
+    def test_rejects_phone_number_8_digits(self):
+        assert not _has_cpr("ring 12345678 for info")
+
+    def test_rejects_invoice_context(self):
+        assert not _has_cpr("faktura nr 290472-1234")
+
+    def test_rejects_part_number_context(self):
+        assert not _has_cpr("del nr. 290472-1234")
+
+    def test_rejects_invalid_date(self):
+        # Month 13 — date invalid, should not appear
+        assert not _has_cpr("CPR: 011370-1234")
+
+    def test_empty_string(self):
+        assert not _has_cpr("")
+
+    def test_plain_prose_no_numbers(self):
+        assert not _has_cpr("Ingen personoplysninger i denne tekst.")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. scan_docx
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestScanDocx:
+
+    def test_detects_cpr_in_paragraph(self, docx_with_cpr):
+        result = ds.scan_docx(docx_with_cpr)
+        assert len(result["cprs"]) >= 1
+
+    def test_detects_multiple_cprs(self, docx_with_cpr):
+        result = ds.scan_docx(docx_with_cpr)
+        assert len(result["cprs"]) >= 2
+
+    def test_detects_cpr_in_table_cell(self, docx_with_cpr):
+        result = ds.scan_docx(docx_with_cpr)
+        # Fixture: 2 CPRs in paragraphs + 1 in a table cell (with context)
+        assert len(result["cprs"]) >= 3
+
+    def test_no_false_positive_on_clean_doc(self, docx_no_cpr):
+        result = ds.scan_docx(docx_no_cpr)
+        assert result["cprs"] == []
+
+    def test_returns_cprs_key(self, docx_with_cpr):
+        result = ds.scan_docx(docx_with_cpr)
+        assert "cprs" in result
+
+    def test_no_error_on_clean_doc(self, docx_no_cpr):
+        result = ds.scan_docx(docx_no_cpr)
+        assert result.get("error") is None
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 4. scan_xlsx
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestScanXlsx:
+
+    def test_detects_cpr_in_cell_with_context(self, xlsx_with_cpr):
+        result = ds.scan_xlsx(xlsx_with_cpr)
+        assert len(result["cprs"]) >= 1
+
+    def test_no_false_positive_on_account_numbers(self, xlsx_no_cpr):
+        result = ds.scan_xlsx(xlsx_no_cpr)
+        assert result["cprs"] == []
+
+    def test_returns_cprs_key(self, xlsx_with_cpr):
+        result = ds.scan_xlsx(xlsx_with_cpr)
+        assert "cprs" in result
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. Binary / edge cases via cpr_detector._scan_bytes
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestScanBytes:
+
+    def test_binary_garbage_does_not_crash(self, binary_garbage):
+        import cpr_detector
+        data = binary_garbage.read_bytes()
+        result = cpr_detector._scan_bytes(data, "sample.bin")
+        assert isinstance(result, dict)
+        assert "cprs" in result
+
+    def test_empty_bytes_returns_empty(self):
+        import cpr_detector
+        result = cpr_detector._scan_bytes(b"", "empty.txt")
+        assert result["cprs"] == []
+
+    def test_txt_with_cpr_detected(self, txt_with_art9):
+        import cpr_detector, document_scanner as ds
+        # scan_text in document_scanner calls undefined extract_cpr_and_dates;
+        # test the underlying extract_matches directly on the file content.
+        text = txt_with_art9.read_text(encoding='utf-8')
+        cprs, _ = ds.extract_matches(text, 1, 'test')
+        assert len(cprs) >= 1
+
+    def test_docx_with_cpr_via_scan_bytes(self, docx_with_cpr):
+        import cpr_detector
+        data = docx_with_cpr.read_bytes()
+        result = cpr_detector._scan_bytes(data, "sample.docx")
+        assert len(result["cprs"]) >= 1
+
+    def test_xlsx_with_cpr_via_scan_bytes(self, xlsx_with_cpr):
+        import cpr_detector
+        data = xlsx_with_cpr.read_bytes()
+        result = cpr_detector._scan_bytes(data, "sample.xlsx")
+        assert len(result["cprs"]) >= 1
+
+    def test_unsupported_extension_does_not_crash(self):
+        import cpr_detector
+        result = cpr_detector._scan_bytes(b"some bytes", "file.xyz")
+        assert isinstance(result, dict)
diff --git a/tests/test_routes.py b/tests/test_routes.py
new file mode 100644
index 0000000..d909652
--- /dev/null
+++ b/tests/test_routes.py
@@ -0,0 +1,277 @@
+"""
+Integration tests for Flask routes — uses the real Flask test client.
+
+Strategy
+--------
+- ``flask_app``  (module-scope) — imports gdpr_scanner once, enables TESTING mode.
+- ``client``     (function-scope) — fresh test_client() per test.
+- ``db_patch``   (function-scope) — replaces routes.database._get_db with a ScanDB
+                  backed by a tmp_path so tests never touch ~/.gdprscanner.
+                  Also sets routes.database.DB_OK = True.
+- ``mock_connector`` — sets routes.state.connector to a MagicMock so routes
+                  that require authentication pass the ``if not state.connector``
+                  guard.
+- ``clean_state`` — autouse, resets routes.state.flagged_items and ensures the
+                  scan lock is released between tests.
+"""
+import io
+import threading
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(scope="module")
+def flask_app():
+    import gdpr_scanner
+    gdpr_scanner.app.config["TESTING"] = True
+    gdpr_scanner.app.config["WTF_CSRF_ENABLED"] = False
+    return gdpr_scanner.app
+
+
+@pytest.fixture()
+def client(flask_app):
+    with flask_app.test_client() as c:
+        yield c
+
+
+@pytest.fixture()
+def db_patch(tmp_path, monkeypatch):
+    """Point routes.database and routes.export _get_db at a fresh ScanDB in a temp dir."""
+    from gdpr_db import ScanDB
+    import routes.database, routes.export
+    db = ScanDB(str(tmp_path / "test.db"))
+    monkeypatch.setattr(routes.database, "_get_db", lambda: db)
+    monkeypatch.setattr(routes.database, "DB_OK", True)
+    monkeypatch.setattr(routes.export, "_get_db", lambda: db)
+    monkeypatch.setattr(routes.export, "DB_OK", True)
+    return db
+
+
+@pytest.fixture()
+def mock_connector(monkeypatch):
+    """Satisfy the connector guard in scan routes.
+
+    /api/scan/start is now handled exclusively by the blueprint (routes/scan.py),
+    which checks ``state.connector``.  Patching state.connector is sufficient.
+    """
+    from routes import state
+    conn = MagicMock()
+    monkeypatch.setattr(state, "connector", conn)
+    return conn
+
+
+@pytest.fixture(autouse=True)
+def clean_state():
+    """Wipe in-memory scan state and ensure the scan lock is free after each test."""
+    from routes import state
+    yield
+    # Clear in-memory results so export tests don't bleed into each other
+    state.flagged_items.clear()
+    # Release the lock if a test left it held (e.g. a failed scan-start test)
+    if not state._scan_lock.acquire(blocking=False):
+        pass  # still held — leave it; the test that set it is responsible
+    else:
+        state._scan_lock.release()
+
+
+# ---------------------------------------------------------------------------
+# /api/scan/status
+# ---------------------------------------------------------------------------
+
+class TestScanStatus:
+    def test_idle_returns_not_running(self, client):
+        r = client.get("/api/scan/status")
+        assert r.status_code == 200
+        data = r.get_json()
+        assert data["running"] is False
+
+    def test_scan_id_is_none_when_idle(self, client):
+        r = client.get("/api/scan/status")
+        data = r.get_json()
+        assert "scan_id" in data
+        assert data["scan_id"] is None
+
+
+# ---------------------------------------------------------------------------
+# /api/scan/start
+# ---------------------------------------------------------------------------
+
+class TestScanStart:
+    def test_unauthenticated_returns_401(self, client, monkeypatch):
+        from routes import state
+        monkeypatch.setattr(state, "connector", None)
+        r = client.post("/api/scan/start", json={})
+        assert r.status_code == 401
+        assert "not authenticated" in r.get_json()["error"]
+
+    def test_lock_held_returns_409(self, client, mock_connector):
+        from routes import state
+        # Hold the lock as if a scan were already running
+        acquired = state._scan_lock.acquire(blocking=False)
+        assert acquired, "Lock should be free at test start"
+        try:
+            r = client.post("/api/scan/start", json={})
+            assert r.status_code == 409
+            assert "already running" in r.get_json()["error"]
+        finally:
+            state._scan_lock.release()
+
+    def test_authenticated_returns_started(self, client, mock_connector, monkeypatch):
+        import scan_engine
+        from routes import state
+        # Stub run_scan so the background thread finishes instantly
+        monkeypatch.setattr(scan_engine, "run_scan", lambda opts: None)
+        r = client.post("/api/scan/start", json={"sources": ["email"]})
+        assert r.status_code == 200
+        assert r.get_json()["status"] == "started"
+        # Give the background thread time to release the lock
+        deadline = time.time() + 2.0
+        while not state._scan_lock.acquire(blocking=False):
+            assert time.time() < deadline, "scan lock was never released"
+            time.sleep(0.05)
+        state._scan_lock.release()
+
+
+# ---------------------------------------------------------------------------
+# /api/scan/stop
+# ---------------------------------------------------------------------------
+
+class TestScanStop:
+    def test_stop_always_returns_200(self, client):
+        r = client.post("/api/scan/stop")
+        assert r.status_code == 200
+        assert r.get_json()["status"] == "stopping"
+
+
+# ---------------------------------------------------------------------------
+# /api/db/stats
+# ---------------------------------------------------------------------------
+
+class TestDbStats:
+    def test_without_db_returns_503(self, client, monkeypatch):
+        import routes.database
+        monkeypatch.setattr(routes.database, "DB_OK", False)
+        r = client.get("/api/db/stats")
+        assert r.status_code == 503
+
+    def test_with_db_returns_200(self, client, db_patch):
+        # The direct route in gdpr_scanner.py (which takes precedence over the
+        # blueprint) returns get_stats() directly — an empty dict for a fresh DB.
+        r = client.get("/api/db/stats")
+        assert r.status_code == 200
+        assert isinstance(r.get_json(), dict)
+
+
+# ---------------------------------------------------------------------------
+# /api/db/disposition
+# ---------------------------------------------------------------------------
+
+class TestDisposition:
+    def test_set_disposition_missing_item_id_returns_400(self, client, db_patch):
+        r = client.post("/api/db/disposition", json={"status": "retain-legal"})
+        assert r.status_code == 400
+        assert "item_id" in r.get_json()["error"]
+
+    def test_set_disposition_saves_and_get_returns_it(self, client, db_patch):
+        item_id = "test-item-abc123"
+
+        # Set
+        r = client.post("/api/db/disposition", json={
+            "item_id":    item_id,
+            "status":     "retain-legal",
+            "legal_basis": "GDPR Art. 6(1)(c)",
+            "notes":      "Required by law",
+        })
+        assert r.status_code == 200
+        assert r.get_json()["status"] == "saved"
+
+        # Get
+        r2 = client.get(f"/api/db/disposition/{item_id}")
+        assert r2.status_code == 200
+        data = r2.get_json()
+        assert data["status"] == "retain-legal"
+
+    def test_get_disposition_unknown_id_returns_unreviewed(self, client, db_patch):
+        r = client.get("/api/db/disposition/no-such-item")
+        assert r.status_code == 200
+        assert r.get_json()["status"] == "unreviewed"
+
+    def test_without_db_returns_503(self, client, monkeypatch):
+        import routes.database
+        monkeypatch.setattr(routes.database, "DB_OK", False)
+        r = client.post("/api/db/disposition",
+                        json={"item_id": "x", "status": "retain-legal"})
+        assert r.status_code == 503
+
+
+# ---------------------------------------------------------------------------
+# /api/export_excel
+# ---------------------------------------------------------------------------
+
+class TestExportExcel:
+    XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+
+    def test_empty_db_returns_workbook(self, client, db_patch):
+        r = client.get("/api/export_excel")
+        assert r.status_code == 200
+        assert self.XLSX_MIME in r.content_type
+        # Must be a valid zip/xlsx (PK magic bytes)
+        assert r.data[:2] == b"PK"
+
+    def test_with_items_in_memory_includes_data(self, client, db_patch):
+        from routes import state
+        state.flagged_items.append({
+            "id":         "item-001",
+            "name":       "test_file.docx",
+            "source":     "onedrive",
+            "cpr_count":  2,
+            "face_count": 0,
+            "account_name": "Anna Hansen",
+            "user_role":  "staff",
+            "modified":   "2025-01-15T10:00:00",
+            "size_kb":    42,
+            "url":        "https://example.com/file",
+        })
+        r = client.get("/api/export_excel")
+        assert r.status_code == 200
+        assert r.data[:2] == b"PK"
+        # Workbook with data is larger than a skeleton workbook
+        assert len(r.data) > 4096
+
+
+# ---------------------------------------------------------------------------
+# /api/export_article30
+# ---------------------------------------------------------------------------
+
+class TestExportArticle30:
+    DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+
+    def test_no_items_returns_400(self, client, db_patch):
+        """Article 30 export requires at least one flagged item."""
+        r = client.get("/api/export_article30")
+        assert r.status_code == 400
+        assert "scan first" in r.get_json()["error"].lower()
+
+    def test_with_items_returns_docx(self, client, db_patch):
+        from routes import state
+        state.flagged_items.append({
+            "id":           "item-002",
+            "name":         "payroll.xlsx",
+            "source":       "email",
+            "cpr_count":    1,
+            "account_name": "Test User",
+            "user_role":    "staff",
+            "modified":     "2025-03-01T09:00:00",
+            "size_kb":      10,
+        })
+        r = client.get("/api/export_article30")
+        assert r.status_code == 200
+        assert self.DOCX_MIME in r.content_type
+        # DOCX is a zip — check PK magic bytes
+        assert r.data[:2] == b"PK"