Initial commit
This commit is contained in:
commit
9c7df76fbd
168
.github/workflows/build.yml
vendored
Normal file
168
.github/workflows/build.yml
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
name: Build — Windows & Linux
|
||||
|
||||
# Trigger on every push to main, on version tags, or manually
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
tags: ['v*']
|
||||
workflow_dispatch:
|
||||
|
||||
# Only run one build at a time per branch to avoid race conditions
|
||||
concurrency:
|
||||
group: build-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
# ── Document Scanner ──────────────────────────────────────────────────────
|
||||
build-document-scanner:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: windows-latest
|
||||
name: windows
|
||||
artifact_glob: "dist/*.exe"
|
||||
- os: ubuntu-22.04
|
||||
name: linux
|
||||
artifact_glob: "dist/Document Scanner"
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: Document Scanner / ${{ matrix.name }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: pip
|
||||
|
||||
# Linux: install system libraries required by OpenCV, pdf2image, Tesseract
|
||||
- name: Install Linux system dependencies
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr tesseract-ocr-dan tesseract-ocr-deu \
|
||||
poppler-utils \
|
||||
libgtk-3-dev libwebkit2gtk-4.0-dev \
|
||||
libglib2.0-dev libcairo2-dev pkg-config \
|
||||
python3-dev
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Download the Danish spaCy model used for NER/anonymisation
|
||||
- name: Download spaCy model
|
||||
run: python -m spacy download da_core_news_sm
|
||||
|
||||
- name: Build Document Scanner
|
||||
run: python build.py
|
||||
|
||||
# Zip the Linux binary (no installer on Linux)
|
||||
- name: Package Linux binary
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
cd dist
|
||||
zip -r "Document_Scanner_linux_x86_64.zip" "Document Scanner"
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: DocumentScanner-${{ matrix.name }}
|
||||
retention-days: 30
|
||||
path: |
|
||||
dist/*.exe
|
||||
dist/Document_Scanner_linux_x86_64.zip
|
||||
|
||||
# ── GDPRScanner ──────────────────────────────────────────────────────────
|
||||
build-m365-scanner:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: windows-latest
|
||||
name: windows
|
||||
artifact_glob: "dist/*.exe"
|
||||
- os: ubuntu-22.04
|
||||
name: linux
|
||||
artifact_glob: "dist/GDPRScanner"
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: GDPRScanner / ${{ matrix.name }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: pip
|
||||
|
||||
- name: Install Linux system dependencies
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
libgtk-3-dev libwebkit2gtk-4.0-dev \
|
||||
libglib2.0-dev libcairo2-dev pkg-config \
|
||||
python3-dev
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
# GDPRScanner only needs a subset — skip OCR/CV heavy deps
|
||||
pip install flask msal requests openpyxl pillow \
|
||||
python-docx \
|
||||
pywebview pystray \
|
||||
pyinstaller pyinstaller-hooks-contrib
|
||||
|
||||
- name: Build GDPRScanner
|
||||
run: python build_gdpr.py
|
||||
|
||||
- name: Package Linux binary
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
cd dist
|
||||
zip -r "GDPRScanner_linux_x86_64.zip" "GDPRScanner"
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: M365Scanner-${{ matrix.name }}
|
||||
retention-days: 30
|
||||
path: |
|
||||
dist/*.exe
|
||||
dist/M365_Scanner_linux_x86_64.zip
|
||||
|
||||
# ── Release (only on version tags v*) ────────────────────────────────────
|
||||
release:
|
||||
name: Create GitHub Release
|
||||
needs: [build-document-scanner, build-m365-scanner]
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Download all artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifacts
|
||||
merge-multiple: true
|
||||
|
||||
- name: Create release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
name: ${{ github.ref_name }}
|
||||
draft: false
|
||||
prerelease: ${{ contains(github.ref_name, '-beta') || contains(github.ref_name, '-rc') }}
|
||||
generate_release_notes: true
|
||||
files: artifacts/**
|
||||
91
.gitignore
vendored
Normal file
91
.gitignore
vendored
Normal file
@ -0,0 +1,91 @@
|
||||
# VERSION, CHANGELOG.md, LICENSE, README.md — always commit these
|
||||
# (VERSION is plain text, not JSON, so the *.json rule does not catch it)
|
||||
|
||||
# ── Credentials and config (NEVER commit these) ───────────────────────────────
|
||||
*.json
|
||||
!lang/*.json
|
||||
!keywords/*.json
|
||||
!skus/*.json
|
||||
!package*.json
|
||||
|
||||
# Be explicit about the most sensitive files
|
||||
.m365_scanner_config.json
|
||||
.m365_scanner_smtp.json
|
||||
.m365_scanner_settings.json
|
||||
.m365_scanner_delta.json
|
||||
.m365_scanner_checkpoint.json
|
||||
.m365_scanner_lang
|
||||
.document_scanner_lang
|
||||
|
||||
# ── Databases (contain personal data) ────────────────────────────────────────
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# ── Audit logs (contain personal data) ───────────────────────────────────────
|
||||
*.jsonl
|
||||
scanner_audit.jsonl
|
||||
|
||||
# ── Python ────────────────────────────────────────────────────────────────────
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
venv/
|
||||
.venv/
|
||||
env/
|
||||
ENV/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
.eggs/
|
||||
pip-wheel-metadata/
|
||||
*.egg
|
||||
|
||||
# ── PyInstaller output ────────────────────────────────────────────────────────
|
||||
dist/
|
||||
build/
|
||||
*.spec
|
||||
*.exe
|
||||
*.app
|
||||
|
||||
# ── Node (docx generation) ────────────────────────────────────────────────────
|
||||
node_modules/
|
||||
npm-debug.log*
|
||||
|
||||
# ── macOS ─────────────────────────────────────────────────────────────────────
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V3
|
||||
.Trashes
|
||||
Icon?
|
||||
|
||||
# ── Windows ───────────────────────────────────────────────────────────────────
|
||||
Thumbs.db
|
||||
ehthumbs.db
|
||||
Desktop.ini
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# ── Editor / IDE ──────────────────────────────────────────────────────────────
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.project
|
||||
.settings/
|
||||
|
||||
# ── Test artifacts ────────────────────────────────────────────────────────────
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
.tox/
|
||||
|
||||
# ── Temporary / local ─────────────────────────────────────────────────────────
|
||||
*.tmp
|
||||
*.bak
|
||||
*.orig
|
||||
tools/
|
||||
# Tools folder is created by the installer — not part of the repo
|
||||
58
ACRONYMS.md
Normal file
58
ACRONYMS.md
Normal file
@ -0,0 +1,58 @@
|
||||
# Acronyms and Abbreviations
|
||||
|
||||
GDPR-related terms and abbreviations used throughout the GDPR Scanner project.
|
||||
|
||||
## GDPR / Legal
|
||||
|
||||
| Term | Full name | Meaning in context |
|
||||
|---|---|---|
|
||||
| GDPR | General Data Protection Regulation | The EU regulation (2016/679) — the primary legal framework the scanner addresses |
|
||||
| CPR | Centrale Personregister | Danish national personal identification number (DDMMYY-XXXX) |
|
||||
| PII | Personally Identifiable Information | Any data that can identify a person — names, addresses, phone numbers, IBANs etc. |
|
||||
| NER | Named Entity Recognition | ML technique (via spaCy) used to detect names, addresses, and organisations in text |
|
||||
| DPA | Data Protection Authority | Supervisory authority — in Denmark: Datatilsynet |
|
||||
| DSR | Data Subject Request | A request from an individual to access, correct, or delete their data (Art. 15/17) |
|
||||
| DPIA | Data Protection Impact Assessment | Risk assessment required before high-risk processing (Art. 35) — not yet in scanner |
|
||||
| RoPA | Register of Processing Activities | The Article 30 register — what the Art.30 export produces |
|
||||
| IBAN | International Bank Account Number | Financial identifier detected as sensitive PII |
|
||||
| SKU | Stock Keeping Unit | In context: Microsoft license product code used to classify student vs staff accounts |
|
||||
|
||||
## GDPR Articles referenced in this project
|
||||
|
||||
| Article | Subject |
|
||||
|---|---|
|
||||
| Art. 5(1)(a) | Lawfulness, fairness, transparency |
|
||||
| Art. 5(1)(b) | Purpose limitation |
|
||||
| Art. 5(1)(c) | Data minimisation |
|
||||
| Art. 5(1)(e) | Storage limitation — basis for retention enforcement |
|
||||
| Art. 5(2) | Accountability — basis for the deletion audit log |
|
||||
| Art. 8 | Conditions for child consent — age threshold |
|
||||
| Art. 9 | Special categories of personal data (biometric, health, criminal etc.) |
|
||||
| Art. 15 | Right of access — basis for data subject lookup |
|
||||
| Art. 17 | Right to erasure ("right to be forgotten") |
|
||||
| Art. 30 | Records of processing activities — basis for Article 30 export |
|
||||
| Art. 35 | Data Protection Impact Assessment |
|
||||
| Art. 44–46 | Transfers to third countries |
|
||||
| Art. 89 | Archiving in the public interest — potential basis for retaining historical data |
|
||||
|
||||
## Danish law
|
||||
|
||||
| Term | Meaning |
|
||||
|---|---|
|
||||
| Databeskyttelsesloven | Danish Data Protection Act — supplements GDPR in Denmark |
|
||||
| Databeskyttelsesloven §6 | Sets digital consent age at 15 — below this, parental consent required |
|
||||
| Bogføringsloven | Danish Bookkeeping Act — requires accounting records for 5 years from end of financial year |
|
||||
| Datatilsynet | Danish Data Protection Authority — the national supervisory body |
|
||||
|
||||
## Microsoft 365 / Technical
|
||||
|
||||
| Term | Full name | Meaning in context |
|
||||
|---|---|---|
|
||||
| M365 | Microsoft 365 | The cloud productivity suite (Exchange, OneDrive, SharePoint, Teams) |
|
||||
| AAD / Entra | Azure Active Directory / Microsoft Entra ID | Microsoft's identity and access management service |
|
||||
| MSAL | Microsoft Authentication Library | Library used for OAuth2 authentication against Azure AD |
|
||||
| UPN | User Principal Name | Microsoft's unique user identifier — typically the user's email address |
|
||||
| SKU | Stock Keeping Unit | Microsoft license product code (e.g. M365EDU_A3_STUDENT) |
|
||||
| SPO | SharePoint Online | Microsoft's cloud document management platform |
|
||||
| SSE | Server-Sent Events | HTTP streaming used to push scan results to the browser in real time |
|
||||
| ORM | Object-Relational Mapping | Not used — the scanner uses raw SQL via sqlite3 |
|
||||
2458
CHANGELOG.md
Normal file
2458
CHANGELOG.md
Normal file
File diff suppressed because it is too large
Load Diff
84
CLAUDE.md
Normal file
84
CLAUDE.md
Normal file
@ -0,0 +1,84 @@
|
||||
# GDPRScanner — Claude Code Context
|
||||
|
||||
A GDPR compliance scanner for Danish educational and municipal organisations. Scans Microsoft 365 (Exchange, OneDrive, SharePoint, Teams), Google Workspace (Gmail, Google Drive), and local/SMB file systems for CPR numbers and PII. Produces Excel reports, GDPR Article 30 Word documents, and supports disposition tagging, bulk deletion, scheduled scans, and multi-language UI.
|
||||
|
||||
## How to run
|
||||
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
python gdpr_scanner.py # http://localhost:5100
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
**Entry point:** `gdpr_scanner.py` — Flask app, scan orchestration globals. SSE route must stay here — blueprints can't stream.
|
||||
|
||||
**Split modules:** `scan_engine.py` (M365 + file scan), `sse.py` (SSE broadcast), `checkpoint.py`, `app_config.py` (all persistence), `cpr_detector.py`
|
||||
|
||||
**Blueprints** in `routes/` — see `routes/CLAUDE.md` for state/SSE rules.
|
||||
|
||||
**Frontend:** `templates/index.html` (SPA), `static/style.css` (all styles), `static/js/*.js` (11 ES modules + `state.js`). `static/app.js` is an archived monolith — no longer loaded.
|
||||
|
||||
**Data dir** `~/.gdprscanner/`: `scanner.db`, `config.json`, `settings.json`, `schedule.json`, `token.json`, `delta.json`, `checkpoint.json`, `smtp.json`, `machine_id` (**never delete** — Fernet key), `role_overrides.json`, `google_sa.json`, `google.json`, `src_toggles.json`, `app.lock`, `viewer_tokens.json`
|
||||
|
||||
## Non-obvious files
|
||||
|
||||
| File | Why it's not obvious |
|
||||
|---|---|
|
||||
| `app_config.py` | All persistence — profiles, settings, SMTP, lang loading, viewer tokens + PIN |
|
||||
| `routes/state.py` | Shared mutable state + scan locks (not a typical Flask state file) |
|
||||
| `routes/google_scan.py` | Google scan execution lives here, not in `google_connector.py` |
|
||||
| `routes/viewer.py` | Viewer token + PIN API; also owns brute-force rate-limit state |
|
||||
| `static/js/viewer.js` | Share modal, token CRUD, viewer PIN settings UI |
|
||||
| `lang/da.json` | Primary language — source of truth is `en.json` |
|
||||
| `build_gdpr.py` | Desktop app builder; contains embedded `LAUNCHER_CODE` for PyInstaller |
|
||||
|
||||
## Tests
|
||||
|
||||
128 tests in `tests/`. No integration tests for Flask routes or live M365/Google connections.
|
||||
|
||||
## Viewer mode (#33) — routes/viewer.py + static/js/viewer.js
|
||||
|
||||
Read-only access for DPOs and reviewers. Key invariants:
|
||||
|
||||
- **`/view` auth chain** — token (`?token=`) → session cookie (`session["viewer_ok"]`) → PIN form (if PIN configured) → 403. Never skip this order.
|
||||
- **`window.VIEWER_MODE`** — injected by Jinja2 in `index.html`. `auth.js` reads it at startup; adds `viewer-mode` class to `<body>`. All hide rules are CSS (`body.viewer-mode …`), not scattered JS checks — except `delBtn` in the card builder which is also guarded in JS. Hidden in viewer mode: `.sidebar` (entire left panel), `#logWrap`, `#progressBar`, scan/stop/profile/bulk-delete buttons, share button.
|
||||
- **`viewer_tokens.json` format** — stored as `{"tokens": [...], "__pin__": {"hash": "…", "salt": "…"}}`. The old bare-list format is migrated transparently on first write. Do not write the file as a bare list.
|
||||
- **`app.secret_key`** — derived from `machine_id` bytes so Flask sessions survive restarts. Set once at startup in `gdpr_scanner.py`; do not override it.
|
||||
- **`GET /api/db/flagged`** — returns `get_session_items()` (last completed scan session, joined with dispositions). Used exclusively by `_loadViewerResults()` in `results.js`. Do not confuse with `get_flagged_items()` (single scan_id, no disposition join).
|
||||
- **Rate-limit state** (`_pin_attempts` dict in `routes/viewer.py`) — in-memory only, resets on server restart. Intentional — a restart clears lockouts without a persistent store.
|
||||
- **Token onclick attributes** — Copy/Revoke buttons in `_renderTokenList()` pass the token as a single-quoted JS string literal (`'\'' + tok.token + '\''`), never via `JSON.stringify`. `JSON.stringify` produces double-quoted strings that break the surrounding `onclick="…"` HTML attribute.
|
||||
- **Settings Security pane** — Admin PIN and Viewer PIN groups live in `stPaneSecurity`, not `stPaneGeneral`. `switchSettingsTab('security')` in `sources.js` triggers both `stLoadPinStatus()` and `stLoadViewerPinStatus()`. The Share modal Configure button opens `openSettings('security')`.
|
||||
- **`stClearViewerPin` guard** — validates that the current-PIN field is non-empty client-side before sending the DELETE request; shows an inline error and focuses the field if empty.
|
||||
|
||||
## Sources panel resize — static/js/log.js + sources.js
|
||||
|
||||
- **`_fitSourcesPanel()`** — called at the end of every `renderSourcesPanel()` call. Clears the panel's inline height, reads `scrollHeight` (natural content height), then either restores a saved smaller preference from `localStorage` (`gdpr_sources_h`) or pins the height to `scrollHeight`. This keeps the panel exactly as tall as needed to show all sources.
|
||||
- **`_initSourcesResize()`** — attaches pointer-drag to `#sourcesResizeHandle`. On `pointerdown` it captures `scrollHeight` as the hard max; drag up shrinks, drag down is capped at that max. Saves to `localStorage` on release; clears the key if the user drags back to full height.
|
||||
- **Do not add a fixed `max-height` or `height` to `#sourcesPanel` in HTML** — height is controlled entirely by `_fitSourcesPanel()` at runtime.
|
||||
- **Do not call `_fitSourcesPanel()` before the panel has rendered** — `scrollHeight` will be 0. The call in `renderSourcesPanel()` is the correct hook; `_initSourcesResize()` only sets up the drag handler.
|
||||
|
||||
## Memory management — scan_engine.py
|
||||
|
||||
Large M365 tenants can generate enormous memory pressure. Key rules to preserve:
|
||||
|
||||
- **Email body stripped at collection time** — `_scan_user_email` calls `conn.get_message_body_text(msg)`, stores the result as `msg["_precomputed_body"]`, then deletes `msg["body"]` and `msg["bodyPreview"]` before appending to `work_items`. The processing loop reads `meta.pop("_precomputed_body", "")`. Do not re-add `body` to the `$select` query without also stripping it here.
|
||||
- **`work_items` → `deque` before processing** — converted with `deque(work_items)` and drained via `popleft()` so each item's memory is released immediately after processing. Do not convert back to a list or iterate with `enumerate()`.
|
||||
- **`del content` in file branch** — raw download bytes are deleted as soon as `content.decode()` is done (before NER/PII counting). Both the hit and no-hit paths have explicit `del content`.
|
||||
- **`del body_text` in email branch** — deleted after `_broadcast_card` call.
|
||||
- **PDF OCR images freed page-by-page** — in `document_scanner.scan_pdf`, `images[page_num-1] = None` immediately after OCR. Do not cache or accumulate page images.
|
||||
- **Memory guard** — `psutil.virtual_memory().available` checked before each M365 file download; scan skips the file if < 300 MB free.
|
||||
|
||||
## Global gotchas
|
||||
|
||||
- **Pattern matching in Python** — when using `str.replace()` to patch JS/HTML, whitespace and quote style must match exactly. Use `in` check first and print if not found.
|
||||
- **`__getattr__` on modules** — only resolves `module.name` access from outside, not bare name lookups inside function bodies. Always import directly.
|
||||
- **`JSON.stringify` inside `onclick="…"` attributes** — produces double-quoted strings that terminate the HTML attribute early. Use single-quoted JS string literals instead, or `data-*` attributes read from the handler.
|
||||
|
||||
## Directory-scoped rules
|
||||
|
||||
- `routes/CLAUDE.md` — SSE constraints, scan_progress source field, file_sources, Python gotchas
|
||||
- `static/js/CLAUDE.md` — profile dropdown, progress bar phase parsing, JS gotchas
|
||||
- `templates/CLAUDE.md` — CSS variable names, sizing rules, badge standard, design rules
|
||||
- `lang/CLAUDE.md` — i18n conventions
|
||||
130
CONTRIBUTING.md
Normal file
130
CONTRIBUTING.md
Normal file
@ -0,0 +1,130 @@
|
||||
# Contributing to GDPR Scanner
|
||||
|
||||
Thank you for considering a contribution. This project helps organisations find
|
||||
and manage personal data in Microsoft 365 tenants. Contributions that improve
|
||||
compliance coverage, reliability, and usability are very welcome.
|
||||
|
||||
---
|
||||
|
||||
## Before You Start
|
||||
|
||||
- Check the [open issues](../../issues) and [SUGGESTIONS.md](SUGGESTIONS.md) to
|
||||
see if your idea is already tracked
|
||||
- For large features, open an issue first to discuss the approach — this avoids
|
||||
wasted effort if the direction doesn't fit
|
||||
- Security vulnerabilities: see [SECURITY.md](SECURITY.md) — do not file public issues
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
# Clone and set up a virtual environment
|
||||
git clone https://github.com/your-org/gdpr-scanner.git
|
||||
cd gdpr-scanner
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate # macOS / Linux
|
||||
venv\Scripts\activate # Windows
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Danish NER model (optional — needed for name/address detection)
|
||||
python -m spacy download da_core_news_lg
|
||||
|
||||
# Run the Document Scanner
|
||||
python server.py
|
||||
|
||||
# Run the GDPRScanner
|
||||
python gdpr_scanner.py
|
||||
```
|
||||
|
||||
You will need a Microsoft Azure app registration with the permissions described
|
||||
in the README to test GDPRScanner against a real tenant. A developer tenant
|
||||
is available for free via the [Microsoft 365 Developer Program](https://developer.microsoft.com/microsoft-365/dev-program).
|
||||
|
||||
---
|
||||
|
||||
## What We Welcome
|
||||
|
||||
- Bug fixes
|
||||
- Improved CPR false-positive reduction
|
||||
- New language files (see `lang/en.lang` for the key list)
|
||||
- Items from [SUGGESTIONS.md](SUGGESTIONS.md) — check the status column first
|
||||
- Performance improvements for large tenants
|
||||
- Docker / deployment improvements
|
||||
- Documentation fixes
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
**Python**
|
||||
- Follow PEP 8 with a line length of 100
|
||||
- Use type hints for function signatures
|
||||
- No external formatters are enforced — just keep it consistent with the surrounding code
|
||||
- All personal data (CPR numbers) must be SHA-256 hashed before storage — never store or log raw CPR values
|
||||
- Wrap Graph API calls in try/except and handle `M365PermissionError` gracefully
|
||||
|
||||
**JavaScript (embedded in the Flask templates)**
|
||||
- `const` / `let` — no `var`
|
||||
- `async/await` over `.then()` chains
|
||||
- All user-visible strings must have a `data-i18n` key so translations work
|
||||
|
||||
**SQL**
|
||||
- Use parameterised queries — never string-format SQL
|
||||
- New columns on existing tables must have a corresponding migration in `_MIGRATIONS` in `gdpr_db.py`
|
||||
|
||||
---
|
||||
|
||||
## Adding a Language
|
||||
|
||||
1. Copy `lang/en.lang` to `lang/xx.lang` (ISO 639-1 code)
|
||||
2. Translate all values — keys must stay identical
|
||||
3. Test by setting `~/.m365_scanner_lang` to `xx` and restarting
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Fork the repository and create a branch: `git checkout -b feature/my-feature`
|
||||
2. Make your changes and test them
|
||||
3. Run a syntax check: `python -m py_compile gdpr_scanner.py m365_connector.py gdpr_db.py`
|
||||
4. Update `README.md` if your change adds or changes user-visible behaviour
|
||||
5. Open a pull request with a clear description of what it does and why
|
||||
6. Link to the relevant issue or SUGGESTIONS.md item if applicable
|
||||
|
||||
We aim to review pull requests within one week.
|
||||
|
||||
---
|
||||
|
||||
## Personal Data in Tests and Examples
|
||||
|
||||
**Do not include real CPR numbers, email addresses, or names in test data,
|
||||
example output, or documentation.** Use clearly fictional values:
|
||||
|
||||
```python
|
||||
# Good
|
||||
test_cpr = "010101-1234" # fictional — fails Modulus 11 check
|
||||
|
||||
# Bad
|
||||
test_cpr = "150385-1234" # could be a real person
|
||||
```
|
||||
|
||||
If you are testing with a real Microsoft 365 tenant, ensure you have appropriate
|
||||
authorisation to access that data.
|
||||
|
||||
---
|
||||
|
||||
## Contributor License Agreement
|
||||
|
||||
By submitting a pull request you confirm that:
|
||||
|
||||
- You wrote the contribution yourself or have the right to submit it
|
||||
- You license your contribution under the same AGPL-3.0 terms as this project
|
||||
- You understand the disclaimer in LICENSE — this is a compliance tool, not legal advice
|
||||
|
||||
---
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Be respectful. Harassment of any kind will not be tolerated.
|
||||
140
DEPENDENCIES.md
Normal file
140
DEPENDENCIES.md
Normal file
@ -0,0 +1,140 @@
|
||||
# Python Dependencies
|
||||
|
||||
All Python modules used in the GDPR Scanner project, with a short explanation of each.
|
||||
|
||||
## Third-party packages (install via `pip install -r requirements.txt`)
|
||||
|
||||
### Web server
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `flask` | Web server and API routing for both the GDPRScanner UI |
|
||||
|
||||
### Microsoft 365 authentication and API
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `msal` | Microsoft Authentication Library — handles OAuth2 device code flow (delegated) and client credentials (application) for Microsoft Graph API access |
|
||||
| `requests` | HTTP client used for all Microsoft Graph API calls |
|
||||
|
||||
### PDF handling
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `pdfplumber` | Text extraction from PDFs with a selectable text layer — fast and accurate for native PDFs |
|
||||
| `pdf2image` | Converts PDF pages to images (via Poppler) for OCR processing of scanned/image-based PDFs |
|
||||
| `pytesseract` | Python wrapper for the Tesseract OCR engine — extracts text from rasterised PDF pages and images |
|
||||
| `pypdf` | PDF metadata reading and low-level page manipulation |
|
||||
| `reportlab` | Fallback PDF redaction via overlay rendering — used when PyMuPDF is unavailable |
|
||||
| `pymupdf` (fitz) | Physically removes the text layer from PDFs — preferred GDPR-compliant redaction method |
|
||||
|
||||
### Document formats
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `python-docx` | Read and write `.docx` Word documents; also used to generate the Article 30 Register of Processing Activities report |
|
||||
| `openpyxl` | Read and write `.xlsx` Excel files — used for the scan result export workbook |
|
||||
| `img2pdf` | Converts images to PDF for archiving redacted output |
|
||||
|
||||
### Image processing and face detection
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `opencv-python` (cv2) | Face detection in images via Haar cascade classifiers; also used for face blurring during anonymisation |
|
||||
| `numpy` | Array operations required internally by OpenCV |
|
||||
| `Pillow` (PIL) | Image manipulation — thumbnail generation, format conversion, image resizing |
|
||||
|
||||
### NLP / Named Entity Recognition
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `spacy` | NLP engine for Danish Named Entity Recognition — detects person names, addresses, and organisations in text. Requires the `da_core_news_lg` model (~500 MB) |
|
||||
|
||||
### Archive scanning
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `py7zr` | 7-Zip archive support — allows the scanner to inspect `.7z` compressed files |
|
||||
|
||||
### Desktop app packaging
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `pywebview` | Renders the Flask web UI inside a native OS window, creating a macOS `.app` or Windows `.exe` without requiring a browser |
|
||||
| `pystray` | System tray icon integration for the desktop app builds |
|
||||
| `pyinstaller` | Packages the Python application and all dependencies into a standalone executable |
|
||||
| `pyinstaller-hooks-contrib` | Community-maintained hooks that help PyInstaller correctly bundle complex packages like spaCy and OpenCV |
|
||||
|
||||
---
|
||||
|
||||
## Standard library modules (no installation needed)
|
||||
|
||||
### Data storage
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `sqlite3` | SQLite database — stores scan results, CPR index (hashed), dispositions, deletion audit log, and scan history in `~/.gdpr_scanner.db` |
|
||||
| `json` | Config files, checkpoint files, language files, API request/response serialisation |
|
||||
| `zipfile` | Database export/import archive creation and reading; also used in the PyInstaller build process |
|
||||
| `csv` | CSV file scanning support in the Document Scanner |
|
||||
|
||||
### Security and hashing
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `hashlib` | SHA-256 hashing of CPR numbers before storage — raw CPR values are never written to the database |
|
||||
| `secrets` | Cryptographically secure random values (used in auth state parameters) |
|
||||
|
||||
### File system and paths
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `pathlib` | Cross-platform file and directory path handling throughout the codebase |
|
||||
| `tempfile` | Temporary files for PDF and image processing — avoids leaving artefacts on disk |
|
||||
| `shutil` | File copy and directory tree operations used in the build scripts |
|
||||
|
||||
### Networking and email
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `smtplib` | SMTP email delivery for the headless report feature — supports STARTTLS and SMTPS/SSL |
|
||||
| `email` | Email message construction (MIME) for the SMTP report feature |
|
||||
|
||||
### Text and pattern matching
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `re` | Regular expression engine — CPR pattern matching, phone numbers, IBANs, email addresses, Danish bank account numbers |
|
||||
|
||||
### Concurrency
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `threading` | Background scan thread so the Flask web UI stays responsive during long scans |
|
||||
| `queue` | Server-Sent Events message queue — passes scan results from the background thread to the browser |
|
||||
| `concurrent.futures` | `ProcessPoolExecutor` for parallel OCR processing of multi-page PDFs |
|
||||
|
||||
### I/O and streams
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `io` | In-memory byte streams for generating Excel and Word documents without writing to disk |
|
||||
| `struct` | Binary data unpacking (used in some PDF processing paths) |
|
||||
|
||||
### Date and time
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `time` | Unix timestamps for scan records, audit log entries, and token expiry tracking |
|
||||
| `datetime` | Human-readable date/time formatting for reports, filenames, and retention cutoff calculations |
|
||||
|
||||
### System and process
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `platform` | Detects the operating system for macOS/Windows-specific code paths |
|
||||
| `subprocess` | Launches Tesseract and Poppler as external processes for OCR and PDF rendering |
|
||||
| `argparse` | CLI argument parsing for `--headless`, `--reset-db`, `--export-db`, `--import-db` etc. |
|
||||
| `sys` | Python runtime access — sys.exit(), sys.path, sys.version |
|
||||
| `os` | Environment variables and low-level file operations |
|
||||
|
||||
### Encoding and serialisation
|
||||
| Module | Purpose |
|
||||
|---|---|
|
||||
| `base64` | Encodes thumbnail images as base64 strings for embedding in JSON API responses |
|
||||
| `struct` | Binary format parsing used in some document processing paths |
|
||||
|
||||
---
|
||||
|
||||
## External system dependencies (not Python packages)
|
||||
|
||||
These must be installed separately — the installers (`install_windows.ps1`, `install_macos.sh`) handle this automatically.
|
||||
|
||||
| Tool | Purpose |
|
||||
|---|---|
|
||||
| Tesseract OCR | The OCR engine called by `pytesseract` — required for scanning image-based PDFs |
|
||||
| Tesseract language packs | `dan` (Danish) and `eng` (English) language data files for Tesseract |
|
||||
| Poppler | PDF rendering tools (`pdftoppm`, `pdfinfo`) required by `pdf2image` |
|
||||
67
EFFORT_ESTIMATE.md
Normal file
67
EFFORT_ESTIMATE.md
Normal file
@ -0,0 +1,67 @@
|
||||
# GDPRScanner — Build Effort Estimate
|
||||
|
||||
Estimated man-hours to build this project from scratch, based on static analysis of v1.6.13.
|
||||
|
||||
---
|
||||
|
||||
## Codebase Stats
|
||||
|
||||
| Metric | Count |
|
||||
|---|---|
|
||||
| Source files (excl. dist / build / venv) | ~70 |
|
||||
| Lines of code (Python + JS + HTML + CSS) | ~25,400 |
|
||||
| Test lines | ~1,280 (128 tests) |
|
||||
| Language files | ~2,300 lines (DA / EN / DE) |
|
||||
| Current version | v1.6.13 |
|
||||
|
||||
---
|
||||
|
||||
## Estimate by Component
|
||||
|
||||
| Component | Key Files | LOC | Hours |
|
||||
|---|---|---|---|
|
||||
| **CPR detector** — regex, modulo-11 validation, context filtering, false-positive suppression | `cpr_detector.py` | 446 | 40–60 |
|
||||
| **Document scanner** — PDF text + OCR, Word, Excel, PowerPoint, images; memory-safe page-by-page processing | `document_scanner.py` | 2,659 | 160–240 |
|
||||
| **Microsoft 365 connector** — Exchange mail, OneDrive, SharePoint, Teams, delta sync, Microsoft Graph API, MSAL auth | `m365_connector.py`, `scan_engine.py`, `m365_launcher.py` | 2,748 | 240–320 |
|
||||
| **Google Workspace connector** — Gmail, Google Drive, service account + OAuth 2.0 flows | `google_connector.py`, `routes/google_scan.py`, `routes/google_auth.py` | 1,300 | 120–160 |
|
||||
| **File / SMB scanner** — local filesystem and network share scanning | `file_scanner.py` | 600 | 40–80 |
|
||||
| **Database layer** — SQLite schema, migrations, scan sessions, dispositions, delta tracking | `gdpr_db.py` | 954 | 80–120 |
|
||||
| **Export system** — formatted Excel reports, GDPR Article 30 Word documents | `routes/export.py` | 1,222 | 120–160 |
|
||||
| **Flask app + SSE + orchestration** — server-sent events, scan threading, checkpointing, resume | `gdpr_scanner.py`, `sse.py`, `checkpoint.py` | 2,400 | 120–160 |
|
||||
| **Frontend SPA** — 11 ES modules, real-time progress, results viewer, profiles, sources panel, viewer mode | `static/js/*.js`, `templates/index.html`, `static/style.css` | 7,800 | 200–280 |
|
||||
| **App config + persistence + encryption** — profiles, settings, SMTP, Fernet key, viewer tokens + PIN | `app_config.py` | 794 | 40–80 |
|
||||
| **Desktop app builder** — PyInstaller packaging for macOS and Windows, embedded webview | `build_gdpr.py` | 1,095 | 80–120 |
|
||||
| **Scheduler** — cron-like scheduled scans, background thread management | `scan_scheduler.py`, `routes/scheduler.py`, `static/js/scheduler.js` | 1,084 | 40–80 |
|
||||
| **Auth + viewer mode + roles** — M365 / Google OAuth, viewer tokens, PIN brute-force protection, SKU role classification | `routes/auth.py`, `routes/viewer.py`, `static/js/auth.js`, `static/js/viewer.js` | 750 | 80–120 |
|
||||
| **Multi-language support** — Danish, English, German UI strings | `lang/da.json`, `lang/en.json`, `lang/de.json` | 2,300 | 40–60 |
|
||||
| **Test suite** — 128 unit tests | `tests/` | 1,282 | 40–80 |
|
||||
| **Documentation + CI/CD + install scripts** — GitHub Actions, macOS / Windows installers, user manuals | `docs/`, `.github/`, `*.sh`, `*.ps1` | — | 40–60 |
|
||||
|
||||
---
|
||||
|
||||
## Total Estimate
|
||||
|
||||
| Scenario | Hours | Calendar time (1 dev, 40 hrs/wk) | Calendar time (2-person team) |
|
||||
|---|---|---|---|
|
||||
| **Low** | ~1,500 | ~9 months | ~5 months |
|
||||
| **Mid** | ~2,000 | ~12 months | ~6 months |
|
||||
| **High** | ~2,500 | ~15 months | ~8 months |
|
||||
|
||||
The mid estimate (~2,000 hours) is the most realistic for a single senior developer building iteratively toward a v1.6 release.
|
||||
|
||||
---
|
||||
|
||||
## Complexity Drivers
|
||||
|
||||
These factors push the estimate beyond what raw line counts suggest:
|
||||
|
||||
- **Microsoft Graph API** — Exchange, SharePoint, and Teams scanning involve underdocumented API behaviour, throttling, delta-token management, and permission edge cases. Research and debugging overhead is substantial.
|
||||
- **CPR validation domain knowledge** — Danish modulo-11 rules, context-aware false-positive filtering, and handling of anonymised or test numbers requires specialised understanding.
|
||||
- **Memory management at scale** — The `deque`-drain pattern, page-by-page OCR image freeing, and pre-scan memory guards (`psutil`) are non-obvious and emerged through iteration on large tenants.
|
||||
- **Cross-platform desktop packaging** — Producing a signed `.app` for macOS and an `.exe` for Windows via PyInstaller, with an embedded webview, is a significant and ongoing maintenance burden.
|
||||
- **SSE + Flask threading** — Correct scan locking, SSE fan-out, and safe state sharing across threads is difficult to get right without subtle race conditions.
|
||||
- **Version iteration** — v1.6.13 represents at least 13 significant release cycles. The first working prototype likely consumed roughly half the total hours; the accumulated refinement accounts for the rest.
|
||||
|
||||
---
|
||||
|
||||
*Generated 2026-04-11 based on static analysis of GDPRScanner v1.6.13.*
|
||||
49
LICENSE
Normal file
49
LICENSE
Normal file
@ -0,0 +1,49 @@
|
||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
Version 3, 19 November 2007
|
||||
|
||||
Copyright (C) 2024-2026 Henrik Højmark
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
AUTHORSHIP AND AI ASSISTANCE
|
||||
|
||||
This software was developed by Henrik Højmark. Development was conducted with
|
||||
substantial AI assistance (Claude by Anthropic), used as a pair-programming
|
||||
tool. All design decisions, architecture, requirements, and validation were made
|
||||
by the author. The use of AI tooling does not diminish authorship — it is
|
||||
analogous to the use of any other development tool or reference.
|
||||
|
||||
ADDITIONAL TERMS — COMMERCIAL USE
|
||||
|
||||
If you wish to use this software in a commercial SaaS product or managed
|
||||
service without complying with the AGPL-3.0 source disclosure requirements,
|
||||
a commercial license is available. Please contact the project maintainers.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
DISCLAIMER — NOT LEGAL ADVICE
|
||||
|
||||
This software is a technical tool intended to assist with GDPR compliance
|
||||
activities. It does not constitute legal advice. The authors make no
|
||||
representation that use of this tool satisfies any specific legal obligation.
|
||||
You are responsible for ensuring your use of this software complies with
|
||||
applicable law, including GDPR, Databeskyttelsesloven, and any other relevant
|
||||
regulations in your jurisdiction.
|
||||
|
||||
CPR numbers (Danish personal identification numbers) are special category
|
||||
personal data. Handle scan results with appropriate care and access controls.
|
||||
205
MAINTAINER.md
Normal file
205
MAINTAINER.md
Normal file
@ -0,0 +1,205 @@
|
||||
# Maintainer Guide
|
||||
|
||||
*Written for future Henrik — assuming Python proficiency, returning after time away.*
|
||||
|
||||
---
|
||||
|
||||
## The short version
|
||||
|
||||
When something breaks, the structure tells you where to look.
|
||||
When you want to add something, `SUGGESTIONS.md` has the context.
|
||||
When you're unsure if a change broke anything, run `pytest tests/`.
|
||||
|
||||
---
|
||||
|
||||
## Project structure
|
||||
|
||||
```
|
||||
gdpr_scanner.py Entry point. Flask app, route definitions, blueprint
|
||||
registration, CLI argument handling. Thin coordinator —
|
||||
it imports from the modules below and re-exports them.
|
||||
|
||||
sse.py Server-Sent Events. broadcast(), the SSE queues, and
|
||||
the replay buffer. Touch this if live progress breaks.
|
||||
|
||||
checkpoint.py Scan checkpoint and delta token persistence. Touch this
|
||||
if resume/incremental scanning breaks.
|
||||
|
||||
app_config.py Everything configuration: i18n loading, Article 9
|
||||
keywords, admin PIN, scan profiles, SMTP config, file
|
||||
source definitions, Fernet encryption. Touch this if
|
||||
settings, language, or profiles break.
|
||||
|
||||
cpr_detector.py CPR detection engine. _scan_bytes() dispatches to the
|
||||
right scanner by file type. Touch this if detection
|
||||
accuracy changes or file type support is needed.
|
||||
|
||||
scan_engine.py M365 and file-system scan orchestration. run_scan() and
|
||||
run_file_scan(). The most complex file — ~1000 lines.
|
||||
Touch this for scan behaviour, collection logic, or
|
||||
new M365 sources.
|
||||
|
||||
gdpr_db.py SQLite persistence layer. ScanDB class. Touch this for
|
||||
DB schema changes, new tables, or query logic.
|
||||
|
||||
document_scanner.py CPR regex, NER, OCR, face detection, PDF/DOCX/XLSX
|
||||
scanning. Pre-existing module — treat as a dependency.
|
||||
Avoid modifying unless you really need to.
|
||||
|
||||
m365_connector.py Microsoft Graph API client. Auth, token refresh, all
|
||||
the iter_* fetchers. Touch this for M365 API changes.
|
||||
|
||||
google_connector.py Google Workspace connector. Service account auth, Gmail
|
||||
and Drive iterators. Touch this for Google API changes.
|
||||
|
||||
routes/ Flask blueprints — one file per functional area.
|
||||
auth.py M365 sign-in / sign-out / device code flow
|
||||
scan.py /api/scan/start, /api/scan/stop, /api/scan/status
|
||||
export.py Excel and Article 30 Word export
|
||||
database.py DB query endpoints (stats, trend, overdue, subject lookup)
|
||||
users.py User listing, role classification, SKU debug
|
||||
sources.py File source management (local and SMB)
|
||||
profiles.py Scan profile CRUD
|
||||
email.py Email report sending via SMTP / Graph API
|
||||
scheduler.py APScheduler integration
|
||||
google_auth.py Google service account connect / disconnect
|
||||
google_scan.py Google Workspace scan start / cancel / users
|
||||
app_routes.py Misc: about, language selector, settings, delta status
|
||||
|
||||
tests/ pytest test suite — 112 tests, all should pass.
|
||||
test_document_scanner.py CPR detection accuracy and false positive checks
|
||||
test_app_config.py i18n, keywords, config, profiles, encryption
|
||||
test_checkpoint.py Checkpoint and delta token persistence
|
||||
test_db.py Database round-trips, CPR hashing, dispositions
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## When something breaks
|
||||
|
||||
**Scan finds nothing / wrong count**
|
||||
→ `cpr_detector.py` → `_scan_bytes()` and `_scan_text_direct()`
|
||||
→ `scan_engine.py` → `run_scan()` for M365, `run_file_scan()` for files
|
||||
|
||||
**Progress bar / live log not updating**
|
||||
→ `sse.py` → `broadcast()`
|
||||
→ `gdpr_scanner.py` → `scan_stream()` — check `sse._current_scan_id`
|
||||
→ `static/app.js` → `_attachScanListeners()` and `scan_progress` handler
|
||||
|
||||
**Cards not appearing after scan**
|
||||
→ `static/app.js` → `scan_file_flagged` handler → calls `applyFilters()`
|
||||
→ `static/app.js` → `scan_done` handler → shows `filterBar`
|
||||
|
||||
**Export (Excel / Art.30) fails**
|
||||
→ `routes/export.py` → checks `state.flagged_items`, falls back to DB
|
||||
→ If DB is empty, a scan has not been run or results were cleared
|
||||
|
||||
**Authentication / sign-in issues**
|
||||
→ `routes/auth.py` for M365
|
||||
→ `routes/google_auth.py` for Google Workspace
|
||||
→ `gdpr_scanner.py` — `_connector = _state.connector = ...` must stay dual-assigned
|
||||
|
||||
**Settings stats show 0 (Scanned / Flagged / Scans)**
|
||||
→ `routes/database.py` → `db_stats()` — queries `flagged_items` and `scans` directly
|
||||
→ Stats populate from existing DB on app start — no re-scan needed
|
||||
→ If still 0 after a completed scan: check `~/.gdpr_scanner.db` exists and is not empty
|
||||
|
||||
**File scan results not persisting to DB**
|
||||
→ `scan_engine.py` → `run_file_scan()` — must call `_db.begin_scan()` not `start_scan()`
|
||||
→ Check terminal output for `[db] begin_scan failed` to confirm
|
||||
|
||||
**Settings / profiles / language not loading**
|
||||
→ `app_config.py`
|
||||
→ Config files live in `~/` — see the migration shim in `gdpr_scanner.py` for paths
|
||||
|
||||
**Scheduled scans not running or not showing in UI**
|
||||
→ `scan_scheduler.py` / `scheduler.py`
|
||||
→ `routes/scheduler.py`
|
||||
→ Schedule config: `~/.gdpr_scanner_schedule.json`
|
||||
|
||||
---
|
||||
|
||||
## Running the tests
|
||||
|
||||
```bash
|
||||
cd GDPRScanner_v1.6.x
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
Run this before every release and after any change to:
|
||||
- `document_scanner.py` — CPR detection
|
||||
- `cpr_detector.py` — file type dispatch
|
||||
- `gdpr_db.py` — database layer
|
||||
|
||||
A failing CPR detection test is a compliance issue, not just a software bug.
|
||||
|
||||
---
|
||||
|
||||
## Key data files (all in `~/`)
|
||||
|
||||
All data files live in **`~/.gdprscanner/`** (created automatically on first run).
|
||||
Existing `~/.gdpr_scanner_*` files are migrated automatically.
|
||||
|
||||
| File | Contents |
|
||||
|---|---|
|
||||
| `scanner.db` | SQLite — all scan results, CPR index, dispositions, history |
|
||||
| `config.json` | Azure client ID / tenant ID |
|
||||
| `settings.json` | Last-used scan options |
|
||||
| `schedule.json` | Scheduled scan configuration |
|
||||
| `token.json` | Cached MSAL token (delegated mode) |
|
||||
| `delta.json` | Microsoft Graph delta tokens |
|
||||
| `checkpoint.json` | Mid-scan checkpoint (deleted on completion) |
|
||||
| `smtp.json` | SMTP config (password Fernet-encrypted) |
|
||||
| `machine_id` | Fernet key for SMTP password — never move without this |
|
||||
| `role_overrides.json` | Manual staff/student role overrides |
|
||||
| `google_sa.json` | Google service account key (chmod 600) |
|
||||
| `google.json` | Google admin email and source toggle state |
|
||||
| `src_toggles.json` | Source panel toggle state (Email, OneDrive, Gmail, etc.) |
|
||||
|
||||
---
|
||||
|
||||
## The files you will rarely touch
|
||||
|
||||
- `document_scanner.py` — treat as a dependency
|
||||
- `build_gdpr.py` — only when adding new `.py` files to the project (bundle the new file in the `datas` list)
|
||||
- `install_windows.ps1` / `install_macos.sh` — only when adding new pip dependencies
|
||||
|
||||
---
|
||||
|
||||
## Adding a new pip dependency
|
||||
|
||||
1. Add to `requirements.txt` with a version pin and a comment
|
||||
2. Add to `install_windows.ps1` (the packages array)
|
||||
3. Add to `install_macos.sh` (the packages array)
|
||||
4. If building the app: no change needed — PyInstaller follows imports automatically
|
||||
|
||||
---
|
||||
|
||||
## The documents that have the history
|
||||
|
||||
| Document | What it contains |
|
||||
|---|---|
|
||||
| `SUGGESTIONS.md` | Every feature idea, why it was or wasn't implemented, current status |
|
||||
| `CHANGELOG.md` | What changed in each version, including root causes of bugs fixed |
|
||||
| `CONTRIBUTING.md` | How to contribute, code style, translation guide |
|
||||
| `DEPENDENCIES.md` | What each dependency is for and why it was chosen |
|
||||
|
||||
When you're unsure why something was done a certain way, read `SUGGESTIONS.md` first.
|
||||
When you're debugging a regression, read `CHANGELOG.md` for the version where it appeared.
|
||||
|
||||
---
|
||||
|
||||
## The one thing to know about the module split
|
||||
|
||||
`gdpr_scanner.py` imports from all five sub-modules and re-exports them.
|
||||
The Flask blueprints in `routes/` use `__getattr__` to lazily resolve names
|
||||
from `gdpr_scanner` — so they work unchanged even though the code moved.
|
||||
|
||||
If you add a new function to `app_config.py` or `cpr_detector.py` and need
|
||||
it accessible from a route blueprint, add it to the `from app_config import (...)`
|
||||
block near the top of `gdpr_scanner.py`.
|
||||
|
||||
---
|
||||
|
||||
*This project was built by Henrik Højmark with AI assistance (Claude by Anthropic)
|
||||
as a pair-programming tool. All design decisions were made by the author.*
|
||||
629
README.md
Normal file
629
README.md
Normal file
@ -0,0 +1,629 @@
|
||||
# GDPRScanner
|
||||
|
||||
Scans Microsoft 365, Google Workspace, and local/network file systems for Danish
|
||||
CPR numbers and personal data (PII). Produces GDPR compliance reports and supports
|
||||
Article 30 record-keeping obligations.
|
||||
|
||||
---
|
||||
|
||||
**Developed by Henrik Højmark**
|
||||
|
||||
This project was built with substantial assistance from AI (Claude by Anthropic),
|
||||
used as a pair-programming tool throughout development. All design decisions,
|
||||
requirements, testing, and validation were made by the author. The AI generated
|
||||
code under direction — the same way a developer might use a senior colleague or
|
||||
an IDE with intelligent completion. The result is the author's work.
|
||||
|
||||
---
|
||||
|
||||
`gdpr_scanner.py` scans Microsoft 365 cloud sources — Exchange email (including all subfolders), OneDrive, SharePoint, and Teams — for Danish CPR numbers and PII. It connects to the Microsoft Graph API and does not require local file access.
|
||||
|
||||
### What it does (M365)
|
||||
|
||||
- **Scans Exchange mailboxes** — email body and attachments, across **all folders and subfolders** recursively (Inbox, custom folders, nested folders). System folders (Deleted Items, Junk, Drafts, Sent, etc.) are automatically skipped using Exchange `wellKnownName` identifiers (language-independent — works correctly for Danish, German, and other locales)
|
||||
- **OneDrive, SharePoint, Teams** — scans files in all connected sources
|
||||
- **Subfolder prioritisation** — custom subfolders are scanned before Inbox to prevent a large Inbox from exhausting the per-user email cap
|
||||
- **EML attachment preview** — email attachments with CPR hits are listed in the preview panel with per-attachment CPR counts
|
||||
- **Folder path in results** — each email result shows its full folder path (e.g. `Inbox / Ansøgninger pædagog SFO`) in the card and in Excel export
|
||||
- **Delete items** — flagged results can be deleted directly from the UI, individually or in bulk
|
||||
- **CPR false-positive reduction** — strict CPR validation
|
||||
- **Excel export** — multi-tab `.xlsx` report with per-source breakdown, auto-filters, and URL hyperlinks. Columns include: Name, CPR Hits, Face count, GPS (✔ if GPS in EXIF), Special category, EXIF author, Folder, Account, Role, Disposition, Date Modified, Size (KB), URL. A dedicated **GPS locations** sheet lists all items with GPS coordinates including a Google Maps link. Separate tabs for Outlook (Exchange), OneDrive, SharePoint, Teams, Gmail, Google Drive, local folders, and SMB/network shares. Summary sheet shows counts by source and GPS item total. When M365, Google Workspace, and file scans run concurrently, all results are captured in the export — not just the last completed scan
|
||||
- **Progressive streaming** — results stream card-by-card via Server-Sent Events as the scan runs
|
||||
- **Token auto-refresh** — expired tokens are detected and silently refreshed mid-scan without interrupting the UI
|
||||
- **Incremental / resumable scans** — interrupted scans save a checkpoint; the next run resumes from where it stopped rather than starting over
|
||||
- **Delta scan** — uses Graph `/delta` endpoints to fetch only changed items since the last scan, cutting API quota usage and scan time on large tenants
|
||||
- **Headless / scheduled mode** — `--headless` flag runs a non-interactive scan and writes an Excel report to disk; combine with cron or Windows Task Scheduler for fully automated compliance scans. **Settings → Scheduler** supports multiple named scan jobs, each with its own frequency (daily/weekly/monthly), time, profile, auto-email, and retention settings. Enable/disable each job with an inline toggle. In application mode, scheduled jobs reconnect automatically without requiring the browser to be open
|
||||
- **EXIF metadata extraction** — GPS coordinates, author, description, device extracted from all scanned images. GPS badge on cards when location data is present. Collapsible EXIF panel in local file previews. No extra dependencies — uses `Pillow` which is already required.
|
||||
- **`--purge`** — permanently deletes all data files created by the scanner (database, credentials, cache); use before decommissioning
|
||||
- **`--export-db`** / **`--import-db`** — export the database to a ZIP archive or restore from one; supports `--import-mode merge` (default) and `--import-mode replace`
|
||||
- **`--reset-db`** — wipe and recreate the database; also clears the checkpoint and delta tokens
|
||||
- **Email report** — send the Excel report by email directly from the UI or via `--email-to` in headless mode. Prefers **Microsoft Graph API** when connected to M365 (no SMTP AUTH needed — requires `Mail.Send` permission). Falls back to `smtplib` SMTP with STARTTLS/SSL support. A **Test** button verifies end-to-end delivery.
|
||||
- **Account name on cards** — when scanning multiple users, each card displays the owner's display name so results from different mailboxes are instantly distinguishable
|
||||
- **Retention policy enforcement** — flag items older than a configurable retention period with a Overdue badge; supports both rolling and fiscal-year-aligned cutoffs (e.g. Bogføringsloven Dec 31); headless auto-delete via `--retention-years`
|
||||
- **Data subject lookup** — find all flagged items containing a specific CPR number across all scans; CPR is SHA-256 hashed before querying — never stored in plaintext
|
||||
- **Disposition tagging** — compliance officers can tag each flagged item with a legal basis (retain / delete-scheduled / deleted) directly from the preview panel
|
||||
- **Read-only viewer mode** — share scan results with a DPO or manager via a secure token URL (`/view?token=…`) or a numeric PIN; viewers see the full results grid and disposition panel but cannot scan, delete, or change settings
|
||||
- **Article 30 report** — one-click export of a structured Word document (`.docx`) satisfying the GDPR Article 30 register of processing activities obligation
|
||||
- **SQLite results database** — scan results, CPR index, PII breakdown, disposition decisions, and scan history are persisted to `~/.gdprscanner/scanner.db` alongside the JSON cache, enabling cross-scan queries and trend tracking
|
||||
- **Built-in user manual** — click the **?** button in the top bar to open the manual in a dedicated window. Available in Danish and English. Printable via the browser's print function. Served from `MANUAL-DA.md` / `MANUAL-EN.md` at `/manual?lang=da|en` — always in sync with the installed version, no internet required. In the packaged desktop app the manual opens as a native pywebview window; in the browser it opens as a popup.
|
||||
|
||||
---
|
||||
|
||||
## Microsoft 365
|
||||
|
||||
See [M365_SETUP.md](docs/setup/M365_SETUP.md) for step-by-step instructions — app registration, permissions, authentication modes, and headless configuration.
|
||||
|
||||
---
|
||||
|
||||
### M365 Web UI
|
||||
|
||||
```
|
||||
python gdpr_scanner.py [--port PORT]
|
||||
```
|
||||
|
||||
> The scanner expects `templates/` and `static/` in the same directory as `gdpr_scanner.py`. Flask serves `templates/index.html` as the UI. The JavaScript is split across 12 ES modules in `static/js/` (`state.js` + 11 feature modules loaded as `<script type="module">`). All API routes live in `routes/` as Flask Blueprints registered at startup.
|
||||
|
||||
Default port: **5100**. If that port is already in use the server auto-increments (5101, 5102, …) and logs which port was chosen. Override with `--port N`. Only one instance may run at a time — a second launch exits immediately with an error rather than corrupting the shared database.
|
||||
|
||||
#### Sources panel
|
||||
|
||||
The sidebar sources panel lists all configured scan sources. Click **Sources** to open the unified Source Management modal. The panel is collapsible (▾/▸ toggle, state persisted) and resizable — drag the handle at the bottom edge to shrink it; the maximum height is automatically capped to show all available sources with no empty space.
|
||||
|
||||
**Microsoft 365 tab** — Azure credentials (Client ID, Tenant ID, Client Secret), auth mode (Application / Delegated), and per-source visibility toggles (Email, OneDrive, SharePoint, Teams). Sources toggled off are hidden from the sidebar panel and excluded from scans.
|
||||
|
||||
**Google Workspace tab** — Two authentication modes: **Workspace** (service account with domain-wide delegation — scans all users) and **Personal account** (OAuth 2.0 device-code flow — scans the signed-in account only). Once connected, per-source toggles control whether Gmail and/or Google Drive appear in the sidebar panel and are included in scans. See [GOOGLE_SETUP.md](docs/setup/GOOGLE_SETUP.md) for setup instructions.
|
||||
|
||||
**File sources tab** — Add local folder paths or SMB/CIFS network shares with a name, path, and optional SMB credentials. Each saved source appears as a checkbox in the sidebar panel (local, SMB/network). Use the **Edit** button on each row to update credentials or rename a source without deleting it.
|
||||
|
||||
**Skipped automatically:** `.recycle`, `.sync`, `.btsync`, `.trash`, `.git`, `node_modules`, `System Volume Information`, and other system/sync folders. Hidden directories (`.` prefix) are skipped too.
|
||||
|
||||
**PDF scanning in file scans:** PDFs are scanned in a dedicated subprocess spawned via `multiprocessing.get_context("spawn")` with a 60-second hard timeout. If a PDF's OCR (Tesseract/Poppler) stalls, the subprocess is terminated and the file is skipped with an error card — the scan thread is never blocked. The `spawn` context is required on macOS + Flask to avoid duplicating the server socket.
|
||||
|
||||
**Preview panel** — opens to the right of the results grid when a card is clicked. The panel is resizable: drag the left edge to adjust its width (min 280 px, max 70% of window). Width is remembered for the session. Click **×** to close.
|
||||
|
||||
**Local file preview** — clicking a result card renders the file content inline:
|
||||
|
||||
| Type | Preview |
|
||||
|---|---|
|
||||
| PDF | First 5 pages as text via `pdfplumber`, CPR numbers highlighted |
|
||||
| XLSX / XLSM / CSV | First 50 rows as a table (up to 3 sheets for Excel) |
|
||||
| DOCX / DOC | First 80 paragraphs as text, CPR numbers highlighted |
|
||||
| Images | Inline image + collapsible EXIF metadata panel (GPS, author, device, datetime) |
|
||||
| TXT / EML / MD / log | Full text with CPR highlights |
|
||||
|
||||
Sources from all tabs can be selected independently in the sidebar before scanning. The selection is saved as part of scan profiles.
|
||||
|
||||
#### User accounts panel
|
||||
|
||||
In Delegated mode, accounts are added via the device code flow. In Application mode, the scanner fetches all users in the tenant. Users are listed with checkboxes — all unchecked by default. Use **All / None** to select or deselect everyone, filter by name with the search field, or add a user manually by email with the **+** button.
|
||||
|
||||
**Role classification** — users are automatically classified as Student or Staff based on their Microsoft 365 licence. Role badges appear on every account row, on result cards, and in the Article 30 report (separate Staff and Student inventory tables).
|
||||
|
||||
Role detection works in two passes:
|
||||
1. **`skuPartNumber` fragment match** (preferred) — strings like `STANDARDWOFFPACK_FACULTY` are stable across all Microsoft licensing generations (EA, A1/A3/A5, new commerce/CSP). Runs first whenever part numbers are available.
|
||||
2. **SKU ID lookup** from `classification/m365_skus.json` — fallback for when part numbers are unavailable or for licences with no recognisable fragment (e.g. Power Automate Free assigned to faculty).
|
||||
|
||||
**Filter buttons** — **All / Ansat / Elev** filter the accounts list before selecting who to scan.
|
||||
|
||||
**SKU debug** — the magnifying-glass button next to the role filters opens a modal listing every unique SKU ID in the tenant, colour-coded student / staff / unknown. Unknown IDs can be copied directly into `classification/m365_skus.json` and take effect on the next restart.
|
||||
|
||||
**Manual role override** — if auto-classification is wrong for a specific user, click the role badge (role badge) on their row to cycle through `student → staff → other → (clear)`. Overrides are stored in `~/.gdpr_scanner_role_overrides.json` and persist across restarts. A pencil indicator appears on overridden rows. Click through until the pencil disappears to revert to auto-detection.
|
||||
|
||||
**`classification/m365_skus.json`** — the SKU ID and fragment file lives in the `classification/` folder alongside `lang/` and `keywords/`. Edit it to add new or tenant-specific SKU IDs without any code change; the file is reloaded on every restart.
|
||||
|
||||
#### Date filter
|
||||
|
||||
A date-from picker limits the scan to items modified after the selected date. Quick presets: **1 yr / 2 yr / 5 yr / 10 yr / Any**. Selecting "Any" sets the date to today (no cutoff).
|
||||
|
||||
#### Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|---|---|---|
|
||||
| Scan email body | On | Scan the plain-text body of each email |
|
||||
| Scan attachments | On | Scan PDF/Word/Excel attachments inside emails |
|
||||
| Max attachment size | **20 MB** | Skip attachments larger than this threshold |
|
||||
| Max emails per user | **2000** | Cap per mailbox to avoid very long scans |
|
||||
| **Δ Delta scan** | Off | Fetch only changed items since the last scan (see [Delta scan](#delta-scan) below) |
|
||||
| **Δ Delta scan** | Off | Fetch only changed items since the last scan — hover the **?** for details (see [Delta scan](#delta-scan) below) |
|
||||
| ** Scan photos for faces** | Off | Detect faces in image files and flag as Art. 9 biometric data — hover the **?** for details (see [Photo scanning](#photo--biometric-scanning) below) |
|
||||
| **Retention policy** | Off | Flag items older than N years — hover the **?** for details (see [Retention policy](#retention-policy-enforcement)) |
|
||||
|
||||
#### Results grid
|
||||
|
||||
Each flagged item appears as a card showing:
|
||||
- File / subject name
|
||||
- CPR hit count badge
|
||||
- Source badge (Email / OneDrive / SharePoint / Teams)
|
||||
- Source account with role badge (**Student** / **Staff**)
|
||||
- Modified / received date
|
||||
- **Folder path** — shown for emails (e.g. ` Inbox / Ansøgninger pædagog SFO`)
|
||||
- **Account name** — owner's display name shown on every card when scanning multiple users
|
||||
- **Overdue badge** — amber badge on items exceeding the configured retention cutoff
|
||||
- **Art.9** badge — purple pill listing detected Article 9 special categories (health, criminal, biometric, etc.)
|
||||
- ** N faces** badge — teal pill on image files where face detection found identifiable persons (biometric data)
|
||||
- **Ext.** / **** badge — external email recipient or externally shared file (Art. 44–46 transfer risk)
|
||||
- **delete button** — appears on hover (grid view) or always visible (list view)
|
||||
|
||||
**Filter bar** — always visible above both the results grid and the preview panel. Narrow results by source, disposition, transfer risk, and risk level:
|
||||
|
||||
| Filter | Options |
|
||||
|---|---|
|
||||
| Source | All / Email / OneDrive / SharePoint / Teams |
|
||||
| Disposition | All / Unreviewed / Retain (legal/legitimate/contract) / Delete-scheduled / Deleted |
|
||||
| Transfer risk | All / External recipient / External share / Shared |
|
||||
| Risk level | All risk levels / Art. 9 special category / Photos / biometric |
|
||||
|
||||
#### Delete items
|
||||
|
||||
Individual items can be deleted directly from their card (hover to reveal , confirm). Emails are moved to Deleted Items; files go to the recycle bin.
|
||||
|
||||
The **Delete** button in the filter bar opens the **Bulk Delete** modal, which lets you filter by:
|
||||
|
||||
| Criterion | Description |
|
||||
|---|---|
|
||||
| Source type | Email / OneDrive / SharePoint / Teams / All |
|
||||
| Min CPR hits | Only delete items with at least N CPR numbers found |
|
||||
| Older than date | Only delete items older than a given date |
|
||||
|
||||
The **Filter overdue** quick button pre-populates the date filter with the exact retention cutoff from the database, making it one click to select all overdue items for deletion.
|
||||
|
||||
A live preview shows how many items match before you confirm. Errors are reported per-item in the log panel.
|
||||
|
||||
> **Requires write permissions** — see [Azure permissions](#azure-permissions) above.
|
||||
|
||||
#### Excel export
|
||||
|
||||
The **⬇ Excel** button exports all current results to a `.xlsx` file (`m365_scan_YYYYMMDD_HHMMSS.xlsx`) with five sheets:
|
||||
|
||||
| Sheet | Contents |
|
||||
|---|---|
|
||||
| Summary | Scan timestamp, total count, per-source breakdown |
|
||||
| Email | Flagged emails — Name/Subject, CPR Hits, **Folder**, Source Account, Date Modified, Size, URL |
|
||||
| OneDrive | Flagged OneDrive files |
|
||||
| SharePoint | Flagged SharePoint files |
|
||||
| Teams | Flagged Teams files |
|
||||
|
||||
In macOS app builds, the export opens a native Save dialog instead of a browser download.
|
||||
|
||||
The **Art.30** button generates a **GDPR Article 30 Register of Processing Activities** as a structured Word document (`.docx`). See [Article 30 report](#article-30-report) below.
|
||||
|
||||
#### Email report
|
||||
|
||||
Configure email delivery in **Settings → Email report**. Click **Save** to store your SMTP settings, **Test** to send a real test email to the configured recipients, and **Send now** to dispatch the latest scan report. When connected to Microsoft 365, the scanner sends via the **Graph API** (`Mail.Send` permission required — add it in Azure AD → App registrations → API permissions). SMTP is used as a fallback when Graph is unavailable.
|
||||
|
||||
| Field | Description |
|
||||
|---|---|
|
||||
| SMTP host | e.g. `smtp.office365.com`, `smtp.gmail.com` |
|
||||
| Port | `587` for STARTTLS (default), `465` for SMTPS/SSL |
|
||||
| Username | SMTP login — usually your sender email address |
|
||||
| Password | Saved to `~/.gdpr_scanner_smtp.json` (permissions 600). Encrypted at rest using Fernet — key in `~/.gdpr_scanner_machine_id` (chmod 0o600, never share) |
|
||||
| Graph API | When connected to M365, email is sent via `/me/sendMail` (delegated) or `/users/{sender}/sendMail` (app mode) — no SMTP password needed. Requires `Mail.Send` Graph permission with admin consent. |
|
||||
| From address | Sender address (defaults to username if blank) |
|
||||
| STARTTLS | Enable STARTTLS on port 587 (recommended) |
|
||||
| SSL | Use SMTPS on port 465 instead |
|
||||
| Recipients | Comma or semicolon separated list of addresses |
|
||||
|
||||
Click **Save** to persist the settings. The password is stored separately from scan settings and never returned to the browser — subsequent loads show "(password saved)". Click **Send now** to email the report immediately with the current results.
|
||||
|
||||
> **No extra dependencies** — uses Python's built-in `smtplib`. Works with Office 365, Gmail, and any standard SMTP server.
|
||||
|
||||
#### About
|
||||
|
||||
Click **About** in the sidebar footer to see app version, Python version, MSAL version, Requests version, and openpyxl version.
|
||||
|
||||
---
|
||||
|
||||
## Google Workspace
|
||||
|
||||
See [GOOGLE_SETUP.md](docs/setup/GOOGLE_SETUP.md) for step-by-step instructions — service account creation, domain-wide delegation, OAuth scopes, and OU-based role classification.
|
||||
|
||||
---
|
||||
|
||||
### Incremental / resumable scans
|
||||
|
||||
If a scan is stopped (via **■ Stop** or by closing the app) before it finishes, a checkpoint is saved to `~/.gdpr_scanner_checkpoint.json`. The next time you click **▶ Scan** with the same configuration, a banner appears above the progress bar:
|
||||
|
||||
```
|
||||
⏸ Previous scan interrupted — 847 scanned, 12 found [Resume] [Start fresh]
|
||||
```
|
||||
|
||||
- **Resume** — skips the 847 already-scanned items, re-emits the 12 previously found cards immediately, and continues from where it left off
|
||||
- **Start fresh** — discards the checkpoint and starts a new full scan
|
||||
|
||||
The checkpoint is keyed by a hash of the scan configuration (sources + users + date cutoff). Changing any of those settings automatically starts fresh. The checkpoint is deleted automatically when a scan completes successfully.
|
||||
|
||||
---
|
||||
|
||||
### Delta scan
|
||||
|
||||
Delta scan uses the Microsoft Graph `/delta` API to fetch only items that have **changed since the last scan**, dramatically reducing Graph API quota usage and scan time on large tenants.
|
||||
|
||||
#### How it works
|
||||
|
||||
1. Run one **full scan** first (Delta checkbox off) — this establishes baseline delta tokens
|
||||
2. Tick **Δ Delta scan** and run again — only items added, modified, or deleted since the previous scan are fetched and CPR-scanned
|
||||
3. Delta tokens are saved automatically to `~/.gdpr_scanner_delta.json` after each successful scan
|
||||
4. To force a full rescan, click **Clear tokens** under the checkbox (or delete the file)
|
||||
|
||||
Delta tokens are stored **per-source**:
|
||||
|
||||
| Token key | Covers |
|
||||
|---|---|
|
||||
| `onedrive:{user_id}` | One user's OneDrive drive |
|
||||
| `sharepoint:{drive_id}` | One SharePoint document library |
|
||||
| `teams:{drive_id}` | One Teams channel file store |
|
||||
| `email:{user_id}:{folder_id}` | One mail folder for one user |
|
||||
|
||||
If a token expires (Graph returns HTTP 410 Gone), that source falls back to a full collection automatically and a fresh token is saved. Other sources are unaffected.
|
||||
|
||||
Deleted items returned by delta (items with a `deleted` or `@removed` marker) are skipped during CPR scanning.
|
||||
|
||||
After each delta scan, the log panel shows:
|
||||
```
|
||||
Scan complete — 3 flagged of 41 (Δ delta — 6 source(s) indexed)
|
||||
```
|
||||
|
||||
#### Delta in headless mode
|
||||
|
||||
Pass `"delta": true` inside the `options` block of your `--settings` JSON to enable delta for scheduled scans:
|
||||
|
||||
```json
|
||||
{
|
||||
"options": { "delta": true, "older_than_days": 365 }
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Headless mode (scheduled / automated scans)
|
||||
|
||||
> **Note:** The scheduler engine lives in `scan_scheduler.py`.
|
||||
|
||||
Run the scanner without a browser UI for cron jobs and Windows Task Scheduler:
|
||||
|
||||
```bash
|
||||
python gdpr_scanner.py --headless --output ~/Reports/ --settings settings.json
|
||||
```
|
||||
|
||||
See [M365_SETUP.md](docs/setup/M365_SETUP.md) for the full settings file format, CLI flags, and SMTP configuration.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### SQLite results database
|
||||
|
||||
Scan results are persisted to `~/.gdprscanner/scanner.db` (SQLite) automatically after every scan, alongside the existing JSON session cache. The database enables cross-scan queries, trend tracking, and compliance workflows that are impractical with JSON alone.
|
||||
|
||||
**Tables:**
|
||||
|
||||
| Table | Contents |
|
||||
|---|---|
|
||||
| `scans` | One row per completed scan run — sources, user count, options, delta flag |
|
||||
| `flagged_items` | One row per flagged file or email — full card data |
|
||||
| `cpr_index` | `(SHA-256(cpr), item_id, scan_id)` — CPR numbers stored as hashes only, never plaintext |
|
||||
| `pii_hits` | Per-type PII counts per item (phone, IBAN, name, address, etc.) |
|
||||
| `dispositions` | Compliance officer decisions per item |
|
||||
| `scan_history` | Aggregated stats per scan for trend tracking |
|
||||
|
||||
**API endpoints:** `GET /api/db/stats`, `GET /api/db/trend`, `GET /api/db/scans`, `POST /api/db/subject`, `GET /api/db/overdue`, `POST /api/db/disposition`, `GET /api/db/disposition/<id>`
|
||||
|
||||
If `gdpr_db.py` is not present, the scanner falls back to JSON-only mode silently.
|
||||
|
||||
---
|
||||
|
||||
### Data subject lookup
|
||||
|
||||
The **Data subject lookup** button in the sidebar opens a modal where you can search for all flagged items containing a specific CPR number across all scans.
|
||||
|
||||
- Enter a CPR number in `DDMMYY-XXXX` format and press Enter or click **Search**
|
||||
- Results show file/email name, source type, date, and CPR hit count
|
||||
- **Delete all for this person** button triggers bulk deletion of all matching items and refreshes the grid
|
||||
- The CPR number is SHA-256 hashed before querying — it is never stored in plaintext in the database or logs
|
||||
|
||||
This directly supports the GDPR **right of access (Article 15)** and **right to erasure (Article 17)**.
|
||||
|
||||
---
|
||||
|
||||
### Disposition tagging
|
||||
|
||||
Every flagged item can be tagged with a compliance decision from the preview panel. Open any card, and the **Disposition** dropdown appears below the metadata strip.
|
||||
|
||||
| Value | Meaning |
|
||||
|---|---|
|
||||
| Unreviewed | Default — not yet assessed |
|
||||
| Retain — legal obligation | Must keep (e.g. Bogføringsloven) |
|
||||
| Retain — legitimate interest | Justified retention, documented |
|
||||
| Retain — contract | Part of an active contract |
|
||||
| Delete — scheduled | Mark for deletion at next cleanup run |
|
||||
| Deleted | Already actioned |
|
||||
|
||||
Dispositions are saved to the `dispositions` table in the SQLite database and included in the Article 30 report.
|
||||
|
||||
---
|
||||
|
||||
### Retention policy enforcement
|
||||
|
||||
Enable **Retention policy** in the options panel to flag items that exceed your retention threshold.
|
||||
|
||||
**Settings:**
|
||||
|
||||
| Setting | Description |
|
||||
|---|---|
|
||||
| Retention years | How many years to retain (default: 5) |
|
||||
| Fiscal year end | Rolling (from today) / 31 Dec (Bogføringsloven) / 30 Jun / 31 Mar |
|
||||
|
||||
**Two cutoff modes:**
|
||||
|
||||
- **Rolling** — exactly N years before today. Correct for GDPR general data minimisation.
|
||||
- **Fiscal year** — N years before the last completed fiscal year end. Correct for Bogføringsloven, which requires records for 5 years *from the end of the financial year*. A document from January 2020 with a Dec 31 FY must be kept until **31 December 2025**, not just until January 2025.
|
||||
|
||||
A live hint below the settings shows the exact cutoff date before you scan.
|
||||
|
||||
After scanning, items older than the cutoff receive an amber **Overdue** badge on their card. In the bulk-delete modal, **Filter overdue** pre-fills the date filter with the exact cutoff for one-click selection.
|
||||
|
||||
**Headless mode:**
|
||||
```bash
|
||||
python gdpr_scanner.py --headless --output ~/Reports/ --retention-years 5 --fiscal-year-end 12-31
|
||||
```
|
||||
Non-interactive (cron): deletes automatically. Interactive (TTY): prompts for confirmation.
|
||||
|
||||
---
|
||||
|
||||
### Scan profiles
|
||||
|
||||
Named, reusable scan configurations — save the current sidebar state as a profile, then load it in one click or run it headlessly by name.
|
||||
|
||||
- **Save** — prompts for a name and saves all current settings (sources, options, user selection, retention) as a profile
|
||||
- **Profile dropdown** — switch between saved profiles; applying a profile populates the entire sidebar instantly
|
||||
- **Profiles button** — opens the profile management modal to rename, edit description, duplicate, or delete profiles
|
||||
- Profiles persist across restarts in `~/.gdprscanner/settings.json`
|
||||
|
||||
**Headless profile usage:**
|
||||
```bash
|
||||
python gdpr_scanner.py --headless --profile "Nightly email scan"
|
||||
python gdpr_scanner.py --list-profiles
|
||||
python gdpr_scanner.py --save-profile "Weekly full scan" --sources email onedrive
|
||||
python gdpr_scanner.py --delete-profile "Old scan"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Photo / biometric scanning
|
||||
|
||||
Enable ** Scan photos for faces** in the Options panel to detect photographs of identifiable persons in OneDrive, SharePoint, and Teams files.
|
||||
|
||||
- **Formats:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`, `.webp`, `.heic`, `.heif`
|
||||
- **Face detection:** OpenCV Haar cascade (`minNeighbors=8`, `min_size=80px` — conservative; requires " Scan photos for faces" opt-in)
|
||||
- **EXIF extraction** — always-on for images regardless of the face detection toggle:
|
||||
- **GPS coordinates** — extracted and converted to decimal degrees; GPS badge on cards; Google Maps link in preview
|
||||
- **PII fields** — Author, Artist, Copyright, Description, UserComment, Keywords checked for content
|
||||
- **Device** — camera make/model
|
||||
- Images with GPS or PII-bearing EXIF are flagged even without CPR hits
|
||||
- `special_category` gains `gps_location` and/or `exif_pii` entries
|
||||
- **GDPR classification:** Images with detected faces are automatically tagged as **Art. 9 biometric data** — the same heightened protection as health or criminal records
|
||||
- ** N faces badge** — teal pill on cards; filterable via " Photos / biometric" in the Risk level dropdown
|
||||
- **Article 30 report** — dedicated section listing all photo items with a 4-bullet retention guidance block (purpose limitation, pupil consent under Databeskyttelsesloven §6, website removal, archiving)
|
||||
- **Excel export** — Face count column added
|
||||
- **Performance:** Slower than CPR scanning — opt-in only. Recommended for targeted scans of known image folders rather than full-tenant scans
|
||||
|
||||
> **Datatilsynet guidance:** Danish schools have received enforcement actions specifically for unlawful retention of pupil photographs. Pupils under 15 require parental consent (Databeskyttelsesloven §6).
|
||||
|
||||
---
|
||||
|
||||
### Article 9 special categories
|
||||
|
||||
The scanner detects keywords from nine GDPR Article 9 special categories in proximity to CPR numbers:
|
||||
|
||||
| Category | Examples |
|
||||
|---|---|
|
||||
| Health | diagnose, sygemelding, behandling, medicin, psykiatri |
|
||||
| Mental health | depression, angst, stress, selvskade |
|
||||
| Criminal records | straffeoplysning, dom, straffeattest, sigtelse |
|
||||
| Trade union | fagforening, tillidsrepræsentant, overenskomst |
|
||||
| Religion | kirke, moské, religiøs, konfirmation |
|
||||
| Ethnicity | nationalitet, herkomst, etnicitet |
|
||||
| Political opinions | politisk, parti, valgkreds |
|
||||
| Biometric | fingeraftryk, ansigtsgenkendelse, biometrisk |
|
||||
| Sexual orientation | seksuel orientering |
|
||||
|
||||
Keywords are loaded from `keywords/da.json` (Danish). English (`en.json`) and German (`de.json`) files can be added without code changes. Detection uses compiled per-category regex patterns for efficient matching.
|
||||
|
||||
---
|
||||
|
||||
### Database export / import
|
||||
|
||||
**Export** and **Import** buttons in the sidebar ** Database** section back up or restore the entire compliance record.
|
||||
|
||||
```bash
|
||||
# CLI equivalents
|
||||
python gdpr_scanner.py --export-db ~/compliance/gdpr_export_2026.zip
|
||||
python gdpr_scanner.py --import-db ~/compliance/gdpr_export_2026.zip
|
||||
python gdpr_scanner.py --import-db ~/compliance/gdpr_export_2026.zip --import-mode replace --yes
|
||||
```
|
||||
|
||||
**Export ZIP contents:**
|
||||
|
||||
| File | Contents |
|
||||
|---|---|
|
||||
| `export_meta.json` | Export date, schema version, row counts |
|
||||
| `scans.json` | Scan run summaries |
|
||||
| `flagged_items.json` | Flagged items — thumbnails stripped |
|
||||
| `cpr_index.json` | CPR hashes (SHA-256 only) |
|
||||
| `pii_hits.json` | Per-type PII counts |
|
||||
| `dispositions.json` | Compliance decisions with legal basis |
|
||||
| `scan_history.json` | Aggregated trend data |
|
||||
| `deletion_log.json` | Full deletion audit trail |
|
||||
|
||||
**Import modes:** `merge` (default — adds dispositions and deletion log only, safe on live DB) or `replace` (full restore, requires `--yes`).
|
||||
|
||||
---
|
||||
|
||||
### Article 30 report
|
||||
|
||||
The **Art.30** button in the filter bar generates a GDPR **Article 30 Register of Processing Activities** as a Word document (`.docx`).
|
||||
|
||||
**Document sections:**
|
||||
|
||||
| Section | Contents |
|
||||
|---|---|
|
||||
| Summary | Scan date, items scanned, flagged count, CPR hits, estimated data subjects, overdue count, Art. 9 item count, photo/biometric count; per-source breakdown |
|
||||
| Data categories | Every detected PII type with hit counts and GDPR classification (Art. 9 vs Art. 4) |
|
||||
| Data inventory | Full item list sorted overdue-first; separate **Staff** and **Student** tables; name, source, account, date, CPR hits, disposition |
|
||||
| Retention analysis | Separate table of overdue items *(if any)* |
|
||||
| Art. 9 special categories | Item list with detected category breakdown *(if any)* |
|
||||
| Photographs / biometric data | Photo item list with face counts and 4-bullet retention guidance *(if photo scanning was enabled)* |
|
||||
| Compliance trend | Last 10 scans with flagged/overdue counts *(if scan history exists)* |
|
||||
| Deletion audit log | Every deletion with timestamp, actor, reason, and legal basis |
|
||||
| Methodology | Scanning approach and GDPR articles referenced (Art. 5, 9, 15, 17, 30) |
|
||||
|
||||
The document is dated and can be stored as evidence of ongoing compliance activity for supervisory authorities.
|
||||
|
||||
> **Requires** `python-docx` — included in `requirements.txt`.
|
||||
|
||||
---
|
||||
|
||||
### Building the M365 app
|
||||
|
||||
`build_gdpr.py` packages `gdpr_scanner.py` + `m365_connector.py` + `lang/` into a standalone native app — same PyInstaller / pywebview approach as `build.py`.
|
||||
|
||||
```bash
|
||||
python build_gdpr.py # build for the current platform
|
||||
python build_gdpr.py --icons-only # regenerate icon_m365.icns / icon_m365.ico
|
||||
```
|
||||
|
||||
> **Note:** Same cross-compilation restriction applies — must build on the target platform.
|
||||
|
||||
---
|
||||
|
||||
## Internationalisation
|
||||
|
||||
Language files live in `lang/` alongside the scripts. As of v1.6.3 they are JSON files:
|
||||
|
||||
| File | Language |
|
||||
|---|---|
|
||||
| `lang/en.json` | English |
|
||||
| `lang/da.json` | Danish |
|
||||
| `lang/de.json` | German |
|
||||
|
||||
**Auto-detection:** On macOS and Linux the system locale is read from `defaults read -g AppleLocale` / `$LANG`. The detected language is used automatically.
|
||||
|
||||
**Manual override:** Create `~/.document_scanner_lang` (or `~/.m365_scanner_lang` for M365) containing just the language code, e.g. `da`. This persists across restarts.
|
||||
|
||||
**In-app switcher:** A language selector appears in the sidebar footer. Selecting a language saves the override and applies the new translations **in place** — the page does not reload and scan results are preserved.
|
||||
|
||||
**Adding a language:** Copy `lang/en.json`, translate all values, save as e.g. `lang/fr.json`. The app picks it up automatically on next start.
|
||||
|
||||
**Exchange folder names** are returned by Microsoft Graph in the account's own language (e.g. "Indbakke" for Danish users) and are displayed as-is. System folders are skipped using Exchange `wellKnownName` identifiers which are always in English regardless of locale, so skip logic is language-independent.
|
||||
|
||||
---
|
||||
|
||||
## Open Source
|
||||
|
||||
GDPR Scanner is open source software, licensed under the **GNU Affero General Public License v3.0 (AGPL-3.0)**.
|
||||
|
||||
This means you are free to use, study, modify, and distribute the software. If you run a modified version as a network service (e.g. a hosted GDPR compliance tool), you must publish the source of your modifications under the same licence.
|
||||
|
||||
A **commercial licence** is available for organisations that need to deploy the software as a managed service without the AGPL source disclosure requirement. Contact the maintainers for details.
|
||||
|
||||
> **Disclaimer:** This tool is intended to assist with GDPR compliance activities. It does not constitute legal advice. You are responsible for ensuring your use complies with applicable law.
|
||||
|
||||
### Contributing
|
||||
|
||||
Contributions are welcome — bug fixes, new language files, performance improvements, and items from [SUGGESTIONS.md](SUGGESTIONS.md).
|
||||
|
||||
Please read [CONTRIBUTING.md](CONTRIBUTING.md) before submitting a pull request. For security vulnerabilities, follow the process in [SECURITY.md](SECURITY.md) — do not file public issues.
|
||||
|
||||
```bash
|
||||
# Quick start for contributors
|
||||
git clone https://github.com/your-org/gdpr-scanner.git
|
||||
cd gdpr-scanner
|
||||
python3 -m venv venv && source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python gdpr_scanner.py # GDPRScanner on port 5100 (auto-increments if in use)
|
||||
```
|
||||
|
||||
### Test suite
|
||||
|
||||
GDPRScanner ships with a `pytest` test suite covering the CPR detection engine, configuration layer, checkpoint persistence, and the SQLite database.
|
||||
|
||||
```bash
|
||||
pip install pytest
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
**112 tests across 4 modules — all expected to pass.**
|
||||
|
||||
| Module | Tests | Covers |
|
||||
|---|---|---|
|
||||
| `tests/test_document_scanner.py` | 36 | `is_valid_cpr`, `extract_matches`, `scan_docx`, `scan_xlsx`, `_scan_bytes` — CPR detection, false-positive suppression, binary crash safety |
|
||||
| `tests/test_app_config.py` | 34 | i18n loading, Article 9 keyword detection, config round-trip, admin PIN, profiles CRUD, Fernet encryption |
|
||||
| `tests/test_checkpoint.py` | 18 | Checkpoint key stability, save/load/clear, wrong-key isolation, delta token round-trip |
|
||||
| `tests/test_db.py` | 24 | Scan lifecycle, CPR hash-only storage, data subject lookup, dispositions, export/import cycle |
|
||||
|
||||
Each new module (`cpr_detector.py`, `app_config.py`, `checkpoint.py`, `gdpr_db.py`) is importable in isolation without Flask or MSAL — tests run without any cloud credentials or a running server.
|
||||
|
||||
The test suite should be run before every release and after any change to `document_scanner.py`, `cpr_detector.py`, or `gdpr_db.py`. CPR detection is the legal core of the tool — a false negative means a real GDPR violation goes undetected.
|
||||
|
||||
### Roadmap
|
||||
|
||||
See [SUGGESTIONS.md](SUGGESTIONS.md) for the full feature roadmap with implementation status.
|
||||
|
||||
---
|
||||
|
||||
## Project files
|
||||
|
||||
| File | Description |
|
||||
|---|---|
|
||||
| `gdpr_scanner.py` | Flask entry point — scan orchestration, SSE route (`/api/scan/stream`), root route |
|
||||
| `scan_engine.py` | M365 and local/SMB scan logic — `run_scan()`, `run_file_scan()` |
|
||||
| `app_config.py` | All persistence — profiles, settings, SMTP config, lang loading, Fernet encryption |
|
||||
| `sse.py` | SSE broadcast queue and `_current_scan_id` |
|
||||
| `checkpoint.py` | Mid-scan checkpoint save/load, `_checkpoint_key()` |
|
||||
| `cpr_detector.py` | CPR pattern matching and validation |
|
||||
| `document_scanner.py` | Core scanning, redaction, OCR, NER, and PII detection engine |
|
||||
| `gdpr_db.py` | SQLite persistence layer — scan results, CPR index, PII hits, dispositions, scan history |
|
||||
| `m365_connector.py` | Microsoft Graph API client — auth, token refresh, email/OneDrive/SharePoint/Teams fetchers, delete methods |
|
||||
| `google_connector.py` | Google Workspace API client — Gmail, Drive, Admin SDK |
|
||||
| `file_scanner.py` | Unified local + SMB/CIFS file iterator — `FileScanner.iter_files()` yields `(path, bytes, metadata)`. SMB reads use a 1-slot sliding-window `ThreadPoolExecutor` (`PREFETCH_WINDOW=1`) with a 60-second per-file timeout. |
|
||||
| `scan_scheduler.py` | In-process APScheduler wrapper — multi-job scheduled scan engine |
|
||||
| `templates/index.html` | Single-page HTML shell — Jinja2 template. Two variables: `app_version`, `lang_json`. |
|
||||
| `static/style.css` | All application CSS — custom properties, layout, components, light/dark themes |
|
||||
| `static/js/state.js` | Shared mutable state module (`export const S`) — imported by all 11 feature modules |
|
||||
| `static/js/*.js` | 11 ES modules: `ui`, `log`, `users`, `auth`, `profiles`, `scan`, `results`, `sources`, `scheduler`, `connector`, `viewer` |
|
||||
| `static/app.js` | Archived JS monolith — no longer loaded |
|
||||
| `routes/__init__.py` | Blueprint package marker |
|
||||
| `routes/state.py` | Shared mutable state (`connector`, `flagged_items`, `LANG`, scan locks) — imported by all blueprints |
|
||||
| `routes/auth.py` | `/api/auth/*` — M365 connect, status, sign-out, config |
|
||||
| `routes/google_auth.py` | `/api/google/*` — Google Workspace connect, status, sign-out |
|
||||
| `routes/google_scan.py` | `/api/google/scan/*` — Google scan execution |
|
||||
| `routes/scan.py` | `/api/scan/*` — start/stop, checkpoint, settings, src toggles |
|
||||
| `routes/users.py` | `/api/users/*` — listing, role overrides, license debug |
|
||||
| `routes/sources.py` | `/api/file_sources/*` and `/api/file_scan/start` |
|
||||
| `routes/profiles.py` | `/api/profiles/*` and `/api/delta/*` |
|
||||
| `routes/scheduler.py` | `/api/scheduler/*` — job CRUD, status, history, run-now |
|
||||
| `routes/email.py` | `/api/smtp/*` and `/api/send_report` |
|
||||
| `routes/database.py` | `/api/db/*`, `/api/admin/*`, `/api/preview`, `/api/thumb` |
|
||||
| `routes/export.py` | `/api/export_excel`, `/api/export_article30`, `/api/delete_bulk` |
|
||||
| `routes/viewer.py` | `/view`, `/api/viewer/tokens`, `/api/viewer/pin` — read-only viewer mode: token + PIN auth, share-link management |
|
||||
| `routes/app_routes.py` | `/api/about`, `/api/langs`, `/api/lang`, `/manual` |
|
||||
| `docs/manuals/MANUAL-EN.md` | End-user manual in English (15 sections) — served at `/manual?lang=en` |
|
||||
| `docs/manuals/MANUAL-DA.md` | End-user manual in Danish (15 sections) — served at `/manual?lang=da` |
|
||||
| `docs/setup/M365_SETUP.md` | Step-by-step Microsoft 365 setup guide |
|
||||
| `docs/setup/GOOGLE_SETUP.md` | Step-by-step Google Workspace setup guide |
|
||||
| `build_gdpr.py` | PyInstaller build script — generates `m365_launcher.py`, packages desktop app |
|
||||
| `lang/en.json` | English translations (source of truth) |
|
||||
| `lang/da.json` | Danish translations (primary language) |
|
||||
| `lang/de.json` | German translations |
|
||||
| `keywords/da.json` | Danish Article 9 special-category keyword list (454 keywords, 9 categories) |
|
||||
| `classification/m365_skus.json` | Microsoft Education SKU IDs and part-number fragments for student/staff role classification — edit to add new SKUs without code changes |
|
||||
| `classification/google_ou_roles.json` | Google OU path → role mapping |
|
||||
| `requirements.txt` | Python dependency list — use with `pip install -r requirements.txt` |
|
||||
| `run_tests.sh` | Activates venv and runs the full test suite; forwards any extra args to pytest |
|
||||
| `install_macos.sh` | Bash installer — Homebrew, Python 3.12, Tesseract, Poppler, `./venv`, spaCy model |
|
||||
| `install_windows.ps1` | PowerShell installer — Chocolatey, Python 3.12, Tesseract, Poppler, `.\\venv`, spaCy model |
|
||||
| `VERSION` | Current version number — single source of truth |
|
||||
| `CHANGELOG.md` | Release history and versioning policy |
|
||||
| `LICENSE` | GNU Affero General Public License v3.0 |
|
||||
| `CONTRIBUTING.md` | Development setup, code style guide, and pull request process |
|
||||
| `SECURITY.md` | How to report security vulnerabilities responsibly |
|
||||
| `.gitignore` | Excludes credentials, databases, venv, and build artifacts from version control |
|
||||
73
SECURITY.md
Normal file
73
SECURITY.md
Normal file
@ -0,0 +1,73 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
| Version | Supported |
|
||||
|---------|-----------|
|
||||
| Latest | ✅ Yes |
|
||||
|
||||
We support only the latest release. Please update before reporting a bug.
|
||||
|
||||
---
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
**Please do not file a public GitHub issue for security vulnerabilities.**
|
||||
|
||||
This tool processes sensitive personal data including Danish CPR numbers (national
|
||||
identifiers). Security issues should be reported privately so a fix can be prepared
|
||||
before public disclosure.
|
||||
|
||||
**Report to:** Open a [GitHub Security Advisory](https://github.com/your-org/gdpr-scanner/security/advisories/new)
|
||||
(Settings → Security → Advisories → New draft advisory)
|
||||
|
||||
Please include:
|
||||
- A description of the vulnerability and its potential impact
|
||||
- Steps to reproduce the issue
|
||||
- Any relevant logs or screenshots (redact personal data)
|
||||
- Your suggested fix if you have one
|
||||
|
||||
We will acknowledge receipt within **3 business days** and aim to release a fix
|
||||
within **14 days** for critical issues.
|
||||
|
||||
---
|
||||
|
||||
## Scope
|
||||
|
||||
Issues we consider in scope:
|
||||
|
||||
- Authentication bypass or token leakage in the M365 connector
|
||||
- Unauthorised access to scan results via the web UI
|
||||
- CPR numbers or other personal data exposed in logs, error messages, or API responses
|
||||
- SQL injection or path traversal in the local scanner or database layer
|
||||
- SSRF (Server-Side Request Forgery) via URL inputs
|
||||
- Dependency vulnerabilities with a known exploit path
|
||||
|
||||
Out of scope:
|
||||
|
||||
- Issues requiring physical access to the machine running the scanner
|
||||
- Vulnerabilities in Microsoft Graph API itself (report to Microsoft MSRC)
|
||||
- Social engineering attacks
|
||||
|
||||
---
|
||||
|
||||
## Data Handling Notes for Security Researchers
|
||||
|
||||
- CPR numbers are stored in the SQLite database as **SHA-256 hashes only** — never in plaintext
|
||||
- SMTP passwords are stored in `~/.gdpr_scanner_smtp.json` with chmod 600
|
||||
- Microsoft OAuth tokens are stored in the MSAL token cache in `~/.gdpr_scanner_config.json`
|
||||
- Scan results are stored locally in `~/.gdpr_scanner.db` — never transmitted externally
|
||||
- The web UI binds to `127.0.0.1` by default — it is not designed to be exposed to the internet
|
||||
|
||||
---
|
||||
|
||||
## Dependency Security
|
||||
|
||||
This project uses Python dependencies listed in `requirements.txt`. We recommend
|
||||
running `pip audit` or `safety check` periodically to identify known CVEs in
|
||||
dependencies.
|
||||
|
||||
```bash
|
||||
pip install pip-audit
|
||||
pip-audit -r requirements.txt
|
||||
```
|
||||
1537
SUGGESTIONS.md
Normal file
1537
SUGGESTIONS.md
Normal file
File diff suppressed because it is too large
Load Diff
46
TODO.md
Normal file
46
TODO.md
Normal file
@ -0,0 +1,46 @@
|
||||
# TODO — Pending features and sustainability
|
||||
|
||||
Quick overview of what's still to be done. Full details in [SUGGESTIONS.md](SUGGESTIONS.md).
|
||||
|
||||
---
|
||||
|
||||
## Recently completed
|
||||
|
||||
### Memory exhaustion during large M365 scans ✅
|
||||
Six root causes fixed in `scan_engine.py` and `document_scanner.py`:
|
||||
- Email body HTML stripped at collection time (`body` key deleted from each message dict before it enters `work_items`; plain text stored as `_precomputed_body` instead)
|
||||
- `work_items` list converted to a `deque` before processing so each item is released immediately after `popleft()`
|
||||
- `del content` added in file-processing branch as soon as raw bytes are no longer needed (before NER/PII counting)
|
||||
- `del body_text` added after email body is fully consumed
|
||||
- PDF OCR page images (`PIL.Image`) nulled out one by one after OCR instead of holding all pages in RAM
|
||||
- Memory guard using `psutil` skips file downloads when < 300 MB RAM is available
|
||||
|
||||
**Still open:** The collection phase itself is still a "gather all, then process" loop. For very large tenants (>500k emails) the pre-extracted plain text in `work_items` could still be significant. The complete fix is to process each user's emails/files inline as they are fetched (generator/streaming pattern) rather than accumulating them into `work_items` first — estimated 1–2 days of refactor.
|
||||
|
||||
---
|
||||
|
||||
## Pending
|
||||
|
||||
### #15 — Scan profiles ✅
|
||||
Named, reusable scan configurations. Full spec in SUGGESTIONS.md §15.
|
||||
**Size:** Large · **Priority:** High
|
||||
|
||||
### #23 — Google Workspace role classification + cross-platform identity mapping ✅
|
||||
Full spec in SUGGESTIONS.md §23.
|
||||
**Size:** Large · **Priority:** Medium
|
||||
|
||||
### #27 — Migrate i18n format from `.lang` to JSON ✅
|
||||
Full spec in SUGGESTIONS.md §27.
|
||||
**Size:** Medium · **Priority:** Low
|
||||
|
||||
### #29 — Rename `skus/` → `classification/` ✅
|
||||
Full spec in SUGGESTIONS.md §29.
|
||||
**Size:** Small · **Priority:** Low
|
||||
|
||||
### #33 — Read-only viewer mode with PIN/token URL ✅
|
||||
A shareable URL (token-protected) or numeric PIN that gives a DPO, school principal, or compliance coordinator read-only access to the results grid — with disposition tagging but without scan controls, credentials, or delete access. Full spec in SUGGESTIONS.md §33.
|
||||
**Size:** Medium · **Priority:** Medium
|
||||
|
||||
### #32 — Windowed mode for Profiles, Sources, and Settings ✗ Won't do
|
||||
The workflow is sequential (configure → scan → review), not parallel — there is no realistic scenario where a modal and the results grid need to be open simultaneously. The Sources panel is already visible in the sidebar. Option A (the least-work path) still loads the full 3800-line JS stack twice. Closed.
|
||||
|
||||
794
app_config.py
Normal file
794
app_config.py
Normal file
@ -0,0 +1,794 @@
|
||||
"""
|
||||
app_config.py — Configuration, i18n, keywords, profiles, settings,
|
||||
SMTP config, file sources, and Fernet encryption for GDPRScanner.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re as _re
|
||||
import time
|
||||
import uuid as _uuid
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DATA_DIR = Path.home() / ".gdprscanner"
|
||||
_DATA_DIR.mkdir(exist_ok=True)
|
||||
|
||||
from typing import Optional
|
||||
|
||||
# ── i18n ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _load_lang() -> dict:
|
||||
import locale, sys as _sys, os as _os, subprocess as _sp
|
||||
from pathlib import Path as _Path
|
||||
_here = _Path(_sys._MEIPASS) if getattr(_sys, "frozen", False) else _Path(__file__).parent
|
||||
lang_dir = _here / "lang"
|
||||
lang_code = "en"
|
||||
try:
|
||||
if _sys.platform == "darwin":
|
||||
try:
|
||||
r = _sp.run(["defaults", "read", "-g", "AppleLocale"],
|
||||
capture_output=True, text=True, timeout=3)
|
||||
if r.returncode == 0 and r.stdout.strip():
|
||||
lang_code = r.stdout.strip().split("_")[0].split("-")[0].lower()
|
||||
except Exception:
|
||||
pass
|
||||
if lang_code == "en":
|
||||
try:
|
||||
r = _sp.run(["defaults", "read", "-g", "AppleLanguages"],
|
||||
capture_output=True, text=True, timeout=3)
|
||||
import re as _re
|
||||
m = _re.search(r'"([a-z]{2})[-_]', r.stdout, _re.I)
|
||||
if m:
|
||||
lang_code = m.group(1).lower()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
loc = (locale.getlocale()[0] or _os.environ.get("LC_ALL") or
|
||||
_os.environ.get("LANG") or "en")
|
||||
lang_code = loc.split("_")[0].split(".")[0].split("-")[0].lower() or "en"
|
||||
except Exception:
|
||||
lang_code = "en"
|
||||
|
||||
def _parse(path) -> dict:
|
||||
import json as _json
|
||||
out = {}
|
||||
try:
|
||||
if path.suffix == ".json":
|
||||
out = _json.loads(path.read_text(encoding="utf-8"))
|
||||
else:
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
out[k.strip()] = v.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
for code in [lang_code, "en"]:
|
||||
# Prefer .json, fall back to .lang for backward compatibility
|
||||
for ext in [".json", ".lang"]:
|
||||
p = lang_dir / f"{code}{ext}"
|
||||
if p.exists():
|
||||
result = _parse(p)
|
||||
result["_lang_code"] = code
|
||||
logger.info("[i18n] loaded %s (%d keys)", p, len(result))
|
||||
return result
|
||||
return {}
|
||||
|
||||
def _load_lang_forced(code: str) -> dict:
|
||||
import sys as _sys
|
||||
from pathlib import Path as _Path
|
||||
_here = _Path(_sys._MEIPASS) if getattr(_sys, "frozen", False) else _Path(__file__).parent
|
||||
lang_dir = _here / "lang"
|
||||
def _parse(path) -> dict:
|
||||
import json as _json
|
||||
out = {}
|
||||
try:
|
||||
if path.suffix == ".json":
|
||||
out = _json.loads(path.read_text(encoding="utf-8"))
|
||||
else:
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
out[k.strip()] = v.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
for c in [code, "en"]:
|
||||
for ext in [".json", ".lang"]:
|
||||
p = lang_dir / f"{c}{ext}"
|
||||
if p.exists():
|
||||
result = _parse(p)
|
||||
result["_lang_code"] = c
|
||||
return result
|
||||
return {}
|
||||
|
||||
_LANG_OVERRIDE_FILE = _DATA_DIR / "lang"
|
||||
|
||||
def _lang_override() -> "str | None":
|
||||
try:
|
||||
v = _LANG_OVERRIDE_FILE.read_text().strip()
|
||||
return v if v else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _set_lang_override(code: str) -> None:
|
||||
try:
|
||||
_LANG_OVERRIDE_FILE.write_text(code.strip())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── Display name resolver (used by scan_engine) ───────────────────────────────
|
||||
import re as _re2
|
||||
|
||||
_GUID_RE = _re2.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', _re2.I
|
||||
)
|
||||
_GENERIC_DISPLAY_NAMES = {
|
||||
"microsoft konto", "microsoft account", "microsoftkonto",
|
||||
"microsoft-konto", "compte microsoft", "cuenta de microsoft",
|
||||
}
|
||||
|
||||
def _resolve_display_name(display_name: str, email: str = "", upn: str = "") -> str:
|
||||
"""Return the best human-readable name for a Microsoft 365 user."""
|
||||
dn = (display_name or "").strip()
|
||||
if not dn or _GUID_RE.match(dn) or dn.lower() in _GENERIC_DISPLAY_NAMES:
|
||||
return email or upn or dn
|
||||
return dn
|
||||
|
||||
LANG = _load_lang_forced(_lang_override()) if _lang_override() else _load_lang()
|
||||
logger.info("[i18n] gdpr lang=%s keys=%d", LANG.get("_lang_code", "?"), len(LANG))
|
||||
|
||||
# ── Article 9 sensitive keywords ──────────────────────────────────────────────
|
||||
import re as _re
|
||||
|
||||
_KEYWORDS_PATH = Path(__file__).parent / "keywords"
|
||||
_keyword_data: dict = {}
|
||||
_keyword_flat: list = [] # (keyword, category_key) kept for reference / len count
|
||||
_compiled_keywords: dict = {} # cat_key → compiled re.Pattern (#13)
|
||||
_KEYWORD_WINDOW = 150 # characters around a keyword to check for CPR proximity
|
||||
|
||||
def _load_keywords(lang: str = "da") -> None:
|
||||
"""Load keyword list from keywords/{lang}.json and compile one regex per
|
||||
Article 9 category. Falls back to da.json if unavailable.
|
||||
|
||||
Each category pattern is an alternation of all its keywords, sorted
|
||||
longest-first and anchored with negative-lookbehind/lookahead so that
|
||||
short tokens (≤4 chars) require a word boundary while longer ones are
|
||||
matched as substrings. The compiled regex is ~10–50× faster than the
|
||||
previous sequential str.find() loop for large texts. (#13)
|
||||
"""
|
||||
global _keyword_data, _keyword_flat, _compiled_keywords
|
||||
for candidate in [lang, "da"]:
|
||||
p = _KEYWORDS_PATH / f"{candidate}.json"
|
||||
if p.exists():
|
||||
try:
|
||||
import json as _kjson
|
||||
_keyword_data = _kjson.loads(p.read_text(encoding="utf-8"))
|
||||
flat: list = []
|
||||
categories: dict = {}
|
||||
for cat_key, cat_val in _keyword_data.items():
|
||||
if cat_key.startswith("_") or not isinstance(cat_val, dict):
|
||||
continue
|
||||
kws = [kw.lower() for kw in cat_val.get("keywords", [])]
|
||||
for kw in kws:
|
||||
flat.append((kw, cat_key))
|
||||
categories[cat_key] = kws
|
||||
|
||||
_keyword_flat = sorted(flat, key=lambda x: -len(x[0]))
|
||||
|
||||
# Compile one alternation regex per category (#13)
|
||||
compiled: dict = {}
|
||||
for cat, kws in categories.items():
|
||||
if not kws:
|
||||
continue
|
||||
# Sort longest-first so the engine prefers the most specific match
|
||||
sorted_kws = sorted(kws, key=len, reverse=True)
|
||||
parts = []
|
||||
for kw in sorted_kws:
|
||||
esc = _re.escape(kw)
|
||||
if len(kw) <= 4:
|
||||
# Whole-word boundary for short tokens
|
||||
parts.append(r"(?<!\w)" + esc + r"(?!\w)")
|
||||
else:
|
||||
parts.append(esc)
|
||||
compiled[cat] = _re.compile(
|
||||
"(?:" + "|".join(parts) + ")",
|
||||
_re.IGNORECASE,
|
||||
)
|
||||
_compiled_keywords = compiled
|
||||
|
||||
logger.info("[keywords] Loaded %d keywords (%d categories compiled) from keywords/%s.json",
|
||||
len(_keyword_flat), len(compiled), candidate)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning("[keywords] Failed to load %s: %s", p, e)
|
||||
|
||||
_load_keywords(LANG.get("_lang_code", "da"))
|
||||
|
||||
|
||||
def _check_special_category(text: str, cprs: list) -> list:
|
||||
"""Return sorted list of Article 9 category keys detected near a CPR number.
|
||||
|
||||
Uses compiled per-category regex patterns for efficient matching (#13).
|
||||
A keyword counts only when within _KEYWORD_WINDOW characters of a CPR
|
||||
in the same text. If no CPRs are present, any keyword occurrence triggers.
|
||||
Returns e.g. ['health', 'criminal'] — empty list if none detected.
|
||||
"""
|
||||
if not _compiled_keywords or not text:
|
||||
return []
|
||||
text_lower = text.lower()
|
||||
found_cats: set = set()
|
||||
|
||||
# Locate CPR positions for proximity check
|
||||
cpr_positions: list = []
|
||||
if cprs:
|
||||
for m in _re.finditer(r"\d{6}[-\s]?\d{4}", text_lower):
|
||||
cpr_positions.append(m.start())
|
||||
|
||||
for cat, pattern in _compiled_keywords.items():
|
||||
# Use compiled regex — single-pass alternation match per category
|
||||
for m in pattern.finditer(text_lower):
|
||||
idx = m.start()
|
||||
if not cpr_positions or any(
|
||||
abs(idx - cp) <= _KEYWORD_WINDOW for cp in cpr_positions
|
||||
):
|
||||
found_cats.add(cat)
|
||||
break # One match per category is enough
|
||||
|
||||
return sorted(found_cats)
|
||||
|
||||
|
||||
_CONFIG_FILE = _DATA_DIR / "config.json"
|
||||
|
||||
import hashlib as _hashlib
|
||||
|
||||
_ADMIN_PIN_KEY = "admin_pin_hash"
|
||||
|
||||
def _get_admin_pin_hash() -> str:
|
||||
"""Return the stored admin PIN hash, or empty string if not set."""
|
||||
cfg = _load_config()
|
||||
return cfg.get(_ADMIN_PIN_KEY, "")
|
||||
|
||||
def _set_admin_pin(pin: str) -> None:
|
||||
"""Hash and store the admin PIN in the config file."""
|
||||
h = _hashlib.sha256(pin.encode()).hexdigest()
|
||||
cfg = _load_config()
|
||||
cfg[_ADMIN_PIN_KEY] = h
|
||||
_save_config(cfg)
|
||||
|
||||
def _verify_admin_pin(pin: str) -> bool:
|
||||
"""Return True if the PIN matches the stored hash."""
|
||||
stored = _get_admin_pin_hash()
|
||||
if not stored:
|
||||
return False
|
||||
return _hashlib.sha256(pin.encode()).hexdigest() == stored
|
||||
|
||||
def _admin_pin_is_set() -> bool:
|
||||
return bool(_get_admin_pin_hash())
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
if _CONFIG_FILE.exists():
|
||||
try:
|
||||
return json.loads(_CONFIG_FILE.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_config(cfg: dict):
|
||||
try:
|
||||
_CONFIG_FILE.write_text(json.dumps(cfg, indent=2))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── Profile storage (15a) ─────────────────────────────────────────────────────
|
||||
_SETTINGS_PATH = _DATA_DIR / "settings.json"
|
||||
_SRC_TOGGLES_PATH = _DATA_DIR / "src_toggles.json"
|
||||
|
||||
def _load_src_toggles() -> dict:
|
||||
"""Load persisted source toggle state."""
|
||||
try:
|
||||
if _SRC_TOGGLES_PATH.exists():
|
||||
return json.loads(_SRC_TOGGLES_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_src_toggles(state: dict) -> None:
|
||||
"""Persist source toggle state."""
|
||||
try:
|
||||
existing = _load_src_toggles()
|
||||
existing.update(state)
|
||||
tmp = _SRC_TOGGLES_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(existing, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
tmp.replace(_SRC_TOGGLES_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[src_toggles] write failed: %s", e)
|
||||
|
||||
|
||||
def _profiles_load() -> list:
|
||||
"""Return list of all profiles from settings file."""
|
||||
try:
|
||||
if not _SETTINGS_PATH.exists():
|
||||
return []
|
||||
data = json.loads(_SETTINGS_PATH.read_text(encoding="utf-8"))
|
||||
# Migrate: old flat settings → wrapped in a default profile
|
||||
if isinstance(data, dict) and "profiles" not in data and (
|
||||
"sources" in data or "user_ids" in data
|
||||
):
|
||||
data = {"profiles": [_profile_from_settings(data, name="Default")]}
|
||||
_profiles_write(data)
|
||||
return data.get("profiles", [])
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _profiles_write(data: dict) -> None:
|
||||
"""Write the full settings dict (including profiles) atomically."""
|
||||
try:
|
||||
tmp = _SETTINGS_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2, default=str),
|
||||
encoding="utf-8")
|
||||
tmp.replace(_SETTINGS_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[profiles] write failed: %s", e)
|
||||
|
||||
|
||||
def _profiles_save_all(profiles: list) -> None:
|
||||
"""Overwrite the profiles list, preserving any other top-level keys."""
|
||||
try:
|
||||
data = {}
|
||||
if _SETTINGS_PATH.exists():
|
||||
data = json.loads(_SETTINGS_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
data = {}
|
||||
data["profiles"] = profiles
|
||||
_profiles_write(data)
|
||||
|
||||
|
||||
def _profile_from_settings(settings: dict, name: str = "Default",
|
||||
description: str = "") -> dict:
|
||||
"""Wrap a flat settings dict as a profile."""
|
||||
import uuid as _uuid
|
||||
return {
|
||||
"id": str(_uuid.uuid4()),
|
||||
"name": name,
|
||||
"description": description,
|
||||
"sources": settings.get("sources", []),
|
||||
"google_sources": settings.get("google_sources", []),
|
||||
"user_ids": settings.get("user_ids", []),
|
||||
"options": settings.get("options", {}),
|
||||
"retention_years": settings.get("retention_years"),
|
||||
"fiscal_year_end": settings.get("fiscal_year_end"),
|
||||
"email_to": settings.get("email_to", ""),
|
||||
"file_sources": settings.get("file_sources", []),
|
||||
"last_run": settings.get("last_run"),
|
||||
"last_scan_id": settings.get("last_scan_id"),
|
||||
}
|
||||
|
||||
|
||||
def _profile_get(name_or_id: str) -> dict | None:
|
||||
"""Find a profile by name (case-insensitive) or ID."""
|
||||
for p in _profiles_load():
|
||||
if p.get("id") == name_or_id or \
|
||||
p.get("name", "").lower() == name_or_id.lower():
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
def _profile_save(profile: dict) -> dict:
|
||||
"""Insert or update a profile. Assigns a new UUID if id is missing."""
|
||||
import uuid as _uuid
|
||||
if not profile.get("id"):
|
||||
profile["id"] = str(_uuid.uuid4())
|
||||
profiles = _profiles_load()
|
||||
for i, p in enumerate(profiles):
|
||||
if p.get("id") == profile["id"]:
|
||||
profiles[i] = profile
|
||||
_profiles_save_all(profiles)
|
||||
return profile
|
||||
profiles.append(profile)
|
||||
_profiles_save_all(profiles)
|
||||
return profile
|
||||
|
||||
|
||||
def _profile_delete(name_or_id: str) -> bool:
|
||||
"""Delete a profile by name or ID. Returns True if found and deleted."""
|
||||
profiles = _profiles_load()
|
||||
before = len(profiles)
|
||||
profiles = [p for p in profiles
|
||||
if p.get("id") != name_or_id
|
||||
and p.get("name", "").lower() != name_or_id.lower()]
|
||||
if len(profiles) == before:
|
||||
return False
|
||||
_profiles_save_all(profiles)
|
||||
return True
|
||||
|
||||
|
||||
def _profile_touch(profile_id: str, scan_id: int) -> None:
|
||||
"""Update last_run and last_scan_id after a successful scan."""
|
||||
import datetime as _dt2
|
||||
profiles = _profiles_load()
|
||||
for p in profiles:
|
||||
if p.get("id") == profile_id:
|
||||
p["last_run"] = _dt2.datetime.now().isoformat(timespec="seconds")
|
||||
p["last_scan_id"] = scan_id
|
||||
break
|
||||
_profiles_save_all(profiles)
|
||||
|
||||
|
||||
# ── Legacy shim — keep _save_settings / _load_settings working ────────────────
|
||||
|
||||
def _save_settings(payload: dict, profile_name: str | None = None,
|
||||
profile_id: str | None = None) -> None:
|
||||
"""Save settings. Upserts the active profile (or 'Default' if none).
|
||||
profile_id takes precedence over profile_name when both are given."""
|
||||
profiles = _profiles_load()
|
||||
# Resolve profile: ID → name → first profile → "Default"
|
||||
existing = None
|
||||
if profile_id:
|
||||
existing = _profile_get(profile_id)
|
||||
if not existing and profile_name:
|
||||
existing = _profile_get(profile_name)
|
||||
if not existing and profiles:
|
||||
existing = profiles[0]
|
||||
name = existing["name"] if existing else (profile_name or "Default")
|
||||
merged = _profile_from_settings(payload, name=name,
|
||||
description=existing.get("description", "") if existing else "")
|
||||
if existing:
|
||||
merged["id"] = existing["id"]
|
||||
merged["last_run"] = existing.get("last_run")
|
||||
merged["last_scan_id"] = existing.get("last_scan_id")
|
||||
# Scan start payloads only include M365 sources/user_ids/options.
|
||||
# Preserve google_sources and file_sources so a single-source scan
|
||||
# doesn't clobber the profile's other source selections.
|
||||
_M365_IDS = {"email", "onedrive", "sharepoint", "teams"}
|
||||
google_src = payload.get("google_sources", existing.get("google_sources", []))
|
||||
file_src = payload.get("file_sources") or existing.get("file_sources", [])
|
||||
merged["google_sources"] = google_src
|
||||
merged["file_sources"] = file_src
|
||||
# Rebuild combined sources: incoming M365 selection + preserved google/file
|
||||
m365_src = [s for s in merged.get("sources", []) if s in _M365_IDS]
|
||||
merged["sources"] = m365_src + google_src + file_src
|
||||
_profile_save(merged)
|
||||
|
||||
|
||||
def _load_settings() -> dict | None:
|
||||
"""Return the first (default) profile as a flat settings dict."""
|
||||
profiles = _profiles_load()
|
||||
if not profiles:
|
||||
return None
|
||||
p = profiles[0]
|
||||
return {
|
||||
"sources": p.get("sources", []),
|
||||
"user_ids": p.get("user_ids", []),
|
||||
"options": p.get("options", {}),
|
||||
"retention_years": p.get("retention_years"),
|
||||
"fiscal_year_end": p.get("fiscal_year_end"),
|
||||
"email_to": p.get("email_to", ""),
|
||||
}
|
||||
|
||||
|
||||
# ── SMTP / email report sending ───────────────────────────────────────────────
|
||||
_SMTP_CONFIG_PATH = _DATA_DIR / "smtp.json"
|
||||
_ROLE_OVERRIDES_PATH = _DATA_DIR / "role_overrides.json"
|
||||
|
||||
|
||||
def _load_role_overrides() -> dict:
|
||||
"""Return {user_id: 'student'|'staff'|'other'} manual overrides dict."""
|
||||
try:
|
||||
if _ROLE_OVERRIDES_PATH.exists():
|
||||
return json.loads(_ROLE_OVERRIDES_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _save_role_overrides(overrides: dict) -> None:
|
||||
"""Atomically write the role overrides dict to disk."""
|
||||
try:
|
||||
tmp = _ROLE_OVERRIDES_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(overrides, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
tmp.replace(_ROLE_OVERRIDES_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[role_overrides] write failed: %s", e)
|
||||
|
||||
|
||||
# ── File source settings (#8) ─────────────────────────────────────────────────
|
||||
_FILE_SOURCES_PATH = _DATA_DIR / "file_sources.json"
|
||||
|
||||
|
||||
def _load_file_sources() -> list:
|
||||
"""Return saved file source definitions.
|
||||
|
||||
Each entry: {id, label, path, smb_host, smb_user, smb_domain, keychain_key}
|
||||
"""
|
||||
try:
|
||||
if _FILE_SOURCES_PATH.exists():
|
||||
return json.loads(_FILE_SOURCES_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def _save_file_sources(sources: list) -> None:
|
||||
"""Atomically write the file sources list to disk."""
|
||||
try:
|
||||
tmp = _FILE_SOURCES_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(sources, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
tmp.replace(_FILE_SOURCES_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[file_sources] write failed: %s", e)
|
||||
|
||||
# ── Viewer tokens ────────────────────────────────────────────────────────────
|
||||
# Read-only viewer tokens allow sharing scan results with a DPO or compliance
|
||||
# officer without exposing scan controls or credentials. Each token is a
|
||||
# 64-character hex string stored in viewer_tokens.json alongside other data files.
|
||||
|
||||
_VIEWER_TOKENS_PATH = _DATA_DIR / "viewer_tokens.json"
|
||||
|
||||
|
||||
def _load_viewer_tokens() -> list:
|
||||
"""Return list of viewer token dicts (empty list if file missing or corrupt)."""
|
||||
try:
|
||||
if _VIEWER_TOKENS_PATH.exists():
|
||||
return json.loads(_VIEWER_TOKENS_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def _save_viewer_tokens(tokens: list) -> None:
|
||||
"""Atomically write viewer tokens to disk."""
|
||||
try:
|
||||
tmp = _VIEWER_TOKENS_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(tokens, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
tmp.replace(_VIEWER_TOKENS_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[viewer_tokens] write failed: %s", e)
|
||||
|
||||
|
||||
def create_viewer_token(label: str = "", expires_days: int | None = None) -> dict:
|
||||
"""Generate a new viewer token, persist it, and return the token dict.
|
||||
|
||||
Args:
|
||||
label: Human-readable description (e.g. "DPO review April 2026").
|
||||
expires_days: Days until expiry. None = no expiry.
|
||||
"""
|
||||
import secrets as _secrets
|
||||
token = _secrets.token_hex(32) # 64-char URL-safe hex string
|
||||
now = time.time()
|
||||
entry: dict = {
|
||||
"token": token,
|
||||
"label": label or "",
|
||||
"created_at": now,
|
||||
"expires_at": now + expires_days * 86400 if expires_days else None,
|
||||
"last_used_at": None,
|
||||
}
|
||||
tokens = _load_viewer_tokens()
|
||||
tokens.append(entry)
|
||||
_save_viewer_tokens(tokens)
|
||||
return entry
|
||||
|
||||
|
||||
def validate_viewer_token(token: str) -> dict | None:
|
||||
"""Return the token dict if the token is valid and not expired, else None.
|
||||
|
||||
Updates last_used_at as a best-effort side effect.
|
||||
"""
|
||||
if not token:
|
||||
return None
|
||||
tokens = _load_viewer_tokens()
|
||||
now = time.time()
|
||||
found: dict | None = None
|
||||
for entry in tokens:
|
||||
if entry.get("token") == token:
|
||||
exp = entry.get("expires_at")
|
||||
if exp is not None and now > exp:
|
||||
return None # expired — treat as not found
|
||||
found = entry
|
||||
break
|
||||
if found is None:
|
||||
return None
|
||||
found["last_used_at"] = now
|
||||
_save_viewer_tokens(tokens) # best-effort; ignore failures
|
||||
return found
|
||||
|
||||
|
||||
def revoke_viewer_token(token: str) -> bool:
|
||||
"""Remove a token from storage. Returns True if found and removed."""
|
||||
tokens = _load_viewer_tokens()
|
||||
before = len(tokens)
|
||||
tokens = [t for t in tokens if t.get("token") != token]
|
||||
if len(tokens) == before:
|
||||
return False
|
||||
_save_viewer_tokens(tokens)
|
||||
return True
|
||||
|
||||
|
||||
def cleanup_expired_viewer_tokens() -> int:
|
||||
"""Delete all expired tokens from storage. Returns count removed."""
|
||||
tokens = _load_viewer_tokens()
|
||||
now = time.time()
|
||||
active = [t for t in tokens if t.get("expires_at") is None or now <= t["expires_at"]]
|
||||
removed = len(tokens) - len(active)
|
||||
if removed:
|
||||
_save_viewer_tokens(active)
|
||||
return removed
|
||||
|
||||
|
||||
# ── Viewer PIN ───────────────────────────────────────────────────────────────
|
||||
# A numeric PIN that grants a browser session read-only viewer access at /view.
|
||||
# The PIN is stored as a salted SHA-256 hash inside viewer_tokens.json under a
|
||||
# top-level "__pin__" key so it lives in the same file as the token list.
|
||||
|
||||
_PIN_META_KEY = "__pin__"
|
||||
|
||||
|
||||
def _load_pin_store() -> dict:
|
||||
"""Load the full viewer_tokens.json as a dict (tokens list + optional pin meta)."""
|
||||
try:
|
||||
if _VIEWER_TOKENS_PATH.exists():
|
||||
raw = json.loads(_VIEWER_TOKENS_PATH.read_text(encoding="utf-8"))
|
||||
if isinstance(raw, list):
|
||||
# Legacy format — just a list; promote to dict
|
||||
return {"tokens": raw}
|
||||
if isinstance(raw, dict):
|
||||
return raw
|
||||
except Exception:
|
||||
pass
|
||||
return {"tokens": []}
|
||||
|
||||
|
||||
def _save_pin_store(store: dict) -> None:
|
||||
try:
|
||||
tmp = _VIEWER_TOKENS_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(store, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
tmp.replace(_VIEWER_TOKENS_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[viewer_pin] write failed: %s", e)
|
||||
|
||||
|
||||
# Rewrite the token helpers to use the new store format transparently.
|
||||
def _load_viewer_tokens() -> list: # type: ignore[misc] # noqa: F811
|
||||
return _load_pin_store().get("tokens", [])
|
||||
|
||||
|
||||
def _save_viewer_tokens(tokens: list) -> None: # type: ignore[misc] # noqa: F811
|
||||
store = _load_pin_store()
|
||||
store["tokens"] = tokens
|
||||
_save_pin_store(store)
|
||||
|
||||
|
||||
def get_viewer_pin_hash() -> "str | None":
|
||||
"""Return the stored PIN hash dict, or None if no PIN is set."""
|
||||
return _load_pin_store().get(_PIN_META_KEY)
|
||||
|
||||
|
||||
def set_viewer_pin(pin: str) -> None:
|
||||
"""Hash and store a viewer PIN."""
|
||||
import hashlib as _hl, secrets as _sec
|
||||
if not pin:
|
||||
raise ValueError("PIN must not be empty")
|
||||
salt = _sec.token_hex(16)
|
||||
h = _hl.sha256((salt + pin).encode()).hexdigest()
|
||||
store = _load_pin_store()
|
||||
store[_PIN_META_KEY] = {"hash": h, "salt": salt}
|
||||
_save_pin_store(store)
|
||||
|
||||
|
||||
def verify_viewer_pin(pin: str) -> bool:
|
||||
"""Return True if *pin* matches the stored hash."""
|
||||
import hashlib as _hl
|
||||
meta = get_viewer_pin_hash()
|
||||
if not meta:
|
||||
return False
|
||||
h = _hl.sha256((meta["salt"] + pin).encode()).hexdigest()
|
||||
return h == meta["hash"]
|
||||
|
||||
|
||||
def clear_viewer_pin() -> None:
|
||||
"""Remove the viewer PIN."""
|
||||
store = _load_pin_store()
|
||||
store.pop(_PIN_META_KEY, None)
|
||||
_save_pin_store(store)
|
||||
|
||||
|
||||
# ── SMTP password encryption ─────────────────────────────────────────────────
|
||||
# The SMTP password is encrypted at rest using Fernet symmetric encryption.
|
||||
# The encryption key is derived from a stable machine-specific UUID stored in
|
||||
# ~/.gdpr_scanner_machine_id. This key is only usable on the same machine —
|
||||
# the encrypted password cannot be decrypted if the config file is copied to
|
||||
# another host.
|
||||
|
||||
_MACHINE_ID_PATH = _DATA_DIR / "machine_id"
|
||||
|
||||
try:
|
||||
from cryptography.fernet import Fernet as _Fernet
|
||||
import base64 as _b64
|
||||
_CRYPTO_OK = True
|
||||
except ImportError:
|
||||
_CRYPTO_OK = False
|
||||
|
||||
def _get_fernet() -> "Optional[_Fernet]":
|
||||
"""Return a Fernet instance keyed to this machine, or None if unavailable."""
|
||||
if not _CRYPTO_OK:
|
||||
return None
|
||||
try:
|
||||
if _MACHINE_ID_PATH.exists():
|
||||
machine_key = _MACHINE_ID_PATH.read_bytes()
|
||||
else:
|
||||
machine_key = _Fernet.generate_key()
|
||||
_MACHINE_ID_PATH.write_bytes(machine_key)
|
||||
try:
|
||||
_MACHINE_ID_PATH.chmod(0o600)
|
||||
except Exception:
|
||||
pass
|
||||
return _Fernet(machine_key)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _encrypt_password(plaintext: str) -> str:
|
||||
"""Encrypt a password string; returns a 'enc:' prefixed ciphertext string."""
|
||||
if not plaintext:
|
||||
return ""
|
||||
f = _get_fernet()
|
||||
if f is None:
|
||||
return plaintext # fallback: store as-is (no cryptography lib)
|
||||
try:
|
||||
return "enc:" + f.encrypt(plaintext.encode()).decode()
|
||||
except Exception:
|
||||
return plaintext
|
||||
|
||||
def _decrypt_password(stored: str) -> str:
|
||||
"""Decrypt a stored password; handles both encrypted and legacy plaintext."""
|
||||
if not stored:
|
||||
return ""
|
||||
if not stored.startswith("enc:"):
|
||||
return stored # legacy plaintext — return as-is
|
||||
f = _get_fernet()
|
||||
if f is None:
|
||||
return ""
|
||||
try:
|
||||
return f.decrypt(stored[4:].encode()).decode()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _load_smtp_config() -> dict:
|
||||
"""Return saved SMTP config, decrypting the password field."""
|
||||
try:
|
||||
if _SMTP_CONFIG_PATH.exists():
|
||||
cfg = json.loads(_SMTP_CONFIG_PATH.read_text(encoding="utf-8"))
|
||||
if cfg.get("password"):
|
||||
cfg["password"] = _decrypt_password(cfg["password"])
|
||||
return cfg
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_smtp_config(cfg: dict) -> None:
|
||||
"""Save SMTP config, encrypting the password field."""
|
||||
try:
|
||||
to_save = dict(cfg)
|
||||
if to_save.get("password"):
|
||||
to_save["password"] = _encrypt_password(to_save["password"])
|
||||
tmp = _SMTP_CONFIG_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(to_save, ensure_ascii=False), encoding="utf-8")
|
||||
tmp.replace(_SMTP_CONFIG_PATH)
|
||||
try:
|
||||
_SMTP_CONFIG_PATH.chmod(0o600)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error("[smtp] config save failed: %s", e)
|
||||
1095
build_gdpr.py
Executable file
1095
build_gdpr.py
Executable file
File diff suppressed because it is too large
Load Diff
5
build_gdpr.sh
Executable file
5
build_gdpr.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# GDPRScanner — build .app (uses ./venv)
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/venv/bin/activate"
|
||||
exec python3 "$SCRIPT_DIR/build_gdpr.py" --clean "$@"
|
||||
84
checkpoint.py
Normal file
84
checkpoint.py
Normal file
@ -0,0 +1,84 @@
|
||||
"""
|
||||
checkpoint.py — Scan checkpoint and delta-token persistence for GDPRScanner.
|
||||
|
||||
Provides save/load/clear for mid-scan checkpoints (so interrupted scans can
|
||||
resume) and load/save for Microsoft Graph delta-link tokens.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DATA_DIR = Path.home() / ".gdprscanner"
|
||||
_DATA_DIR.mkdir(exist_ok=True)
|
||||
_CHECKPOINT_PATH = _DATA_DIR / "checkpoint.json"
|
||||
|
||||
def _checkpoint_key(options: dict) -> str:
|
||||
"""Stable hash of the scan options — used to detect when a checkpoint
|
||||
belongs to a different scan configuration and should be ignored."""
|
||||
sig = json.dumps({
|
||||
"sources": sorted(options.get("sources", [])),
|
||||
"user_ids": sorted([u["id"] if isinstance(u, dict) else u for u in options.get("user_ids", [])]),
|
||||
"older_than_days": options.get("options", {}).get("older_than_days", 0),
|
||||
}, sort_keys=True)
|
||||
return hashlib.sha256(sig.encode()).hexdigest()[:16]
|
||||
|
||||
def _save_checkpoint(key: str, scanned_ids: set, flagged: list, meta: dict) -> None:
|
||||
"""Write checkpoint to disk. Called periodically during scanning."""
|
||||
try:
|
||||
payload = {
|
||||
"key": key,
|
||||
"scanned_ids": list(scanned_ids),
|
||||
"flagged": flagged,
|
||||
"meta": {k: v for k, v in meta.items() if k != "options"},
|
||||
}
|
||||
tmp = _CHECKPOINT_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(payload, ensure_ascii=False, default=str), encoding="utf-8")
|
||||
tmp.replace(_CHECKPOINT_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[checkpoint] save failed: %s", e)
|
||||
|
||||
def _load_checkpoint(key: str) -> dict | None:
|
||||
"""Load checkpoint if it matches the current scan key. Returns None on mismatch or error."""
|
||||
try:
|
||||
if not _CHECKPOINT_PATH.exists():
|
||||
return None
|
||||
payload = json.loads(_CHECKPOINT_PATH.read_text(encoding="utf-8"))
|
||||
if payload.get("key") != key:
|
||||
return None
|
||||
return payload
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _clear_checkpoint() -> None:
|
||||
try:
|
||||
if _CHECKPOINT_PATH.exists():
|
||||
_CHECKPOINT_PATH.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_DELTA_PATH = _DATA_DIR / "delta.json"
|
||||
|
||||
def _load_delta_tokens() -> dict:
|
||||
"""Return saved delta token map {key: deltaLink_url}."""
|
||||
try:
|
||||
if _DELTA_PATH.exists():
|
||||
return json.loads(_DELTA_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_delta_tokens(tokens: dict) -> None:
|
||||
"""Persist delta tokens atomically."""
|
||||
try:
|
||||
tmp = _DELTA_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(tokens, ensure_ascii=False), encoding="utf-8")
|
||||
tmp.replace(_DELTA_PATH)
|
||||
except Exception as e:
|
||||
logger.error("[delta] save failed: %s", e)
|
||||
|
||||
# ── Broadcast ─────────────────────────────────────────────────────────────────
|
||||
446
cpr_detector.py
Normal file
446
cpr_detector.py
Normal file
@ -0,0 +1,446 @@
|
||||
"""
|
||||
cpr_detector.py — File scanning and CPR/PII detection for GDPRScanner.
|
||||
|
||||
Provides:
|
||||
_scan_bytes(content, filename) — dispatch to correct scanner by file type
|
||||
_scan_text_direct(text) — scan a plain text string
|
||||
_extract_exif(content, filename) — extract PII-bearing EXIF tags from images
|
||||
_detect_photo_faces(content, fn) — count faces in an image (OpenCV)
|
||||
_get_pii_counts(text) — NER-based PII type counts
|
||||
_make_thumb(content, filename) — JPEG thumbnail as base64 string
|
||||
_placeholder_svg(ext, name) — SVG file-type icon
|
||||
|
||||
Globals SCANNER_OK, PIL_OK, PHOTO_EXTS, SUPPORTED_EXTS, ds, PILImage, LANG,
|
||||
and _check_special_category are injected at startup by gdpr_scanner.py via
|
||||
`from cpr_detector import *` AFTER those names are defined. This keeps the
|
||||
module cleanly importable in isolation for unit tests (#26) while preserving
|
||||
the existing runtime behaviour.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import base64
|
||||
import hashlib
|
||||
import io
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# Only one PDF subprocess may run at a time — each spawned process loads
|
||||
# ~150-300 MB of Python libs (pdfplumber, pdf2image, pytesseract).
|
||||
# Serialising them prevents overlapping subprocesses from exhausting RAM.
|
||||
_pdf_subprocess_sem = threading.Semaphore(1)
|
||||
|
||||
# ── Lazy fallbacks for standalone / test imports ──────────────────────────────
|
||||
# When imported in isolation (e.g. pytest), these defaults prevent NameErrors.
|
||||
# gdpr_scanner.py overwrites them at startup via explicit assignment.
|
||||
try:
|
||||
import document_scanner as ds
|
||||
SCANNER_OK = True
|
||||
except ImportError:
|
||||
ds = None # type: ignore[assignment]
|
||||
SCANNER_OK = False
|
||||
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
PIL_OK = True
|
||||
except ImportError:
|
||||
PILImage = None # type: ignore[assignment]
|
||||
PIL_OK = False
|
||||
|
||||
SUPPORTED_EXTS = {
|
||||
".pdf", ".docx", ".doc", ".xlsx", ".xlsm", ".csv",
|
||||
".txt", ".eml", ".msg",
|
||||
".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp",
|
||||
}
|
||||
PHOTO_EXTS = {
|
||||
".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp", ".heic", ".heif",
|
||||
}
|
||||
LANG: dict = {}
|
||||
|
||||
def _check_special_category(text: str, cprs: list) -> list:
|
||||
"""Stub — overwritten by app_config._check_special_category at startup."""
|
||||
return []
|
||||
|
||||
|
||||
# EXIF tags that may contain PII
|
||||
# EXIF tags that may contain genuinely personal data (name, description, keywords).
|
||||
# Deliberately excludes hardware/OS fields (HostComputer, Software, Make, Model,
|
||||
# DocumentName, PageName) that are set automatically by the OS on every screenshot
|
||||
# and carry no personal information about an individual.
|
||||
_EXIF_PII_TAGS = {
|
||||
"Artist", "Copyright", "ImageDescription", "UserComment",
|
||||
"XPAuthor", "XPSubject", "XPComment", "XPKeywords",
|
||||
}
|
||||
|
||||
# Minimum character length for a PII field value to be considered meaningful.
|
||||
# Prevents single-letter or empty values from triggering a flag.
|
||||
_EXIF_PII_MIN_LEN = 3
|
||||
|
||||
def _extract_exif(content: bytes, filename: str) -> dict:
|
||||
"""Extract EXIF metadata from an image file.
|
||||
|
||||
Returns a dict with keys:
|
||||
gps — {lat, lon, lat_ref, lon_ref, maps_url} or None
|
||||
pii_fields — {tag: value} for fields containing potential PII
|
||||
author — str or None
|
||||
datetime — str or None
|
||||
device — str or None
|
||||
has_pii — bool
|
||||
"""
|
||||
result = {"gps": None, "pii_fields": {}, "author": None,
|
||||
"datetime": None, "device": None, "has_pii": False}
|
||||
|
||||
if not PIL_OK:
|
||||
return result
|
||||
|
||||
try:
|
||||
from PIL import Image as _Img, ExifTags as _ExifTags
|
||||
import io
|
||||
img = _Img.open(io.BytesIO(content))
|
||||
|
||||
# Get raw EXIF
|
||||
raw = getattr(img, "_getexif", lambda: None)()
|
||||
if not raw:
|
||||
# Try newer Pillow API
|
||||
exif_data = img.getexif()
|
||||
raw = {k: v for k, v in exif_data.items()}
|
||||
|
||||
if not raw:
|
||||
return result
|
||||
|
||||
tag_names = {v: k for k, v in _ExifTags.TAGS.items()}
|
||||
|
||||
# Build human-readable dict
|
||||
named = {}
|
||||
for tag_id, value in raw.items():
|
||||
tag = _ExifTags.TAGS.get(tag_id, str(tag_id))
|
||||
named[tag] = value
|
||||
|
||||
# Author / description fields
|
||||
for field in _EXIF_PII_TAGS:
|
||||
val = named.get(field)
|
||||
if val:
|
||||
try:
|
||||
# UserComment is bytes with encoding prefix
|
||||
if isinstance(val, bytes):
|
||||
val = val.decode("utf-8", errors="replace").strip("\x00 ")
|
||||
elif not isinstance(val, str):
|
||||
val = str(val)
|
||||
if val.strip() and len(val.strip()) >= _EXIF_PII_MIN_LEN:
|
||||
result["pii_fields"][field] = val.strip()
|
||||
result["has_pii"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if named.get("Artist"):
|
||||
result["author"] = str(named["Artist"])
|
||||
elif named.get("XPAuthor"):
|
||||
result["author"] = str(named["XPAuthor"])
|
||||
|
||||
if named.get("DateTimeOriginal"):
|
||||
result["datetime"] = str(named["DateTimeOriginal"])
|
||||
elif named.get("DateTime"):
|
||||
result["datetime"] = str(named["DateTime"])
|
||||
|
||||
make = named.get("Make", "")
|
||||
model = named.get("Model", "")
|
||||
if make or model:
|
||||
result["device"] = f"{make} {model}".strip()
|
||||
|
||||
# GPS
|
||||
gps_raw = named.get("GPSInfo")
|
||||
if gps_raw and isinstance(gps_raw, dict):
|
||||
try:
|
||||
gps_tags = {_ExifTags.GPSTAGS.get(k, k): v for k, v in gps_raw.items()}
|
||||
|
||||
def _dms_to_decimal(dms, ref):
|
||||
if not dms or len(dms) < 3:
|
||||
return None
|
||||
deg, mn, sec = dms
|
||||
# Pillow may return IFDRational objects
|
||||
deg = float(deg); mn = float(mn); sec = float(sec)
|
||||
dec = deg + mn / 60 + sec / 3600
|
||||
if ref in ("S", "W"):
|
||||
dec = -dec
|
||||
return round(dec, 7)
|
||||
|
||||
lat = _dms_to_decimal(
|
||||
gps_tags.get("GPSLatitude"),
|
||||
gps_tags.get("GPSLatitudeRef", "N"),
|
||||
)
|
||||
lon = _dms_to_decimal(
|
||||
gps_tags.get("GPSLongitude"),
|
||||
gps_tags.get("GPSLongitudeRef", "E"),
|
||||
)
|
||||
if lat is not None and lon is not None:
|
||||
result["gps"] = {
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"lat_ref": gps_tags.get("GPSLatitudeRef", "N"),
|
||||
"lon_ref": gps_tags.get("GPSLongitudeRef", "E"),
|
||||
"maps_url": f"https://www.google.com/maps?q={lat},{lon}",
|
||||
}
|
||||
result["has_pii"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
"""Detect faces in an image file using OpenCV Haar cascades.
|
||||
|
||||
Returns the number of faces detected, or 0 if cv2 is unavailable,
|
||||
the file is not a supported image format, or decoding fails.
|
||||
Face detection is intentionally strict (minNeighbors=8, min_size=80px) to
|
||||
reduce false positives on background textures, labels, and artwork.
|
||||
Haar cascades are tuned for compliance flagging, not exhaustive detection. (#9)
|
||||
"""
|
||||
if not SCANNER_OK:
|
||||
return 0
|
||||
try:
|
||||
cv2_mod = getattr(ds, "_get_cv2", None)
|
||||
if cv2_mod is None:
|
||||
return 0
|
||||
cv2, np = ds._get_cv2()
|
||||
if cv2 is None or np is None:
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
try:
|
||||
# Decode image bytes → cv2 BGR array
|
||||
arr = np.frombuffer(content, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
# imdecode failed (e.g. HEIC without codec) — try PIL fallback
|
||||
if PIL_OK:
|
||||
try:
|
||||
from PIL import Image as _PILImg
|
||||
import io as _io
|
||||
pil_img = _PILImg.open(_io.BytesIO(content)).convert("RGB")
|
||||
pil_arr = np.array(pil_img)
|
||||
img = cv2.cvtColor(pil_arr, cv2.COLOR_RGB2BGR)
|
||||
except Exception:
|
||||
return 0
|
||||
else:
|
||||
return 0
|
||||
|
||||
faces = ds.detect_faces_cv2(img, min_size=80, neighbors=8)
|
||||
return len(faces)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def _detect_photo_faces(content: bytes, filename: str) -> int:
|
||||
"""Detect faces in an image file using OpenCV Haar cascades.
|
||||
|
||||
Returns the number of faces detected, or 0 if cv2 is unavailable,
|
||||
the file is not a supported image format, or decoding fails.
|
||||
Face detection is intentionally strict (minNeighbors=8, min_size=80px) to
|
||||
reduce false positives on background textures, labels, and artwork.
|
||||
Haar cascades are tuned for compliance flagging, not exhaustive detection. (#9)
|
||||
"""
|
||||
if not SCANNER_OK:
|
||||
return 0
|
||||
try:
|
||||
cv2_mod = getattr(ds, "_get_cv2", None)
|
||||
if cv2_mod is None:
|
||||
return 0
|
||||
cv2, np = ds._get_cv2()
|
||||
if cv2 is None or np is None:
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
try:
|
||||
arr = np.frombuffer(content, dtype=np.uint8)
|
||||
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
if PIL_OK:
|
||||
try:
|
||||
from PIL import Image as _PILImg
|
||||
import io as _io
|
||||
pil_img = _PILImg.open(_io.BytesIO(content)).convert("RGB")
|
||||
pil_arr = np.array(pil_img)
|
||||
img = cv2.cvtColor(pil_arr, cv2.COLOR_RGB2BGR)
|
||||
except Exception:
|
||||
return 0
|
||||
else:
|
||||
return 0
|
||||
|
||||
faces = ds.detect_faces_cv2(img, min_size=80, neighbors=8)
|
||||
return len(faces)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _scan_bytes(content: bytes, filename: str, poppler_path=None) -> dict:
|
||||
"""Scan raw bytes for CPRs. Returns scanner result dict."""
|
||||
if not SCANNER_OK:
|
||||
return {"cprs": [], "dates": [], "error": "scanner not available"}
|
||||
ext = Path(filename).suffix.lower()
|
||||
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = Path(tmp.name)
|
||||
try:
|
||||
if ext == ".pdf":
|
||||
# Check if the PDF has a text layer before running full scan_pdf.
|
||||
# Image-only PDFs (scanned documents) have no text and would trigger
|
||||
# Tesseract OCR subprocesses that hang indefinitely on some files.
|
||||
try:
|
||||
import pdfplumber as _pp, io as _io
|
||||
with _pp.open(_io.BytesIO(content)) as _pdf:
|
||||
has_text = any(ds.is_text_page(p) for p in _pdf.pages)
|
||||
if not has_text:
|
||||
return {"cprs": [], "dates": []} # image-only PDF — no CPRs possible
|
||||
except Exception:
|
||||
pass # if pdfplumber fails, fall through to full scan_pdf
|
||||
return ds.scan_pdf(tmp_path, poppler_path=poppler_path)
|
||||
elif ext in {".docx", ".doc"}:
|
||||
return ds.scan_docx(tmp_path)
|
||||
elif ext in {".xlsx", ".xlsm"}:
|
||||
return ds.scan_xlsx(tmp_path)
|
||||
elif ext == ".csv":
|
||||
return ds.scan_csv(tmp_path)
|
||||
elif ext == ".txt":
|
||||
text = content.decode("utf-8", errors="replace")
|
||||
cprs, dates = ds.extract_matches(text, 1, "text")
|
||||
return {"cprs": cprs, "dates": dates}
|
||||
elif ext in {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}:
|
||||
return ds.scan_image(tmp_path)
|
||||
else:
|
||||
# Try plain text
|
||||
try:
|
||||
text = content.decode("utf-8", errors="replace")
|
||||
cprs, dates = ds.extract_matches(text, 1, "text")
|
||||
return {"cprs": cprs, "dates": dates}
|
||||
except Exception:
|
||||
return {"cprs": [], "dates": []}
|
||||
except Exception as e:
|
||||
return {"cprs": [], "dates": [], "error": str(e)}
|
||||
finally:
|
||||
try:
|
||||
tmp_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _worker_scan_pdf(pdf_path_str: str, result_q) -> None:
|
||||
"""Worker executed in a spawned subprocess — must be a module-level function."""
|
||||
try:
|
||||
import document_scanner as _ds
|
||||
from pathlib import Path as _Path
|
||||
result_q.put(_ds.scan_pdf(_Path(pdf_path_str)))
|
||||
except Exception as e:
|
||||
result_q.put({"cprs": [], "dates": [], "error": str(e)})
|
||||
|
||||
|
||||
def _scan_bytes_timeout(content: bytes, filename: str, timeout: int = 60) -> dict:
|
||||
"""Like _scan_bytes but runs PDF scanning in a spawned subprocess with a hard timeout.
|
||||
|
||||
For non-PDF files delegates straight to _scan_bytes. For PDFs it writes the
|
||||
bytes to a temp file, spawns a fresh Python process (spawn context — safe on
|
||||
macOS/Flask), and joins with *timeout* seconds. If the worker is still alive
|
||||
after the timeout it is forcibly terminated so the scan thread is never blocked.
|
||||
"""
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext != ".pdf":
|
||||
return _scan_bytes(content, filename)
|
||||
|
||||
import multiprocessing
|
||||
ctx = multiprocessing.get_context("spawn")
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path_str = tmp.name
|
||||
del content # written to temp file — release raw bytes before subprocess loads
|
||||
|
||||
try:
|
||||
with _pdf_subprocess_sem:
|
||||
q = ctx.Queue()
|
||||
p = ctx.Process(target=_worker_scan_pdf, args=(tmp_path_str, q))
|
||||
p.start()
|
||||
p.join(timeout)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
return {"cprs": [], "dates": [], "error": f"PDF OCR timed out after {timeout}s"}
|
||||
try:
|
||||
return q.get_nowait()
|
||||
except Exception:
|
||||
return {"cprs": [], "dates": [], "error": "Worker returned no result"}
|
||||
finally:
|
||||
try:
|
||||
Path(tmp_path_str).unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _scan_text_direct(text: str) -> dict:
|
||||
"""Scan a plain text string for CPRs using extract_matches.
|
||||
|
||||
Uses ds.extract_matches() directly rather than ds.scan_text() because
|
||||
scan_text() calls extract_cpr_and_dates() which is not defined in
|
||||
document_scanner.py (pre-existing bug).
|
||||
"""
|
||||
if not SCANNER_OK or not text:
|
||||
return {"cprs": [], "dates": []}
|
||||
try:
|
||||
cprs, dates = ds.extract_matches(text, 1, "text")
|
||||
return {"cprs": cprs, "dates": dates}
|
||||
except Exception:
|
||||
return {"cprs": [], "dates": []}
|
||||
|
||||
def _html_esc(s: str) -> str:
|
||||
"""HTML-escape a string for safe inline embedding."""
|
||||
import html as _h
|
||||
return _h.escape(str(s))
|
||||
|
||||
|
||||
def _get_pii_counts(text: str) -> dict:
|
||||
"""Run count_pii_types on text if the scanner is available."""
|
||||
if not SCANNER_OK:
|
||||
return {}
|
||||
try:
|
||||
return ds.count_pii_types(text, use_ner=True)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _make_thumb(content: bytes, filename: str) -> str:
|
||||
"""Make a small base64 thumbnail from image bytes, or return SVG placeholder."""
|
||||
ext = Path(filename).suffix.lower()
|
||||
if not PIL_OK or ext not in {".jpg", ".jpeg", ".png", ".bmp", ".webp"}:
|
||||
return _placeholder_svg(ext, filename)
|
||||
try:
|
||||
img = PILImage.open(io.BytesIO(content)).convert("RGB")
|
||||
img.thumbnail((280, 360), PILImage.LANCZOS)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="JPEG", quality=82)
|
||||
return base64.b64encode(buf.getvalue()).decode()
|
||||
except Exception:
|
||||
return _placeholder_svg(ext, filename)
|
||||
|
||||
def _placeholder_svg(ext: str, name: str) -> str:
|
||||
colors = {
|
||||
".pdf": ("#E8453C", "PDF"), ".docx": ("#2B7CD3", "DOCX"),
|
||||
".doc": ("#2B7CD3", "DOC"), ".xlsx": ("#1E7145", "XLSX"),
|
||||
".xlsm": ("#1E7145", "XLSM"), ".csv": ("#6B7280", "CSV"),
|
||||
".eml": ("#8B44AD", "EML"), ".msg": ("#8B44AD", "MSG"),
|
||||
".txt": ("#6B7280", "TXT"),
|
||||
}
|
||||
bg, label = colors.get(ext, ("#9CA3AF", ext.upper().lstrip(".")))
|
||||
short = name[:22] + "…" if len(name) > 22 else name
|
||||
svg = f"""<svg xmlns="http://www.w3.org/2000/svg" width="280" height="360">
|
||||
<rect width="280" height="360" fill="{bg}"/>
|
||||
<rect x="20" y="20" width="240" height="280" rx="8" fill="rgba(255,255,255,0.12)"/>
|
||||
<text x="140" y="170" font-family="monospace" font-size="52" font-weight="bold"
|
||||
fill="#fff" text-anchor="middle" opacity="0.9">{label}</text>
|
||||
<text x="140" y="320" font-family="monospace" font-size="13"
|
||||
fill="#fff" text-anchor="middle" opacity="0.7">{short}</text>
|
||||
</svg>"""
|
||||
return base64.b64encode(svg.encode()).decode()
|
||||
|
||||
# ── Main scan runner ──────────────────────────────────────────────────────────
|
||||
543
docs/manuals/MANUAL-DA.md
Normal file
543
docs/manuals/MANUAL-DA.md
Normal file
@ -0,0 +1,543 @@
|
||||
# GDPR Scanner — Brugermanual
|
||||
|
||||
Version 1.6.14
|
||||
|
||||
---
|
||||
|
||||
## Indholdsfortegnelse
|
||||
|
||||
1. [Hvad er GDPR Scanner?](#1-hvad-er-gdpr-scanner)
|
||||
2. [Overblik over brugerfladen](#2-overblik-over-brugerfladen)
|
||||
3. [Forbindelse til dine datakilder](#3-forbindelse-til-dine-datakilder)
|
||||
4. [Kør en scanning](#4-kør-en-scanning)
|
||||
5. [Forstå resultaterne](#5-forstå-resultaterne)
|
||||
6. [Gennemgang og mærkning af fund](#6-gennemgang-og-mærkning-af-fund)
|
||||
7. [Sletning af elementer](#7-sletning-af-elementer)
|
||||
8. [Profiler — gem dine scanningsindstillinger](#8-profiler--gem-dine-scanningsindstillinger)
|
||||
9. [Rapporter og eksport](#9-rapporter-og-eksport)
|
||||
10. [Del resultater med en gennemganger](#10-del-resultater-med-en-gennemganger)
|
||||
11. [Planlagte scanninger](#11-planlagte-scanninger)
|
||||
12. [E-mailrapporter](#12-e-mailrapporter)
|
||||
13. [Sikkerhedskopi og gendannelse af database](#13-sikkerhedskopi-og-gendannelse-af-database)
|
||||
14. [Indstillinger — oversigt](#14-indstillinger--oversigt)
|
||||
15. [Ofte stillede spørgsmål](#15-ofte-stillede-spørgsmål)
|
||||
|
||||
---
|
||||
|
||||
## 1. Hvad er GDPR Scanner?
|
||||
|
||||
GDPR Scanner søger i din organisations digitale data — e-mails, cloud-filer, delte drev og lokale filservere — efter personoplysninger som CPR-numre, navne, adresser, telefonnumre og særlige kategorier af oplysninger efter GDPR artikel 9.
|
||||
|
||||
Når der er fundet elementer, kan du gennemgå dem, beslutte hvad der skal ske med hvert enkelt (beholde, slette eller markere som uden for scope), udarbejde en artikel 30-fortegnelse og masseslette forældet data.
|
||||
|
||||
**Hvad scanneren gennemgår:**
|
||||
- Microsoft 365: Exchange e-mail, OneDrive, SharePoint, Teams
|
||||
- Google Workspace: Gmail, Google Drev
|
||||
- Lokale og netværksbaserede filmapper (herunder SMB/NAS-drev)
|
||||
|
||||
**Hvad den finder:**
|
||||
- CPR-numre
|
||||
- Telefonnumre, e-mailadresser, postadresser
|
||||
- Bankkontonumre og IBAN-numre
|
||||
- Navne og organisationsnavne
|
||||
- Fotografier med genkendelige ansigter (valgfrit)
|
||||
- GPS-placeringsdata indlejret i billedfiler
|
||||
|
||||
---
|
||||
|
||||
## 2. Overblik over brugerfladen
|
||||
|
||||
Når du åbner scanneren, er skærmen inddelt i tre områder:
|
||||
|
||||
```
|
||||
┌─────────────────┬──────────────────────────────────────────┐
|
||||
│ │ Topbjælke: Scan-knap, profiler, handlinger│
|
||||
│ Venstre panel ├──────────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ - Kilder │ Resultater / scanningsforløb │
|
||||
│ - Indstillinger│ │
|
||||
│ - Konti │ │
|
||||
│ - Statistik ├──────────────────────────────────────────┤
|
||||
│ │ Aktivitetslog │
|
||||
└─────────────────┴──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Venstre panel** — vælg hvad der skal scannes og hvordan.
|
||||
**Topbjælke** — start en scanning, vælg profiler, og tilgå eksporter og indstillinger.
|
||||
**Resultatområde** — fundne elementer vises her, mens scanningen kører.
|
||||
**Statuslinje** — vises lige over aktivitetsloggen og angiver hvilken kilde der scannes, hvem der scannes, og hvor langt scanningen er.
|
||||
**Aktivitetslog** — viser statusbeskeder i realtid under scanningen. Klik på **▾**-pilen i loggens overskrift for at folde panelet sammen eller ud. Du kan også filtrere loggen til kun at vise fejl, kopiere al logtekst til udklipsholderen og ændre størrelsen på panelet ved at trække i håndtaget øverst på panelet.
|
||||
|
||||
### Mørkt / lyst tema
|
||||
|
||||
Klik på **🌙**-knappen øverst til højre for at skifte mellem mørkt og lyst tema. Din præference huskes.
|
||||
|
||||
---
|
||||
|
||||
## 3. Forbindelse til dine datakilder
|
||||
|
||||
Inden du kan scanne, skal du forbinde mindst én datakilde. Klik på **Kilder** i topbjælken for at åbne kildestyringspanelet.
|
||||
|
||||
### 3.1 Microsoft 365
|
||||
|
||||
Fanen Microsoft 365 viser din aktuelle forbindelsesstatus. Hvis du ser en grøn prik og dit kontonavn eller lejernavn, er du allerede forbundet.
|
||||
|
||||
**Kilder du kan slå til og fra:**
|
||||
|
||||
| Skift | Hvad der scannes |
|
||||
|-------|-----------------|
|
||||
| Outlook | Exchange-postkasser (indbakke, sendt post, alle mapper) |
|
||||
| OneDrive | Den enkelte brugers personlige cloud-lager |
|
||||
| SharePoint | Team- og projektsider |
|
||||
| Teams | Filer delt i Teams-kanaler |
|
||||
|
||||
Slå de kilder fra, du ikke ønsker at medtage. Disse indstillinger huskes.
|
||||
|
||||
### 3.2 Google Workspace
|
||||
|
||||
Fanen Google Workspace lader dig forbinde en Google Workspace-konto (tidligere G Suite) via en tjenestekonto, eller en personlig Google-konto via login.
|
||||
|
||||
**Kilder du kan slå til og fra:**
|
||||
|
||||
| Skift | Hvad der scannes |
|
||||
|-------|-----------------|
|
||||
| Gmail | Alle e-mails i den enkelte brugers indbakke og labels |
|
||||
| Google Drev | Alle filer ejet af eller delt med den enkelte bruger |
|
||||
|
||||
### 3.3 Lokale og netværksbaserede filer
|
||||
|
||||
Fanen **Filkilder** viser de lokale mapper og netværksdrev, du har konfigureret.
|
||||
|
||||
**Sådan tilføjer du en ny filkilde:**
|
||||
1. Indtast en **Betegnelse** — et navn du kan genkende (f.eks. "Skolens Fællesmappe").
|
||||
2. Indtast **Stien**:
|
||||
- Lokal mappe: `~/Dokumenter` eller `/Volumes/Drev`
|
||||
- Netværksdrev: `//nas-server/delt` eller `\\server\delt`
|
||||
3. Hvis det er et netværksdrev, udfyldes felterne **SMB-vært**, **Brugernavn** og **Adgangskode** automatisk. Adgangskoden gemmes sikkert i systemets nøglering.
|
||||
4. Klik på **Tilføj**.
|
||||
|
||||
Du kan tilføje så mange filkilder, du har brug for. De vil fremgå som valgbare kilder i venstre panel, når du er klar til at scanne.
|
||||
|
||||
---
|
||||
|
||||
## 4. Kør en scanning
|
||||
|
||||
### 4.1 Vælg dine kilder
|
||||
|
||||
I venstre panel under **Kilder** sætter du hak ved de kilder, du vil medtage. Du kan kombinere M365, Google og filkilder i samme scanning.
|
||||
|
||||
### 4.2 Vælg konti
|
||||
|
||||
Under **Konti** vises alle brugere tilknyttet din M365- og/eller Google-lejer.
|
||||
|
||||
- Brug **søgefeltet** til at finde bestemte personer.
|
||||
- Brug knapperne **Alle / Ansat / Elev** til at filtrere efter rolle.
|
||||
- Brug **Alle**- og **Ingen**-knapperne til at vælge eller fravælge alle på én gang.
|
||||
- Sæt hak ved eller fjern hak fra enkeltpersoner.
|
||||
|
||||
For filkilder er kontovalg ikke relevant — alle filer i de valgte stier scannes.
|
||||
|
||||
### 4.3 Konfigurer indstillinger
|
||||
|
||||
Under **Indstillinger** kan du justere scanningen:
|
||||
|
||||
**Datofilter (Scan e-mails/filer fra)**
|
||||
Scan kun elementer ændret efter en bestemt dato. Hurtige forudindstillinger — **1 år**, **2 år**, **5 år**, **10 år**, **Alle** — lader dig vælge et interval med ét klik. Du kan også vælge en specifik dato med datovælgeren.
|
||||
|
||||
> Tip: "2 år" er et godt udgangspunkt for den første scanning. Du kan altid udvide til "Alle" bagefter.
|
||||
|
||||
**Scan e-mailindhold** — gennemgår selve teksten i e-mails. Aktiveret som standard.
|
||||
|
||||
**Scan vedhæftede filer** — gennemgår filer vedhæftet e-mails. Aktiveret som standard.
|
||||
|
||||
**Maks. vedhæftet filstørrelse** — spring vedhæftede filer over, der er større end denne grænse (standard 20 MB). Øg grænsen, hvis du vil kontrollere større dokumenter.
|
||||
|
||||
**Maks. e-mails pr. bruger** — stop efter at have scannet dette antal e-mails per person (standard 2.000). Øg det, hvis du har brug for fuld dækning.
|
||||
|
||||
### 4.4 Start scanningen
|
||||
|
||||
Klik på den blå **Scan**-knap i topbjælken.
|
||||
|
||||
En statuslinje viser:
|
||||
- En farvet **kildemærkat** — **Outlook**, **OneDrive**, **SharePoint**, **Teams**, **Gmail**, **GDrive** eller **Local** — efterfulgt af det fulde navn på den konto, der scannes i øjeblikket
|
||||
- En løbende optælling af scannede og fundne elementer
|
||||
- Estimeret resterende tid
|
||||
|
||||
Resultater vises i hovedområdet efterhånden som de findes — du behøver ikke vente på, at scanningen er færdig, før du begynder at gennemgå dem.
|
||||
|
||||
Klik på **Stop** for at afbryde. Et kontrolpunkt gemmes automatisk, så du kan fortsætte senere.
|
||||
|
||||
### 4.5 Genoptag en afbrudt scanning
|
||||
|
||||
Hvis en scanning blev afbrudt (via stop, nedbrud eller lukning af programmet), vises et gult banner øverst i resultatområdet:
|
||||
|
||||
> Forrige scanning blev afbrudt — X scannet, Y fundet
|
||||
> **▶ Genoptag** · Start forfra
|
||||
|
||||
Klik på **▶ Genoptag** for at fortsætte fra det sted, scanningen slap. Klik på **Start forfra** for at kassere kontrolpunktet og begynde en ny scanning.
|
||||
|
||||
---
|
||||
|
||||
## 5. Forstå resultaterne
|
||||
|
||||
Hvert fundet element vises som et kort. Her er forklaringen på mærker og labels:
|
||||
|
||||
### Kildemærker
|
||||
|
||||
| Mærke | Betydning |
|
||||
|-------|-----------|
|
||||
| Outlook | Fundet i en Exchange-postkasse |
|
||||
| OneDrive | Fundet i en brugers OneDrive |
|
||||
| SharePoint | Fundet på et SharePoint-site |
|
||||
| Teams | Fundet i en Teams-kanal |
|
||||
| Gmail | Fundet i en Gmail-postkasse |
|
||||
| Google Drev | Fundet i Google Drev |
|
||||
| Lokal / Netværk | Fundet på et filshare |
|
||||
|
||||
### Risikoniveau
|
||||
|
||||
| Niveau | Betydning |
|
||||
|--------|-----------|
|
||||
| HØJ | Flere CPR-numre, særlige kategorier af data, ældre end opbevaringspolitikken eller eksternt delt |
|
||||
| MELLEM | Et enkelt CPR-nummer med noget deling eller kontekstuel risiko |
|
||||
| LAV | Et enkelt CPR-nummer, ikke delt, nyligt oprettet |
|
||||
|
||||
### Øvrige mærker
|
||||
|
||||
| Mærke | Betydning |
|
||||
|-------|-----------|
|
||||
| Tal (f.eks. **3**) | Antal CPR-numre fundet i elementet |
|
||||
| **Delt** | Elementet er delt med andre brugere |
|
||||
| **Ekstern** | Elementet er delt med nogen uden for organisationen |
|
||||
| **Art. 9** | Særlige kategorier af oplysninger fundet (helbred, religion, biometriske data mv.) |
|
||||
| **N ansigter** | N genkendelige ansigter registreret i et foto |
|
||||
| **GPS** | Filen indeholder GPS-placeringsdata i metadata |
|
||||
|
||||
### Kortvisning vs. listevisning
|
||||
|
||||
Standardvisningen er **kortvisning**. Klik på **Liste** i filterbjælken for at skifte til en kompakt tabelvisning med sorterbare kolonner. Klik på **Gitter** for at skifte tilbage.
|
||||
|
||||
### Filtrering af resultater
|
||||
|
||||
Brug filterbjælken over resultaterne til at indsnævre visningen:
|
||||
|
||||
- **Søgefelt** — søg på navn, emne eller filsti.
|
||||
- **Kildetype** — vis kun én kildetype.
|
||||
- **Disposition** — vis elementer efter gennemgangsstatus.
|
||||
- **Deling** — filtrer på delt / ekstern / alle.
|
||||
- **Risiko** — vis kun Art. 9, fotos, GPS eller høj-risiko-elementer.
|
||||
|
||||
---
|
||||
|
||||
## 6. Gennemgang og mærkning af fund
|
||||
|
||||
Klik på et resultatkort for at åbne forhåndsvisningspanelet i højre side af skærmen.
|
||||
|
||||
Forhåndsvisningen viser:
|
||||
- Elementets navn eller e-mailens emne
|
||||
- Kontoen (ejer / afsender)
|
||||
- Kilde og ændringsdate
|
||||
- Alle fundne CPR-numre og deres kontekst
|
||||
- Øvrige personoplysninger registreret (telefon, e-mailadresse, IBAN mv.)
|
||||
- Deling og ekstern adgangsinformation
|
||||
|
||||
### Angiv en disposition
|
||||
|
||||
Hvert element har en **Disposition**-rullemenu i forhåndsvisningspanelet. Vælg én af følgende:
|
||||
|
||||
| Disposition | Brug den når… |
|
||||
|-------------|---------------|
|
||||
| Ikke gennemgået | Endnu ikke vurderet — standardværdi |
|
||||
| Opbevar — lovkrav | Du er lovpligtig til at beholde den |
|
||||
| Opbevar — legitim interesse | Du har en legitim interesse i at beholde den |
|
||||
| Opbevar — kontrakt | Nødvendig i forbindelse med en kontrakt |
|
||||
| Slet — planlagt | Markeret til fremtidig sletning |
|
||||
| Privat brug — uden for scope | Personligt element, ikke inden for GDPR-scopet |
|
||||
| Slettet | Allerede slettet (angives automatisk ved sletning) |
|
||||
|
||||
Klik på **Gem** efter valget. En lille **✓ Gemt**-bekræftelse vises.
|
||||
|
||||
### Find alle elementer for en bestemt person
|
||||
|
||||
Klik på **🔍** i venstre panel (under Statistik) for at åbne **Registreret person**-opslaget. Indtast et CPR-nummer, og scanneren finder alle fundne elementer, der indeholder dette nummer. Du kan derefter slette dem alle i ét trin — i overensstemmelse med retten til sletning (GDPR artikel 17).
|
||||
|
||||
CPR-nummeret hashes inden søgningen og gemmes aldrig i klartekst.
|
||||
|
||||
---
|
||||
|
||||
## 7. Sletning af elementer
|
||||
|
||||
### 7.1 Sletning af et enkelt element
|
||||
|
||||
Med et element åbent i forhåndsvisningspanelet kan du angive dispositionen **Slet — planlagt** og bruge handlingsknappen til at slette det. E-mailen flyttes til mappen Slettet post; filer flyttes til papirkurven i den pågældende tjeneste.
|
||||
|
||||
### 7.2 Massesletning
|
||||
|
||||
Klik på **Slet**-knappen i filterbjælken for at åbne massesletningsvinduet.
|
||||
|
||||
1. **Indstil filtre** for at målrette de elementer, du ønsker at slette:
|
||||
- **Kildetype** — slet fra én kilde eller alle.
|
||||
- **Min. CPR-fund** — slet kun elementer med mindst dette antal CPR-numre.
|
||||
- **Ældre end dato** — slet kun elementer ændret inden en bestemt dato.
|
||||
- Klik på **🗓 Filter forældet** for automatisk at udfylde datoen ud fra din opbevaringspolitik.
|
||||
|
||||
2. Vinduet viser, hvor mange elementer der matcher dine filtre.
|
||||
|
||||
3. Klik på den røde **Slet matchende elementer**-knap for at fortsætte.
|
||||
|
||||
4. En statuslinje viser sletningerne i realtid. E-mails flyttes til **Slettet post**; filer flyttes til **papirkurven**.
|
||||
|
||||
En fuldstændig revisionslog over alle sletninger (hvad der er slettet, hvornår og hvorfor) medtages i artikel 30-rapporten.
|
||||
|
||||
---
|
||||
|
||||
## 8. Profiler — gem dine scanningsindstillinger
|
||||
|
||||
En profil gemmer dine valgte kilder, konti, scanningsindstillinger og datoindstillinger, så du kan genbruge dem uden at konfigurere alt på ny hver gang.
|
||||
|
||||
### Gem en profil
|
||||
|
||||
Konfigurer venstre panel præcis som du ønsker det — herunder hvilke M365-kilder, Google-kilder og lokale filkilder der er aktiveret, hvilke konti der er valgt, og alle indstillinger — og klik derefter på **Gem**-knappen i topbjælken. Indtast et navn og klik OK. Profilen gemmes og vælges med det samme.
|
||||
|
||||
### Anvend en profil
|
||||
|
||||
Klik på profil-rullemenuen i topbjælken og vælg en profil. Alle indstillinger i venstre panel — kilder, konti, indstillinger og datofilter — indlæses på én gang. Venstre panel viser derefter din aktive tilstand, og du kan justere hvad som helst, inden du scanner.
|
||||
|
||||
En **Ryd**-knap vises ved siden af rullemenuen, når en profil er valgt. Klik på den for at rydde profiletiketten uden at ændre indstillingerne i venstre panel. Det er nyttigt, når du vil køre en engangsscan uden at overskrive en gemt profil.
|
||||
|
||||
### Administrer profiler
|
||||
|
||||
Klik på **Profiler** for at åbne profiladministrationspanelet. Her kan du:
|
||||
|
||||
- **Redigere** en profil — ændre navn, beskrivelse, kilder, konti eller indstillinger.
|
||||
- **Duplikere** en profil — nyttigt som udgangspunkt for en variant.
|
||||
- **Slette** en profil.
|
||||
|
||||
> Bemærk: Redigering af en profil påvirker ikke scanninger, der allerede er gennemført med den pågældende profil.
|
||||
|
||||
---
|
||||
|
||||
## 9. Rapporter og eksport
|
||||
|
||||
### 9.1 Excel-eksport
|
||||
|
||||
Klik på **Excel** i filterbjælken for at downloade de aktuelle resultater som en Excel-projektmappe. Projektmappen indeholder:
|
||||
- Et oversigtsfaneblad med scanningsdato, antal elementer og kildefordeling.
|
||||
- Et separat faneblad for hver kildetype (Outlook, OneDrive, SharePoint, Teams, Gmail, Google Drive, Lokal, Netværk).
|
||||
- Alle fundne elementer, herunder kilde, konto, CPR-antal, risikoniveau, delingsstatus og disposition.
|
||||
|
||||
Knapperne **Excel** og **Art.30** er altid tilgængelige — også efter genstart af programmet — og eksporterer resultaterne fra den seneste afsluttede scanningssession uden at kræve en ny scanning.
|
||||
|
||||
Excel-filen er det primære arbejdsdokument til din interne gennemgangsproces.
|
||||
|
||||
### 9.2 GDPR Artikel 30-rapport (Word-dokument)
|
||||
|
||||
Klik på **Art.30** i filterbjælken for at generere et Word-dokument, der opfylder kravet i GDPR artikel 30 om at føre en fortegnelse over behandlingsaktiviteter.
|
||||
|
||||
Dokumentet indeholder:
|
||||
- **Resumé** — scanningsdato, samlet antal elementer, CPR-fund pr. kilde.
|
||||
- **Datakategorier** — hvilke typer personoplysninger der er fundet.
|
||||
- **Datafortegnelse** — den fulde liste over fundne elementer.
|
||||
- **Opbevaringsanalyse** — elementer ældre end din opbevaringspolitik, fordelt på kilder.
|
||||
- **Særlige kategorier (Art. 9)** — helbreds-, biometriske og andre følsomme oplysninger.
|
||||
- **Fotografier / biometriske data** — hvis ansigtsgenkendelse var aktiveret.
|
||||
- **GPS-data** — filer med indlejrede placeringsoplysninger.
|
||||
- **Compliance-tendens** — antal fundne elementer på tværs af dine seneste 20 scanninger.
|
||||
- **Revisionslog for sletninger** — en komplet dokumentation af alle sletninger foretaget via scanneren.
|
||||
- **Metode** — hvordan scanningen er udført og det juridiske grundlag.
|
||||
- **Noter om elevdata** — vejledning om krav til forældresamtykke for børn under 15 år.
|
||||
|
||||
---
|
||||
|
||||
## 10. Del resultater med en gennemganger
|
||||
|
||||
Du kan give en DPO, skoleleder eller compliance-koordinator skrivebeskyttet adgang til resultatgitteret — herunder mulighed for at mærke dispositioner — uden at give dem adgang til scanningskontroller, loginoplysninger eller indstillinger.
|
||||
|
||||
### 10.1 Token-links
|
||||
|
||||
Klik på **🔗**-knappen øverst til højre i topbjælken for at åbne delingspanelet.
|
||||
|
||||
1. Angiv eventuelt en **Betegnelse** for at identificere, hvem linket er til (f.eks. "DPO-gennemgang april 2026").
|
||||
2. Vælg en **Udløbsdato** — 7 dage, 30 dage, 90 dage, 1 år eller Aldrig.
|
||||
3. Klik på **Opret**. Der genereres et unikt link: `http://host:5100/view?token=…`
|
||||
4. Klik på **Kopiér** for at kopiere linket til udklipsholderen, og send det til gennemgangeren.
|
||||
|
||||
Gennemgangeren åbner linket i en browser. De kan se det fulde resultatgitter og mærke dispositioner, men kan ikke starte scanninger, ændre indstillinger, se loginoplysninger eller slette elementer.
|
||||
|
||||
**Administrer eksisterende links**
|
||||
|
||||
Delingspanelet viser alle aktive links. Hver række viser betegnelse, udløbsdato og hvornår linket sidst blev brugt. Klik på **Kopiér** for at kopiere et link igen, eller **Tilbagekald** for at gøre det ugyldigt med det samme.
|
||||
|
||||
### 10.2 Viewer-PIN
|
||||
|
||||
Som alternativ til token-links kan du angive en numerisk PIN-kode (4–8 cifre) under **Indstillinger → Sikkerhed → Viewer-PIN**. Alle, der kender PIN-koden, kan åbne `http://host:5100/view` i en browser, indtaste PIN-koden og få adgang til den skrivebeskyttede visning i hele browserens session.
|
||||
|
||||
For at angive eller ændre PIN-koden skal du indtaste den nye kode i feltet **Ny PIN** og klikke på **Gem PIN**. Klik på **Ryd PIN** for at fjerne den.
|
||||
|
||||
> **Sikkerhedsnote:** Token-links er mere sikre end en PIN-kode, fordi hvert link kan tilbagekaldes individuelt og har en udløbsdato. Brug PIN-indstillingen kun til betroede interne gennemgangere på dit lokale netværk.
|
||||
|
||||
### 10.3 Hvad gennemgangeren kan gøre
|
||||
|
||||
| Handling | Tilladt |
|
||||
|----------|---------|
|
||||
| Gennemse resultatgitter | Ja |
|
||||
| Filtrere og søge i resultater | Ja |
|
||||
| Åbne forhåndsvisning | Ja |
|
||||
| Mærke dispositioner | Ja |
|
||||
| Eksportere til Excel | Ja |
|
||||
| Eksportere Artikel 30-rapport | Ja |
|
||||
| Starte eller stoppe en scanning | Nej |
|
||||
| Se eller ændre loginoplysninger | Nej |
|
||||
| Slette elementer | Nej |
|
||||
| Tilgå indstillinger | Nej |
|
||||
| Oprette eller tilbagekalde viewer-links | Nej |
|
||||
|
||||
---
|
||||
|
||||
## 11. Planlagte scanninger
|
||||
|
||||
Gå til **Indstillinger → Planlægger** for at konfigurere automatiske scanninger.
|
||||
|
||||
### Opret en planlagt scanning
|
||||
|
||||
1. Klik på **+ Tilføj planlagt scanning**.
|
||||
2. Giv jobbet et navn.
|
||||
3. Vælg frekvens: **Dagligt**, **Ugentligt** eller **Månedligt**.
|
||||
4. For ugentlige scanninger vælges ugedag. For månedlige vælges dag i måneden.
|
||||
5. Angiv det tidspunkt, scanningen skal køre.
|
||||
6. Vælg en **Profil** — scanneren bruger den pågældende profils kilder, konti og indstillinger.
|
||||
7. Aktiver eventuelt:
|
||||
- **Send rapport automatisk** — send Excel-rapporten pr. e-mail til dine konfigurerede modtagere efter hver scanning.
|
||||
- **Håndhæv opbevaringspolitik** — slet automatisk elementer ældre end din opbevaringspolitik efter hver scanning.
|
||||
8. Klik på **Gem**.
|
||||
|
||||
Planlæggerikatoren i topbjælken viser dato og tidspunkt for den næste planlagte scanning ("Næste: …").
|
||||
|
||||
### Se seneste kørsler
|
||||
|
||||
Fanen Planlægger viser historik over seneste kørsler med starttidspunkt, status og antal fundne elementer.
|
||||
|
||||
---
|
||||
|
||||
## 12. E-mailrapporter
|
||||
|
||||
Gå til **Indstillinger → E-mailrapport** for at konfigurere e-mail-afsendelse.
|
||||
|
||||
### Opsætning af SMTP
|
||||
|
||||
Udfyld oplysningerne for din udgående mailserver:
|
||||
|
||||
| Felt | Eksempel |
|
||||
|------|----------|
|
||||
| SMTP-vært | smtp.office365.com |
|
||||
| Port | 587 |
|
||||
| Brugernavn | scanner@skole.dk |
|
||||
| Adgangskode | (din e-mailadgangskode eller app-adgangskode) |
|
||||
| Afsenderadresse | scanner@skole.dk |
|
||||
| Modtagere | dpo@skole.dk; it@skole.dk |
|
||||
|
||||
Klik på **Gem** for at gemme, og klik derefter på **Test** for at sende en test-e-mail og bekræfte, at konfigurationen virker.
|
||||
|
||||
> Hvis din konto har MFA (to-faktor-godkendelse) aktiveret, kan du ikke bruge din almindelige adgangskode. Du skal oprette en **app-adgangskode** i din kontos sikkerhedsindstillinger:
|
||||
> - **Personlig Microsoft-konto**: account.microsoft.com/security → App-adgangskoder
|
||||
> - **Gmail**: myaccount.google.com → Sikkerhed → 2-trinsbekræftelse → App-adgangskoder
|
||||
|
||||
### Send en rapport manuelt
|
||||
|
||||
Klik på **Send nu** for øjeblikkeligt at sende den aktuelle Excel-rapport pr. e-mail til alle konfigurerede modtagere.
|
||||
|
||||
---
|
||||
|
||||
## 13. Sikkerhedskopi og gendannelse af database
|
||||
|
||||
Alle scanningsresultater, dispositioner og sletningsrevisionsloggen gemmes i en lokal database. Det anbefales at tage regelmæssige sikkerhedskopier.
|
||||
|
||||
Gå til **Indstillinger → Database**.
|
||||
|
||||
### Sikkerhedskopi (Eksport)
|
||||
|
||||
Klik på **Eksporter** for at oprette en `.zip`-sikkerhedskopi af din database. Gem den på et sikkert sted.
|
||||
|
||||
### Gendannelse (Import)
|
||||
|
||||
Klik på **Importer** for at gendanne fra en sikkerhedskopi. To tilstande er tilgængelige:
|
||||
|
||||
| Tilstand | Hvornår du bruger den |
|
||||
|----------|-----------------------|
|
||||
| Flet (sikker) | Tilføj dispositioner og sletningslog fra sikkerhedskopien til dine eksisterende data. Brug denne til at samle data fra flere installationer. |
|
||||
| Erstat (fuld gendannelse) | Slet alt eksisterende og gendan sikkerhedskopien fuldstændigt. Brug denne til at flytte til en ny maskine eller gendanne efter datatab. Kræver bekræftelse med admin-PIN. |
|
||||
|
||||
### Nulstil database
|
||||
|
||||
Klik på **Nulstil database** for at slette alle scanningsdata, dispositioner og sletningslog. Dette kan ikke fortrydes. Hvis en admin-PIN er sat, skal du indtaste den for at fortsætte.
|
||||
|
||||
---
|
||||
|
||||
## 14. Indstillinger — oversigt
|
||||
|
||||
### Fanen Generelt
|
||||
|
||||
| Indstilling | Beskrivelse |
|
||||
|-------------|-------------|
|
||||
| Tema | Mørkt eller lyst |
|
||||
|
||||
### Fanen Sikkerhed
|
||||
|
||||
| Indstilling | Beskrivelse |
|
||||
|-------------|-------------|
|
||||
| Admin-PIN | Valgfri PIN-kode, der beskytter destruktive handlinger (nulstil database, erstat ved import) |
|
||||
| Viewer-PIN | Valgfri 4–8-cifret PIN-kode, der giver alle adgang til `/view` i en browser som skrivebeskyttet gennemganger uden et token-link |
|
||||
|
||||
### Avancerede scanningsindstillinger
|
||||
|
||||
Disse indstillinger findes i venstre panel under **Indstillinger**:
|
||||
|
||||
**Delta-scanning** — efter din første fulde scanning kan du aktivere dette for kun at scanne elementer, der er ændret siden sidste scanning. Meget hurtigere til løbende kontrol. Knappen "Ryd tokens" tvinger den næste scanning til at være en fuld scanning.
|
||||
|
||||
**Søg efter ansigter i billeder** — langsommere scanning, der registrerer fotografier med genkendelige menneskelige ansigter. Markerer dem som artikel 9 biometriske data. Anbefales til skoler, der opbevarer elevfotos.
|
||||
|
||||
**Opbevaringspolitik** — når aktiveret, markeres elementer ældre end det angivne antal år som forældet. Regnskabsårets afslutning bestemmer, hvordan skæringsdatoen beregnes:
|
||||
|
||||
| Indstilling | Beregning af skæringsdato |
|
||||
|-------------|--------------------------|
|
||||
| Løbende (fra i dag) | I dag minus N år |
|
||||
| 31 dec (Bogføringsloven) | Seneste 31. december minus N år |
|
||||
| 30 jun / 31 mar | Seneste forekomst af den dato minus N år |
|
||||
|
||||
---
|
||||
|
||||
## 15. Ofte stillede spørgsmål
|
||||
|
||||
**Gemmer scanneren CPR-numre?**
|
||||
Nej. CPR-numre fundet under en scanning gemmes kun som et antal (f.eks. "3 CPR-numre fundet") og som en SHA-256-hash, der bruges til personopslag. Det faktiske nummer skrives aldrig til databasen.
|
||||
|
||||
**Hvad sker der, når jeg sletter elementer via scanneren?**
|
||||
E-mails flyttes til brugerens **Slettet post**-mappe i Exchange — de slettes ikke permanent og kan gendannes af brugeren eller en administrator. Filer flyttes til **papirkurven** i den pågældende tjeneste (OneDrive, SharePoint, filsystem). Permanent sletning kræver en efterfølgende handling af brugeren eller administrator.
|
||||
|
||||
**Kan jeg scanne uden at forbinde til Microsoft 365?**
|
||||
Ja. Du kan scanne lokale og SMB-filshares uden nogen M365- eller Google-forbindelse. Åbn **Kilder**, gå til fanen **Filkilder**, og tilføj dine filstier.
|
||||
|
||||
**Hvad er delta-scanning, og hvornår skal jeg bruge det?**
|
||||
Delta-scanning bruger Microsoft Graphs ændringstokens til kun at hente elementer ændret siden den seneste scanning. Det er ideelt til regelmæssige (f.eks. ugentlige) compliance-tjek efter, at du har gennemført en fuld basisscan. Aktiver det i afsnittet Indstillinger i venstre panel.
|
||||
|
||||
**Scanningen stoppede — kan jeg fortsætte, hvor den slap?**
|
||||
Ja. Når du starter scanningen igen, vil et gult banner tilbyde at genoptage fra kontrolpunktet. Klik på **▶ Genoptag** for at fortsætte. Hvis du foretrækker at starte forfra, klikker du på **Start forfra**.
|
||||
|
||||
**Hvordan dokumenterer jeg compliance, hvis vi bliver auditeret?**
|
||||
Brug **Art.30**-knappen til at eksportere artikel 30-rapporten. Det er et Word-dokument, der dækker din datafortegnelse, opbevaringsanalyse, sletningslog og metode — præcis hvad en tilsynsmyndighed (Datatilsynet) typisk anmoder om.
|
||||
|
||||
**Hvad gør filteret "Elev / Ansat"?**
|
||||
Scanneren klassificerer brugere som ansatte eller elever ud fra deres Microsoft 365-licenstype eller Google Workspace-organisationsenhed. Du kan bruge dette filter i kontolisten til at begrænse en scanning til kun ansatte, kun elever eller en bestemt person. Det er nyttigt, fordi reglerne for behandling af elevdata — særligt for børn under 15 år — adskiller sig fra reglerne for medarbejderdata i henhold til databeskyttelsesloven.
|
||||
|
||||
**Hvordan tilføjer jeg en konto, der ikke er på listen?**
|
||||
I kontoafsnittet i venstre panel er der et felt **+ Tilføj konto manuelt**. Indtast e-mailadressen eller UPN'en, og den tilføjes til den aktuelle sessions kontoliste.
|
||||
|
||||
**Kører scanneren? Jeg kan ikke se en statuslinje.**
|
||||
Tjek aktivitetsloggen nederst på skærmen. Hvis en scanning kører, vises der beskeder her. Hvis du ikke ser noget, er scanningen muligvis afsluttet eller ikke startet. Kontrollér også, at du har valgt mindst én kilde og mindst én konto.
|
||||
|
||||
**Kan en gennemganger mærke dispositioner uden adgang til scanningskontrollerne?**
|
||||
Ja. Brug **🔗 Del**-knappen til at oprette et skrivebeskyttet viewer-link eller angiv en Viewer-PIN under Indstillinger → Sikkerhed. Gennemgangeren åbner linket i sin browser og kan gennemse resultater og mærke dispositioner uden at se loginoplysninger, kilder eller scanningsknapper. Se afsnit 10 for detaljer.
|
||||
|
||||
---
|
||||
|
||||
*GDPR Scanner v1.6.14 — teknisk opsætning og konfiguration: se README.md*
|
||||
543
docs/manuals/MANUAL-EN.md
Normal file
543
docs/manuals/MANUAL-EN.md
Normal file
@ -0,0 +1,543 @@
|
||||
# GDPR Scanner — User Manual
|
||||
|
||||
Version 1.6.14
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [What is GDPR Scanner?](#1-what-is-gdpr-scanner)
|
||||
2. [The Interface at a Glance](#2-the-interface-at-a-glance)
|
||||
3. [Connecting to Your Data Sources](#3-connecting-to-your-data-sources)
|
||||
4. [Running a Scan](#4-running-a-scan)
|
||||
5. [Understanding the Results](#5-understanding-the-results)
|
||||
6. [Reviewing and Tagging Results](#6-reviewing-and-tagging-results)
|
||||
7. [Deleting Items](#7-deleting-items)
|
||||
8. [Profiles — Saving Your Scan Settings](#8-profiles--saving-your-scan-settings)
|
||||
9. [Reports and Exports](#9-reports-and-exports)
|
||||
10. [Sharing Results with a Reviewer](#10-sharing-results-with-a-reviewer)
|
||||
11. [Scheduled Scans](#11-scheduled-scans)
|
||||
12. [Email Reports](#12-email-reports)
|
||||
13. [Database Backup and Restore](#13-database-backup-and-restore)
|
||||
14. [Settings Reference](#14-settings-reference)
|
||||
15. [Frequently Asked Questions](#15-frequently-asked-questions)
|
||||
|
||||
---
|
||||
|
||||
## 1. What is GDPR Scanner?
|
||||
|
||||
GDPR Scanner searches your organisation's digital data — emails, cloud files, shared drives, and local file servers — for personal data such as CPR numbers, names, addresses, phone numbers, and special-category data under GDPR Article 9.
|
||||
|
||||
When items are found, you can review them, decide what to do with each one (keep, delete, or note as out of scope), produce an Article 30 compliance report, and delete overdue data in bulk.
|
||||
|
||||
**What it scans:**
|
||||
- Microsoft 365: Exchange email, OneDrive, SharePoint, Teams
|
||||
- Google Workspace: Gmail, Google Drive
|
||||
- Local and network file shares (including SMB/NAS drives)
|
||||
|
||||
**What it finds:**
|
||||
- CPR numbers (Danish civil registration numbers)
|
||||
- Phone numbers, email addresses, postal addresses
|
||||
- Bank account and IBAN numbers
|
||||
- Names and organisation names
|
||||
- Photographs containing recognisable faces (optional)
|
||||
- GPS location data embedded in image files
|
||||
|
||||
---
|
||||
|
||||
## 2. The Interface at a Glance
|
||||
|
||||
When you open the scanner, the screen is divided into three areas:
|
||||
|
||||
```
|
||||
┌─────────────────┬──────────────────────────────────────────┐
|
||||
│ │ Top bar: Scan button, profiles, actions │
|
||||
│ Left sidebar ├──────────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ - Sources │ Results / scan progress │
|
||||
│ - Options │ │
|
||||
│ - Accounts │ │
|
||||
│ - Stats ├──────────────────────────────────────────┤
|
||||
│ │ Activity log │
|
||||
└─────────────────┴──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Left sidebar** — choose what to scan and how.
|
||||
**Top bar** — start a scan, select profiles, and access exports and settings.
|
||||
**Results area** — flagged items appear here as the scan runs.
|
||||
**Progress bar** — sits just above the activity log and shows which source is being scanned, who is being scanned, and how far along the scan is.
|
||||
**Activity log** — shows live status messages during scanning. Click the **▾** arrow in the log header to collapse or expand the panel. You can also filter the log to show only errors, copy all log text to the clipboard, and resize the panel by dragging the handle at its top edge.
|
||||
|
||||
### Dark / Light mode
|
||||
|
||||
Click the **🌙** button in the top-right corner to switch between dark and light mode. Your preference is remembered.
|
||||
|
||||
---
|
||||
|
||||
## 3. Connecting to Your Data Sources
|
||||
|
||||
Before you can scan, you need to connect to at least one data source. Click the **Sources** button in the top bar to open the Source Management panel.
|
||||
|
||||
### 3.1 Microsoft 365
|
||||
|
||||
The Microsoft 365 tab shows your current connection status. If you see a green dot and your account or tenant name, you are already connected.
|
||||
|
||||
**Sources you can enable or disable:**
|
||||
|
||||
| Toggle | What it scans |
|
||||
|--------|---------------|
|
||||
| Outlook | Exchange mailboxes (inbox, sent, all folders) |
|
||||
| OneDrive | Each user's personal cloud storage |
|
||||
| SharePoint | Team and project sites |
|
||||
| Teams | Files shared in Teams channels |
|
||||
|
||||
Turn off any source you do not want to include. These settings are remembered.
|
||||
|
||||
### 3.2 Google Workspace
|
||||
|
||||
The Google Workspace tab lets you connect a Google Workspace (formerly G Suite) account via a service account, or a personal Google account via sign-in.
|
||||
|
||||
**Sources you can enable or disable:**
|
||||
|
||||
| Toggle | What it scans |
|
||||
|--------|---------------|
|
||||
| Gmail | All emails in each user's inbox and labels |
|
||||
| Google Drive | All files owned by or shared with each user |
|
||||
|
||||
### 3.3 Local and Network File Shares
|
||||
|
||||
The **Filkilder** (File Sources) tab lists any local folders or network drives you have configured.
|
||||
|
||||
**To add a new file source:**
|
||||
1. Enter a **Label** — a friendly name you will recognise (e.g. "Skolens Fællesmappe").
|
||||
2. Enter the **Path**:
|
||||
- Local folder: `~/Documents` or `/Volumes/Share`
|
||||
- Network share: `//nas-server/shared` or `\\server\share`
|
||||
3. If it is a network share, fill in the **SMB Host**, **Username**, and **Password** that appear automatically. The password is stored securely in your system keychain.
|
||||
4. Click **Tilføj** (Add).
|
||||
|
||||
You can add as many file sources as you need. Each one will appear as a selectable source in the main sidebar when you are ready to scan.
|
||||
|
||||
---
|
||||
|
||||
## 4. Running a Scan
|
||||
|
||||
### 4.1 Select Your Sources
|
||||
|
||||
In the left sidebar under **Kilder** (Sources), tick the sources you want to include in this scan. You can mix M365, Google, and file sources in the same scan.
|
||||
|
||||
### 4.2 Choose Your Accounts
|
||||
|
||||
Under **Konti** (Accounts) the sidebar shows all users connected to your M365 and/or Google tenant.
|
||||
|
||||
- Use the **search box** to find specific people.
|
||||
- Use the **Alle / Ansat / Elev** buttons to filter by role.
|
||||
- Use the **Alle** and **Ingen** buttons to select or deselect everyone at once.
|
||||
- Tick or untick individual names.
|
||||
|
||||
For file sources, accounts are not relevant — all files in the selected paths are scanned.
|
||||
|
||||
### 4.3 Configure Options
|
||||
|
||||
Under **Indstillinger** (Options) you can refine the scan:
|
||||
|
||||
**Date filter (Scan e-mails/filer fra)**
|
||||
Only scan items modified after a certain date. Quick presets — **1 år**, **2 år**, **5 år**, **10 år**, **Alle** — let you choose a window with one click. You can also pick a specific date with the date picker.
|
||||
|
||||
> Tip: Starting with "2 år" is a good first scan. You can always widen to "Alle" later.
|
||||
|
||||
**Email body** — scan the text content of emails. On by default.
|
||||
|
||||
**Attachments** — scan files attached to emails. On by default.
|
||||
|
||||
**Max attachment size** — skip attachments larger than this limit (default 20 MB). Increase it if you want to check large documents.
|
||||
|
||||
**Max emails per user** — stop after scanning this many emails per person (default 2,000). Increase if you need complete coverage.
|
||||
|
||||
### 4.4 Start the Scan
|
||||
|
||||
Click the blue **Scan** button in the top bar.
|
||||
|
||||
A progress bar appears showing:
|
||||
- A coloured **source label** — **Outlook**, **OneDrive**, **SharePoint**, **Teams**, **Gmail**, **GDrive**, or **Local** — followed by the full name of the account currently being scanned
|
||||
- A live count of items scanned and flagged
|
||||
- An estimated time remaining
|
||||
|
||||
Results appear in the main area as they are found — you do not need to wait for the scan to finish before reviewing them.
|
||||
|
||||
To stop a scan, click **Stop**. A checkpoint is saved automatically so you can resume later.
|
||||
|
||||
### 4.5 Resuming an Interrupted Scan
|
||||
|
||||
If a scan was interrupted (by a stop, a crash, or closing the application), a yellow banner appears at the top of the results area:
|
||||
|
||||
> Previous scan interrupted — X scanned, Y found
|
||||
> **▶ Genoptag** · Start fresh
|
||||
|
||||
Click **▶ Genoptag** to continue from where the scan left off. Click **Start fresh** to discard the checkpoint and begin again.
|
||||
|
||||
---
|
||||
|
||||
## 5. Understanding the Results
|
||||
|
||||
Each flagged item appears as a card. Here is what the badges and labels mean:
|
||||
|
||||
### Source badges
|
||||
|
||||
| Badge | Meaning |
|
||||
|-------|---------|
|
||||
| Outlook | Found in an Exchange mailbox |
|
||||
| OneDrive | Found in a user's OneDrive |
|
||||
| SharePoint | Found in a SharePoint site |
|
||||
| Teams | Found in a Teams channel |
|
||||
| Gmail | Found in a Gmail mailbox |
|
||||
| Google Drive | Found in Google Drive |
|
||||
| Local / Network | Found on a file share |
|
||||
|
||||
### Risk level
|
||||
|
||||
| Level | Meaning |
|
||||
|-------|---------|
|
||||
| HIGH | Multiple CPR numbers, special-category data, older than retention policy, or externally shared |
|
||||
| MEDIUM | Single CPR with some sharing or contextual risk |
|
||||
| LOW | Single CPR number, not shared, recent |
|
||||
|
||||
### Other badges
|
||||
|
||||
| Badge | Meaning |
|
||||
|-------|---------|
|
||||
| Number (e.g. **3**) | Number of CPR numbers found in this item |
|
||||
| **Delt** (Shared) | The item has been shared with other users |
|
||||
| **Ekstern** (External) | The item has been shared with someone outside your organisation |
|
||||
| **Art. 9** | Special-category data detected (health, religion, biometric, etc.) |
|
||||
| **N faces** | N recognisable faces detected in a photo |
|
||||
| **GPS** | The file contains GPS location data in its metadata |
|
||||
|
||||
### Grid view vs. list view
|
||||
|
||||
The default **grid view** shows cards. Click **List** in the filter bar to switch to a compact table view with sortable columns. Click **Grid** to switch back.
|
||||
|
||||
### Filtering results
|
||||
|
||||
Use the filter bar above the results to narrow down what you see:
|
||||
|
||||
- **Search box** — search by name, subject, or path.
|
||||
- **Source dropdown** — show only one source type.
|
||||
- **Disposition dropdown** — show items by their review status.
|
||||
- **Transfer dropdown** — filter by shared / external / all.
|
||||
- **Risk dropdown** — show only Art. 9, photos, GPS, or high-risk items.
|
||||
|
||||
---
|
||||
|
||||
## 6. Reviewing and Tagging Results
|
||||
|
||||
Click any result card to open the preview panel on the right side of the screen.
|
||||
|
||||
The preview shows:
|
||||
- The item name or email subject
|
||||
- The account (owner / sender)
|
||||
- Source and modification date
|
||||
- All CPR numbers found and their context
|
||||
- Other personal data detected (phone, email address, IBAN, etc.)
|
||||
- Sharing and external-access information
|
||||
|
||||
### Setting a disposition
|
||||
|
||||
Every item has a **Disposition** dropdown in the preview panel. Choose one of:
|
||||
|
||||
| Disposition | Use when… |
|
||||
|-------------|-----------|
|
||||
| Ikke gennemgået (Unreviewed) | Not yet assessed — the default |
|
||||
| Opbevar — lovkrav | You must keep it by law |
|
||||
| Opbevar — legitim interesse | You have a legitimate interest in keeping it |
|
||||
| Opbevar — kontrakt | Required for a contract |
|
||||
| Slet — planlagt | Marked for future deletion |
|
||||
| Privat brug — uden for scope | Personal item, not in scope for GDPR processing |
|
||||
| Slettet | Already deleted (set automatically when you delete an item) |
|
||||
|
||||
After choosing, click **Gem**. A small **✓ Gemt** confirmation appears.
|
||||
|
||||
### Finding all items for a specific person
|
||||
|
||||
Click **🔍** in the sidebar (under Stats) to open the **Data Subject Lookup**. Enter a CPR number and the scanner will find all flagged items containing that number. You can then delete all of them in one step — supporting the GDPR right to erasure (Article 17).
|
||||
|
||||
The CPR number is hashed before the search and is never stored in plaintext.
|
||||
|
||||
---
|
||||
|
||||
## 7. Deleting Items
|
||||
|
||||
### 7.1 Deleting a Single Item
|
||||
|
||||
With an item open in the preview panel, set its disposition to **Slet — planlagt**, then use the action button to delete it. The item moves to the Deleted Items folder (email) or recycle bin (files).
|
||||
|
||||
### 7.2 Bulk Delete
|
||||
|
||||
Click the **Delete** button in the filter bar to open the bulk delete modal.
|
||||
|
||||
1. **Set filters** to target the items you want to delete:
|
||||
- **Source type** — delete from one source or all.
|
||||
- **Min. CPR hits** — only delete items with at least this many CPR numbers.
|
||||
- **Older than date** — only delete items modified before a specific date.
|
||||
- Click **🗓 Filter overdue** to automatically fill in the date based on your retention policy.
|
||||
|
||||
2. The modal shows how many items match your filters.
|
||||
|
||||
3. Click the red **Delete matching items** button to proceed.
|
||||
|
||||
4. A progress bar shows deletions as they happen. Emails go to **Deleted Items**; files go to the **recycle bin**.
|
||||
|
||||
A full audit log of every deletion (what was deleted, when, and why) is included in the Article 30 report.
|
||||
|
||||
---
|
||||
|
||||
## 8. Profiles — Saving Your Scan Settings
|
||||
|
||||
A profile stores your chosen sources, accounts, scan options, and date settings so you can re-use them without reconfiguring every time.
|
||||
|
||||
### Saving a profile
|
||||
|
||||
Configure the sidebar exactly as you want it — including which M365 sources, Google sources, and local file sources are enabled, which accounts are selected, and all options — then click the **Save** button in the top bar. Enter a name and click OK. The profile is saved and selected immediately.
|
||||
|
||||
### Applying a profile
|
||||
|
||||
Click the profile dropdown in the top bar and select a profile. All sidebar settings — sources, accounts, options, and date filter — are loaded at once. The sidebar then shows your live state and you can adjust anything before scanning.
|
||||
|
||||
A **Clear** button appears next to the dropdown after you select a profile. Click it to clear the profile label without changing the sidebar settings. This is useful when you want to run a one-off scan without overwriting a saved profile.
|
||||
|
||||
### Managing profiles
|
||||
|
||||
Click **Profiles** to open the profile management panel. Here you can:
|
||||
|
||||
- **Edit** any profile — change its name, description, sources, accounts, or options.
|
||||
- **Duplicate** a profile — useful as a starting point for a variation.
|
||||
- **Delete** a profile.
|
||||
|
||||
> Note: Editing a profile does not affect scans already completed with that profile.
|
||||
|
||||
---
|
||||
|
||||
## 9. Reports and Exports
|
||||
|
||||
### 9.1 Excel Export
|
||||
|
||||
Click **Excel** in the filter bar to download the current results as an Excel workbook. The workbook contains:
|
||||
- A summary tab with scan date, item counts, and source breakdown.
|
||||
- A separate tab for each source type (Outlook, OneDrive, SharePoint, Teams, Gmail, Google Drive, Local, Network).
|
||||
- Every flagged item, including source, account, CPR count, risk level, sharing status, and disposition.
|
||||
|
||||
The **Excel** and **Art.30** buttons are always available — even after restarting the application — and will export the results from the most recent completed scan session without requiring a new scan.
|
||||
|
||||
The Excel file is the main working document for your internal review process.
|
||||
|
||||
### 9.2 GDPR Article 30 Report (Word document)
|
||||
|
||||
Click **Art.30** in the filter bar to generate a Word document that satisfies the GDPR Article 30 requirement to maintain a record of processing activities.
|
||||
|
||||
The document includes:
|
||||
- **Executive summary** — scan date, total items, CPR counts per source.
|
||||
- **Data categories** — which types of personal data were found.
|
||||
- **Data inventory** — the full list of flagged items.
|
||||
- **Retention analysis** — items older than your retention policy, with a breakdown by source.
|
||||
- **Special-category data (Art. 9)** — health, biometric, and other sensitive data found.
|
||||
- **Photographs / biometric data** — if face scanning was enabled.
|
||||
- **GPS data** — files with embedded location information.
|
||||
- **Compliance trend** — flagged counts across your last 20 scans.
|
||||
- **Deletion audit log** — a complete record of all deletions made through the scanner.
|
||||
- **Methodology** — how the scan was performed and the legal basis for scanning.
|
||||
- **Notes on student data** — guidance on parental consent requirements for children under 15.
|
||||
|
||||
---
|
||||
|
||||
## 10. Sharing Results with a Reviewer
|
||||
|
||||
You can give a DPO, school principal, or compliance coordinator read-only access to the results grid — including the ability to tag dispositions — without giving them access to scan controls, credentials, or settings.
|
||||
|
||||
### 10.1 Token links
|
||||
|
||||
Click the **🔗** button in the top-right of the top bar to open the Share panel.
|
||||
|
||||
1. Optionally enter a **Label** to identify who the link is for (e.g. "DPO review April 2026").
|
||||
2. Choose an **Expiry** — 7 days, 30 days, 90 days, 1 year, or Never.
|
||||
3. Click **Create**. A unique link is generated: `http://host:5100/view?token=…`
|
||||
4. Click **Copy** to copy the link to your clipboard, then send it to the reviewer.
|
||||
|
||||
The reviewer opens the link in any browser. They see the full results grid and can tag dispositions but cannot start scans, change settings, view credentials, or delete items.
|
||||
|
||||
**Managing existing links**
|
||||
|
||||
The Share panel lists all active links. Each row shows the label, expiry date, and when the link was last used. Click **Copy** to copy a link again, or **Revoke** to invalidate it immediately.
|
||||
|
||||
### 10.2 Viewer PIN
|
||||
|
||||
As an alternative to token links, you can set a numeric PIN (4–8 digits) in **Settings → Security → Viewer PIN**. Anyone who knows the PIN can open `http://host:5100/view` in a browser, enter the PIN, and access the read-only view for the duration of their browser session.
|
||||
|
||||
To set or change the PIN, enter the new PIN in the **New PIN** field and click **Save PIN**. To remove it, click **Clear PIN**.
|
||||
|
||||
> **Security note:** Token links are more secure than a PIN because each link can be individually revoked and has an expiry date. Use the PIN option only for trusted internal reviewers on your local network.
|
||||
|
||||
### 10.3 What the reviewer can do
|
||||
|
||||
| Action | Allowed |
|
||||
|--------|---------|
|
||||
| Browse results grid | Yes |
|
||||
| Filter and search results | Yes |
|
||||
| Open item preview | Yes |
|
||||
| Tag dispositions | Yes |
|
||||
| Export to Excel | Yes |
|
||||
| Export Article 30 report | Yes |
|
||||
| Start or stop a scan | No |
|
||||
| View or change credentials | No |
|
||||
| Delete items | No |
|
||||
| Access Settings | No |
|
||||
| Create or revoke viewer links | No |
|
||||
|
||||
---
|
||||
|
||||
## 11. Scheduled Scans
|
||||
|
||||
Go to **Settings → Planlægger** to configure automatic scans.
|
||||
|
||||
### Creating a scheduled scan
|
||||
|
||||
1. Click **+ Tilføj planlagt scanning** (+ Add scheduled scan).
|
||||
2. Give the job a name.
|
||||
3. Choose the frequency: **Dagligt**, **Ugentligt**, or **Månedligt**.
|
||||
4. For weekly scans, choose the day of the week. For monthly, choose the day of the month.
|
||||
5. Set the time the scan should run.
|
||||
6. Choose a **Profile** — the scanner will use that profile's sources, accounts, and options.
|
||||
7. Optionally enable:
|
||||
- **Send rapport automatisk** — email the Excel report to your configured recipients after each scan.
|
||||
- **Håndhæv opbevaringspolitik** — automatically delete items older than your retention policy after each scan.
|
||||
8. Click **Gem** (Save).
|
||||
|
||||
The scheduler indicator in the top bar shows the date and time of the next scheduled scan ("Next: …").
|
||||
|
||||
### Viewing recent runs
|
||||
|
||||
The scheduler tab shows a history of recent runs, including start time, status, and the number of items flagged.
|
||||
|
||||
---
|
||||
|
||||
## 12. Email Reports
|
||||
|
||||
Go to **Settings → E-mailrapport** to configure email sending.
|
||||
|
||||
### Setting up SMTP
|
||||
|
||||
Fill in your outgoing mail server details:
|
||||
|
||||
| Field | Example |
|
||||
|-------|---------|
|
||||
| SMTP host | smtp.office365.com |
|
||||
| Port | 587 |
|
||||
| Username | scanner@skole.dk |
|
||||
| Password | (your email password or app password) |
|
||||
| From address | scanner@skole.dk |
|
||||
| Recipients | dpo@skole.dk; it@skole.dk |
|
||||
|
||||
Click **Gem** to save, then click **Test** to send a test email and verify the configuration is working.
|
||||
|
||||
> If your account has MFA (two-factor authentication) enabled, you cannot use your regular password. You need to create an **App Password** in your account security settings:
|
||||
> - **Microsoft personal account**: account.microsoft.com/security → App passwords
|
||||
> - **Gmail**: myaccount.google.com → Security → 2-Step Verification → App passwords
|
||||
|
||||
### Sending a report manually
|
||||
|
||||
Click **Send nu** (Send now) to email the current Excel report immediately to all configured recipients.
|
||||
|
||||
---
|
||||
|
||||
## 13. Database Backup and Restore
|
||||
|
||||
All scan results, dispositions, and the deletion audit log are stored in a local database. It is good practice to take regular backups.
|
||||
|
||||
Go to **Settings → Database**.
|
||||
|
||||
### Backup (Export)
|
||||
|
||||
Click **Export** to create a `.zip` backup of your database. Save it to a safe location.
|
||||
|
||||
### Restore (Import)
|
||||
|
||||
Click **Import** to restore from a backup. Two modes are available:
|
||||
|
||||
| Mode | When to use |
|
||||
|------|-------------|
|
||||
| Merge (safe) | Add dispositions and deletion log from the backup to your existing data. Use this to consolidate data from multiple installations. |
|
||||
| Replace (full restore) | Erase everything and restore the backup completely. Use this to move to a new machine or recover from data loss. Requires Admin PIN confirmation. |
|
||||
|
||||
### Reset database
|
||||
|
||||
Click **Reset DB** to wipe all scan data, dispositions, and deletion log. This is irreversible. If an Admin PIN is set, you must enter it to proceed.
|
||||
|
||||
---
|
||||
|
||||
## 14. Settings Reference
|
||||
|
||||
### General tab
|
||||
|
||||
| Setting | Description |
|
||||
|---------|-------------|
|
||||
| Theme | Dark or light mode |
|
||||
|
||||
### Security tab
|
||||
|
||||
| Setting | Description |
|
||||
|---------|-------------|
|
||||
| Admin PIN | Optional PIN that protects destructive actions (database reset, replace import) |
|
||||
| Viewer PIN | Optional 4–8 digit PIN that lets anyone open `/view` in a browser for read-only access to results without a token link |
|
||||
|
||||
### Advanced scan options
|
||||
|
||||
These options are in the left sidebar under **Indstillinger**:
|
||||
|
||||
**Delta scanning** — after your first full scan, enable this to scan only items that have changed since the last scan. Much faster for routine checks. A "Clear tokens" button forces the next scan to be a full scan.
|
||||
|
||||
**Scan photos for faces** — slower scan that detects photographs containing recognisable human faces. Flags them as Article 9 biometric data. Recommended for schools storing student photos.
|
||||
|
||||
**Retention policy** — when enabled, marks items older than the specified number of years as overdue. The fiscal year end setting determines how the cutoff date is calculated:
|
||||
|
||||
| Option | Cutoff date calculation |
|
||||
|--------|------------------------|
|
||||
| Rolling (fra i dag) | Today minus N years |
|
||||
| 31 dec (Bogføringsloven) | Last 31 December minus N years |
|
||||
| 30 jun / 31 mar | Last occurrence of that date minus N years |
|
||||
|
||||
---
|
||||
|
||||
## 15. Frequently Asked Questions
|
||||
|
||||
**Does the scanner store CPR numbers?**
|
||||
No. CPR numbers found during a scan are stored only as a count (e.g. "3 CPR numbers found") and as a SHA-256 hash used for the Data Subject Lookup. The actual number is never written to the database.
|
||||
|
||||
**What happens when I delete items through the scanner?**
|
||||
Emails are moved to the user's **Deleted Items** folder in Exchange — they are not permanently deleted and can be recovered by the user or an administrator. Files are moved to the **recycle bin** of the relevant service (OneDrive, SharePoint, file system). A permanent deletion requires a second action by the user or admin.
|
||||
|
||||
**Can I scan without connecting to Microsoft 365?**
|
||||
Yes. You can scan local and SMB file shares without any M365 or Google connection. Open **Sources**, go to the **Filkilder** tab, and add your file paths.
|
||||
|
||||
**What is delta scanning and when should I use it?**
|
||||
Delta scanning uses Microsoft Graph change tokens to fetch only items modified since the last scan. It is ideal for regular (e.g. weekly) compliance checks after you have done a full baseline scan. Enable it in the Options section of the sidebar.
|
||||
|
||||
**The scan stopped — can I continue where it left off?**
|
||||
Yes. When you restart the scan, a yellow banner will offer to resume from the checkpoint. Click **▶ Genoptag** to continue. If you prefer to start over, click **Start fresh**.
|
||||
|
||||
**How do I prove compliance if we are audited?**
|
||||
Use the **Art.30** button to export the Article 30 report. It is a Word document covering your data inventory, retention analysis, deletion log, and methodology — exactly what a supervisory authority (Datatilsynet) typically requests.
|
||||
|
||||
**What does the "Elev / Ansat" filter do?**
|
||||
The scanner classifies users as staff (Ansat) or students (Elev) based on their Microsoft 365 licence type or Google Workspace organisational unit. You can use this filter in the accounts list to restrict a scan to only staff, only students, or a specific individual. This is useful because the rules for processing student data — especially for children under 15 — differ from staff data under Databeskyttelsesloven.
|
||||
|
||||
**How do I add an account that is not in the list?**
|
||||
In the accounts section of the sidebar, there is an **+ Tilføj konto manuelt** (Add account manually) field. Enter the email address or UPN and it will be added to the current session's account list.
|
||||
|
||||
**Is the scanner running? I cannot see a progress bar.**
|
||||
Check the activity log at the bottom of the screen. If a scan is running it will show messages there. If you see nothing, the scan may have completed or not started. Also check that you have at least one source ticked and at least one account selected.
|
||||
|
||||
**Can a reviewer tag dispositions without access to the scan controls?**
|
||||
Yes. Use the **🔗 Share** button to create a read-only viewer link or set a Viewer PIN in Settings → Security. The reviewer opens the link in their browser and can browse results and tag dispositions without seeing credentials, sources, or scan buttons. See section 10 for details.
|
||||
|
||||
---
|
||||
|
||||
*GDPR Scanner v1.6.14 — for technical setup and configuration see README.md*
|
||||
144
docs/setup/GOOGLE_SETUP.md
Normal file
144
docs/setup/GOOGLE_SETUP.md
Normal file
@ -0,0 +1,144 @@
|
||||
# Google Workspace Setup
|
||||
|
||||
Step-by-step guide for connecting GDPRScanner to Google Workspace via a service account.
|
||||
|
||||
GDPRScanner connects using a **service account** with **domain-wide delegation** — this allows it to scan all users' Gmail and Drive without requiring each user to sign in individually.
|
||||
|
||||
---
|
||||
|
||||
## 1. Create a Google Cloud project
|
||||
|
||||
Go to [console.cloud.google.com](https://console.cloud.google.com) and create a new project (or use an existing one).
|
||||
|
||||
---
|
||||
|
||||
## 2. Enable the required APIs
|
||||
|
||||
In your project: **APIs & Services → Enable APIs and Services**. Enable:
|
||||
|
||||
- **Gmail API**
|
||||
- **Google Drive API**
|
||||
- **Admin SDK API**
|
||||
|
||||
---
|
||||
|
||||
## 3. Create a service account
|
||||
|
||||
Go to **IAM & Admin → Service accounts → Create service account**.
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Name | gdprscanner (or any name) |
|
||||
| Description | GDPRScanner service account |
|
||||
|
||||
Click **Create and continue**. Skip the optional role and user access steps. Click **Done**.
|
||||
|
||||
### Create a key
|
||||
|
||||
Click on the service account → **Keys → Add key → Create new key → JSON**.
|
||||
|
||||
Download the JSON file. This is your service account key — treat it like a password.
|
||||
|
||||
---
|
||||
|
||||
## 4. Enable domain-wide delegation
|
||||
|
||||
Back on the service account page: **Show advanced settings → Domain-wide delegation → Enable**.
|
||||
|
||||
Note the **Client ID** (a long number) — you'll need it in the next step.
|
||||
|
||||
---
|
||||
|
||||
## 5. Authorise scopes in Google Admin Console
|
||||
|
||||
Go to [admin.google.com](https://admin.google.com) →
|
||||
**Security → Access and data control → API controls → Manage domain-wide delegation → Add new**.
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Client ID | The numeric Client ID from the service account |
|
||||
| OAuth scopes | See below |
|
||||
|
||||
Add all of these scopes (paste as a comma-separated list):
|
||||
|
||||
```
|
||||
https://www.googleapis.com/auth/admin.directory.user.readonly,
|
||||
https://www.googleapis.com/auth/gmail.readonly,
|
||||
https://www.googleapis.com/auth/drive.readonly
|
||||
```
|
||||
|
||||
Click **Authorise**. Changes can take a few minutes to propagate.
|
||||
|
||||
---
|
||||
|
||||
## 6. Connect in GDPRScanner
|
||||
|
||||
Open GDPRScanner → **Source Management → Google Workspace** tab.
|
||||
|
||||
1. **Upload service account key** — select the JSON file you downloaded in step 3
|
||||
2. **Admin email** — enter the email address of a Google Workspace admin user in your domain (e.g. `admin@skolen.dk`). The service account impersonates this user to call the Admin Directory API.
|
||||
|
||||
Click **Connect**. If successful, the status dot turns green and shows the service account email.
|
||||
|
||||
---
|
||||
|
||||
## 7. User role classification
|
||||
|
||||
GDPRScanner classifies Google Workspace users as **staff** or **student** based on their **Organisational Unit (OU) path** in Google Admin.
|
||||
|
||||
The mapping is in `classification/google_ou_roles.json`. Edit it to match your school's OU structure — no code change required.
|
||||
|
||||
Default mapping:
|
||||
|
||||
| OU prefix | Role |
|
||||
|---|---|
|
||||
| `/Elever` | student |
|
||||
| `/Personale` | staff |
|
||||
| `/Admin` | staff |
|
||||
|
||||
To see your OU structure: **Google Admin → Directory → Administrer organisationsenheder**.
|
||||
|
||||
Example `classification/google_ou_roles.json` for a typical Danish school (Gudenaaskolen.dk structure):
|
||||
|
||||
```json
|
||||
{
|
||||
"student_ou_prefixes": ["/Elever"],
|
||||
"staff_ou_prefixes": ["/Personale", "/Admin"]
|
||||
}
|
||||
```
|
||||
|
||||
After editing the file, restart GDPRScanner — no rebuild required.
|
||||
|
||||
---
|
||||
|
||||
## 8. Verify
|
||||
|
||||
After connecting:
|
||||
|
||||
- **Sources panel** shows Gmail and Google Drive checkboxes
|
||||
- **Accounts panel** shows all Google Workspace users with `GWS` badges
|
||||
- Users are classified as Elev / Ansat based on their OU
|
||||
|
||||
Select one or more accounts, check Gmail and/or Google Drive, and click Scan.
|
||||
|
||||
---
|
||||
|
||||
## Notes on what is scanned
|
||||
|
||||
| Source | What is scanned |
|
||||
|---|---|
|
||||
| Gmail | Email bodies and attachments for all mail folders |
|
||||
| Google Drive | My Drive files — Docs, Sheets, Slides are auto-exported to text for scanning |
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Symptom | Likely cause |
|
||||
|---|---|
|
||||
| `unauthorized_client` on connect | Domain-wide delegation not enabled, or scopes not authorised in Admin Console |
|
||||
| 0 users listed | `admin.directory.user.readonly` scope missing, or wrong admin email |
|
||||
| Users show as "Anden" (other) | OU path not matched in `classification/google_ou_roles.json` — check OU paths in Google Admin and compare with the file |
|
||||
| Gmail scan finds nothing | `gmail.readonly` scope not authorised |
|
||||
| Drive scan finds nothing | `drive.readonly` scope not authorised |
|
||||
| `RefreshError` on scan | Service account key expired or revoked — generate a new key |
|
||||
160
docs/setup/M365_SETUP.md
Normal file
160
docs/setup/M365_SETUP.md
Normal file
@ -0,0 +1,160 @@
|
||||
# Microsoft 365 Setup
|
||||
|
||||
Step-by-step guide for connecting GDPRScanner to Microsoft 365.
|
||||
|
||||
---
|
||||
|
||||
## 1. Register an app in Azure
|
||||
|
||||
Go to **Azure Portal → Microsoft Entra ID → App registrations → New registration**.
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Name | GDPRScanner (or any name) |
|
||||
| Supported account types | Accounts in this organisational directory only |
|
||||
| Redirect URI | Leave blank |
|
||||
|
||||
Click **Register**. Note the **Application (client) ID** and **Directory (tenant) ID** — you'll need both.
|
||||
|
||||
---
|
||||
|
||||
## 2. Choose an authentication mode
|
||||
|
||||
| Mode | How it works | When to use |
|
||||
|---|---|---|
|
||||
| **Application** | Client credentials — client ID + tenant ID + client secret. No user interaction. | Automated / scheduled scans, all-user scans |
|
||||
| **Delegated** | OAuth device code flow — user signs in interactively. | Single-user scans, testing |
|
||||
|
||||
### Application mode — create a client secret
|
||||
|
||||
In your app registration: **Certificates & secrets → New client secret**.
|
||||
|
||||
Set an expiry (24 months recommended) and copy the **Value** immediately — it is only shown once.
|
||||
|
||||
### Delegated mode — no secret needed
|
||||
|
||||
The scanner will show a device code URL. Open it in a browser, sign in, and the scanner authenticates as that user.
|
||||
|
||||
---
|
||||
|
||||
## 3. Add API permissions
|
||||
|
||||
Go to **API permissions → Add a permission → Microsoft Graph**.
|
||||
|
||||
### Scan only
|
||||
|
||||
| Permission | Type |
|
||||
|---|---|
|
||||
| `Mail.Read` | Application or Delegated |
|
||||
| `Files.Read.All` | Application or Delegated |
|
||||
| `Sites.Read.All` | Application or Delegated |
|
||||
| `ChannelMessage.Read.All` | Application |
|
||||
| `Team.ReadBasic.All` | Application |
|
||||
| `User.Read.All` | Application |
|
||||
|
||||
### Scan + Delete
|
||||
|
||||
Add these in addition to the read permissions above:
|
||||
|
||||
| Permission | Type |
|
||||
|---|---|
|
||||
| `Mail.ReadWrite` | Application or Delegated |
|
||||
| `Files.ReadWrite.All` | Application or Delegated |
|
||||
| `Sites.ReadWrite.All` | Application or Delegated |
|
||||
|
||||
### Email reports via Graph
|
||||
|
||||
If you want the scanner to send email reports via Microsoft 365 (not SMTP):
|
||||
|
||||
| Permission | Type |
|
||||
|---|---|
|
||||
| `Mail.Send` | Application or Delegated |
|
||||
|
||||
### Grant admin consent
|
||||
|
||||
All **Application** permissions require admin consent. Click **Grant admin consent for [your tenant]** at the top of the API permissions page. Without this, scans will fail with 403 errors.
|
||||
|
||||
---
|
||||
|
||||
## 4. Connect in GDPRScanner
|
||||
|
||||
Open GDPRScanner → **Source Management → Microsoft 365** tab.
|
||||
|
||||
| Field | Where to find it |
|
||||
|---|---|
|
||||
| Client ID | App registration → Overview → Application (client) ID |
|
||||
| Tenant ID | App registration → Overview → Directory (tenant) ID |
|
||||
| Client Secret | The value you copied in step 2 (Application mode only) |
|
||||
|
||||
Click **Connect**. In Application mode, the connection is immediate. In Delegated mode, a browser window opens for sign-in.
|
||||
|
||||
---
|
||||
|
||||
## 5. Verify
|
||||
|
||||
After connecting, the Sources panel shows:
|
||||
|
||||
- **Email** — Exchange mailboxes
|
||||
- **OneDrive** — personal drives
|
||||
- **SharePoint** — site file libraries
|
||||
- **Teams** — Teams channel files
|
||||
|
||||
The Accounts panel lists all users in the tenant (Application mode) or just the signed-in user (Delegated mode).
|
||||
|
||||
---
|
||||
|
||||
## Notes on deletion
|
||||
|
||||
Emails deleted via the scanner are moved to **Deleted Items** — recoverable for 14–30 days depending on admin configuration. Files are sent to the **OneDrive/SharePoint recycle bin** — retained for 93 days across both recycle bin stages before permanent deletion. Nothing is permanently destroyed without a second manual step.
|
||||
|
||||
---
|
||||
|
||||
## Headless / scheduled mode
|
||||
|
||||
Headless mode uses Application auth only. Credentials are read in priority order:
|
||||
|
||||
1. `--settings FILE` — a JSON file you provide
|
||||
2. Environment variables: `M365_CLIENT_ID`, `M365_TENANT_ID`, `M365_CLIENT_SECRET`
|
||||
|
||||
Example settings file:
|
||||
|
||||
```json
|
||||
{
|
||||
"client_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
||||
"tenant_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
||||
"client_secret": "your-secret",
|
||||
"sources": ["email", "onedrive"],
|
||||
"options": {
|
||||
"older_than_days": 365,
|
||||
"email_body": true,
|
||||
"attachments": true,
|
||||
"delta": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
python gdpr_scanner.py --headless --output ~/Reports/ --settings settings.json
|
||||
```
|
||||
|
||||
See the full CLI flag reference in `README.md`.
|
||||
|
||||
---
|
||||
|
||||
## Role classification (staff / student)
|
||||
|
||||
GDPRScanner classifies users as **staff** or **student** based on their Microsoft 365 licence SKU. The mapping is in `classification/m365_skus.json`. If users appear as "other", open **Settings → SKU debug** to see which SKU IDs are assigned in your tenant and add any missing ones to `m365_skus.json`.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Symptom | Likely cause |
|
||||
|---|---|
|
||||
| 403 on scan start | Admin consent not granted, or wrong permissions added |
|
||||
| `AADSTS7000215` | Invalid client secret — check it was copied correctly |
|
||||
| No users listed | `User.Read.All` permission missing or not consented |
|
||||
| Teams files not appearing | `ChannelMessage.Read.All` or `Team.ReadBasic.All` missing |
|
||||
| Delta scan not working | Delta tokens require at least one full scan first |
|
||||
2659
document_scanner.py
Normal file
2659
document_scanner.py
Normal file
File diff suppressed because it is too large
Load Diff
600
file_scanner.py
Normal file
600
file_scanner.py
Normal file
@ -0,0 +1,600 @@
|
||||
"""
|
||||
file_scanner.py — Unified local and SMB/CIFS file iterator for GDPR Scanner.
|
||||
|
||||
Provides FileScanner.iter_files() which yields (relative_path, bytes, metadata)
|
||||
regardless of whether the source is a local path or a network share.
|
||||
|
||||
gdpr_scanner.py imports this module and calls iter_files() inside run_file_scan().
|
||||
All CPR scanning, card broadcasting, and DB persistence stay in gdpr_scanner.py.
|
||||
|
||||
Optional dependencies:
|
||||
smbprotocol>=1.13 — native SMB2/3 without mounting (pip install smbprotocol)
|
||||
keyring>=25.0 — OS keychain credential storage (pip install keyring)
|
||||
python-dotenv>=1.0 — .env file fallback (pip install python-dotenv)
|
||||
|
||||
If smbprotocol is not installed, the scanner falls back to local-path mode.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
import hashlib
|
||||
from pathlib import Path, PurePosixPath
|
||||
from typing import Iterator
|
||||
|
||||
# ── Optional dependency flags ─────────────────────────────────────────────────
|
||||
|
||||
try:
|
||||
import smbprotocol # noqa: F401 — just checking availability
|
||||
from smbprotocol.connection import Connection
|
||||
from smbprotocol.session import Session
|
||||
from smbprotocol.tree import TreeConnect
|
||||
from smbprotocol.open import (
|
||||
Open, CreateDisposition, CreateOptions,
|
||||
FileAttributes, FilePipePrinterAccessMask, ShareAccess,
|
||||
ImpersonationLevel,
|
||||
)
|
||||
from smbprotocol.query_info import FileDirectoryInformation
|
||||
SMB_OK = True
|
||||
except ImportError:
|
||||
SMB_OK = False
|
||||
|
||||
try:
|
||||
import keyring as _keyring
|
||||
KEYRING_OK = True
|
||||
except ImportError:
|
||||
KEYRING_OK = False
|
||||
|
||||
try:
|
||||
from dotenv import dotenv_values as _dotenv_values
|
||||
DOTENV_OK = True
|
||||
except ImportError:
|
||||
DOTENV_OK = False
|
||||
|
||||
|
||||
# ── Public constants ──────────────────────────────────────────────────────────
|
||||
|
||||
KEYCHAIN_SERVICE = "gdpr-scanner-nas"
|
||||
|
||||
# File extensions passed through to _scan_bytes(). Matches SUPPORTED_EXTS in
|
||||
# gdpr_scanner.py; kept here too so FileScanner can filter without importing it.
|
||||
DEFAULT_EXTENSIONS = {
|
||||
".pdf", ".docx", ".doc", ".xlsx", ".xlsm", ".csv",
|
||||
".txt", ".eml", ".msg",
|
||||
".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp",
|
||||
".heic", ".heif",
|
||||
}
|
||||
|
||||
# Extensions for local/SMB file scans — PDFs now included; OCR runs in a spawned
|
||||
# subprocess with a 60-second hard timeout via _scan_bytes_timeout so hanging
|
||||
# Tesseract/Poppler processes can never block the scan thread indefinitely.
|
||||
FILE_SCAN_EXTENSIONS = DEFAULT_EXTENSIONS
|
||||
|
||||
# Maximum file size to load into memory (bytes). Files larger than this are
|
||||
# skipped with a warning — same guard used by the M365 attachment scanner.
|
||||
MAX_FILE_BYTES = 20 * 1024 * 1024 # 20 MB
|
||||
|
||||
# SMB pre-fetch sliding window (#22)
|
||||
PREFETCH_WINDOW = 1 # 1 SMB read in flight — halves peak concurrent buffer memory
|
||||
SMB_READ_TIMEOUT = 60 # seconds before an individual SMB read is abandoned
|
||||
|
||||
# Directories to silently skip — system/sync/trash folders that never contain
|
||||
# user documents and would only generate noise or permission errors.
|
||||
SKIP_DIRS = {
|
||||
".recycle", ".recycler", "recycler", "$recycle.bin", ".trash", ".trashes",
|
||||
".sync", ".btsync", ".syncthing",
|
||||
".git", ".svn", ".hg",
|
||||
"__pycache__", "node_modules",
|
||||
".spotlight-v100", ".fseventsd", ".temporaryitems",
|
||||
"system volume information", "lost+found",
|
||||
}
|
||||
|
||||
|
||||
# ── Credential helpers ────────────────────────────────────────────────────────
|
||||
|
||||
def get_smb_password(smb_host: str, smb_user: str,
|
||||
keychain_key: str | None = None) -> str | None:
|
||||
"""Return SMB password from the best available source.
|
||||
|
||||
Priority:
|
||||
1. OS keychain via keyring (keychain_key or smb_user as account name)
|
||||
2. NAS_PASSWORD environment variable
|
||||
3. .env file in the current working directory
|
||||
"""
|
||||
# 1. OS keychain
|
||||
if KEYRING_OK:
|
||||
account = keychain_key or smb_user
|
||||
try:
|
||||
pw = _keyring.get_password(KEYCHAIN_SERVICE, account)
|
||||
if pw:
|
||||
return pw
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Environment variable
|
||||
pw = os.environ.get("NAS_PASSWORD")
|
||||
if pw:
|
||||
return pw
|
||||
|
||||
# 3. .env file
|
||||
if DOTENV_OK:
|
||||
env = _dotenv_values(".env")
|
||||
pw = env.get("NAS_PASSWORD")
|
||||
if pw:
|
||||
return pw
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def store_smb_password(smb_host: str, smb_user: str,
|
||||
password: str,
|
||||
keychain_key: str | None = None) -> bool:
|
||||
"""Store SMB password in the OS keychain. Returns True on success."""
|
||||
if not KEYRING_OK:
|
||||
return False
|
||||
account = keychain_key or smb_user
|
||||
try:
|
||||
_keyring.set_password(KEYCHAIN_SERVICE, account, password)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ── FileScanner ───────────────────────────────────────────────────────────────
|
||||
|
||||
class FileScanner:
|
||||
"""Unified local + SMB/CIFS file iterator."""
|
||||
|
||||
FILE_SCAN_EXTENSIONS = FILE_SCAN_EXTENSIONS # excludes .pdf
|
||||
"""Unified iterator over local paths and SMB/CIFS network shares.
|
||||
|
||||
Usage::
|
||||
|
||||
fs = FileScanner("/mnt/data")
|
||||
for rel_path, content, meta in fs.iter_files():
|
||||
result = _scan_bytes(content, rel_path)
|
||||
...
|
||||
|
||||
fs = FileScanner("//nas.school.dk/shares",
|
||||
smb_host="nas.school.dk",
|
||||
smb_user="DOMAIN\\\\henrik",
|
||||
smb_password="secret")
|
||||
for rel_path, content, meta in fs.iter_files():
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
smb_host: str | None = None,
|
||||
smb_user: str | None = None,
|
||||
smb_password: str | None = None,
|
||||
smb_domain: str | None = None,
|
||||
keychain_key: str | None = None,
|
||||
max_file_bytes: int = MAX_FILE_BYTES,
|
||||
):
|
||||
self.path = path
|
||||
self.smb_user = smb_user
|
||||
self.smb_domain = smb_domain or ""
|
||||
self.keychain_key = keychain_key
|
||||
self.max_file_bytes = max_file_bytes
|
||||
|
||||
# Detect SMB path by prefix; auto-derive host if not provided
|
||||
_is_smb_path = path.startswith("//") or path.startswith("\\\\")
|
||||
if _is_smb_path and not smb_host:
|
||||
# Extract host from path: //host/share → host
|
||||
_norm = path.replace("\\", "/").lstrip("/")
|
||||
smb_host = _norm.split("/")[0] or None
|
||||
self.smb_host = smb_host
|
||||
|
||||
self.is_smb = _is_smb_path and SMB_OK
|
||||
|
||||
# Resolve password from keychain / env / .env if not provided directly
|
||||
self._password = smb_password
|
||||
if self.is_smb and not self._password:
|
||||
self._password = get_smb_password(
|
||||
smb_host or "", smb_user or "", keychain_key
|
||||
)
|
||||
|
||||
# ── Public ────────────────────────────────────────────────────────────────
|
||||
|
||||
def iter_files(
|
||||
self,
|
||||
extensions: set[str] | None = None,
|
||||
progress_cb=None,
|
||||
) -> Iterator[tuple[str, bytes, dict]]:
|
||||
"""Yield (relative_path, content_bytes, metadata) for every scannable file.
|
||||
|
||||
Args:
|
||||
extensions: Set of lowercase extensions to include, e.g. {".pdf", ".docx"}.
|
||||
Defaults to DEFAULT_EXTENSIONS.
|
||||
progress_cb: Optional callable(rel_path) called before each file is read,
|
||||
so the caller can update a progress indicator.
|
||||
|
||||
Yields:
|
||||
rel_path — path relative to the root (e.g. "subfolder/doc.pdf")
|
||||
content — raw bytes of the file
|
||||
metadata — dict with keys: size_kb, modified, source_type, source_root
|
||||
"""
|
||||
exts = extensions or DEFAULT_EXTENSIONS
|
||||
|
||||
if self.is_smb:
|
||||
yield from self._iter_smb(exts, progress_cb)
|
||||
else:
|
||||
yield from self._iter_local(exts, progress_cb)
|
||||
|
||||
@property
|
||||
def source_type(self) -> str:
|
||||
return "smb" if self.is_smb else "local"
|
||||
|
||||
@staticmethod
|
||||
def smb_available() -> bool:
|
||||
return SMB_OK
|
||||
|
||||
# ── Local walker ──────────────────────────────────────────────────────────
|
||||
|
||||
def _iter_local(self, exts: set[str], progress_cb) -> Iterator[tuple[str, bytes, dict]]:
|
||||
root = Path(self.path).expanduser().resolve()
|
||||
if not root.exists():
|
||||
raise FileNotFoundError(f"Path not found: {root}")
|
||||
|
||||
for dirpath, _dirs, filenames in os.walk(root):
|
||||
# Skip junk/system directories in-place
|
||||
_dirs[:] = [d for d in _dirs if d.lower() not in SKIP_DIRS and not d.startswith(".")]
|
||||
for fname in filenames:
|
||||
full = Path(dirpath) / fname
|
||||
ext = full.suffix.lower()
|
||||
if ext not in exts:
|
||||
continue
|
||||
|
||||
try:
|
||||
size = full.stat().st_size
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if size > self.max_file_bytes:
|
||||
yield _skip(str(full.relative_to(root)), size, "local", str(root))
|
||||
continue
|
||||
|
||||
rel = str(full.relative_to(root))
|
||||
if progress_cb:
|
||||
progress_cb(rel)
|
||||
|
||||
try:
|
||||
content = full.read_bytes()
|
||||
modified = time.strftime(
|
||||
"%Y-%m-%d",
|
||||
time.localtime(full.stat().st_mtime)
|
||||
)
|
||||
meta = {
|
||||
"size_kb": round(size / 1024, 1),
|
||||
"modified": modified,
|
||||
"source_type": "local",
|
||||
"source_root": str(root),
|
||||
"full_path": str(full),
|
||||
"skipped": False,
|
||||
}
|
||||
yield rel, content, meta
|
||||
except (OSError, PermissionError) as e:
|
||||
yield _error(rel, str(e), "local", str(root))
|
||||
|
||||
# ── SMB walker ────────────────────────────────────────────────────────────
|
||||
|
||||
def _iter_smb(self, exts: set[str], progress_cb) -> Iterator[tuple[str, bytes, dict]]:
|
||||
"""Walk an SMB share using smbprotocol with a sliding-window pre-fetcher.
|
||||
|
||||
Directory traversal and file reads are decoupled:
|
||||
1. _smb_collect() walks the tree metadata-only (fast — no file I/O).
|
||||
2. A ThreadPoolExecutor submits _smb_read_file() calls up to
|
||||
PREFETCH_WINDOW at a time. Each future has SMB_READ_TIMEOUT seconds
|
||||
to complete; timed-out reads yield an error sentinel and are abandoned
|
||||
without blocking the scan thread.
|
||||
"""
|
||||
if not SMB_OK:
|
||||
raise RuntimeError(
|
||||
"smbprotocol not installed — run: pip install smbprotocol"
|
||||
)
|
||||
|
||||
# Parse //host/share/optional/subpath — normalise backslashes
|
||||
norm = self.path.replace("\\", "/").lstrip("/")
|
||||
parts = norm.split("/", 2)
|
||||
host = parts[0] if len(parts) > 0 else self.smb_host or ""
|
||||
share = parts[1] if len(parts) > 1 else ""
|
||||
sub = parts[2] if len(parts) > 2 else ""
|
||||
|
||||
if not host or not share:
|
||||
raise ValueError(
|
||||
f"Cannot parse SMB path '{self.path}' — expected //host/share[/subpath]"
|
||||
)
|
||||
|
||||
source_root = f"//{host}/{share}"
|
||||
|
||||
conn = Connection(uuid.uuid4(), host, 445)
|
||||
conn.connect(timeout=30)
|
||||
try:
|
||||
session = Session(conn,
|
||||
username=self.smb_user or "",
|
||||
password=self._password or "",
|
||||
require_encryption=False)
|
||||
session.connect()
|
||||
try:
|
||||
tree = TreeConnect(session, f"\\\\{host}\\{share}")
|
||||
tree.connect()
|
||||
try:
|
||||
# Phase 1: collect all candidate file descriptors (no reads)
|
||||
candidates = list(self._smb_collect(
|
||||
tree, sub, sub, exts, source_root
|
||||
))
|
||||
|
||||
# Phase 2: resolve sentinels, then sliding-window parallel reads
|
||||
# Sentinels from _smb_collect are yielded immediately; only real
|
||||
# file entries enter the executor queue.
|
||||
real_candidates = []
|
||||
for item in candidates:
|
||||
marker = item[0]
|
||||
if marker is _COLLECT_ERROR:
|
||||
yield _error(item[1] or ".", item[4], "smb", source_root)
|
||||
elif marker is _COLLECT_SKIP:
|
||||
yield _skip(item[1], item[2], "smb", source_root)
|
||||
else:
|
||||
real_candidates.append(item)
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import deque
|
||||
|
||||
pending: deque = deque() # (future, display_rel, size, modified, src_root)
|
||||
|
||||
def _submit_next(item):
|
||||
display_rel, smb_path, size, modified, src_root = item
|
||||
fut = executor.submit(_smb_read_file, tree, smb_path)
|
||||
pending.append((fut, display_rel, size, modified, src_root))
|
||||
|
||||
with ThreadPoolExecutor(max_workers=PREFETCH_WINDOW) as executor:
|
||||
it = iter(real_candidates)
|
||||
# Seed the window
|
||||
for item in it:
|
||||
if progress_cb:
|
||||
progress_cb(item[0])
|
||||
_submit_next(item)
|
||||
if len(pending) >= PREFETCH_WINDOW:
|
||||
break
|
||||
|
||||
while pending:
|
||||
fut, display_rel, size, modified, src_root = pending.popleft()
|
||||
|
||||
# Submit the next candidate to keep the window full
|
||||
nxt = next(it, None)
|
||||
if nxt is not None:
|
||||
if progress_cb:
|
||||
progress_cb(nxt[0])
|
||||
_submit_next(nxt)
|
||||
|
||||
try:
|
||||
content = fut.result(timeout=SMB_READ_TIMEOUT)
|
||||
meta = {
|
||||
"size_kb": round(size / 1024, 1),
|
||||
"modified": modified,
|
||||
"source_type": "smb",
|
||||
"source_root": src_root,
|
||||
"full_path": f"{src_root}/{display_rel}",
|
||||
"skipped": False,
|
||||
}
|
||||
yield display_rel, content, meta
|
||||
except TimeoutError:
|
||||
fut.cancel()
|
||||
yield _error(display_rel,
|
||||
f"SMB read timed out after {SMB_READ_TIMEOUT}s",
|
||||
"smb", src_root)
|
||||
except Exception as e:
|
||||
err = str(e)
|
||||
if "STATUS_END_OF_FILE" in err or "0xc0000011" in err:
|
||||
continue # empty/placeholder — skip silently
|
||||
yield _error(display_rel, err, "smb", src_root)
|
||||
|
||||
finally:
|
||||
tree.disconnect()
|
||||
finally:
|
||||
session.disconnect()
|
||||
finally:
|
||||
conn.disconnect()
|
||||
|
||||
def _smb_collect(
|
||||
self,
|
||||
tree,
|
||||
directory: str,
|
||||
root_sub: str,
|
||||
exts: set[str],
|
||||
source_root: str,
|
||||
) -> Iterator[tuple[str, str, int, str, str]]:
|
||||
"""Recursively walk an SMB directory tree, yielding file descriptors only.
|
||||
|
||||
Yields (display_rel, smb_path, size_bytes, modified_str, source_root).
|
||||
No file reads are performed — this is directory-listing only.
|
||||
Over-size files are yielded as _skip() sentinels via a side-channel;
|
||||
those are handled in _iter_smb before the prefetch loop.
|
||||
"""
|
||||
query_path = directory.replace("/", "\\") if directory else ""
|
||||
pattern = (query_path + "\\" if query_path else "") + "*"
|
||||
|
||||
try:
|
||||
entries = _smb_list_dir(tree, pattern)
|
||||
except Exception as e:
|
||||
# Can't list directory — emit error sentinel via a special marker
|
||||
# _iter_smb won't see it; we raise so it propagates as a read error
|
||||
yield _COLLECT_ERROR, "", 0, "", source_root # sentinel handled below
|
||||
return
|
||||
|
||||
for entry in entries:
|
||||
name = entry["name"]
|
||||
if name in (".", ".."):
|
||||
continue
|
||||
|
||||
rel = (directory + "/" + name) if directory else name
|
||||
display_rel = rel[len(root_sub):].lstrip("/") if root_sub else rel
|
||||
display_rel = display_rel or name
|
||||
|
||||
is_dir = bool(entry["attributes"] & 0x10)
|
||||
size = entry["size"]
|
||||
|
||||
if is_dir:
|
||||
if name.lower() in SKIP_DIRS or (name.startswith(".") and name not in (".", "..")):
|
||||
continue
|
||||
yield from self._smb_collect(tree, rel, root_sub, exts, source_root)
|
||||
continue
|
||||
|
||||
ext = PurePosixPath(name).suffix.lower()
|
||||
if ext not in exts:
|
||||
continue
|
||||
|
||||
if size > self.max_file_bytes:
|
||||
# Mark as over-size — _iter_smb skips before submitting to executor
|
||||
yield _COLLECT_SKIP, display_rel, size, "", source_root
|
||||
continue
|
||||
|
||||
modified = _smb_ts(entry.get("last_write_time", 0))
|
||||
yield display_rel, rel.replace("/", "\\"), size, modified, source_root
|
||||
|
||||
|
||||
# Sentinel strings for _smb_collect side-channel messages
|
||||
_COLLECT_ERROR = "\x00__error__"
|
||||
_COLLECT_SKIP = "\x00__skip__"
|
||||
|
||||
|
||||
# ── SMB helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
def uuid4_str() -> str:
|
||||
import uuid
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def _smb_list_dir(tree, pattern: str) -> list[dict]:
|
||||
"""List directory entries matching pattern on an SMB tree."""
|
||||
from smbprotocol.open import (
|
||||
Open, CreateDisposition, CreateOptions,
|
||||
FileAttributes, DirectoryAccessMask, ShareAccess,
|
||||
ImpersonationLevel, FileInformationClass,
|
||||
)
|
||||
from smbprotocol.file_info import FileDirectoryInformation
|
||||
import smbprotocol.exceptions as smb_exc
|
||||
|
||||
# Open directory
|
||||
dir_path = "\\".join(pattern.replace("/", "\\").split("\\")[:-1])
|
||||
file_pattern = pattern.replace("/", "\\").split("\\")[-1] or "*"
|
||||
|
||||
fh = Open(tree, dir_path or "")
|
||||
fh.create(
|
||||
ImpersonationLevel.Impersonation,
|
||||
DirectoryAccessMask.FILE_LIST_DIRECTORY |
|
||||
DirectoryAccessMask.FILE_READ_ATTRIBUTES,
|
||||
FileAttributes.FILE_ATTRIBUTE_DIRECTORY,
|
||||
ShareAccess.FILE_SHARE_READ | ShareAccess.FILE_SHARE_WRITE |
|
||||
ShareAccess.FILE_SHARE_DELETE,
|
||||
CreateDisposition.FILE_OPEN,
|
||||
CreateOptions.FILE_DIRECTORY_FILE,
|
||||
)
|
||||
|
||||
entries = []
|
||||
try:
|
||||
raw = fh.query_directory(
|
||||
pattern=file_pattern,
|
||||
file_information_class=FileInformationClass.FILE_DIRECTORY_INFORMATION,
|
||||
flags=0,
|
||||
max_output=65536,
|
||||
)
|
||||
for info in raw:
|
||||
fname = info["file_name"].get_value()
|
||||
if isinstance(fname, bytes):
|
||||
fname = fname.decode("utf-16-le", errors="replace").rstrip("\x00")
|
||||
attrs = info["file_attributes"].get_value()
|
||||
entries.append({
|
||||
"name": fname,
|
||||
"attributes": int(attrs) if not isinstance(attrs, int) else attrs,
|
||||
"size": info["end_of_file"].get_value(),
|
||||
"last_write_time": info["last_write_time"].get_value(),
|
||||
})
|
||||
except smb_exc.SMBOSError:
|
||||
pass # Empty directory or no match
|
||||
finally:
|
||||
try:
|
||||
fh.close(get_attributes=False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def _smb_read_file(tree, smb_path: str) -> bytes:
|
||||
"""Read a complete file from an SMB tree into bytes."""
|
||||
from smbprotocol.open import (
|
||||
Open, CreateDisposition, CreateOptions,
|
||||
FileAttributes, FilePipePrinterAccessMask, ShareAccess,
|
||||
ImpersonationLevel,
|
||||
)
|
||||
|
||||
fh = Open(tree, smb_path)
|
||||
fh.create(
|
||||
ImpersonationLevel.Impersonation,
|
||||
FilePipePrinterAccessMask.FILE_READ_DATA |
|
||||
FilePipePrinterAccessMask.FILE_READ_ATTRIBUTES,
|
||||
FileAttributes.FILE_ATTRIBUTE_NORMAL,
|
||||
ShareAccess.FILE_SHARE_READ,
|
||||
CreateDisposition.FILE_OPEN,
|
||||
CreateOptions.FILE_NON_DIRECTORY_FILE,
|
||||
)
|
||||
try:
|
||||
chunks = []
|
||||
offset = 0
|
||||
chunk_size = 1024 * 1024 # 1 MB chunks
|
||||
while True:
|
||||
data = fh.read(offset, chunk_size)
|
||||
if not data:
|
||||
break
|
||||
chunks.append(bytes(data))
|
||||
offset += len(data)
|
||||
if len(data) < chunk_size:
|
||||
break
|
||||
return b"".join(chunks)
|
||||
finally:
|
||||
fh.close(get_attributes=False)
|
||||
|
||||
|
||||
def _smb_ts(windows_ts: int) -> str:
|
||||
"""Convert Windows FILETIME (100ns intervals since 1601-01-01) to YYYY-MM-DD."""
|
||||
if not windows_ts:
|
||||
return ""
|
||||
try:
|
||||
# FILETIME → Unix epoch
|
||||
unix_ts = (windows_ts - 116444736000000000) / 10_000_000
|
||||
return time.strftime("%Y-%m-%d", time.gmtime(unix_ts))
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
# ── Sentinel yield helpers ────────────────────────────────────────────────────
|
||||
|
||||
def _skip(rel: str, size: int, source_type: str, source_root: str):
|
||||
"""Yield a skipped-file sentinel (content=None, meta['skipped']=True)."""
|
||||
return rel, None, {
|
||||
"size_kb": round(size / 1024, 1),
|
||||
"modified": "",
|
||||
"source_type": source_type,
|
||||
"source_root": source_root,
|
||||
"full_path": f"{source_root}/{rel}",
|
||||
"skipped": True,
|
||||
"skip_reason": f"File too large ({size // 1_048_576} MB)",
|
||||
}
|
||||
|
||||
|
||||
def _error(rel: str, error: str, source_type: str, source_root: str):
|
||||
"""Yield an error sentinel (content=None, meta['error']=...)."""
|
||||
return rel, None, {
|
||||
"size_kb": 0,
|
||||
"modified": "",
|
||||
"source_type": source_type,
|
||||
"source_root": source_root,
|
||||
"full_path": f"{source_root}/{rel}",
|
||||
"skipped": True,
|
||||
"skip_reason": f"Error: {error}",
|
||||
}
|
||||
954
gdpr_db.py
Normal file
954
gdpr_db.py
Normal file
@ -0,0 +1,954 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
gdpr_db.py — SQLite persistence layer for GDPRScanner
|
||||
|
||||
Stores scan results alongside the existing JSON cache. Neither replaces the
|
||||
other: JSON is fast and portable, SQLite enables querying, trending, and the
|
||||
data-subject index.
|
||||
|
||||
Database location: ~/.gdpr_scanner.db (configurable via DB_PATH)
|
||||
|
||||
Schema
|
||||
------
|
||||
scans one row per completed scan run
|
||||
flagged_items one row per flagged file / email
|
||||
cpr_index (cpr_hash, item_id) — powers data-subject lookup
|
||||
pii_hits per-type PII counts per item
|
||||
dispositions compliance officer decisions per item
|
||||
scan_history aggregated stats for trend tracking
|
||||
|
||||
Usage (from gdpr_scanner.py)
|
||||
-----------------------------
|
||||
from gdpr_db import ScanDB
|
||||
db = ScanDB()
|
||||
scan_id = db.begin_scan(options)
|
||||
db.save_item(scan_id, card, cprs) # called for each flagged card
|
||||
db.finish_scan(scan_id, total_scanned)
|
||||
db.close()
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
from pathlib import Path as _P
|
||||
_DATA_DIR = _P.home() / ".gdprscanner"
|
||||
_DATA_DIR.mkdir(exist_ok=True)
|
||||
DB_PATH = _DATA_DIR / "scanner.db"
|
||||
|
||||
# ── Retention cutoff helper ──────────────────────────────────────────────────
|
||||
|
||||
def overdue_cutoff(years: int = 5, fiscal_year_end: str | None = None) -> str:
|
||||
"""Return the ISO date string before which items are considered overdue.
|
||||
|
||||
Two modes:
|
||||
- Rolling (default, fiscal_year_end=None):
|
||||
Exactly N years before today.
|
||||
E.g. years=5 on 2026-03-17 -> 2021-03-17
|
||||
Correct for GDPR general data minimisation.
|
||||
|
||||
- Fiscal year end (fiscal_year_end="MM-DD", e.g. "12-31"):
|
||||
N years before the most recently completed fiscal year end.
|
||||
E.g. years=5, FY end Dec 31, run on 2026-03-17:
|
||||
Last FY end = 2025-12-31 -> cutoff = 2020-12-31
|
||||
Documents from the FY ending 2020-12-31 expire on 2025-12-31,
|
||||
so on 2026-03-17 they are overdue. This is correct for
|
||||
Bogforingsloven (Danish bookkeeping law) which requires records
|
||||
for 5 years from the END of the financial year.
|
||||
"""
|
||||
from datetime import date, timedelta
|
||||
|
||||
today = date.today()
|
||||
|
||||
if fiscal_year_end:
|
||||
# Parse MM-DD
|
||||
try:
|
||||
month, day = (int(x) for x in fiscal_year_end.split("-"))
|
||||
except (ValueError, AttributeError):
|
||||
raise ValueError(f"fiscal_year_end must be MM-DD, got {fiscal_year_end!r}")
|
||||
|
||||
# Find the most recently completed fiscal year end date
|
||||
fy_this_year = date(today.year, month, day)
|
||||
if fy_this_year >= today:
|
||||
# This year's FY end is in the future -- use last year's
|
||||
fy_end = date(today.year - 1, month, day)
|
||||
else:
|
||||
fy_end = fy_this_year
|
||||
|
||||
# Cutoff is N years before that FY end
|
||||
cutoff = fy_end.replace(year=fy_end.year - years)
|
||||
else:
|
||||
# Rolling: exactly N years before today
|
||||
cutoff = today.replace(year=today.year - years)
|
||||
|
||||
return cutoff.isoformat()
|
||||
|
||||
|
||||
# ── Schema DDL ────────────────────────────────────────────────────────────────
|
||||
_DDL = """
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA foreign_keys = ON;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scans (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
started_at REAL NOT NULL,
|
||||
finished_at REAL,
|
||||
sources TEXT NOT NULL DEFAULT '[]', -- JSON array
|
||||
user_count INTEGER NOT NULL DEFAULT 0,
|
||||
options TEXT NOT NULL DEFAULT '{}', -- JSON object
|
||||
total_scanned INTEGER NOT NULL DEFAULT 0,
|
||||
flagged_count INTEGER NOT NULL DEFAULT 0,
|
||||
delta INTEGER NOT NULL DEFAULT 0 -- 0=full, 1=delta
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS flagged_items (
|
||||
id TEXT NOT NULL, -- Graph item ID
|
||||
scan_id INTEGER NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL DEFAULT '',
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
source_type TEXT NOT NULL DEFAULT '', -- email/onedrive/sharepoint/teams
|
||||
account_id TEXT NOT NULL DEFAULT '',
|
||||
folder TEXT NOT NULL DEFAULT '',
|
||||
url TEXT NOT NULL DEFAULT '',
|
||||
drive_id TEXT NOT NULL DEFAULT '',
|
||||
size_kb REAL NOT NULL DEFAULT 0,
|
||||
modified TEXT NOT NULL DEFAULT '', -- YYYY-MM-DD
|
||||
cpr_count INTEGER NOT NULL DEFAULT 0,
|
||||
risk TEXT,
|
||||
user_role TEXT NOT NULL DEFAULT 'other', -- student/staff/other -- LOW/MEDIUM/HIGH
|
||||
thumb_b64 TEXT NOT NULL DEFAULT '',
|
||||
thumb_mime TEXT NOT NULL DEFAULT 'image/svg+xml',
|
||||
attachments TEXT NOT NULL DEFAULT '[]', -- JSON array
|
||||
scanned_at REAL NOT NULL,
|
||||
PRIMARY KEY (id, scan_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS cpr_index (
|
||||
cpr_hash TEXT NOT NULL, -- SHA-256 of the raw CPR string
|
||||
item_id TEXT NOT NULL,
|
||||
scan_id INTEGER NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
|
||||
first_seen REAL NOT NULL,
|
||||
PRIMARY KEY (cpr_hash, item_id, scan_id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS pii_hits (
|
||||
item_id TEXT NOT NULL,
|
||||
scan_id INTEGER NOT NULL REFERENCES scans(id) ON DELETE CASCADE,
|
||||
pii_type TEXT NOT NULL, -- phone/email/iban/name/address/org
|
||||
hit_count INTEGER NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (item_id, scan_id, pii_type)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS dispositions (
|
||||
item_id TEXT NOT NULL PRIMARY KEY,
|
||||
status TEXT NOT NULL DEFAULT 'unreviewed',
|
||||
legal_basis TEXT,
|
||||
notes TEXT,
|
||||
reviewed_by TEXT,
|
||||
reviewed_at REAL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scan_history (
|
||||
scan_id INTEGER PRIMARY KEY REFERENCES scans(id) ON DELETE CASCADE,
|
||||
scan_date TEXT NOT NULL, -- YYYY-MM-DD
|
||||
flagged_count INTEGER NOT NULL DEFAULT 0,
|
||||
special_category INTEGER NOT NULL DEFAULT 0,
|
||||
overdue_count INTEGER NOT NULL DEFAULT 0,
|
||||
deleted_count INTEGER NOT NULL DEFAULT 0,
|
||||
sources_json TEXT NOT NULL DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS deletion_log (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
deleted_at REAL NOT NULL, -- Unix timestamp
|
||||
item_id TEXT NOT NULL,
|
||||
item_name TEXT NOT NULL DEFAULT '',
|
||||
source_type TEXT NOT NULL DEFAULT '', -- email/onedrive/sharepoint/teams
|
||||
account_id TEXT NOT NULL DEFAULT '',
|
||||
account_name TEXT NOT NULL DEFAULT '',
|
||||
cpr_count INTEGER NOT NULL DEFAULT 0,
|
||||
reason TEXT NOT NULL DEFAULT 'manual', -- manual/bulk/retention/data-subject-request
|
||||
legal_basis TEXT NOT NULL DEFAULT '', -- from dispositions table if set
|
||||
deleted_by TEXT NOT NULL DEFAULT '', -- authenticated user or "headless"
|
||||
scan_id INTEGER -- which scan found this item (nullable)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dellog_time ON deletion_log(deleted_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_dellog_item ON deletion_log(item_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dellog_reason ON deletion_log(reason);
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_items_scan ON flagged_items(scan_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_source ON flagged_items(source_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_account ON flagged_items(account_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_items_risk ON flagged_items(risk);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpr_hash ON cpr_index(cpr_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_cpr_item ON cpr_index(item_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_history_date ON scan_history(scan_date);
|
||||
"""
|
||||
|
||||
# ── Migration helpers ─────────────────────────────────────────────────────────
|
||||
_MIGRATIONS: list[tuple[int, str]] = [
|
||||
# (version, sql)
|
||||
# Each runs once and is recorded in the user_version pragma.
|
||||
(1, "ALTER TABLE flagged_items ADD COLUMN user_role TEXT NOT NULL DEFAULT 'other'"),
|
||||
(2, "ALTER TABLE flagged_items ADD COLUMN transfer_risk TEXT NOT NULL DEFAULT ''"),
|
||||
(3, "ALTER TABLE flagged_items ADD COLUMN special_category TEXT NOT NULL DEFAULT '[]'"),
|
||||
(4, "ALTER TABLE flagged_items ADD COLUMN face_count INTEGER NOT NULL DEFAULT 0"),
|
||||
(5, "ALTER TABLE flagged_items ADD COLUMN exif_json TEXT NOT NULL DEFAULT '{}'"),
|
||||
(6, "ALTER TABLE flagged_items ADD COLUMN full_path TEXT NOT NULL DEFAULT ''"),
|
||||
(7, """CREATE TABLE IF NOT EXISTS schedule_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
started_at REAL NOT NULL,
|
||||
finished_at REAL,
|
||||
status TEXT NOT NULL DEFAULT 'running',
|
||||
profile_id TEXT NOT NULL DEFAULT '',
|
||||
flagged INTEGER NOT NULL DEFAULT 0,
|
||||
scanned INTEGER NOT NULL DEFAULT 0,
|
||||
emailed INTEGER NOT NULL DEFAULT 0,
|
||||
error TEXT NOT NULL DEFAULT ''
|
||||
)"""),
|
||||
]
|
||||
|
||||
|
||||
class ScanDB:
|
||||
"""Thread-safe SQLite wrapper for GDPRScanner results."""
|
||||
|
||||
def __init__(self, path: Path = DB_PATH):
|
||||
self._path = path
|
||||
self._conn: sqlite3.Connection | None = None
|
||||
|
||||
# ── Connection ────────────────────────────────────────────────────────────
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
if self._conn is None:
|
||||
self._conn = sqlite3.connect(
|
||||
str(self._path),
|
||||
check_same_thread=False,
|
||||
timeout=15,
|
||||
)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.executescript(_DDL)
|
||||
self._conn.commit()
|
||||
self._run_migrations()
|
||||
return self._conn
|
||||
|
||||
def _run_migrations(self) -> None:
|
||||
conn = self._conn
|
||||
cur_ver = conn.execute("PRAGMA user_version").fetchone()[0]
|
||||
for ver, sql in _MIGRATIONS:
|
||||
if ver > cur_ver:
|
||||
try:
|
||||
conn.executescript(sql)
|
||||
except Exception:
|
||||
pass # column may already exist on fresh DBs
|
||||
conn.execute(f"PRAGMA user_version = {ver}")
|
||||
conn.commit()
|
||||
|
||||
def close(self) -> None:
|
||||
if self._conn:
|
||||
try:
|
||||
self._conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._conn = None
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Drop all tables and recreate the schema from scratch.
|
||||
|
||||
This permanently deletes all scan results, CPR index, dispositions,
|
||||
deletion log, and scan history. Use with caution.
|
||||
Closes and reopens the connection so the fresh schema is in effect.
|
||||
"""
|
||||
c = self._connect()
|
||||
tables = [
|
||||
"deletion_log", "pii_hits", "cpr_index",
|
||||
"dispositions", "scan_history", "flagged_items", "scans",
|
||||
]
|
||||
for tbl in tables:
|
||||
c.execute(f"DROP TABLE IF EXISTS {tbl}")
|
||||
c.execute("PRAGMA user_version = 0")
|
||||
c.commit()
|
||||
# Reopen so _connect() rebuilds schema fresh
|
||||
self.close()
|
||||
self._connect()
|
||||
|
||||
|
||||
|
||||
def begin_scan(self, options: dict) -> int:
|
||||
"""Create a scan record and return its id."""
|
||||
c = self._connect()
|
||||
sources = options.get("sources", [])
|
||||
user_ids = options.get("user_ids", [])
|
||||
scan_opts = options.get("options", {})
|
||||
delta = 1 if scan_opts.get("delta") else 0
|
||||
cur = c.execute(
|
||||
"""INSERT INTO scans
|
||||
(started_at, sources, user_count, options, delta)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(
|
||||
time.time(),
|
||||
json.dumps(sources),
|
||||
len(user_ids),
|
||||
json.dumps(scan_opts),
|
||||
delta,
|
||||
),
|
||||
)
|
||||
c.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def save_item(self, scan_id: int, card: dict, cprs: list | None = None,
|
||||
pii_counts: dict | None = None) -> None:
|
||||
"""Persist one flagged item and its CPR/PII data."""
|
||||
c = self._connect()
|
||||
now = time.time()
|
||||
|
||||
c.execute(
|
||||
"""INSERT OR REPLACE INTO flagged_items
|
||||
(id, scan_id, name, source, source_type, account_id, folder,
|
||||
url, drive_id, size_kb, modified, cpr_count, risk,
|
||||
thumb_b64, thumb_mime, attachments, user_role, transfer_risk,
|
||||
special_category, face_count, exif_json, full_path, scanned_at)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
|
||||
(
|
||||
card.get("id", ""),
|
||||
scan_id,
|
||||
card.get("name", ""),
|
||||
card.get("source", ""),
|
||||
card.get("source_type", ""),
|
||||
card.get("account_id", ""),
|
||||
card.get("folder", ""),
|
||||
card.get("url", ""),
|
||||
card.get("drive_id", ""),
|
||||
card.get("size_kb", 0),
|
||||
card.get("modified", ""),
|
||||
card.get("cpr_count", 0),
|
||||
card.get("risk"),
|
||||
card.get("thumb_b64", ""),
|
||||
card.get("thumb_mime", "image/svg+xml"),
|
||||
json.dumps(card.get("attachments", [])),
|
||||
card.get("user_role", "other"),
|
||||
card.get("transfer_risk", ""),
|
||||
json.dumps(card.get("special_category", [])),
|
||||
card.get("face_count", 0),
|
||||
json.dumps(card.get("exif", {})),
|
||||
card.get("full_path", ""),
|
||||
now,
|
||||
),
|
||||
)
|
||||
|
||||
# CPR index — store hash only (never store raw CPR numbers in DB)
|
||||
item_id = card.get("id", "")
|
||||
if cprs:
|
||||
for cpr in cprs:
|
||||
cpr_hash = hashlib.sha256(str(cpr).encode()).hexdigest()
|
||||
c.execute(
|
||||
"""INSERT OR IGNORE INTO cpr_index
|
||||
(cpr_hash, item_id, scan_id, first_seen)
|
||||
VALUES (?,?,?,?)""",
|
||||
(cpr_hash, item_id, scan_id, now),
|
||||
)
|
||||
|
||||
# PII hit counts
|
||||
if pii_counts:
|
||||
for pii_type, count in pii_counts.items():
|
||||
if count and count > 0:
|
||||
c.execute(
|
||||
"""INSERT OR REPLACE INTO pii_hits
|
||||
(item_id, scan_id, pii_type, hit_count)
|
||||
VALUES (?,?,?,?)""",
|
||||
(item_id, scan_id, pii_type, count),
|
||||
)
|
||||
|
||||
c.commit()
|
||||
|
||||
def finish_scan(self, scan_id: int, total_scanned: int,
|
||||
deleted_count: int = 0) -> None:
|
||||
"""Mark scan as complete and write history row."""
|
||||
c = self._connect()
|
||||
now = time.time()
|
||||
|
||||
flagged = c.execute(
|
||||
"SELECT COUNT(*) FROM flagged_items WHERE scan_id=?", (scan_id,)
|
||||
).fetchone()[0]
|
||||
|
||||
c.execute(
|
||||
"""UPDATE scans SET finished_at=?, total_scanned=?, flagged_count=?
|
||||
WHERE id=?""",
|
||||
(now, total_scanned, flagged, scan_id),
|
||||
)
|
||||
|
||||
# Per-source breakdown for history
|
||||
rows = c.execute(
|
||||
"""SELECT source_type, COUNT(*) FROM flagged_items
|
||||
WHERE scan_id=? GROUP BY source_type""",
|
||||
(scan_id,),
|
||||
).fetchall()
|
||||
sources_json = json.dumps({r[0]: r[1] for r in rows})
|
||||
|
||||
# Count overdue items using rolling 5-year window (baseline for history)
|
||||
overdue = c.execute(
|
||||
"""SELECT COUNT(*) FROM flagged_items
|
||||
WHERE scan_id=? AND modified != ''
|
||||
AND date(modified) < ?""",
|
||||
(scan_id, overdue_cutoff(5)),
|
||||
).fetchone()[0]
|
||||
|
||||
special_count = c.execute(
|
||||
"""SELECT COUNT(*) FROM flagged_items
|
||||
WHERE scan_id=? AND special_category != '[]' AND special_category != ''""",
|
||||
(scan_id,),
|
||||
).fetchone()[0]
|
||||
|
||||
scan_date = time.strftime("%Y-%m-%d", time.localtime(now))
|
||||
c.execute(
|
||||
"""INSERT OR REPLACE INTO scan_history
|
||||
(scan_id, scan_date, flagged_count, special_category,
|
||||
overdue_count, deleted_count, sources_json)
|
||||
VALUES (?,?,?,?,?,?,?)""",
|
||||
(scan_id, scan_date, flagged, special_count, overdue, deleted_count, sources_json),
|
||||
)
|
||||
|
||||
c.commit()
|
||||
|
||||
# ── Query helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
def latest_scan_id(self) -> int | None:
|
||||
"""Return the id of the most recent completed scan."""
|
||||
row = self._connect().execute(
|
||||
"SELECT id FROM scans WHERE finished_at IS NOT NULL ORDER BY id DESC LIMIT 1"
|
||||
).fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
def get_flagged_items(self, scan_id: int | None = None) -> list[dict]:
|
||||
"""Return flagged items for a scan (defaults to latest)."""
|
||||
sid = scan_id or self.latest_scan_id()
|
||||
if not sid:
|
||||
return []
|
||||
rows = self._connect().execute(
|
||||
"""SELECT fi.*, COALESCE(d.status, 'unreviewed') AS disposition
|
||||
FROM flagged_items fi
|
||||
LEFT JOIN dispositions d ON d.item_id = fi.id
|
||||
WHERE fi.scan_id=? ORDER BY fi.cpr_count DESC""",
|
||||
(sid,),
|
||||
).fetchall()
|
||||
result = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
d["attachments"] = json.loads(d.get("attachments") or "[]")
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
def get_session_items(self, window_seconds: int = 300) -> list[dict]:
|
||||
"""Return flagged items from all scans in the same session as the latest scan.
|
||||
|
||||
A session is all scans whose started_at is within *window_seconds* of the
|
||||
most recently started completed scan. This captures concurrent M365, Google,
|
||||
and file scans which each create their own scan_id but start within seconds
|
||||
of each other.
|
||||
"""
|
||||
row = self._connect().execute(
|
||||
"SELECT started_at FROM scans WHERE finished_at IS NOT NULL ORDER BY id DESC LIMIT 1"
|
||||
).fetchone()
|
||||
if not row:
|
||||
return []
|
||||
latest_start = row[0]
|
||||
rows = self._connect().execute(
|
||||
"""SELECT fi.*, COALESCE(d.status, 'unreviewed') AS disposition
|
||||
FROM flagged_items fi
|
||||
JOIN scans s ON fi.scan_id = s.id
|
||||
LEFT JOIN dispositions d ON d.item_id = fi.id
|
||||
WHERE s.started_at >= ? AND s.finished_at IS NOT NULL
|
||||
ORDER BY fi.cpr_count DESC""",
|
||||
(latest_start - window_seconds,),
|
||||
).fetchall()
|
||||
result = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
d["attachments"] = json.loads(d.get("attachments") or "[]")
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
def lookup_data_subject(self, cpr: str) -> list[dict]:
|
||||
"""Find all flagged items containing a given CPR number (by hash)."""
|
||||
cpr_hash = hashlib.sha256(str(cpr).encode()).hexdigest()
|
||||
rows = self._connect().execute(
|
||||
"""SELECT fi.*, ci.first_seen AS cpr_first_seen
|
||||
FROM cpr_index ci
|
||||
JOIN flagged_items fi ON fi.id = ci.item_id AND fi.scan_id = ci.scan_id
|
||||
WHERE ci.cpr_hash = ?
|
||||
ORDER BY fi.modified DESC""",
|
||||
(cpr_hash,),
|
||||
).fetchall()
|
||||
result = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
d["attachments"] = json.loads(d.get("attachments") or "[]")
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
def get_overdue_items(self, years: int = 5,
|
||||
scan_id: int | None = None,
|
||||
fiscal_year_end: str | None = None) -> list[dict]:
|
||||
"""Return items older than the retention cutoff.
|
||||
|
||||
Args:
|
||||
years: Retention period in years (default 5).
|
||||
scan_id: Scan to query (defaults to latest).
|
||||
fiscal_year_end: "MM-DD" for fiscal-year-aligned cutoff
|
||||
(e.g. "12-31" for Danish bookkeeping law).
|
||||
None = rolling window from today.
|
||||
"""
|
||||
sid = scan_id or self.latest_scan_id()
|
||||
if not sid:
|
||||
return []
|
||||
cutoff = overdue_cutoff(years, fiscal_year_end)
|
||||
rows = self._connect().execute(
|
||||
"""SELECT * FROM flagged_items
|
||||
WHERE scan_id=? AND modified != ''
|
||||
AND date(modified) < ?
|
||||
ORDER BY modified ASC""",
|
||||
(sid, cutoff),
|
||||
).fetchall()
|
||||
result = [dict(r) for r in rows]
|
||||
for r in result:
|
||||
r["cutoff_date"] = cutoff
|
||||
r["cutoff_mode"] = "fiscal" if fiscal_year_end else "rolling"
|
||||
return result
|
||||
|
||||
def get_trend(self, last_n: int = 20) -> list[dict]:
|
||||
"""Return the last N scan history rows for trend display."""
|
||||
rows = self._connect().execute(
|
||||
"""SELECT sh.*, s.delta, s.sources
|
||||
FROM scan_history sh
|
||||
JOIN scans s ON s.id = sh.scan_id
|
||||
ORDER BY sh.scan_id DESC LIMIT ?""",
|
||||
(last_n,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in reversed(rows)]
|
||||
|
||||
def set_disposition(self, item_id: str, status: str,
|
||||
legal_basis: str = "", notes: str = "",
|
||||
reviewed_by: str = "") -> None:
|
||||
"""Record a compliance officer's decision on an item."""
|
||||
self._connect().execute(
|
||||
"""INSERT OR REPLACE INTO dispositions
|
||||
(item_id, status, legal_basis, notes, reviewed_by, reviewed_at)
|
||||
VALUES (?,?,?,?,?,?)""",
|
||||
(item_id, status, legal_basis, notes, reviewed_by, time.time()),
|
||||
)
|
||||
self._connect().commit()
|
||||
|
||||
def get_disposition(self, item_id: str) -> dict | None:
|
||||
row = self._connect().execute(
|
||||
"SELECT * FROM dispositions WHERE item_id=?", (item_id,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def get_prior_disposition(self, item_id: str) -> str | None:
|
||||
"""Return prior disposition status if set (not 'unreviewed'), else None."""
|
||||
row = self._connect().execute(
|
||||
"SELECT status FROM dispositions WHERE item_id=?", (item_id,)
|
||||
).fetchone()
|
||||
if row and row[0] and row[0] != "unreviewed":
|
||||
return row[0]
|
||||
return None
|
||||
|
||||
def get_stats(self, scan_id: int | None = None) -> dict:
|
||||
"""Return summary stats for a scan."""
|
||||
sid = scan_id or self.latest_scan_id()
|
||||
if not sid:
|
||||
return {}
|
||||
c = self._connect()
|
||||
scan = c.execute("SELECT * FROM scans WHERE id=?", (sid,)).fetchone()
|
||||
if not scan:
|
||||
return {}
|
||||
by_source = c.execute(
|
||||
"""SELECT source_type, COUNT(*), SUM(cpr_count)
|
||||
FROM flagged_items WHERE scan_id=? GROUP BY source_type""",
|
||||
(sid,),
|
||||
).fetchall()
|
||||
unique_subjects = c.execute(
|
||||
"SELECT COUNT(DISTINCT cpr_hash) FROM cpr_index WHERE scan_id=?",
|
||||
(sid,),
|
||||
).fetchone()[0]
|
||||
overdue = c.execute(
|
||||
"""SELECT COUNT(*) FROM flagged_items
|
||||
WHERE scan_id=? AND modified != ''
|
||||
AND date(modified) < ?""",
|
||||
(sid, overdue_cutoff(5)),
|
||||
).fetchone()[0]
|
||||
return {
|
||||
"scan_id": sid,
|
||||
"started_at": scan["started_at"],
|
||||
"finished_at": scan["finished_at"],
|
||||
"total_scanned": scan["total_scanned"],
|
||||
"flagged_count": scan["flagged_count"],
|
||||
"unique_subjects": unique_subjects,
|
||||
"overdue_count": overdue,
|
||||
"delta": bool(scan["delta"]),
|
||||
"by_source": {
|
||||
r[0]: {"items": r[1], "cpr_hits": r[2]}
|
||||
for r in by_source
|
||||
},
|
||||
}
|
||||
|
||||
def iter_all_items(self, scan_id: int | None = None) -> Iterator[dict]:
|
||||
"""Iterate over flagged items without loading all into memory."""
|
||||
sid = scan_id or self.latest_scan_id()
|
||||
if not sid:
|
||||
return
|
||||
cur = self._connect().execute(
|
||||
"SELECT * FROM flagged_items WHERE scan_id=? ORDER BY id",
|
||||
(sid,),
|
||||
)
|
||||
for row in cur:
|
||||
d = dict(row)
|
||||
d["attachments"] = json.loads(d.get("attachments") or "[]")
|
||||
yield d
|
||||
|
||||
def scans_list(self, limit: int = 50) -> list[dict]:
|
||||
"""Return recent scan summaries."""
|
||||
rows = self._connect().execute(
|
||||
"""SELECT id, started_at, finished_at, sources, user_count,
|
||||
total_scanned, flagged_count, delta
|
||||
FROM scans
|
||||
WHERE finished_at IS NOT NULL
|
||||
ORDER BY id DESC LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
result = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
d["sources"] = json.loads(d.get("sources") or "[]")
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
def log_deletion(self, item: dict, reason: str = "manual",
|
||||
deleted_by: str = "", scan_id: int | None = None) -> None:
|
||||
"""Write an immutable deletion audit record.
|
||||
|
||||
Args:
|
||||
item: flagged_item dict (or any dict with id, name, source_type, etc.)
|
||||
reason: "manual" | "bulk" | "retention" | "data-subject-request"
|
||||
deleted_by: identity of the actor — authenticated M365 user UPN,
|
||||
"headless" for scheduled runs, or "" for UI with no user context
|
||||
scan_id: which scan originally found this item (optional)
|
||||
"""
|
||||
c = self._connect()
|
||||
now = time.time()
|
||||
|
||||
# Pull legal_basis from dispositions table if available
|
||||
legal_basis = ""
|
||||
disp = self.get_disposition(item.get("id", ""))
|
||||
if disp:
|
||||
legal_basis = disp.get("legal_basis", "") or ""
|
||||
|
||||
c.execute(
|
||||
"""INSERT INTO deletion_log
|
||||
(deleted_at, item_id, item_name, source_type, account_id,
|
||||
account_name, cpr_count, reason, legal_basis, deleted_by, scan_id)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?)""",
|
||||
(
|
||||
now,
|
||||
item.get("id", ""),
|
||||
item.get("name", ""),
|
||||
item.get("source_type", ""),
|
||||
item.get("account_id", ""),
|
||||
item.get("account_name", ""),
|
||||
item.get("cpr_count", 0),
|
||||
reason,
|
||||
legal_basis,
|
||||
deleted_by,
|
||||
scan_id,
|
||||
),
|
||||
)
|
||||
c.commit()
|
||||
|
||||
def get_deletion_log(self, limit: int = 500,
|
||||
reason: str | None = None) -> list[dict]:
|
||||
"""Return deletion audit records, most recent first."""
|
||||
c = self._connect()
|
||||
if reason:
|
||||
rows = c.execute(
|
||||
"SELECT * FROM deletion_log WHERE reason=? ORDER BY deleted_at DESC LIMIT ?",
|
||||
(reason, limit),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = c.execute(
|
||||
"SELECT * FROM deletion_log ORDER BY deleted_at DESC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def deletion_log_stats(self) -> dict:
|
||||
"""Return summary counts of the deletion log."""
|
||||
c = self._connect()
|
||||
total = c.execute("SELECT COUNT(*) FROM deletion_log").fetchone()[0]
|
||||
by_reason = {
|
||||
r[0]: r[1] for r in c.execute(
|
||||
"SELECT reason, COUNT(*) FROM deletion_log GROUP BY reason"
|
||||
).fetchall()
|
||||
}
|
||||
cpr_deleted = c.execute(
|
||||
"SELECT SUM(cpr_count) FROM deletion_log"
|
||||
).fetchone()[0] or 0
|
||||
return {"total": total, "by_reason": by_reason, "cpr_hits_deleted": cpr_deleted}
|
||||
|
||||
def delete_item_record(self, item_id: str, scan_id: int | None = None) -> None:
|
||||
"""Remove a flagged item from the DB (after it has been deleted in M365)."""
|
||||
c = self._connect()
|
||||
if scan_id:
|
||||
c.execute(
|
||||
"DELETE FROM flagged_items WHERE id=? AND scan_id=?",
|
||||
(item_id, scan_id),
|
||||
)
|
||||
c.execute(
|
||||
"DELETE FROM cpr_index WHERE item_id=? AND scan_id=?",
|
||||
(item_id, scan_id),
|
||||
)
|
||||
else:
|
||||
c.execute("DELETE FROM flagged_items WHERE id=?", (item_id,))
|
||||
c.execute("DELETE FROM cpr_index WHERE item_id=?", (item_id,))
|
||||
c.commit()
|
||||
|
||||
|
||||
# ── Scheduler runs ────────────────────────────────────────────────────────
|
||||
|
||||
def begin_schedule_run(self, profile_id: str = "") -> int:
|
||||
"""Insert a new schedule_runs row and return its id."""
|
||||
import time
|
||||
c = self._connect()
|
||||
cur = c.execute(
|
||||
"INSERT INTO schedule_runs (started_at, profile_id) VALUES (?, ?)",
|
||||
(time.time(), profile_id))
|
||||
c.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def finish_schedule_run(self, run_id: int, *,
|
||||
status: str = "completed",
|
||||
flagged: int = 0, scanned: int = 0,
|
||||
emailed: int = 0, error: str = "") -> None:
|
||||
import time
|
||||
c = self._connect()
|
||||
c.execute(
|
||||
"""UPDATE schedule_runs
|
||||
SET finished_at=?, status=?, flagged=?, scanned=?, emailed=?, error=?
|
||||
WHERE id=?""",
|
||||
(time.time(), status, flagged, scanned, emailed, error, run_id))
|
||||
c.commit()
|
||||
|
||||
def get_schedule_runs(self, limit: int = 20) -> list[dict]:
|
||||
c = self._connect()
|
||||
rows = c.execute(
|
||||
"SELECT * FROM schedule_runs ORDER BY started_at DESC LIMIT ?",
|
||||
(limit,)).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def export_db(self, out_path: Path) -> dict:
|
||||
"""Export the database to a structured ZIP archive.
|
||||
|
||||
Contents:
|
||||
export_meta.json — metadata (date, schema version, row counts)
|
||||
scans.json — scan run summaries
|
||||
flagged_items.json — flagged items (thumb_b64 stripped)
|
||||
cpr_index.json — CPR hashes (never raw CPR)
|
||||
pii_hits.json — per-type PII counts
|
||||
dispositions.json — compliance decisions
|
||||
scan_history.json — aggregated trend data
|
||||
deletion_log.json — full deletion audit trail
|
||||
|
||||
Returns a summary dict with row counts.
|
||||
"""
|
||||
import zipfile as _zf, json as _json, datetime as _dt
|
||||
|
||||
c = self._connect()
|
||||
|
||||
def _rows(table: str, strip_cols: list | None = None) -> list[dict]:
|
||||
rows = [dict(r) for r in c.execute(f"SELECT * FROM {table}").fetchall()]
|
||||
if strip_cols:
|
||||
for row in rows:
|
||||
for col in strip_cols:
|
||||
row.pop(col, None)
|
||||
return rows
|
||||
|
||||
tables = {
|
||||
"scans": _rows("scans"),
|
||||
"flagged_items": _rows("flagged_items", strip_cols=["thumb_b64"]),
|
||||
"cpr_index": _rows("cpr_index"),
|
||||
"pii_hits": _rows("pii_hits"),
|
||||
"dispositions": _rows("dispositions"),
|
||||
"scan_history": _rows("scan_history"),
|
||||
"deletion_log": _rows("deletion_log"),
|
||||
"schedule_runs": _rows("schedule_runs"),
|
||||
}
|
||||
|
||||
schema_ver = c.execute("PRAGMA user_version").fetchone()[0]
|
||||
meta = {
|
||||
"exported_at": _dt.datetime.now().isoformat(),
|
||||
"schema_version": schema_ver,
|
||||
"db_path": str(self._path),
|
||||
"row_counts": {k: len(v) for k, v in tables.items()},
|
||||
}
|
||||
|
||||
out_path = Path(out_path)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _zf.ZipFile(out_path, "w", _zf.ZIP_DEFLATED, compresslevel=9) as zf:
|
||||
zf.writestr("export_meta.json", _json.dumps(meta, indent=2))
|
||||
for name, rows in tables.items():
|
||||
zf.writestr(f"{name}.json", _json.dumps(rows, indent=2, default=str))
|
||||
|
||||
return meta
|
||||
|
||||
def import_db(self, zip_path: Path, mode: str = "merge") -> dict:
|
||||
"""Import a previously exported ZIP archive into the database.
|
||||
|
||||
Args:
|
||||
zip_path: Path to the export ZIP file.
|
||||
mode: "merge" — import dispositions and deletion_log into
|
||||
the current DB, leave existing data intact.
|
||||
"replace" — wipe the DB first, then import everything.
|
||||
|
||||
Returns a summary dict with imported row counts.
|
||||
"""
|
||||
import zipfile as _zf, json as _json
|
||||
|
||||
zip_path = Path(zip_path)
|
||||
if not zip_path.exists():
|
||||
raise FileNotFoundError(f"Export file not found: {zip_path}")
|
||||
|
||||
with _zf.ZipFile(zip_path, "r") as zf:
|
||||
names = zf.namelist()
|
||||
if "export_meta.json" not in names:
|
||||
raise ValueError("Not a valid GDPRScanner export — missing export_meta.json")
|
||||
|
||||
meta = _json.loads(zf.read("export_meta.json"))
|
||||
|
||||
def _load(fname: str) -> list[dict]:
|
||||
if fname not in names:
|
||||
return []
|
||||
return _json.loads(zf.read(fname))
|
||||
|
||||
scans = _load("scans.json")
|
||||
flagged_items = _load("flagged_items.json")
|
||||
cpr_index = _load("cpr_index.json")
|
||||
pii_hits = _load("pii_hits.json")
|
||||
dispositions = _load("dispositions.json")
|
||||
scan_history = _load("scan_history.json")
|
||||
deletion_log = _load("deletion_log.json")
|
||||
schedule_runs = _load("schedule_runs.json")
|
||||
|
||||
if mode == "replace":
|
||||
self.reset()
|
||||
|
||||
c = self._connect()
|
||||
imported: dict[str, int] = {}
|
||||
|
||||
if mode == "replace":
|
||||
# Full restore — import all tables
|
||||
for row in scans:
|
||||
try:
|
||||
c.execute(
|
||||
"""INSERT OR IGNORE INTO scans
|
||||
(id,started_at,finished_at,sources,user_count,
|
||||
options,total_scanned,flagged_count,delta)
|
||||
VALUES (:id,:started_at,:finished_at,:sources,:user_count,
|
||||
:options,:total_scanned,:flagged_count,:delta)""", row)
|
||||
except Exception: pass
|
||||
imported["scans"] = len(scans)
|
||||
|
||||
for row in flagged_items:
|
||||
row.setdefault("thumb_b64", "")
|
||||
row.setdefault("user_role", "other")
|
||||
try:
|
||||
c.execute(
|
||||
"""INSERT OR IGNORE INTO flagged_items
|
||||
(id,scan_id,name,source,source_type,account_id,folder,
|
||||
url,drive_id,size_kb,modified,cpr_count,risk,
|
||||
thumb_b64,thumb_mime,attachments,user_role,scanned_at)
|
||||
VALUES (:id,:scan_id,:name,:source,:source_type,:account_id,
|
||||
:folder,:url,:drive_id,:size_kb,:modified,:cpr_count,:risk,
|
||||
:thumb_b64,:thumb_mime,:attachments,:user_role,:scanned_at)""", row)
|
||||
except Exception: pass
|
||||
imported["flagged_items"] = len(flagged_items)
|
||||
|
||||
for row in cpr_index:
|
||||
try:
|
||||
c.execute(
|
||||
"INSERT OR IGNORE INTO cpr_index (cpr_hash,item_id,scan_id,first_seen) "
|
||||
"VALUES (:cpr_hash,:item_id,:scan_id,:first_seen)", row)
|
||||
except Exception: pass
|
||||
imported["cpr_index"] = len(cpr_index)
|
||||
|
||||
for row in pii_hits:
|
||||
try:
|
||||
c.execute(
|
||||
"INSERT OR IGNORE INTO pii_hits (item_id,scan_id,pii_type,hit_count) "
|
||||
"VALUES (:item_id,:scan_id,:pii_type,:hit_count)", row)
|
||||
except Exception: pass
|
||||
imported["pii_hits"] = len(pii_hits)
|
||||
|
||||
for row in scan_history:
|
||||
try:
|
||||
c.execute(
|
||||
"""INSERT OR IGNORE INTO scan_history
|
||||
(scan_id,scan_date,flagged_count,special_category,
|
||||
overdue_count,deleted_count,sources_json)
|
||||
VALUES (:scan_id,:scan_date,:flagged_count,:special_category,
|
||||
:overdue_count,:deleted_count,:sources_json)""", row)
|
||||
except Exception: pass
|
||||
imported["scan_history"] = len(scan_history)
|
||||
|
||||
# Both modes: merge dispositions and deletion_log
|
||||
for row in dispositions:
|
||||
try:
|
||||
c.execute(
|
||||
"""INSERT OR REPLACE INTO dispositions
|
||||
(item_id,status,legal_basis,notes,reviewed_by,reviewed_at)
|
||||
VALUES (:item_id,:status,:legal_basis,:notes,:reviewed_by,:reviewed_at)""",
|
||||
row)
|
||||
except Exception: pass
|
||||
imported["dispositions"] = len(dispositions)
|
||||
|
||||
for row in deletion_log:
|
||||
try:
|
||||
c.execute(
|
||||
"""INSERT OR IGNORE INTO deletion_log
|
||||
(id,deleted_at,item_id,item_name,source_type,account_id,
|
||||
account_name,cpr_count,reason,legal_basis,deleted_by,scan_id)
|
||||
VALUES (:id,:deleted_at,:item_id,:item_name,:source_type,:account_id,
|
||||
:account_name,:cpr_count,:reason,:legal_basis,:deleted_by,:scan_id)""",
|
||||
row)
|
||||
except Exception: pass
|
||||
imported["deletion_log"] = len(deletion_log)
|
||||
|
||||
for row in schedule_runs:
|
||||
try:
|
||||
c.execute(
|
||||
"""INSERT OR IGNORE INTO schedule_runs
|
||||
(id,started_at,finished_at,status,profile_id,
|
||||
flagged,scanned,emailed,error)
|
||||
VALUES (:id,:started_at,:finished_at,:status,:profile_id,
|
||||
:flagged,:scanned,:emailed,:error)""",
|
||||
row)
|
||||
except Exception: pass
|
||||
imported["schedule_runs"] = len(schedule_runs)
|
||||
|
||||
c.commit()
|
||||
return {"mode": mode, "exported_at": meta.get("exported_at"), "imported": imported}
|
||||
|
||||
|
||||
# ── Module-level singleton ────────────────────────────────────────────────────
|
||||
_db: ScanDB | None = None
|
||||
|
||||
|
||||
def get_db(path: Path = DB_PATH) -> ScanDB:
|
||||
"""Return the module-level ScanDB singleton, creating it if needed."""
|
||||
global _db
|
||||
if _db is None:
|
||||
_db = ScanDB(path)
|
||||
return _db
|
||||
2212
gdpr_scanner.py
Normal file
2212
gdpr_scanner.py
Normal file
File diff suppressed because it is too large
Load Diff
726
google_connector.py
Normal file
726
google_connector.py
Normal file
@ -0,0 +1,726 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
google_connector.py — Google Workspace connector for GDPR Scanner.
|
||||
|
||||
Handles service-account authentication with domain-wide delegation and exposes
|
||||
iterators for:
|
||||
- Gmail messages (body + attachments) via the Gmail API
|
||||
- Google Drive files (with export for native Docs/Sheets/Slides) via Drive API
|
||||
|
||||
All file content is yielded as (metadata_dict, bytes_content) tuples, matching
|
||||
the same contract used by m365_connector so the scan engine can reuse _scan_bytes.
|
||||
|
||||
Authentication:
|
||||
Service account JSON key with domain-wide delegation enabled in Google Workspace
|
||||
Admin Console → Security → API Controls → Domain-wide delegation.
|
||||
|
||||
Required OAuth scopes (add to the service account's delegation entry):
|
||||
https://www.googleapis.com/auth/gmail.readonly
|
||||
https://www.googleapis.com/auth/drive.readonly
|
||||
https://www.googleapis.com/auth/admin.directory.user.readonly (user listing)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Optional
|
||||
|
||||
# ── google-auth / google-api-python-client ────────────────────────────────────
|
||||
try:
|
||||
from google.oauth2 import service_account
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
from googleapiclient.http import MediaIoBaseDownload
|
||||
GOOGLE_AUTH_OK = True
|
||||
|
||||
# Suppress the googleapiclient.http WARNING that fires before raising
|
||||
# HttpError for exportSizeLimitExceeded — we handle it ourselves below.
|
||||
class _SuppressExportSizeWarning(logging.Filter):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
return "exportSizeLimitExceeded" not in record.getMessage()
|
||||
|
||||
logging.getLogger("googleapiclient.http").addFilter(_SuppressExportSizeWarning())
|
||||
|
||||
except ImportError:
|
||||
GOOGLE_AUTH_OK = False
|
||||
|
||||
_DATA_DIR = Path.home() / ".gdprscanner"
|
||||
_DATA_DIR.mkdir(exist_ok=True)
|
||||
_SA_KEY_FILE = _DATA_DIR / "google_sa.json"
|
||||
_GOOGLE_TOKEN_FILE = _DATA_DIR / "google_token.json"
|
||||
|
||||
PERSONAL_SCOPES = [
|
||||
"https://www.googleapis.com/auth/gmail.readonly",
|
||||
"https://www.googleapis.com/auth/drive.readonly",
|
||||
]
|
||||
_DEVICE_AUTH_URL = "https://oauth2.googleapis.com/device/code"
|
||||
_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
||||
_USERINFO_URL = "https://www.googleapis.com/oauth2/v2/userinfo"
|
||||
_DEVICE_GRANT = "urn:ietf:params:oauth:grant-type:device_code"
|
||||
|
||||
GMAIL_SCOPES = [
|
||||
"https://www.googleapis.com/auth/gmail.readonly",
|
||||
]
|
||||
DRIVE_SCOPES = [
|
||||
"https://www.googleapis.com/auth/drive.readonly",
|
||||
]
|
||||
ADMIN_SCOPES = [
|
||||
"https://www.googleapis.com/auth/admin.directory.user.readonly",
|
||||
]
|
||||
|
||||
# Google-native MIME types and the export format we request
|
||||
_EXPORT_MAP = {
|
||||
"application/vnd.google-apps.document": ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx"),
|
||||
"application/vnd.google-apps.spreadsheet": ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx"),
|
||||
"application/vnd.google-apps.presentation": ("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx"),
|
||||
"application/vnd.google-apps.drawing": ("application/pdf", ".pdf"),
|
||||
"application/vnd.google-apps.form": ("application/pdf", ".pdf"),
|
||||
}
|
||||
|
||||
# Maximum export size for native Google files (bytes) — skip larger ones
|
||||
_MAX_EXPORT_BYTES = 20 * 1024 * 1024 # 20 MB
|
||||
|
||||
# ── OU role mapping ───────────────────────────────────────────────────────────
|
||||
_OU_ROLES_PATH = Path(__file__).parent / "classification" / "google_ou_roles.json"
|
||||
|
||||
def _load_ou_roles() -> tuple[list, list]:
|
||||
"""Load student/staff OU prefix lists from skus/google_ou_roles.json.
|
||||
Returns (student_prefixes, staff_prefixes) — both lowercased."""
|
||||
try:
|
||||
import json as _j
|
||||
data = _j.loads(_OU_ROLES_PATH.read_text(encoding="utf-8"))
|
||||
students = [p.lower() for p in data.get("student_ou_prefixes", [])]
|
||||
staff = [p.lower() for p in data.get("staff_ou_prefixes", [])]
|
||||
return students, staff
|
||||
except Exception:
|
||||
return ["/elever", "/students"], ["/personale", "/staff", "/lærere", "/ansatte"]
|
||||
|
||||
def classify_ou_role(org_unit_path: str) -> str:
|
||||
"""Return 'student', 'staff', or 'other' based on orgUnitPath prefix."""
|
||||
if not org_unit_path:
|
||||
return "other"
|
||||
path_lower = org_unit_path.lower()
|
||||
students, staff = _load_ou_roles()
|
||||
for prefix in students:
|
||||
if path_lower.startswith(prefix):
|
||||
return "student"
|
||||
for prefix in staff:
|
||||
if path_lower.startswith(prefix):
|
||||
return "staff"
|
||||
return "other"
|
||||
|
||||
|
||||
|
||||
class GoogleError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class GoogleConnector:
|
||||
"""
|
||||
Wraps service-account + domain-wide delegation auth for Gmail and Drive.
|
||||
|
||||
Usage:
|
||||
conn = GoogleConnector(key_dict, admin_email="admin@domain.com")
|
||||
for meta, data in conn.iter_gmail_messages("user@domain.com"):
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, key_dict: dict, admin_email: str = ""):
|
||||
if not GOOGLE_AUTH_OK:
|
||||
raise GoogleError(
|
||||
"google-auth not installed — run: "
|
||||
"pip install google-auth google-auth-httplib2 google-api-python-client"
|
||||
)
|
||||
self._key_dict = key_dict
|
||||
self._admin_email = admin_email.strip()
|
||||
self._lock = threading.Lock()
|
||||
# Validate the key looks sane
|
||||
if key_dict.get("type") != "service_account":
|
||||
raise GoogleError("Key file must be a service_account JSON — found type: " + str(key_dict.get("type")))
|
||||
|
||||
# ── Credential factories ──────────────────────────────────────────────────
|
||||
|
||||
def _creds_for(self, user_email: str, scopes: list):
|
||||
"""Return delegated credentials impersonating user_email."""
|
||||
base = service_account.Credentials.from_service_account_info(
|
||||
self._key_dict, scopes=scopes
|
||||
)
|
||||
return base.with_subject(user_email)
|
||||
|
||||
def _admin_creds(self):
|
||||
"""Admin Directory API credentials (impersonating admin_email)."""
|
||||
if not self._admin_email:
|
||||
raise GoogleError("admin_email required to list workspace users")
|
||||
return self._creds_for(self._admin_email, ADMIN_SCOPES + GMAIL_SCOPES + DRIVE_SCOPES)
|
||||
|
||||
# ── Connectivity check ────────────────────────────────────────────────────
|
||||
|
||||
def is_authenticated(self) -> bool:
|
||||
"""Light check — verifies credentials refresh without making API calls."""
|
||||
try:
|
||||
creds = service_account.Credentials.from_service_account_info(
|
||||
self._key_dict, scopes=GMAIL_SCOPES
|
||||
)
|
||||
return bool(creds)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def get_service_account_email(self) -> str:
|
||||
return self._key_dict.get("client_email", "")
|
||||
|
||||
def get_project_id(self) -> str:
|
||||
return self._key_dict.get("project_id", "")
|
||||
|
||||
# ── User listing ─────────────────────────────────────────────────────────
|
||||
|
||||
def list_users(self, domain: str = "") -> list[dict]:
|
||||
"""
|
||||
Return [{id, email, displayName}] for all active users in the domain.
|
||||
Requires Admin Directory API scope on the service account delegation.
|
||||
Falls back gracefully if admin_email is not set.
|
||||
"""
|
||||
if not self._admin_email:
|
||||
return []
|
||||
try:
|
||||
creds = self._admin_creds()
|
||||
service = build("admin", "directory_v1", credentials=creds, cache_discovery=False)
|
||||
results = []
|
||||
page_token = None
|
||||
params: dict = {"customer": "my_customer", "maxResults": 500, "orderBy": "email", "projection": "full"}
|
||||
if domain:
|
||||
params["domain"] = domain
|
||||
while True:
|
||||
if page_token:
|
||||
params["pageToken"] = page_token
|
||||
resp = service.users().list(**params).execute()
|
||||
for u in resp.get("users", []):
|
||||
if not u.get("suspended") and not u.get("archived"):
|
||||
ou_path = u.get("orgUnitPath", "")
|
||||
results.append({
|
||||
"id": u.get("id", ""),
|
||||
"email": u.get("primaryEmail", ""),
|
||||
"displayName": u.get("name", {}).get("fullName", ""),
|
||||
"orgUnitPath": ou_path,
|
||||
"userRole": classify_ou_role(ou_path),
|
||||
})
|
||||
page_token = resp.get("nextPageToken")
|
||||
if not page_token:
|
||||
break
|
||||
return results
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Admin Directory API error: {e}") from e
|
||||
|
||||
# ── Gmail iterator ────────────────────────────────────────────────────────
|
||||
|
||||
def iter_gmail_messages(
|
||||
self,
|
||||
user_email: str,
|
||||
max_messages: int = 2000,
|
||||
scan_body: bool = True,
|
||||
scan_attachments: bool = True,
|
||||
max_attach_mb: float = 20.0,
|
||||
) -> Iterator[tuple[dict, bytes]]:
|
||||
"""
|
||||
Yield (metadata, content_bytes) for each Gmail message / attachment.
|
||||
|
||||
For messages with only inline text body: yields one item with the body text.
|
||||
For attachments: yields one item per attachment (skips if > max_attach_mb).
|
||||
"""
|
||||
try:
|
||||
creds = self._creds_for(user_email, GMAIL_SCOPES)
|
||||
service = build("gmail", "v1", credentials=creds, cache_discovery=False)
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Gmail auth failed for {user_email}: {e}") from e
|
||||
yield from _gmail_iter(service, user_email, max_messages, scan_body, scan_attachments, max_attach_mb)
|
||||
|
||||
# ── Drive iterator ────────────────────────────────────────────────────────
|
||||
|
||||
def iter_drive_files(
|
||||
self,
|
||||
user_email: str,
|
||||
max_files: int = 5000,
|
||||
max_file_mb: float = 50.0,
|
||||
) -> Iterator[tuple[dict, bytes]]:
|
||||
"""
|
||||
Yield (metadata, content_bytes) for each Drive file.
|
||||
|
||||
Native Google formats (Docs/Sheets/Slides) are exported to Office format.
|
||||
Binary files are downloaded directly (skipped if > max_file_mb).
|
||||
"""
|
||||
try:
|
||||
creds = self._creds_for(user_email, DRIVE_SCOPES)
|
||||
service = build("drive", "v3", credentials=creds, cache_discovery=False)
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Drive auth failed for {user_email}: {e}") from e
|
||||
yield from _drive_iter(service, user_email, max_files, max_file_mb)
|
||||
|
||||
|
||||
# ── Persistence helpers ───────────────────────────────────────────────────────
|
||||
|
||||
def load_saved_key() -> Optional[dict]:
|
||||
"""Load service account key from disk. Returns None if not found."""
|
||||
if _SA_KEY_FILE.exists():
|
||||
try:
|
||||
return json.loads(_SA_KEY_FILE.read_text())
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def save_key(key_dict: dict) -> None:
|
||||
"""Persist service account key to disk (chmod 600)."""
|
||||
_SA_KEY_FILE.write_text(json.dumps(key_dict, indent=2))
|
||||
try:
|
||||
_SA_KEY_FILE.chmod(0o600)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def delete_key() -> None:
|
||||
"""Remove persisted service account key."""
|
||||
try:
|
||||
if _SA_KEY_FILE.exists():
|
||||
_SA_KEY_FILE.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── Internal helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
def _epoch_to_iso(epoch_secs: int) -> str:
|
||||
from datetime import datetime, timezone
|
||||
try:
|
||||
return datetime.fromtimestamp(epoch_secs, tz=timezone.utc).isoformat()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_body(payload: dict) -> bytes:
|
||||
"""Recursively extract plain-text (or HTML) body from a Gmail message payload."""
|
||||
mime = payload.get("mimeType", "")
|
||||
body_data = payload.get("body", {}).get("data", "")
|
||||
|
||||
if mime == "text/plain" and body_data:
|
||||
return base64.urlsafe_b64decode(body_data)
|
||||
if mime == "text/html" and body_data:
|
||||
# Return raw HTML bytes — _scan_bytes handles HTML stripping
|
||||
return base64.urlsafe_b64decode(body_data)
|
||||
|
||||
# Recurse into multipart
|
||||
for part in payload.get("parts", []):
|
||||
result = _extract_body(part)
|
||||
if result:
|
||||
return result
|
||||
return b""
|
||||
|
||||
|
||||
def _iter_parts(payload: dict):
|
||||
"""Yield all leaf parts (for attachment scanning)."""
|
||||
parts = payload.get("parts", [])
|
||||
if not parts:
|
||||
yield payload
|
||||
else:
|
||||
for part in parts:
|
||||
yield from _iter_parts(part)
|
||||
|
||||
|
||||
# ── Shared iteration helpers (used by both GoogleConnector and PersonalGoogleConnector) ──
|
||||
|
||||
def _gmail_iter(
|
||||
service,
|
||||
user_email: str,
|
||||
max_messages: int,
|
||||
scan_body: bool,
|
||||
scan_attachments: bool,
|
||||
max_attach_mb: float,
|
||||
) -> Iterator[tuple[dict, bytes]]:
|
||||
"""Paginate Gmail messages and yield (metadata, bytes) tuples."""
|
||||
ids: list[str] = []
|
||||
page_token = None
|
||||
while len(ids) < max_messages:
|
||||
params: dict = {"userId": "me", "maxResults": min(500, max_messages - len(ids))}
|
||||
if page_token:
|
||||
params["pageToken"] = page_token
|
||||
try:
|
||||
resp = service.users().messages().list(**params).execute()
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Gmail list error for {user_email}: {e}") from e
|
||||
ids.extend(m["id"] for m in resp.get("messages", []))
|
||||
page_token = resp.get("nextPageToken")
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
max_attach_bytes = int(max_attach_mb * 1024 * 1024)
|
||||
|
||||
for msg_id in ids:
|
||||
try:
|
||||
msg = service.users().messages().get(
|
||||
userId="me", id=msg_id, format="full"
|
||||
).execute()
|
||||
except HttpError:
|
||||
continue
|
||||
|
||||
headers = {h["name"].lower(): h["value"] for h in msg.get("payload", {}).get("headers", [])}
|
||||
meta = {
|
||||
"id": f"gmail:{msg_id}",
|
||||
"name": headers.get("subject", "(no subject)"),
|
||||
"_source": "gmail",
|
||||
"_source_type": "gmail",
|
||||
"_account": user_email,
|
||||
"_account_id": user_email,
|
||||
"_url": f"https://mail.google.com/mail/u/0/#inbox/{msg_id}",
|
||||
"receivedDateTime": _epoch_to_iso(int(msg.get("internalDate", 0)) // 1000),
|
||||
"size": msg.get("sizeEstimate", 0),
|
||||
}
|
||||
|
||||
payload = msg.get("payload", {})
|
||||
|
||||
if scan_body:
|
||||
body_bytes = _extract_body(payload)
|
||||
if body_bytes:
|
||||
yield (meta, body_bytes)
|
||||
|
||||
if scan_attachments:
|
||||
for part in _iter_parts(payload):
|
||||
filename = part.get("filename", "")
|
||||
body = part.get("body", {})
|
||||
att_id = body.get("attachmentId")
|
||||
size = body.get("size", 0)
|
||||
if not att_id or not filename:
|
||||
continue
|
||||
if size > max_attach_bytes:
|
||||
continue
|
||||
try:
|
||||
att = service.users().messages().attachments().get(
|
||||
userId="me", messageId=msg_id, id=att_id
|
||||
).execute()
|
||||
data = base64.urlsafe_b64decode(att.get("data", ""))
|
||||
except HttpError:
|
||||
continue
|
||||
att_meta = {
|
||||
**meta,
|
||||
"id": f"gmail:{msg_id}:{att_id}",
|
||||
"name": filename,
|
||||
"size": len(data),
|
||||
}
|
||||
yield (att_meta, data)
|
||||
|
||||
|
||||
def _drive_iter(
|
||||
service,
|
||||
user_email: str,
|
||||
max_files: int,
|
||||
max_file_mb: float,
|
||||
) -> Iterator[tuple[dict, bytes]]:
|
||||
"""Paginate Drive files and yield (metadata, bytes) tuples."""
|
||||
max_bytes = int(max_file_mb * 1024 * 1024)
|
||||
fields = "nextPageToken,files(id,name,mimeType,size,webViewLink,modifiedTime,owners,parents)"
|
||||
page_token = None
|
||||
fetched = 0
|
||||
|
||||
while fetched < max_files:
|
||||
params: dict = {
|
||||
"pageSize": min(1000, max_files - fetched),
|
||||
"fields": fields,
|
||||
"q": "trashed = false",
|
||||
}
|
||||
if page_token:
|
||||
params["pageToken"] = page_token
|
||||
try:
|
||||
resp = service.files().list(**params).execute()
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Drive list error for {user_email}: {e}") from e
|
||||
|
||||
for f in resp.get("files", []):
|
||||
fetched += 1
|
||||
mime = f.get("mimeType", "")
|
||||
fid = f.get("id", "")
|
||||
fname = f.get("name", "")
|
||||
size = int(f.get("size", 0) or 0)
|
||||
|
||||
meta = {
|
||||
"id": f"gdrive:{fid}",
|
||||
"name": fname,
|
||||
"_source": "gdrive",
|
||||
"_source_type": "gdrive",
|
||||
"_account": user_email,
|
||||
"_account_id": user_email,
|
||||
"_url": f.get("webViewLink", ""),
|
||||
"lastModifiedDateTime": f.get("modifiedTime", "")[:10],
|
||||
"size": size,
|
||||
}
|
||||
|
||||
if mime in _EXPORT_MAP:
|
||||
export_mime, ext = _EXPORT_MAP[mime]
|
||||
try:
|
||||
req = service.files().export_media(fileId=fid, mimeType=export_mime)
|
||||
buf = io.BytesIO()
|
||||
dl = MediaIoBaseDownload(buf, req, chunksize=4 * 1024 * 1024)
|
||||
done = False
|
||||
total = 0
|
||||
while not done:
|
||||
status, done = dl.next_chunk()
|
||||
total = buf.tell()
|
||||
if total > _MAX_EXPORT_BYTES:
|
||||
break
|
||||
if total > _MAX_EXPORT_BYTES:
|
||||
continue
|
||||
meta["name"] = fname + ext
|
||||
meta["size"] = total
|
||||
data = buf.getvalue()
|
||||
del buf
|
||||
yield (meta, data)
|
||||
except HttpError as e:
|
||||
if "exportSizeLimitExceeded" in str(e):
|
||||
print(
|
||||
f"[gdrive] skip '{fname}' — file too large for Google export API"
|
||||
f" (exportSizeLimitExceeded); fid={fid}",
|
||||
flush=True,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
if mime.startswith("application/vnd.google-apps."):
|
||||
continue # other native formats we can't export — skip
|
||||
if size == 0 or size > max_bytes:
|
||||
continue
|
||||
try:
|
||||
req = service.files().get_media(fileId=fid)
|
||||
buf = io.BytesIO()
|
||||
dl = MediaIoBaseDownload(buf, req, chunksize=4 * 1024 * 1024)
|
||||
done = False
|
||||
while not done:
|
||||
_, done = dl.next_chunk()
|
||||
data = buf.getvalue()
|
||||
del buf
|
||||
yield (meta, data)
|
||||
except HttpError:
|
||||
continue
|
||||
|
||||
page_token = resp.get("nextPageToken")
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
|
||||
# ── Personal Google account (OAuth device-code) connector ────────────────────
|
||||
|
||||
class PersonalGoogleConnector:
|
||||
"""
|
||||
OAuth 2.0 device-code connector for personal Google accounts.
|
||||
|
||||
Provides the same public interface as GoogleConnector so the scan engine
|
||||
can use either transparently via state.google_connector.
|
||||
|
||||
Authentication:
|
||||
GCP project with an OAuth 2.0 Desktop App credential.
|
||||
Required scopes: gmail.readonly, drive.readonly.
|
||||
"""
|
||||
|
||||
def __init__(self, token_data: dict):
|
||||
"""
|
||||
Construct from a stored token dict with keys:
|
||||
access_token, refresh_token, client_id, client_secret, token_uri, scopes
|
||||
"""
|
||||
if not GOOGLE_AUTH_OK:
|
||||
raise GoogleError(
|
||||
"google-auth not installed — run: "
|
||||
"pip install google-auth google-auth-httplib2 google-api-python-client"
|
||||
)
|
||||
self._token_data = token_data
|
||||
self._creds = self._build_creds()
|
||||
|
||||
def _build_creds(self):
|
||||
from google.oauth2.credentials import Credentials
|
||||
return Credentials(
|
||||
token=self._token_data.get("access_token"),
|
||||
refresh_token=self._token_data.get("refresh_token"),
|
||||
token_uri=self._token_data.get("token_uri", _TOKEN_URL),
|
||||
client_id=self._token_data.get("client_id"),
|
||||
client_secret=self._token_data.get("client_secret"),
|
||||
scopes=self._token_data.get("scopes", PERSONAL_SCOPES),
|
||||
)
|
||||
|
||||
def _refresh_if_needed(self) -> None:
|
||||
from google.auth.transport.requests import Request
|
||||
if not self._creds.valid:
|
||||
if self._creds.expired and self._creds.refresh_token:
|
||||
self._creds.refresh(Request())
|
||||
updated = dict(self._token_data)
|
||||
updated["access_token"] = self._creds.token
|
||||
save_personal_token(updated)
|
||||
self._token_data = updated
|
||||
|
||||
def is_authenticated(self) -> bool:
|
||||
try:
|
||||
self._refresh_if_needed()
|
||||
return bool(self._creds.token)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def get_user_info(self) -> dict:
|
||||
"""Return {id, email, displayName} for the authenticated user."""
|
||||
if not REQUESTS_OK:
|
||||
raise GoogleError("requests library required")
|
||||
self._refresh_if_needed()
|
||||
resp = _requests.get(
|
||||
_USERINFO_URL,
|
||||
headers={"Authorization": f"Bearer {self._creds.token}"},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return {
|
||||
"id": data.get("id", ""),
|
||||
"email": data.get("email", ""),
|
||||
"displayName": data.get("name", ""),
|
||||
}
|
||||
|
||||
def list_users(self, domain: str = "") -> list[dict]:
|
||||
"""Return a single-item list for the signed-in user (no admin access needed)."""
|
||||
info = self.get_user_info()
|
||||
return [{
|
||||
"id": info["email"],
|
||||
"email": info["email"],
|
||||
"displayName": info["displayName"],
|
||||
"orgUnitPath": "",
|
||||
"userRole": "other",
|
||||
}]
|
||||
|
||||
def iter_gmail_messages(
|
||||
self,
|
||||
user_email: str,
|
||||
max_messages: int = 2000,
|
||||
scan_body: bool = True,
|
||||
scan_attachments: bool = True,
|
||||
max_attach_mb: float = 20.0,
|
||||
) -> Iterator[tuple[dict, bytes]]:
|
||||
"""Yield (metadata, bytes) for each Gmail message / attachment."""
|
||||
self._refresh_if_needed()
|
||||
try:
|
||||
service = build("gmail", "v1", credentials=self._creds, cache_discovery=False)
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Gmail auth failed: {e}") from e
|
||||
yield from _gmail_iter(service, user_email, max_messages, scan_body, scan_attachments, max_attach_mb)
|
||||
|
||||
def iter_drive_files(
|
||||
self,
|
||||
user_email: str,
|
||||
max_files: int = 5000,
|
||||
max_file_mb: float = 50.0,
|
||||
) -> Iterator[tuple[dict, bytes]]:
|
||||
"""Yield (metadata, bytes) for each Drive file."""
|
||||
self._refresh_if_needed()
|
||||
try:
|
||||
service = build("drive", "v3", credentials=self._creds, cache_discovery=False)
|
||||
except HttpError as e:
|
||||
raise GoogleError(f"Drive auth failed: {e}") from e
|
||||
yield from _drive_iter(service, user_email, max_files, max_file_mb)
|
||||
|
||||
@staticmethod
|
||||
def get_device_code_flow(client_id: str, client_secret: str) -> dict:
|
||||
"""
|
||||
Initiate a Google device-code flow.
|
||||
Returns a flow dict containing user_code, verification_url, device_code, etc.
|
||||
"""
|
||||
if not REQUESTS_OK:
|
||||
raise GoogleError("requests library required — run: pip install requests")
|
||||
resp = _requests.post(_DEVICE_AUTH_URL, data={
|
||||
"client_id": client_id,
|
||||
"scope": " ".join(PERSONAL_SCOPES),
|
||||
}, timeout=10)
|
||||
data = resp.json()
|
||||
if "device_code" not in data:
|
||||
raise GoogleError(
|
||||
f"Failed to start device flow: {data.get('error_description', data)}"
|
||||
)
|
||||
return {
|
||||
"device_code": data["device_code"],
|
||||
"user_code": data["user_code"],
|
||||
"verification_url": data.get("verification_url", "https://www.google.com/device"),
|
||||
"expires_in": data.get("expires_in", 1800),
|
||||
"interval": data.get("interval", 5),
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def complete_device_code_flow(flow: dict) -> "PersonalGoogleConnector":
|
||||
"""
|
||||
Poll until the user completes sign-in at verification_url.
|
||||
Blocks the calling thread. Returns a ready PersonalGoogleConnector.
|
||||
"""
|
||||
if not REQUESTS_OK:
|
||||
raise GoogleError("requests library required — run: pip install requests")
|
||||
client_id = flow["client_id"]
|
||||
client_secret = flow["client_secret"]
|
||||
device_code = flow["device_code"]
|
||||
interval = flow.get("interval", 5)
|
||||
expires_in = flow.get("expires_in", 1800)
|
||||
deadline = time.time() + expires_in
|
||||
|
||||
while time.time() < deadline:
|
||||
time.sleep(interval)
|
||||
resp = _requests.post(_TOKEN_URL, data={
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"device_code": device_code,
|
||||
"grant_type": _DEVICE_GRANT,
|
||||
}, timeout=10)
|
||||
data = resp.json()
|
||||
if "access_token" in data:
|
||||
token_data = {
|
||||
"access_token": data["access_token"],
|
||||
"refresh_token": data.get("refresh_token", ""),
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"token_uri": _TOKEN_URL,
|
||||
"scopes": PERSONAL_SCOPES,
|
||||
}
|
||||
save_personal_token(token_data)
|
||||
return PersonalGoogleConnector(token_data)
|
||||
err = data.get("error", "")
|
||||
if err == "authorization_pending":
|
||||
continue
|
||||
if err == "slow_down":
|
||||
interval = max(interval + 5, 5)
|
||||
continue
|
||||
raise GoogleError(
|
||||
f"Device flow error: {data.get('error_description', err)}"
|
||||
)
|
||||
|
||||
raise GoogleError("Device code flow timed out")
|
||||
|
||||
|
||||
# ── Personal token persistence ────────────────────────────────────────────────
|
||||
|
||||
def save_personal_token(data: dict) -> None:
|
||||
"""Persist OAuth token to disk (chmod 600)."""
|
||||
_GOOGLE_TOKEN_FILE.write_text(json.dumps(data, indent=2))
|
||||
try:
|
||||
_GOOGLE_TOKEN_FILE.chmod(0o600)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def load_personal_token() -> Optional[dict]:
|
||||
"""Load OAuth token from disk. Returns None if not found."""
|
||||
if _GOOGLE_TOKEN_FILE.exists():
|
||||
try:
|
||||
return json.loads(_GOOGLE_TOKEN_FILE.read_text())
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def delete_personal_token() -> None:
|
||||
"""Remove persisted OAuth token."""
|
||||
try:
|
||||
if _GOOGLE_TOKEN_FILE.exists():
|
||||
_GOOGLE_TOKEN_FILE.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
BIN
icon_gdpr.icns
Normal file
BIN
icon_gdpr.icns
Normal file
Binary file not shown.
BIN
icon_gdpr.ico
Normal file
BIN
icon_gdpr.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 739 B |
BIN
icon_gdpr.png
Normal file
BIN
icon_gdpr.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 18 KiB |
423
install_macos.sh
Executable file
423
install_macos.sh
Executable file
@ -0,0 +1,423 @@
|
||||
#!/usr/bin/env bash
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Document Scanner — macOS Installation Script
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Installs all dependencies for document_scanner.py, server.py, build.py,
|
||||
# gdpr_scanner.py and m365_connector.py:
|
||||
# - Homebrew (if not present)
|
||||
# - Python 3.11 or 3.12 (3.13+ blocked — spaCy incompatible)
|
||||
# - Tesseract OCR with Danish + English language packs
|
||||
# - Poppler (required by pdf2image for PDF rendering)
|
||||
# - A virtualenv at ./venv with all Python packages
|
||||
# - spaCy Danish NER model (~500 MB)
|
||||
#
|
||||
# All Python packages are installed into a virtualenv (./venv) to avoid the
|
||||
# "externally-managed-environment" error from Homebrew Python 3.12+.
|
||||
#
|
||||
# Usage:
|
||||
# chmod +x install_macos.sh && ./install_macos.sh
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Colours ───────────────────────────────────────────────────────────────────
|
||||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'; BOLD='\033[1m'; RESET='\033[0m'
|
||||
|
||||
step() { echo -e "\n${CYAN}==> $1${RESET}"; }
|
||||
ok() { echo -e " ${GREEN}[OK]${RESET} $1"; }
|
||||
warn() { echo -e " ${YELLOW}[!!]${RESET} $1"; }
|
||||
fail() { echo -e " ${RED}[XX]${RESET} $1"; exit 1; }
|
||||
|
||||
# Where the virtualenv will live — next to this script
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
VENV_DIR="$SCRIPT_DIR/venv"
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD} Document Scanner — macOS Setup${RESET}"
|
||||
echo " -----------------------------------------"
|
||||
echo ""
|
||||
|
||||
# ── 0. Detect architecture ────────────────────────────────────────────────────
|
||||
ARCH=$(uname -m)
|
||||
if [[ "$ARCH" == "arm64" ]]; then
|
||||
BREW_PREFIX="/opt/homebrew"
|
||||
ok "Apple Silicon (M-series) — Homebrew prefix: $BREW_PREFIX"
|
||||
else
|
||||
BREW_PREFIX="/usr/local"
|
||||
ok "Intel Mac — Homebrew prefix: $BREW_PREFIX"
|
||||
fi
|
||||
|
||||
# ── 1. Install Homebrew ───────────────────────────────────────────────────────
|
||||
step "Checking Homebrew"
|
||||
if command -v brew &>/dev/null; then
|
||||
ok "Homebrew already installed: $(brew --version | head -1)"
|
||||
else
|
||||
echo " Installing Homebrew..."
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
eval "$($BREW_PREFIX/bin/brew shellenv)"
|
||||
ok "Homebrew installed"
|
||||
fi
|
||||
eval "$($BREW_PREFIX/bin/brew shellenv)" 2>/dev/null || true
|
||||
|
||||
# ── 2. Find or install Python 3.11 / 3.12 ────────────────────────────────────
|
||||
# Homebrew Python 3.12+ is "externally managed" — pip installs must go into
|
||||
# a virtualenv. We find a compatible base interpreter here; all packages will
|
||||
# be installed into ./venv below, not into the system interpreter.
|
||||
step "Checking Python (need 3.11 or 3.12 — spaCy incompatible with 3.13+)"
|
||||
|
||||
find_compatible_python() {
|
||||
for cmd in \
|
||||
"$BREW_PREFIX/bin/python3.12" \
|
||||
"$BREW_PREFIX/bin/python3.11" \
|
||||
python3.12 python3.11 python3 python; do
|
||||
if command -v "$cmd" &>/dev/null 2>&1; then
|
||||
local ver maj min
|
||||
ver=$("$cmd" --version 2>&1 | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1)
|
||||
maj=$(echo "$ver" | cut -d. -f1)
|
||||
min=$(echo "$ver" | cut -d. -f2)
|
||||
if [[ "$maj" == "3" ]] && { [[ "$min" == "11" ]] || [[ "$min" == "12" ]]; }; then
|
||||
echo "$cmd"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
BASE_PYTHON=""
|
||||
if BASE_PYTHON=$(find_compatible_python); then
|
||||
ok "Compatible Python: $($BASE_PYTHON --version 2>&1) ($BASE_PYTHON)"
|
||||
else
|
||||
if command -v python3 &>/dev/null; then
|
||||
EXISTING=$(python3 --version 2>&1 | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
||||
EXIST_MIN=$(echo "$EXISTING" | cut -d. -f2)
|
||||
if [[ "$EXIST_MIN" -ge 13 ]]; then
|
||||
warn "Python $EXISTING is too new (spaCy requires ≤ 3.12)"
|
||||
fi
|
||||
fi
|
||||
echo " Installing Python 3.12 via Homebrew..."
|
||||
brew install python@3.12
|
||||
BASE_PYTHON="$BREW_PREFIX/bin/python3.12"
|
||||
if [[ ! -x "$BASE_PYTHON" ]]; then
|
||||
echo " python3.12 not found, trying python3.11..."
|
||||
brew install python@3.11
|
||||
BASE_PYTHON="$BREW_PREFIX/bin/python3.11"
|
||||
fi
|
||||
[[ -x "$BASE_PYTHON" ]] || fail "Python install failed. Try: brew install python@3.12"
|
||||
ok "Python installed: $($BASE_PYTHON --version 2>&1)"
|
||||
fi
|
||||
|
||||
# Confirm version
|
||||
$BASE_PYTHON --version 2>&1 | grep -qE 'Python 3\.(11|12)' \
|
||||
|| fail "Unexpected version: $($BASE_PYTHON --version 2>&1)"
|
||||
|
||||
# ── 3. Create virtualenv ──────────────────────────────────────────────────────
|
||||
step "Setting up virtualenv at $VENV_DIR"
|
||||
|
||||
if [[ -d "$VENV_DIR" && -x "$VENV_DIR/bin/python" ]]; then
|
||||
# Validate it was built with a compatible interpreter
|
||||
VENV_VER=$("$VENV_DIR/bin/python" --version 2>&1 | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
||||
VENV_MIN=$(echo "$VENV_VER" | cut -d. -f2)
|
||||
if [[ "$VENV_MIN" == "11" || "$VENV_MIN" == "12" ]]; then
|
||||
ok "Existing virtualenv is compatible (Python $VENV_VER) — reusing"
|
||||
else
|
||||
warn "Existing virtualenv uses Python $VENV_VER — rebuilding"
|
||||
rm -rf "$VENV_DIR"
|
||||
$BASE_PYTHON -m venv "$VENV_DIR"
|
||||
ok "Virtualenv rebuilt"
|
||||
fi
|
||||
else
|
||||
$BASE_PYTHON -m venv "$VENV_DIR"
|
||||
ok "Virtualenv created"
|
||||
fi
|
||||
|
||||
# All subsequent Python/pip commands use the venv
|
||||
PYTHON="$VENV_DIR/bin/python"
|
||||
PIP="$PYTHON -m pip"
|
||||
|
||||
# Upgrade pip inside the venv (no restrictions here)
|
||||
echo " Upgrading pip..."
|
||||
$PIP install --upgrade pip --quiet
|
||||
ok "pip up to date: $($PIP --version)"
|
||||
|
||||
# ── 4. Install Tesseract OCR ──────────────────────────────────────────────────
|
||||
step "Installing Tesseract OCR + language packs"
|
||||
if brew list tesseract &>/dev/null 2>&1; then
|
||||
ok "Tesseract already installed: $(tesseract --version 2>&1 | head -1)"
|
||||
else
|
||||
brew install tesseract
|
||||
ok "Tesseract installed: $(tesseract --version 2>&1 | head -1)"
|
||||
fi
|
||||
|
||||
if brew list tesseract-lang &>/dev/null 2>&1; then
|
||||
ok "Tesseract language packs already installed"
|
||||
else
|
||||
echo " Installing tesseract-lang (~300 MB)..."
|
||||
brew install tesseract-lang
|
||||
ok "Language packs installed"
|
||||
fi
|
||||
|
||||
if tesseract --list-langs 2>&1 | grep -q "^dan$"; then
|
||||
ok "Danish (dan) OCR available"
|
||||
else
|
||||
warn "Danish language pack not found — try: brew reinstall tesseract-lang"
|
||||
fi
|
||||
|
||||
# ── 5. Install Poppler ────────────────────────────────────────────────────────
|
||||
step "Installing Poppler (required for PDF rendering)"
|
||||
if brew list poppler &>/dev/null 2>&1; then
|
||||
ok "Poppler already installed"
|
||||
else
|
||||
brew install poppler
|
||||
ok "Poppler installed"
|
||||
fi
|
||||
command -v pdftoppm &>/dev/null \
|
||||
&& ok "pdftoppm: $(which pdftoppm)" \
|
||||
|| warn "pdftoppm not on PATH — launcher will probe Homebrew paths automatically"
|
||||
|
||||
# ── 6. Install Python packages into venv ─────────────────────────────────────
|
||||
step "Installing Python packages into virtualenv"
|
||||
|
||||
packages=(
|
||||
"flask"
|
||||
"pdfplumber"
|
||||
"pdf2image"
|
||||
"pytesseract"
|
||||
"pypdf"
|
||||
"reportlab"
|
||||
"python-docx"
|
||||
"openpyxl"
|
||||
"img2pdf"
|
||||
"opencv-python-headless"
|
||||
"numpy"
|
||||
"Pillow"
|
||||
"spacy"
|
||||
"py7zr"
|
||||
"pymupdf"
|
||||
"pywebview"
|
||||
"pystray"
|
||||
"pyinstaller"
|
||||
"pyinstaller-hooks-contrib"
|
||||
# GDPRScanner
|
||||
"msal"
|
||||
"requests"
|
||||
# Optional — File system scanning (#8)
|
||||
# smbprotocol: native SMB2/3 without mounting (needed for network share scanning)
|
||||
# keyring: OS keychain credential storage for SMB passwords
|
||||
# python-dotenv: .env file fallback for headless SMB credentials
|
||||
"smbprotocol"
|
||||
"keyring"
|
||||
"python-dotenv"
|
||||
# Scheduler (#19)
|
||||
"APScheduler"
|
||||
# Google Workspace scanning (#10)
|
||||
"google-auth"
|
||||
"google-auth-httplib2"
|
||||
"google-api-python-client"
|
||||
)
|
||||
|
||||
failed=()
|
||||
for pkg in "${packages[@]}"; do
|
||||
printf " %-36s" "$pkg..."
|
||||
if $PIP install "$pkg" --quiet --disable-pip-version-check 2>/dev/null; then
|
||||
echo -e "${GREEN}OK${RESET}"
|
||||
else
|
||||
echo -e "${RED}FAILED${RESET}"
|
||||
failed+=("$pkg")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#failed[@]} -gt 0 ]]; then
|
||||
warn "Failed: ${failed[*]}"
|
||||
warn "Retry: $PIP install ${failed[*]}"
|
||||
fi
|
||||
|
||||
# ── 7. Install create-dmg ─────────────────────────────────────────────────────
|
||||
step "Checking create-dmg (optional — for .dmg packaging)"
|
||||
if command -v create-dmg &>/dev/null; then
|
||||
ok "create-dmg already installed"
|
||||
else
|
||||
brew install create-dmg 2>/dev/null \
|
||||
&& ok "create-dmg installed" \
|
||||
|| warn "create-dmg unavailable — install manually: brew install create-dmg"
|
||||
fi
|
||||
|
||||
# ── 8. Install spaCy Danish NER model ─────────────────────────────────────────
|
||||
step "Installing spaCy Danish NER model (~500 MB)"
|
||||
|
||||
# spaCy's download command uses shutil.which("pip") to find a package
|
||||
# installer. Inside a venv the wrapper may be named pip3 only. Ensure a
|
||||
# `pip` executable exists so spaCy can find it.
|
||||
if [[ ! -x "$VENV_DIR/bin/pip" ]]; then
|
||||
echo " Creating pip wrapper in venv (needed by spaCy download)…"
|
||||
cat > "$VENV_DIR/bin/pip" << 'PIPSHIM'
|
||||
#!/usr/bin/env bash
|
||||
exec "$(dirname "$0")/python3" -m pip "$@"
|
||||
PIPSHIM
|
||||
chmod +x "$VENV_DIR/bin/pip"
|
||||
fi
|
||||
# Verify pip is now visible
|
||||
if "$VENV_DIR/bin/pip" --version &>/dev/null; then
|
||||
ok "pip available: $("$VENV_DIR/bin/pip" --version 2>&1)"
|
||||
else
|
||||
warn "pip wrapper not working — will use direct pip install fallback"
|
||||
fi
|
||||
|
||||
if $PYTHON -c "import da_core_news_lg" &>/dev/null 2>&1; then
|
||||
ok "spaCy Danish model already installed"
|
||||
else
|
||||
installed=false
|
||||
for model in da_core_news_lg da_core_news_md da_core_news_sm; do
|
||||
echo " Trying $model..."
|
||||
|
||||
# Method 1: spacy download with venv/bin explicitly on PATH
|
||||
# (spaCy uses shutil.which("pip") which searches PATH)
|
||||
if PATH="$VENV_DIR/bin:$PATH" $PYTHON -m spacy download "$model" 2>/dev/null; then
|
||||
ok "Installed: $model (via spacy download)"
|
||||
installed=true
|
||||
break
|
||||
fi
|
||||
|
||||
# Method 2: direct pip install — spaCy models are regular PyPI packages
|
||||
echo " spacy download failed — trying pip install..."
|
||||
if $PIP install "$model" 2>&1; then
|
||||
if $PYTHON -c "import ${model//-/_}" &>/dev/null 2>&1; then
|
||||
ok "Installed: $model (via pip)"
|
||||
installed=true
|
||||
break
|
||||
else
|
||||
warn "$model pip install reported success but import failed"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
if [[ "$installed" == false ]]; then
|
||||
warn "No spaCy model installed — anonymisation unavailable"
|
||||
warn "Retry manually: $PIP install da_core_news_sm"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── 9. Verify ─────────────────────────────────────────────────────────────────
|
||||
step "Verifying installation"
|
||||
|
||||
ok "Python (venv): $($PYTHON --version 2>&1)"
|
||||
ok "Tesseract: $(tesseract --version 2>&1 | head -1)"
|
||||
ok "Poppler: $(pdftoppm -v 2>&1 | head -1 || echo 'available via Homebrew PATH')"
|
||||
|
||||
$PYTHON - <<'PYCHECK'
|
||||
import sys
|
||||
checks = [
|
||||
('flask', 'flask'),
|
||||
('pdfplumber', 'pdfplumber'),
|
||||
('pdf2image', 'pdf2image'),
|
||||
('pytesseract', 'pytesseract'),
|
||||
('pypdf', 'pypdf'),
|
||||
('reportlab', 'reportlab'),
|
||||
('python-docx', 'docx'),
|
||||
('openpyxl', 'openpyxl'),
|
||||
('opencv-python-headless', 'cv2'),
|
||||
('numpy', 'numpy'),
|
||||
('Pillow', 'PIL'),
|
||||
('spacy', 'spacy'),
|
||||
('img2pdf', 'img2pdf'),
|
||||
('pywebview', 'webview'),
|
||||
('pystray', 'pystray'),
|
||||
('PyInstaller', 'PyInstaller'),
|
||||
('py7zr', 'py7zr'),
|
||||
# GDPRScanner
|
||||
('msal', 'msal'),
|
||||
('requests', 'requests'),
|
||||
]
|
||||
optional_checks = [
|
||||
('smbprotocol', 'smbprotocol', 'SMB/CIFS network share scanning'),
|
||||
('keyring', 'keyring', 'OS keychain credential storage'),
|
||||
('python-dotenv', 'dotenv', '.env file credential fallback'),
|
||||
('APScheduler', 'apscheduler', 'In-process scheduled scans'),
|
||||
]
|
||||
missing = []
|
||||
for name, imp in checks:
|
||||
try:
|
||||
__import__(imp)
|
||||
print(f' \033[32m[OK]\033[0m {name}')
|
||||
except ImportError:
|
||||
print(f' \033[31m[!!]\033[0m {name} MISSING')
|
||||
missing.append(name)
|
||||
print('\n Optional (file system scanning):')
|
||||
for name, imp, desc in optional_checks:
|
||||
try:
|
||||
__import__(imp)
|
||||
print(f' \033[32m[OK]\033[0m {name} — {desc}')
|
||||
except ImportError:
|
||||
print(f' \033[33m[--]\033[0m {name} — {desc} (not installed)')
|
||||
if missing:
|
||||
print(f'\n Missing: {", ".join(missing)}')
|
||||
sys.exit(1)
|
||||
print('\n All packages verified.')
|
||||
PYCHECK
|
||||
|
||||
ALL_OK=$?
|
||||
|
||||
# ── 10. Shell profile ─────────────────────────────────────────────────────────
|
||||
step "Shell PATH configuration"
|
||||
SHELL_RC=""
|
||||
if [[ "$SHELL" == *"zsh"* ]]; then SHELL_RC="$HOME/.zshrc"; fi
|
||||
if [[ "$SHELL" == *"bash"* ]]; then SHELL_RC="$HOME/.bash_profile"; fi
|
||||
|
||||
if [[ -n "$SHELL_RC" ]]; then
|
||||
if grep -q "brew shellenv" "$SHELL_RC" 2>/dev/null; then
|
||||
ok "Homebrew already configured in $SHELL_RC"
|
||||
else
|
||||
echo "" >> "$SHELL_RC"
|
||||
echo "# Homebrew" >> "$SHELL_RC"
|
||||
echo "eval \"\$($BREW_PREFIX/bin/brew shellenv)\"" >> "$SHELL_RC"
|
||||
ok "Homebrew added to $SHELL_RC — restart Terminal or: source $SHELL_RC"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── 11. Create launch scripts ─────────────────────────────────────────────────
|
||||
step "Creating launch scripts"
|
||||
|
||||
# start_gdpr.sh — launches GDPRScanner
|
||||
cat > "$SCRIPT_DIR/start_gdpr.sh" << M365EOF
|
||||
#!/usr/bin/env bash
|
||||
# GDPRScanner — launch script (uses ./venv)
|
||||
SCRIPT_DIR="\$(cd "\$(dirname "\${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "\$SCRIPT_DIR/venv/bin/activate"
|
||||
exec python3 "\$SCRIPT_DIR/gdpr_scanner.py" "\${@}"
|
||||
M365EOF
|
||||
chmod +x "$SCRIPT_DIR/start_gdpr.sh"
|
||||
ok "Created: start_gdpr.sh"
|
||||
|
||||
# build_gdpr.sh — builds standalone GDPRScanner .app
|
||||
cat > "$SCRIPT_DIR/build_gdpr.sh" << BLD365EOF
|
||||
#!/usr/bin/env bash
|
||||
# GDPRScanner — build .app (uses ./venv)
|
||||
SCRIPT_DIR="\$(cd "\$(dirname "\${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "\$SCRIPT_DIR/venv/bin/activate"
|
||||
exec python3 "\$SCRIPT_DIR/build_gdpr.py" --clean "\$@"
|
||||
BLD365EOF
|
||||
chmod +x "$SCRIPT_DIR/build_gdpr.sh"
|
||||
ok "Created: build_gdpr.sh"
|
||||
|
||||
|
||||
# ── Done ──────────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo " -----------------------------------------"
|
||||
[[ $ALL_OK -eq 0 ]] \
|
||||
&& echo -e " ${GREEN}${BOLD}Installation complete!${RESET}" \
|
||||
|| echo -e " ${YELLOW}${BOLD}Installation complete with warnings — see above${RESET}"
|
||||
echo ""
|
||||
echo -e " ${BOLD}GDPRScanner:${RESET}"
|
||||
echo -e " ${CYAN}./start_gdpr.sh${RESET}"
|
||||
echo " Then open: http://127.0.0.1:5100"
|
||||
echo ""
|
||||
echo -e " ${BOLD}File system scanning (optional):${RESET}"
|
||||
echo -e " ${CYAN}./start_gdpr.sh --scan-path ~/Documents${RESET}"
|
||||
echo -e " ${CYAN}./start_gdpr.sh --scan-path //nas/shares --smb-user 'DOMAIN\\user'${RESET}"
|
||||
echo " Or use the '📁 File sources' panel in the GDPRScanner UI"
|
||||
echo ""
|
||||
echo -e " ${BOLD}Build standalone app:${RESET}"
|
||||
echo -e " ${CYAN}./build_gdpr.sh${RESET} → dist/GDPRScanner.app"
|
||||
echo ""
|
||||
echo " -----------------------------------------"
|
||||
echo ""
|
||||
568
install_windows.ps1
Normal file
568
install_windows.ps1
Normal file
@ -0,0 +1,568 @@
|
||||
#Requires -RunAsAdministrator
|
||||
# Always run from the folder this script lives in
|
||||
Set-Location -Path $PSScriptRoot
|
||||
<#
|
||||
.SYNOPSIS
|
||||
M365 GDPR Scanner -- Windows Installation Script
|
||||
.DESCRIPTION
|
||||
Installs all dependencies for gdpr_scanner.py and m365_connector.py:
|
||||
- Python 3.11 or 3.12 (3.13+ blocked -- spaCy incompatible)
|
||||
- Tesseract OCR 5.x with Danish + English language packs
|
||||
- Poppler (required by pdfplumber for PDF rendering)
|
||||
- All Python packages including pywebview, pystray
|
||||
- spaCy Danish NER model (da_core_news_lg, ~500 MB)
|
||||
Adds Tesseract and Poppler to the system PATH.
|
||||
.NOTES
|
||||
Run from an elevated PowerShell prompt:
|
||||
PowerShell -ExecutionPolicy Bypass -File install_windows.ps1
|
||||
#>
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# -- Colours --------------------------------------------------------------------
|
||||
function Write-Step { param($msg) Write-Host "`n==> $msg" -ForegroundColor Cyan }
|
||||
function Write-OK { param($msg) Write-Host " [OK] $msg" -ForegroundColor Green }
|
||||
function Write-Warn { param($msg) Write-Host " [!!] $msg" -ForegroundColor Yellow }
|
||||
function Write-Fail { param($msg) Write-Host " [XX] $msg" -ForegroundColor Red; exit 1 }
|
||||
|
||||
Write-Host ""
|
||||
Write-Host " M365 GDPR Scanner - Windows Setup" -ForegroundColor White
|
||||
Write-Host " -----------------------------------------" -ForegroundColor DarkGray
|
||||
Write-Host ""
|
||||
|
||||
# -- 0. Check architecture ------------------------------------------------------
|
||||
if ($env:PROCESSOR_ARCHITECTURE -ne "AMD64") {
|
||||
Write-Warn "This script targets 64-bit Windows. Proceeding anyway."
|
||||
}
|
||||
|
||||
# -- 1. Install Chocolatey (if not present) -------------------------------------
|
||||
Write-Step "Checking Chocolatey package manager"
|
||||
if (-not (Get-Command choco -ErrorAction SilentlyContinue)) {
|
||||
Write-Host " Installing Chocolatey..."
|
||||
Set-ExecutionPolicy Bypass -Scope Process -Force
|
||||
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
|
||||
Invoke-Expression ((New-Object System.Net.WebClient).DownloadString(
|
||||
'https://community.chocolatey.org/install.ps1'))
|
||||
$env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
|
||||
[System.Environment]::GetEnvironmentVariable("PATH","User")
|
||||
Write-OK "Chocolatey installed"
|
||||
} else {
|
||||
Write-OK "Chocolatey already installed ($((choco --version)))"
|
||||
}
|
||||
|
||||
# -- Virtualenv path -----------------------------------------------------------
|
||||
$VenvDir = Join-Path $PSScriptRoot "venv"
|
||||
$VenvPython = Join-Path $VenvDir "Scripts\python.exe"
|
||||
|
||||
# -- 2. Install / validate Python ---------------------------------------------------
|
||||
# Compatible: 3.11.x or 3.12.x
|
||||
# spaCy does not support 3.13+. pywebview requires 3.8+.
|
||||
Write-Step "Checking Python (need 3.11 or 3.12 -- prefer 3.12, spaCy incompatible with 3.13+)"
|
||||
|
||||
function Get-PythonExe {
|
||||
# Returns the path/command of a compatible Python (3.11 or 3.12), or $null.
|
||||
$candidates = @()
|
||||
|
||||
# py launcher -- wrap in try/catch so "No runtime found" exit codes don't bubble up
|
||||
if (Get-Command py -ErrorAction SilentlyContinue) {
|
||||
foreach ($v in @("3.12", "3.11")) {
|
||||
try {
|
||||
$test = $null
|
||||
$prev = $ErrorActionPreference
|
||||
$ErrorActionPreference = 'SilentlyContinue'
|
||||
$test = & py "-$v" --version 2>&1
|
||||
$ErrorActionPreference = $prev
|
||||
} catch { $ErrorActionPreference = $prev }
|
||||
if ("$test" -match "^Python $v") { $candidates += "py -$v" }
|
||||
}
|
||||
}
|
||||
|
||||
# Direct python / python3 commands
|
||||
foreach ($cmd in @("python3.12", "python3.11", "python", "python3")) {
|
||||
if (Get-Command $cmd -ErrorAction SilentlyContinue) {
|
||||
$candidates += $cmd
|
||||
}
|
||||
}
|
||||
|
||||
# Well-known install locations (e.g. installed from python.org without PATH update)
|
||||
$wellKnown = @(
|
||||
"$env:LOCALAPPDATA\Programs\Python\Python312\python.exe",
|
||||
"$env:LOCALAPPDATA\Programs\Python\Python311\python.exe",
|
||||
"C:\Python312\python.exe",
|
||||
"C:\Python311\python.exe",
|
||||
"C:\Program Files\Python312\python.exe",
|
||||
"C:\Program Files\Python311\python.exe"
|
||||
)
|
||||
foreach ($p in $wellKnown) {
|
||||
if (Test-Path $p) { $candidates += $p }
|
||||
}
|
||||
|
||||
foreach ($cmd in $candidates) {
|
||||
$parts = $cmd -split " "
|
||||
$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
|
||||
if ("$raw" -match "Python (\d+)\.(\d+)") {
|
||||
$maj = [int]$Matches[1]; $min = [int]$Matches[2]
|
||||
if ($maj -eq 3 -and ($min -eq 11 -or $min -eq 12)) { return $cmd }
|
||||
}
|
||||
}
|
||||
return $null
|
||||
}
|
||||
|
||||
function Get-PythonVersionStr {
|
||||
param($cmd)
|
||||
$parts = $cmd -split " "
|
||||
$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
|
||||
return $raw
|
||||
}
|
||||
|
||||
function Invoke-Py {
|
||||
param([string[]]$PyArgs)
|
||||
$parts = $script:pythonCmd -split " "
|
||||
if ($parts.Count -gt 1) { & $parts[0] $parts[1] @PyArgs }
|
||||
else { & $parts[0] @PyArgs }
|
||||
return $LASTEXITCODE
|
||||
}
|
||||
|
||||
$pythonCmd = Get-PythonExe
|
||||
|
||||
if ($pythonCmd) {
|
||||
$verStr = Get-PythonVersionStr $pythonCmd
|
||||
Write-OK "Compatible Python found: $verStr (using '$pythonCmd')"
|
||||
} else {
|
||||
# Check if an incompatible version is present so we can warn clearly
|
||||
if (Get-Command python -ErrorAction SilentlyContinue) {
|
||||
$raw = & python --version 2>&1
|
||||
if ($raw -match "Python (\d+)\.(\d+)") {
|
||||
$maj = [int]$Matches[1]; $min = [int]$Matches[2]
|
||||
if ($maj -eq 3 -and $min -ge 13) {
|
||||
Write-Warn "Python $maj.$min is installed but too new (spaCy needs <= 3.12)"
|
||||
Write-Warn "Python 3.11 will be installed alongside it"
|
||||
} elseif ($maj -eq 3 -and $min -le 10) {
|
||||
Write-Warn "Python $maj.$min is installed but too old (need >= 3.11)"
|
||||
}
|
||||
}
|
||||
}
|
||||
# ---- Try Chocolatey first (fast, silent) ----
|
||||
$chocoOk = $false
|
||||
if (Get-Command choco -ErrorAction SilentlyContinue) {
|
||||
Write-Host " Installing Python 3.12 via Chocolatey..."
|
||||
choco install python312 -y --no-progress | Out-Null
|
||||
$env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
|
||||
[System.Environment]::GetEnvironmentVariable("PATH","User")
|
||||
$pythonCmd = Get-PythonExe
|
||||
if ($pythonCmd) { $chocoOk = $true }
|
||||
}
|
||||
|
||||
# ---- Direct download from python.org (works without Chocolatey) ----
|
||||
if (-not $chocoOk) {
|
||||
$PyVersion = "3.12.9"
|
||||
$PyInstaller = "$env:TEMP\python-$PyVersion-amd64.exe"
|
||||
$PyUrl = "https://www.python.org/ftp/python/$PyVersion/python-$PyVersion-amd64.exe"
|
||||
|
||||
Write-Host " Downloading Python $PyVersion from python.org..."
|
||||
try {
|
||||
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
|
||||
& curl.exe -L --silent --show-error -o $PyInstaller $PyUrl
|
||||
if ($LASTEXITCODE -ne 0) { throw "curl.exe download failed" }
|
||||
} catch {
|
||||
Write-Fail "Download failed: $_`nInstall Python 3.12 manually from https://www.python.org/downloads/ then re-run this script."
|
||||
}
|
||||
|
||||
Write-Host " Installing Python $PyVersion (silent, all users)..."
|
||||
$installArgs = "/quiet InstallAllUsers=0 PrependPath=0 Include_test=0"
|
||||
Start-Process -FilePath $PyInstaller -ArgumentList $installArgs -Wait -NoNewWindow
|
||||
|
||||
# Reload PATH so the new python.exe is visible in this session
|
||||
$env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
|
||||
[System.Environment]::GetEnvironmentVariable("PATH","User")
|
||||
|
||||
$pythonCmd = Get-PythonExe
|
||||
if (-not $pythonCmd) {
|
||||
Write-Fail ("Python $PyVersion was installed but could not be found.`n" +
|
||||
" -- Open a NEW PowerShell window and re-run this script, or`n" +
|
||||
" -- Install manually from https://www.python.org/downloads/")
|
||||
}
|
||||
}
|
||||
|
||||
$verStr = Get-PythonVersionStr $pythonCmd
|
||||
Write-OK "Python installed: $verStr"
|
||||
}
|
||||
|
||||
# Final sanity check
|
||||
$parts = $pythonCmd -split " "
|
||||
$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
|
||||
if ($raw -notmatch "Python 3\.(11|12)") {
|
||||
Write-Fail "Could not confirm a Python 3.11 or 3.12 interpreter. Got: $raw"
|
||||
}
|
||||
|
||||
# -- Create / reuse virtualenv -------------------------------------------------
|
||||
Write-Step "Setting up virtualenv at $VenvDir"
|
||||
if (Test-Path $VenvPython) {
|
||||
Write-OK "Existing virtualenv found -- reusing"
|
||||
} else {
|
||||
if (Test-Path $VenvDir) { Remove-Item $VenvDir -Recurse -Force }
|
||||
Write-Host " Creating virtualenv..."
|
||||
Invoke-Py @("-m", "venv", $VenvDir)
|
||||
Write-OK "Virtualenv created: $VenvDir"
|
||||
}
|
||||
|
||||
function Invoke-VenvPip {
|
||||
param([string[]]$PipArgs)
|
||||
& $VenvPython -m pip @PipArgs
|
||||
return $LASTEXITCODE
|
||||
}
|
||||
|
||||
Write-Host " Upgrading pip..."
|
||||
Invoke-VenvPip @("install", "--upgrade", "pip", "--quiet") | Out-Null
|
||||
Write-OK "pip up to date"
|
||||
|
||||
# -- 3. Install Visual C++ Redistributable (required by OpenCV/cv2) -----------
|
||||
Write-Step "Checking Visual C++ Redistributable 2015-2022"
|
||||
$vcKey = "HKLM:\SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64"
|
||||
$vcAlt = "HKLM:\SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64"
|
||||
$vcInstalled = (Test-Path $vcKey) -or (Test-Path $vcAlt)
|
||||
if ($vcInstalled) {
|
||||
Write-OK "Visual C++ Redistributable already installed"
|
||||
} else {
|
||||
Write-Host " Downloading VC++ Redistributable..."
|
||||
$vcUrl = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
|
||||
$vcInstaller = "$env:TEMP\vc_redist.x64.exe"
|
||||
& curl.exe -L --silent --show-error -o $vcInstaller $vcUrl
|
||||
if ($LASTEXITCODE -ne 0) { Write-Warn "VC++ download failed -- skipping (may already be installed)" }
|
||||
Write-Host " Installing silently..."
|
||||
Start-Process -FilePath $vcInstaller -ArgumentList "/install", "/quiet", "/norestart" -Wait
|
||||
Remove-Item $vcInstaller -Force
|
||||
Write-OK "Visual C++ Redistributable installed"
|
||||
}
|
||||
|
||||
# -- 4. Install Tesseract OCR ---------------------------------------------------
|
||||
Write-Step "Installing Tesseract OCR"
|
||||
$ToolsDir = Join-Path $PSScriptRoot "tools"
|
||||
$TessDir = Join-Path $ToolsDir "tesseract"
|
||||
$tessExe = Join-Path $TessDir "tesseract.exe"
|
||||
New-Item -ItemType Directory -Force -Path $ToolsDir | Out-Null
|
||||
if (Test-Path $tessExe) {
|
||||
$tessVer = & $tessExe --version 2>&1 | Select-Object -First 1
|
||||
Write-OK "Tesseract already installed: $tessVer"
|
||||
} else {
|
||||
Write-Host " Downloading Tesseract 5.x installer..."
|
||||
# Download Tesseract installer -- try multiple mirrors
|
||||
$tessInstaller = "$env:TEMP\tesseract-setup.exe"
|
||||
$tessUrls = @(
|
||||
"https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-5.3.4.20240503.exe",
|
||||
"https://github.com/UB-Mannheim/tesseract/releases/download/v5.3.4.20240503/tesseract-ocr-w64-setup-5.3.4.20240503.exe"
|
||||
)
|
||||
$downloaded = $false
|
||||
foreach ($tessUrl in $tessUrls) {
|
||||
Write-Host " Trying: $tessUrl"
|
||||
# Suppress NativeCommandError -- check exit code manually
|
||||
$prev = $ErrorActionPreference; $ErrorActionPreference = "SilentlyContinue"
|
||||
& curl.exe -L --fail --silent --show-error -o $tessInstaller $tessUrl 2>&1 | Out-Null
|
||||
$curlExit = $LASTEXITCODE
|
||||
$ErrorActionPreference = $prev
|
||||
$sz = if (Test-Path $tessInstaller) { (Get-Item $tessInstaller).Length } else { 0 }
|
||||
if ($curlExit -eq 0 -and $sz -gt 1MB) {
|
||||
Write-OK "Downloaded ($([math]::Round($sz/1MB,1)) MB)"
|
||||
$downloaded = $true
|
||||
break
|
||||
}
|
||||
Write-Host " Failed (exit $curlExit, $sz bytes) -- trying next mirror..."
|
||||
if (Test-Path $tessInstaller) { Remove-Item $tessInstaller -Force }
|
||||
}
|
||||
if (-not $downloaded) {
|
||||
Write-Host ""
|
||||
Write-Host " Automatic download failed." -ForegroundColor Yellow
|
||||
Write-Host " Please download the installer manually:" -ForegroundColor Yellow
|
||||
Write-Host " https://github.com/UB-Mannheim/tesseract/releases/tag/v5.3.4.20240503" -ForegroundColor Cyan
|
||||
Write-Host " Save it as: $tessInstaller" -ForegroundColor Cyan
|
||||
Write-Host " Then press Enter to continue..." -ForegroundColor Yellow
|
||||
Read-Host
|
||||
if (-not (Test-Path $tessInstaller) -or (Get-Item $tessInstaller).Length -lt 1MB) {
|
||||
Write-Fail "Installer not found at $tessInstaller"
|
||||
}
|
||||
}
|
||||
Write-Host " Running installer (silent)..."
|
||||
Start-Process -FilePath $tessInstaller -ArgumentList "/S /D=$TessDir" -Wait
|
||||
Remove-Item $tessInstaller -Force
|
||||
Write-OK "Tesseract installed in project tools\ folder"
|
||||
}
|
||||
|
||||
# Tesseract is local in tools\ -- session PATH set above
|
||||
|
||||
# -- 4. Install Tesseract language packs ---------------------------------------
|
||||
Write-Step "Installing Tesseract language packs (Danish + English)"
|
||||
$tessData = Join-Path $TessDir "tessdata"
|
||||
New-Item -ItemType Directory -Force -Path $tessData | Out-Null
|
||||
$langFiles = @{
|
||||
"dan" = "https://github.com/tesseract-ocr/tessdata/raw/main/dan.traineddata"
|
||||
"eng" = "https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata"
|
||||
}
|
||||
foreach ($lang in $langFiles.Keys) {
|
||||
$dest = Join-Path $tessData "$lang.traineddata"
|
||||
if (Test-Path $dest) {
|
||||
Write-OK "'$lang' language pack already present"
|
||||
} else {
|
||||
Write-Host " Downloading $lang.traineddata..."
|
||||
& curl.exe -L --silent --show-error -o $dest $langFiles[$lang]
|
||||
if ($LASTEXITCODE -ne 0) { Write-Warn "Failed to download $lang language pack" }
|
||||
Write-OK "'$lang' installed"
|
||||
}
|
||||
}
|
||||
|
||||
# -- 5. Install Poppler --------------------------------------------------------
|
||||
Write-Step "Installing Poppler (required for PDF rendering)"
|
||||
$PopplerDir = Join-Path $ToolsDir "poppler"
|
||||
$popplerBin = Join-Path $PopplerDir "Library\bin"
|
||||
if (Test-Path (Join-Path $popplerBin "pdftoppm.exe")) {
|
||||
Write-OK "Poppler already installed"
|
||||
} else {
|
||||
Write-Host " Downloading Poppler for Windows..."
|
||||
$popplerUrl = "https://github.com/oschwartz10612/poppler-windows/releases/download/v24.07.0-0/Release-24.07.0-0.zip"
|
||||
$popplerZip = "$env:TEMP\poppler.zip"
|
||||
& curl.exe -L --silent --show-error -o $popplerZip $popplerUrl
|
||||
if ($LASTEXITCODE -ne 0) { Write-Fail "Poppler download failed. Try re-running the script." }
|
||||
Write-Host " Extracting to $popplerBase..."
|
||||
Expand-Archive -Path $popplerZip -DestinationPath $PopplerDir -Force
|
||||
Remove-Item $popplerZip -Force
|
||||
$found = Get-ChildItem -Path $PopplerDir -Recurse -Filter "pdftoppm.exe" |
|
||||
Select-Object -First 1
|
||||
if ($found) {
|
||||
$popplerBin = $found.DirectoryName
|
||||
Write-OK "Poppler extracted: $popplerBin"
|
||||
} else {
|
||||
Write-Fail "Poppler extraction failed -- pdftoppm.exe not found"
|
||||
}
|
||||
}
|
||||
|
||||
# Poppler is local in tools\ -- session PATH set above
|
||||
$env:PATH = "$env:PATH;$popplerBin"
|
||||
|
||||
# -- 6. Install Python packages -------------------------------------------------
|
||||
Write-Step "Installing Python packages"
|
||||
|
||||
$packages = @(
|
||||
# Web server
|
||||
@{ name="flask"; desc="web server" },
|
||||
# PDF handling
|
||||
@{ name="pdfplumber"; desc="PDF text extraction" },
|
||||
@{ name="pdf2image"; desc="PDF to image (needs Poppler)" },
|
||||
@{ name="pytesseract"; desc="OCR wrapper (needs Tesseract)" },
|
||||
@{ name="pypdf"; desc="PDF read/write" },
|
||||
@{ name="reportlab"; desc="PDF generation for redaction" },
|
||||
# Document formats
|
||||
@{ name="python-docx"; desc="Word documents" },
|
||||
@{ name="openpyxl"; desc="Excel files" },
|
||||
@{ name="img2pdf"; desc="image to PDF" },
|
||||
# Image / CV
|
||||
@{ name="opencv-python-headless"; desc="face detection (headless, fewer DLL deps)" },
|
||||
@{ name="numpy"; desc="image processing" },
|
||||
@{ name="Pillow"; desc="image handling" },
|
||||
# NER / anonymisation
|
||||
@{ name="spacy"; desc="named entity recognition" },
|
||||
# Archive scanning
|
||||
# Native app window
|
||||
@{ name="pymupdf"; desc="secure PDF redaction (physical text removal)" },
|
||||
@{ name="pywebview"; desc="native webview window" },
|
||||
@{ name="pystray"; desc="system tray icon (fallback)" },
|
||||
# App bundling
|
||||
@{ name="pyinstaller"; desc="app packager" },
|
||||
@{ name="pyinstaller-hooks-contrib"; desc="PyInstaller hooks" },
|
||||
# GDPRScanner
|
||||
@{ name="msal"; desc="Microsoft authentication" },
|
||||
@{ name="requests"; desc="HTTP client for Graph API" },
|
||||
# Optional — File system scanning (#8)
|
||||
@{ name="smbprotocol"; desc="native SMB2/3 network share scanning (optional)" },
|
||||
@{ name="keyring"; desc="OS keychain credential storage for SMB (optional)" },
|
||||
@{ name="python-dotenv"; desc=".env file credential fallback (optional)" },
|
||||
# Scheduler (#19)
|
||||
@{ name="APScheduler"; desc="in-process scheduled scans (optional)" },
|
||||
# Google Workspace scanning (#10)
|
||||
@{ name="google-auth"; desc="Google service account auth (optional)" },
|
||||
@{ name="google-auth-httplib2"; desc="Google auth HTTP transport (optional)" },
|
||||
@{ name="google-api-python-client"; desc="Gmail + Drive + Admin APIs (optional)" }
|
||||
)
|
||||
|
||||
$failed = @()
|
||||
foreach ($pkg in $packages) {
|
||||
Write-Host (" {0,-36} {1}" -f ($pkg.name + "..."), $pkg.desc) -NoNewline
|
||||
Invoke-VenvPip @("install", $pkg.name, "--quiet", "--disable-pip-version-check") | Out-Null
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Host " FAILED" -ForegroundColor Red
|
||||
$failed += $pkg.name
|
||||
} else {
|
||||
Write-Host " OK" -ForegroundColor Green
|
||||
}
|
||||
}
|
||||
|
||||
# pywebview 5.x used a [win32] extra; 6.x+ ships WebView2 support built-in -- no extra needed
|
||||
if ($LASTEXITCODE -eq 0) { Write-Host " OK" -ForegroundColor Green }
|
||||
else { Write-Host " skipped" -ForegroundColor Yellow }
|
||||
|
||||
if ($failed.Count -gt 0) {
|
||||
Write-Warn "Failed to install: $($failed -join ', ')"
|
||||
Write-Warn "Retry manually: python -m pip install $($failed -join ' ')"
|
||||
}
|
||||
|
||||
# -- 7. Install spaCy language model -------------------------------------------
|
||||
Write-Step "Installing spaCy Danish NER model (~500 MB, may take several minutes)"
|
||||
|
||||
# Check if any model already installed
|
||||
$spaCyHasModel = & $VenvPython -c "import spacy; [spacy.load(m) for m in ['da_core_news_lg','da_core_news_md','da_core_news_sm'] if spacy.util.is_package(m)]; print('ok')" 2>$null
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-OK "spaCy Danish model already installed"
|
||||
} else {
|
||||
$models = @("da_core_news_lg", "da_core_news_md", "da_core_news_sm")
|
||||
$installed = $false
|
||||
foreach ($model in $models) {
|
||||
Write-Host " Trying $model..."
|
||||
& $VenvPython -m spacy download $model --quiet 2>$null | Out-Null
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-OK "Installed: $model"
|
||||
$installed = $true
|
||||
break
|
||||
}
|
||||
}
|
||||
if (-not $installed) {
|
||||
Write-Warn "No spaCy Danish model installed -- anonymisation will be unavailable"
|
||||
Write-Warn "Retry manually: python -m spacy download da_core_news_sm"
|
||||
}
|
||||
}
|
||||
|
||||
# -- 8. Verify installation -----------------------------------------------------
|
||||
Write-Step "Verifying installation"
|
||||
|
||||
# Python
|
||||
Write-OK "Python: $(Get-PythonVersionStr $pythonCmd)"
|
||||
|
||||
# Tesseract
|
||||
try {
|
||||
$tessVer = & tesseract --version 2>&1 | Select-Object -First 1
|
||||
Write-OK "Tesseract: $tessVer"
|
||||
$langs = & tesseract --list-langs 2>&1 | Where-Object { $_ -match "^(dan|eng)$" }
|
||||
Write-OK "OCR languages: $($langs -join ', ')"
|
||||
} catch {
|
||||
Write-Warn "Tesseract not on PATH -- restart PowerShell and re-run if needed"
|
||||
}
|
||||
|
||||
# Poppler
|
||||
try {
|
||||
$pp = Get-Command pdftoppm -ErrorAction Stop
|
||||
Write-OK "Poppler: $($pp.Source)"
|
||||
} catch {
|
||||
Write-Warn "Poppler not on PATH -- restart PowerShell and re-run if needed"
|
||||
}
|
||||
|
||||
# All Python imports -- write to a temp file to avoid PowerShell expanding {vars} in f-strings
|
||||
$importScriptPath = Join-Path $env:TEMP "gdpr_verify.py"
|
||||
Set-Content -Path $importScriptPath -Encoding UTF8 -Value @'
|
||||
import sys
|
||||
checks = [
|
||||
('flask', 'flask'),
|
||||
('pdfplumber', 'pdfplumber'),
|
||||
('pdf2image', 'pdf2image'),
|
||||
('pytesseract', 'pytesseract'),
|
||||
('pypdf', 'pypdf'),
|
||||
('reportlab', 'reportlab'),
|
||||
('python-docx', 'docx'),
|
||||
('openpyxl', 'openpyxl'),
|
||||
('opencv-python-headless', 'cv2'),
|
||||
('numpy', 'numpy'),
|
||||
('Pillow', 'PIL'),
|
||||
('spacy', 'spacy'),
|
||||
('img2pdf', 'img2pdf'),
|
||||
('pymupdf', 'fitz'),
|
||||
('pywebview', 'webview'),
|
||||
('pystray', 'pystray'),
|
||||
('PyInstaller', 'PyInstaller'),
|
||||
('msal', 'msal'),
|
||||
('requests', 'requests'),
|
||||
]
|
||||
optional_checks = [
|
||||
('smbprotocol', 'smbprotocol'),
|
||||
('keyring', 'keyring'),
|
||||
('python-dotenv', 'dotenv'),
|
||||
('APScheduler', 'apscheduler'),
|
||||
]
|
||||
missing = []
|
||||
for name, imp in checks:
|
||||
try:
|
||||
__import__(imp)
|
||||
print(" [OK] " + name)
|
||||
except ImportError:
|
||||
print(" [!!] " + name + " MISSING")
|
||||
missing.append(name)
|
||||
print("\n Optional (file system scanning):")
|
||||
for name, imp in optional_checks:
|
||||
try:
|
||||
__import__(imp)
|
||||
print(" [OK] " + name)
|
||||
except ImportError:
|
||||
print(" [--] " + name + " (not installed)")
|
||||
if missing:
|
||||
print("\nMissing required: " + ", ".join(missing))
|
||||
sys.exit(1)
|
||||
print("\nAll required packages verified.")
|
||||
sys.exit(0)
|
||||
'@
|
||||
|
||||
& $VenvPython $importScriptPath
|
||||
$allOk = ($LASTEXITCODE -eq 0)
|
||||
Remove-Item $importScriptPath -ErrorAction SilentlyContinue
|
||||
|
||||
# -- 9. Create launch scripts ---------------------------------------------------
|
||||
Write-Step "Creating launch scripts"
|
||||
|
||||
Set-Content -Path "start_gdpr.bat" -Encoding ASCII -Value @'
|
||||
@echo off
|
||||
:: GDPRScanner - Web UI
|
||||
cd /d "%~dp0"
|
||||
set PATH=%~dp0tools\tesseract;%~dp0tools\poppler\Library\bin;%PATH%
|
||||
set TESSDATA_PREFIX=%~dp0tools\tesseract\tessdata
|
||||
set PORT=5100
|
||||
echo.
|
||||
echo GDPRScanner
|
||||
echo Open in browser: http://localhost:%PORT%
|
||||
echo Press Ctrl+C to stop
|
||||
echo.
|
||||
"%~dp0venv\Scripts\python.exe" "%~dp0gdpr_scanner.py" --port %PORT%
|
||||
pause
|
||||
'@
|
||||
Write-OK "Created: start_gdpr.bat"
|
||||
|
||||
Set-Content -Path "build_m365.bat" -Encoding ASCII -Value @'
|
||||
@echo off
|
||||
:: GDPRScanner -- Build standalone .exe
|
||||
cd /d "%~dp0"
|
||||
set PATH=%~dp0tools\tesseract;%~dp0tools\poppler\Library\bin;%PATH%
|
||||
set TESSDATA_PREFIX=%~dp0tools\tesseract\tessdata
|
||||
echo Building GDPRScanner...
|
||||
echo.
|
||||
"%~dp0venv\Scripts\python.exe" "%~dp0build_gdpr.py" --clean %*
|
||||
pause
|
||||
'@
|
||||
Write-OK "Created: build_m365.bat"
|
||||
|
||||
|
||||
# -- Done -----------------------------------------------------------------------
|
||||
Write-Host ""
|
||||
Write-Host " -----------------------------------------" -ForegroundColor DarkGray
|
||||
if ($allOk) {
|
||||
Write-Host " Installation complete!" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " Installation complete with warnings -- see above" -ForegroundColor Yellow
|
||||
}
|
||||
Write-Host ""
|
||||
Write-Host " GDPRScanner:" -ForegroundColor White
|
||||
Write-Host " Double-click start_gdpr.bat" -ForegroundColor Cyan
|
||||
Write-Host " Web UI: http://localhost:5100" -ForegroundColor White
|
||||
Write-Host ""
|
||||
Write-Host " File system scanning (optional):" -ForegroundColor White
|
||||
Write-Host " python gdpr_scanner.py --scan-path C:\Users\Me\Documents" -ForegroundColor Cyan
|
||||
Write-Host " python gdpr_scanner.py --scan-path //nas/shares --smb-user DOMAIN\user" -ForegroundColor Cyan
|
||||
Write-Host " Or use the File sources panel in the GDPRScanner UI" -ForegroundColor Gray
|
||||
Write-Host ""
|
||||
Write-Host " Build standalone app:" -ForegroundColor White
|
||||
Write-Host " Double-click build_gdpr.bat -> dist\GDPRScanner.exe" -ForegroundColor Cyan
|
||||
Write-Host " -----------------------------------------" -ForegroundColor DarkGray
|
||||
Write-Host ""
|
||||
532
keywords/da.json
Normal file
532
keywords/da.json
Normal file
@ -0,0 +1,532 @@
|
||||
{
|
||||
"_comment": "GDPR Article 9 sensitive category keywords — Danish",
|
||||
"_version": "1.0",
|
||||
"_note": "Keywords are matched case-insensitively. A match within ~150 characters of a personal identifier (CPR, name, address) elevates the item to HIGH risk and adds a special_category badge. Edit this file to add organisation-specific terms.",
|
||||
"health": {
|
||||
"_label_da": "Helbred",
|
||||
"_label_en": "Health data",
|
||||
"_article": "Art. 9(1) — data concerning health",
|
||||
"keywords": [
|
||||
"diagnose",
|
||||
"diagnos",
|
||||
"diagnosen",
|
||||
"diagnoser",
|
||||
"sygemelding",
|
||||
"sygemeldingen",
|
||||
"sygemeldinger",
|
||||
"sygedagpenge",
|
||||
"sygefravær",
|
||||
"sygeorlov",
|
||||
"sygefraværssamtale",
|
||||
"sygefraværspolitik",
|
||||
"indlæggelse",
|
||||
"indlæggelsen",
|
||||
"indlæggelser",
|
||||
"udskrivning",
|
||||
"udskrivningsbrev",
|
||||
"hospitalsindlæggelse",
|
||||
"operation",
|
||||
"opereret",
|
||||
"ambulant",
|
||||
"ambulatorium",
|
||||
"skadestue",
|
||||
"lægehenvisning",
|
||||
"lægeerklæring",
|
||||
"lægeattest",
|
||||
"lægejournalen",
|
||||
"lægejounal",
|
||||
"patientjournal",
|
||||
"epikrisen",
|
||||
"epikrise",
|
||||
"behandling",
|
||||
"behandlingsplan",
|
||||
"behandlingsforløb",
|
||||
"medicinsk",
|
||||
"medicin",
|
||||
"medicindosering",
|
||||
"medicinstatus",
|
||||
"medicinliste",
|
||||
"recept",
|
||||
"receptpligtigt",
|
||||
"bivirkninger",
|
||||
"dosering",
|
||||
"præparat",
|
||||
"antidepressiv",
|
||||
"antipsykotisk",
|
||||
"beroligende medicin",
|
||||
"smertestillende",
|
||||
"kronisk sygdom",
|
||||
"kronisk lidelse",
|
||||
"kronisk",
|
||||
"alvorlig sygdom",
|
||||
"terminal",
|
||||
"terminalt syg",
|
||||
"palliativ",
|
||||
"kræft",
|
||||
"kræftbehandling",
|
||||
"kemoterapi",
|
||||
"stråleterapi",
|
||||
"diabetes",
|
||||
"type 1 diabetes",
|
||||
"type 2 diabetes",
|
||||
"insulinbehandling",
|
||||
"blodsukkermåling",
|
||||
"hjertesygdom",
|
||||
"hjerteinsufficiens",
|
||||
"hjertesvigt",
|
||||
"hjerneblødning",
|
||||
"blodprop",
|
||||
"apopleksi",
|
||||
"sklerose",
|
||||
"multipel sklerose",
|
||||
"epilepsi",
|
||||
"epileptisk anfald",
|
||||
"astma",
|
||||
"kol",
|
||||
"rygerlunger",
|
||||
"allergi",
|
||||
"allergisk",
|
||||
"anafylaktisk",
|
||||
"depression",
|
||||
"angst",
|
||||
"angstlidelse",
|
||||
"panikangst",
|
||||
"social fobi",
|
||||
"ptsd",
|
||||
"posttraumatisk",
|
||||
"bipolar",
|
||||
"bipolar lidelse",
|
||||
"skizofreni",
|
||||
"skizofreni diagnose",
|
||||
"personlighedsforstyrrelse",
|
||||
"borderline",
|
||||
"adhd",
|
||||
"add",
|
||||
"autisme",
|
||||
"autismespektrum",
|
||||
"asperger",
|
||||
"ocd",
|
||||
"tvangstanker",
|
||||
"tvangshandlinger",
|
||||
"selvskade",
|
||||
"selvmordstanker",
|
||||
"suicidaltanker",
|
||||
"suicidalitet",
|
||||
"psykiatri",
|
||||
"psykiatrisk",
|
||||
"psykiatrisk indlæggelse",
|
||||
"psykiatrisk behandling",
|
||||
"psykolog",
|
||||
"psykologforløb",
|
||||
"psykoterapi",
|
||||
"terapi",
|
||||
"terapiforløb",
|
||||
"familiebehandling",
|
||||
"misbrugsbehandling",
|
||||
"alkoholmisbrug",
|
||||
"alkoholbehandling",
|
||||
"alkoholafhængighed",
|
||||
"stofmisbrug",
|
||||
"narkotikamisbrug",
|
||||
"narkobehandling",
|
||||
"rehabilitering",
|
||||
"genoptræning",
|
||||
"arbejdsskade",
|
||||
"erhvervssygdom",
|
||||
"erhvervsevnetab",
|
||||
"varig men",
|
||||
"handicap",
|
||||
"handicapkompensation",
|
||||
"hjælpemiddel",
|
||||
"kørestol",
|
||||
"gangbesvær",
|
||||
"synshæmmet",
|
||||
"hørehæmmet",
|
||||
"høretab",
|
||||
"cochlear implantat",
|
||||
"graviditet",
|
||||
"gravid",
|
||||
"barsel",
|
||||
"barselsperiode",
|
||||
"barselsorlov",
|
||||
"abort",
|
||||
"spontan abort",
|
||||
"dødfødt",
|
||||
"fertilitet",
|
||||
"fertilitetsbehandling",
|
||||
"reagensglasbefrugtning",
|
||||
"ivf",
|
||||
"overgangsalder",
|
||||
"menopause",
|
||||
"stofskifte",
|
||||
"stofskiftesygdom",
|
||||
"blodtryk",
|
||||
"forhøjet blodtryk",
|
||||
"kolesterol",
|
||||
"overvægt",
|
||||
"fedme",
|
||||
"spiseforstyrrelser",
|
||||
"anoreksi",
|
||||
"bulimi",
|
||||
"hiv",
|
||||
"aids",
|
||||
"seksuelt overførbar",
|
||||
"kønssygdom",
|
||||
"hepatitis",
|
||||
"tuberkulose",
|
||||
"organdonor",
|
||||
"transplantation",
|
||||
"blodtype",
|
||||
"blodprøve",
|
||||
"blodprøvesvar",
|
||||
"scanningssvar",
|
||||
"røntgensvar",
|
||||
"mri-scanning",
|
||||
"ct-scanning",
|
||||
"helbredstilstand",
|
||||
"funktionsevne",
|
||||
"nedsatfunktionsevne",
|
||||
"pfandplan",
|
||||
"senhjerneskade",
|
||||
"hjerneskade",
|
||||
"demens",
|
||||
"alzheimers",
|
||||
"frontallapsdemens",
|
||||
"åndelig lidelse"
|
||||
]
|
||||
},
|
||||
"mental_health": {
|
||||
"_label_da": "Psykisk helbred",
|
||||
"_label_en": "Mental health",
|
||||
"_article": "Art. 9(1) — data concerning health (mental)",
|
||||
"_note": "Subset of health — flagged separately for higher scrutiny",
|
||||
"keywords": [
|
||||
"ppp-plan",
|
||||
"handleplan",
|
||||
"behandlingsplan psykiatri",
|
||||
"psykiatrisk journal",
|
||||
"mentalerklæring",
|
||||
"retsmedicinsk",
|
||||
"mentalobservation",
|
||||
"psykologisk vurdering",
|
||||
"psykologisk rapport",
|
||||
"kognitiv test",
|
||||
"intelligenstest",
|
||||
"iq-test",
|
||||
"neuropsykologisk",
|
||||
"funktionsvurdering",
|
||||
"støtte-kontaktperson",
|
||||
"socialpædagogisk støtte",
|
||||
"botilbud",
|
||||
"bostøtte",
|
||||
"socialpsykiatri",
|
||||
"§ 85-støtte",
|
||||
"§ 107",
|
||||
"§ 108",
|
||||
"aktivitets- og samværstilbud"
|
||||
]
|
||||
},
|
||||
"criminal": {
|
||||
"_label_da": "Strafbare forhold",
|
||||
"_label_en": "Criminal records and offences",
|
||||
"_article": "Art. 10 — data relating to criminal convictions",
|
||||
"keywords": [
|
||||
"straffeoplysning",
|
||||
"straffeoplysninger",
|
||||
"straffeattest",
|
||||
"børneattest",
|
||||
"ren børneattest",
|
||||
"udvidet børneattest",
|
||||
"dom",
|
||||
"domfældt",
|
||||
"straffet",
|
||||
"straffedom",
|
||||
"betinget dom",
|
||||
"ubetinget dom",
|
||||
"fængselsstraf",
|
||||
"fængslet",
|
||||
"fængsling",
|
||||
"varetægtsfængslet",
|
||||
"varetægtsfængsling",
|
||||
"varetægt",
|
||||
"løsladelse",
|
||||
"løsladt",
|
||||
"prøveløsladt",
|
||||
"prøveløsladelse",
|
||||
"afsoning",
|
||||
"afsoningstid",
|
||||
"sigtelse",
|
||||
"sigtet",
|
||||
"sigtet for",
|
||||
"tiltale",
|
||||
"tiltalt",
|
||||
"anklage",
|
||||
"anklaget",
|
||||
"politianmeldelse",
|
||||
"anmeldt til politiet",
|
||||
"ransagning",
|
||||
"anholdelse",
|
||||
"anholdt",
|
||||
"bøde",
|
||||
"bødeforelæg",
|
||||
"betinget frakendelse",
|
||||
"ubetinget frakendelse",
|
||||
"kørekortfrakendelse",
|
||||
"samfundstjeneste",
|
||||
"fodlænke",
|
||||
"elektronisk fodlænke",
|
||||
"probation",
|
||||
"tiltalefrafald",
|
||||
"tiltaleopgivelse",
|
||||
"straffesag",
|
||||
"strafferet",
|
||||
"kriminalitet",
|
||||
"kriminel",
|
||||
"recidiv",
|
||||
"gentagelseskriminalitet",
|
||||
"sexregistret",
|
||||
"dna-register",
|
||||
"efterlyst",
|
||||
"udvisning",
|
||||
"udvist",
|
||||
"udvisningssag",
|
||||
"udvisningsbeslutning",
|
||||
"udvisningsdom"
|
||||
]
|
||||
},
|
||||
"trade_union": {
|
||||
"_label_da": "Fagforeningsmedlemskab",
|
||||
"_label_en": "Trade union membership",
|
||||
"_article": "Art. 9(1) — data revealing trade union membership",
|
||||
"keywords": [
|
||||
"fagforening",
|
||||
"fagforeningsmedlem",
|
||||
"fagforeningsmedlemskab",
|
||||
"fagforbund",
|
||||
"tillidsrepræsentant",
|
||||
"tr",
|
||||
"fællestillidsrepræsentant",
|
||||
"ftr",
|
||||
"arbejdsmiljørepræsentant",
|
||||
"amr",
|
||||
"strejke",
|
||||
"strejkevagt",
|
||||
"strejkebrydervirksomhed",
|
||||
"sympatistrejke",
|
||||
"lockout",
|
||||
"overenskomst",
|
||||
"overenskomstforhandling",
|
||||
"overenskomstmæssig",
|
||||
"faglig organisation",
|
||||
"faglig konflikt",
|
||||
"kollektiv overenskomst",
|
||||
"a-kasse",
|
||||
"arbejdsløshedskasse",
|
||||
"fh",
|
||||
"3f",
|
||||
"hk",
|
||||
"dso",
|
||||
"dsto",
|
||||
"dtl",
|
||||
"bupl",
|
||||
"dlf",
|
||||
"ftf",
|
||||
"fagbevægelsen",
|
||||
"fagpolitisk",
|
||||
"fagretlig",
|
||||
"faglig sag"
|
||||
]
|
||||
},
|
||||
"religion": {
|
||||
"_label_da": "Religion og tro",
|
||||
"_label_en": "Religious or philosophical beliefs",
|
||||
"_article": "Art. 9(1) — data revealing religious or philosophical beliefs",
|
||||
"keywords": [
|
||||
"religion",
|
||||
"religiøs",
|
||||
"religiøsitet",
|
||||
"tro",
|
||||
"trosfrihed",
|
||||
"trossamfund",
|
||||
"menighedsråd",
|
||||
"kirke",
|
||||
"kirkemedlem",
|
||||
"kirkeskat",
|
||||
"folkekirken",
|
||||
"sognepræst",
|
||||
"konfirmation",
|
||||
"konfirmand",
|
||||
"dåb",
|
||||
"begravelse",
|
||||
"begravelsesritual",
|
||||
"bisættelse",
|
||||
"kirkegård",
|
||||
"moské",
|
||||
"imam",
|
||||
"islamisk",
|
||||
"muslim",
|
||||
"muslimsk",
|
||||
"halal",
|
||||
"ramadan",
|
||||
"fredagsbøn",
|
||||
"synagoge",
|
||||
"rabbi",
|
||||
"jødisk",
|
||||
"kosher",
|
||||
"hinduistisk",
|
||||
"hindu",
|
||||
"buddhistisk",
|
||||
"buddhist",
|
||||
"sikh",
|
||||
"sikhisme",
|
||||
"kristen",
|
||||
"katolik",
|
||||
"katolsk",
|
||||
"protestant",
|
||||
"luthersk",
|
||||
"baptism",
|
||||
"baptist",
|
||||
"jehovas vidner",
|
||||
"mormon",
|
||||
"frikirke",
|
||||
"pinsekirke",
|
||||
"ateist",
|
||||
"agnostiker",
|
||||
"humanist",
|
||||
"sekulariseret",
|
||||
"religiøst fritagelse",
|
||||
"dispensation af religiøse grunde",
|
||||
"religiøs overbevisning",
|
||||
"religiøst tilhørsforhold",
|
||||
"religiøst tilknytning",
|
||||
"filosofisk overbevisning"
|
||||
]
|
||||
},
|
||||
"ethnicity": {
|
||||
"_label_da": "Race og etnisk oprindelse",
|
||||
"_label_en": "Racial or ethnic origin",
|
||||
"_article": "Art. 9(1) — data revealing racial or ethnic origin",
|
||||
"keywords": [
|
||||
"etnisk oprindelse",
|
||||
"etnisk baggrund",
|
||||
"etnicitet",
|
||||
"nationalitet",
|
||||
"herkomst",
|
||||
"national herkomst",
|
||||
"fremmed herkomst",
|
||||
"indvandrerbaggrund",
|
||||
"efterkommer",
|
||||
"andengenerationsindvandrer",
|
||||
"tosproget",
|
||||
"tosprogede elever",
|
||||
"modersmål",
|
||||
"modersmålsundervisning",
|
||||
"flygtning",
|
||||
"asylansøger",
|
||||
"asylsag",
|
||||
"opholdstilladelse",
|
||||
"opholdsstatus",
|
||||
"statsborgerskab",
|
||||
"dansk statsborgerskab",
|
||||
"naturaliseret",
|
||||
"udlænding",
|
||||
"udlændingeloven",
|
||||
"fremmedlov",
|
||||
"visumpligtig",
|
||||
"indrejseforbud",
|
||||
"udsendelsesland",
|
||||
"racediskrimination",
|
||||
"racisme",
|
||||
"hadforbrydelse",
|
||||
"racistisk overgreb"
|
||||
]
|
||||
},
|
||||
"political": {
|
||||
"_label_da": "Politisk overbevisning",
|
||||
"_label_en": "Political opinions",
|
||||
"_article": "Art. 9(1) — data revealing political opinions",
|
||||
"_note": "Included for completeness — less common in school context but relevant for staff",
|
||||
"keywords": [
|
||||
"politisk overbevisning",
|
||||
"politisk holdning",
|
||||
"politisk tilhørsforhold",
|
||||
"partimedlem",
|
||||
"partimedlemskab",
|
||||
"politisk aktiv",
|
||||
"venstrefløj",
|
||||
"højrefløj",
|
||||
"venstreorienteret",
|
||||
"højreorienteret",
|
||||
"radikal",
|
||||
"konservativ",
|
||||
"socialdemokrat",
|
||||
"liberalist",
|
||||
"anarkist",
|
||||
"kommunist",
|
||||
"politisk flygtning",
|
||||
"politisk forfølgelse",
|
||||
"politisk asyl",
|
||||
"samvittighedsfange",
|
||||
"politisk dissens"
|
||||
]
|
||||
},
|
||||
"biometric": {
|
||||
"_label_da": "Biometriske oplysninger",
|
||||
"_label_en": "Biometric data",
|
||||
"_article": "Art. 9(1) — biometric data for the purpose of uniquely identifying a natural person",
|
||||
"keywords": [
|
||||
"fingeraftryk",
|
||||
"fingeraftryksscanning",
|
||||
"ansigtsgenkendelse",
|
||||
"iris-scanning",
|
||||
"nethindescanning",
|
||||
"stemmebiometri",
|
||||
"dna-profil",
|
||||
"dna-analyse",
|
||||
"dna-prøve",
|
||||
"genetisk profil",
|
||||
"biometrisk",
|
||||
"biometrisk id",
|
||||
"biometrisk data",
|
||||
"biometrisk verificering",
|
||||
"pas med chip",
|
||||
"ansigtsscanning",
|
||||
"kropsscanning",
|
||||
"gangartsanalyse"
|
||||
]
|
||||
},
|
||||
"sexual_orientation": {
|
||||
"_label_da": "Seksuel orientering",
|
||||
"_label_en": "Data concerning sex life or sexual orientation",
|
||||
"_article": "Art. 9(1) — data concerning a natural person's sex life or sexual orientation",
|
||||
"keywords": [
|
||||
"seksuel orientering",
|
||||
"seksualitet",
|
||||
"homoseksuel",
|
||||
"bøsse",
|
||||
"lesbisk",
|
||||
"biseksuel",
|
||||
"transseksuel",
|
||||
"transperson",
|
||||
"transkønnet",
|
||||
"ikke-binær",
|
||||
"queer",
|
||||
"kønsskifte",
|
||||
"kønsskifteoperation",
|
||||
"juridisk kønsskifte",
|
||||
"kønsdysfori",
|
||||
"lgbtq",
|
||||
"lgbt",
|
||||
"coming out",
|
||||
"skeiv"
|
||||
]
|
||||
},
|
||||
"_proximity_note": "A keyword match only triggers a special_category flag when it appears within 150 characters of a personal identifier (CPR number, full name, or address). Isolated keyword occurrences in general text do not flag the item.",
|
||||
"_false_positive_guidance": {
|
||||
"behandling": "Very common word — also means 'processing' in legal text. Consider requiring proximity to a health-related term before flagging.",
|
||||
"dom": "Also means 'cathedral' (domkirke) and appears in many compound words. Match as a standalone token only.",
|
||||
"tro": "Also a given name and common word. Match only in context with 'religiøs', 'trossamfund' etc.",
|
||||
"lo": "Abbreviation — match only in known union context.",
|
||||
"allergi": "Common in school contexts (food allergies). Low risk unless near a CPR number."
|
||||
}
|
||||
}
|
||||
7
lang/CLAUDE.md
Normal file
7
lang/CLAUDE.md
Normal file
@ -0,0 +1,7 @@
|
||||
# lang/ — i18n Rules
|
||||
|
||||
- `en.json` is the source of truth. Always update `da.json` and `de.json` when adding or changing keys.
|
||||
- `/api/langs` globs both `*.json` and `*.lang` — both formats coexist.
|
||||
- Loader in `app_config.py` prefers `.json`, falls back to `.lang`.
|
||||
- JS: `t(key, default)` — Python: `LANG.get(key, default)`
|
||||
- No emojis or symbol prefixes in translation values used as button labels.
|
||||
773
lang/da.json
Normal file
773
lang/da.json
Normal file
@ -0,0 +1,773 @@
|
||||
{
|
||||
"app_name": "Document Scanner",
|
||||
"label_root_folder": "Rodmappe",
|
||||
"label_older_than": "Markér filer med data ældre end",
|
||||
"placeholder_folder": "/sti/til/dokumenter",
|
||||
"btn_scan": "Start scanning",
|
||||
"btn_stop": "Stop scanning",
|
||||
"toggle_anonymise": "Anonymisér",
|
||||
"toggle_mask": "Maskér kun CPR",
|
||||
"toggle_blur_faces": "Slør ansigter",
|
||||
"toggle_skip_cloud": "Spring skybaserede filer over",
|
||||
"toggle_ocr": "OCR-scan af PDF'er",
|
||||
"label_face_sensitivity": "Ansigtsfølsomhed",
|
||||
"face_sensitivity_high": "Høj",
|
||||
"face_sensitivity_low": "Lav",
|
||||
"face_sensitivity_hint": "Højere = færre fejlregistreringer",
|
||||
"label_ocr_language": "OCR-sprog",
|
||||
"label_ocr_dpi": "DPI (kvalitet vs. hastighed)",
|
||||
"lang_danish": "Dansk",
|
||||
"lang_danish_english": "Dansk + Engelsk",
|
||||
"lang_english": "Engelsk",
|
||||
"lang_norwegian": "Norsk",
|
||||
"lang_swedish": "Svensk",
|
||||
"lang_german": "Tysk",
|
||||
"lang_french": "Fransk",
|
||||
"lang_dutch": "Hollandsk",
|
||||
"time_any": "Alle",
|
||||
"time_1y": "1 år",
|
||||
"time_2y": "2 år",
|
||||
"time_5y": "5 år",
|
||||
"time_10y": "10 år",
|
||||
"stat_scanned": "Filer scannet",
|
||||
"stat_flagged": "Markerede",
|
||||
"stat_high_risk": "Høj risiko",
|
||||
"stat_cpr": "CPR-numre fundet",
|
||||
"col_file": "Fil",
|
||||
"col_cpr": "CPR-numre",
|
||||
"col_oldest": "Ældste dato",
|
||||
"col_risk": "Risiko",
|
||||
"col_action": "Handling",
|
||||
"col_detail": "Detaljer",
|
||||
"sort_name_az": "Navn A–Z",
|
||||
"sort_name_za": "Navn Z–A",
|
||||
"sort_cpr_desc": "CPR-antal ↓",
|
||||
"sort_oldest_desc": "Ældste dato ↓",
|
||||
"sort_risk_desc": "Risiko ↓",
|
||||
"sort_size_desc": "Størrelse ↓",
|
||||
"filter_all_types": "Alle typer",
|
||||
"filter_pdf": "PDF",
|
||||
"filter_word": "Word",
|
||||
"filter_excel": "Excel",
|
||||
"filter_image": "Billede",
|
||||
"placeholder_search": "Søg filnavn…",
|
||||
"btn_anonymise": "Anonymisér",
|
||||
"btn_mask": "Maskér CPR",
|
||||
"btn_blur": "Slør",
|
||||
"btn_preview": "Forhåndsvis",
|
||||
"btn_show_in_folder": "Vis i mappe",
|
||||
"btn_move_to_trash": "Flyt til papirkurv",
|
||||
"btn_undo": "Fortryd",
|
||||
"btn_export_csv": "CSV",
|
||||
"btn_select_all": "Vælg alle",
|
||||
"btn_anonymise_flagged": "Anonymisér markerede",
|
||||
"btn_anonymise_all": "Anonymisér alle markerede",
|
||||
"btn_cancel": "Annullér",
|
||||
"btn_close": "Luk",
|
||||
"btn_clear": "Ryd",
|
||||
"preview_original": "Original",
|
||||
"preview_processed": "Behandlet",
|
||||
"preview_anonymise": "Anonymisér → forhåndsvis",
|
||||
"preview_mask": "Maskér CPR → forhåndsvis",
|
||||
"preview_blur_faces": "Slør {n} ansigt(er) → forhåndsvis",
|
||||
"preview_no_faces": "✓ Ingen ansigter fundet",
|
||||
"preview_scanning_faces": "Scanner efter ansigter…",
|
||||
"preview_processing_faces": "Behandler ansigter…",
|
||||
"preview_rendering": "Indlæser…",
|
||||
"scan_preparing": "Forbereder…",
|
||||
"scan_scanning": "Scanner…",
|
||||
"scan_face_scanning": "Scanner {total} fil(er) for ansigter…",
|
||||
"scan_face_progress": "Ansigter: {index} / {total} — {file}",
|
||||
"scan_eta": "{eta} tilbage",
|
||||
"scan_stopped": "Scanning stoppet.",
|
||||
"empty_flagged": "Ingen markerede dokumenter",
|
||||
"empty_flagged_detail": "Kør en scanning for at se markerede dokumenter",
|
||||
"empty_filter": "Ingen filer matcher dit filter",
|
||||
"no_audit": "Ingen revisionslog endnu",
|
||||
"dialog_delete_title": "Slet filer?",
|
||||
"dialog_delete_confirm": "Flyt til papirkurv",
|
||||
"all_trashed": "Alle markerede dokumenter er flyttet til papirkurven.",
|
||||
"btn_audit_log": "Revisionslog",
|
||||
"audit_cleared": "Revisionslog ryddet",
|
||||
"failed_audit": "Kunne ikke indlæse revisionslog",
|
||||
"about_title": "Om",
|
||||
"label_python": "Python",
|
||||
"label_spacy": "spaCy-model",
|
||||
"label_tesseract": "Tesseract",
|
||||
"label_pymupdf": "PyMuPDF",
|
||||
"label_opencv": "OpenCV",
|
||||
"no_model": "ingen model installeret",
|
||||
"not_installed": "ikke installeret",
|
||||
"btn_about": "Om",
|
||||
"lbl_size": "Størrelse",
|
||||
"lbl_time": "Tid",
|
||||
"lbl_space": "Mellemrum",
|
||||
"lbl_loading": "Indlæser…",
|
||||
"lbl_blurred": "Sløret",
|
||||
"lbl_none": "Ingen",
|
||||
"lbl_scanner": "Scanner",
|
||||
"lbl_document": "Dokument",
|
||||
"lbl_folder": "Mappe",
|
||||
"empty_scan_hint": "Angiv en mappesti og klik",
|
||||
"empty_flagged_found": "Ingen markerede dokumenter fundet.",
|
||||
"preview_click_hint": "Klik på et dokument for at forhåndsvise det",
|
||||
"kbd_select": "vælg",
|
||||
"kbd_delete": "slet",
|
||||
"kbd_close_preview": "luk forhåndsvisning",
|
||||
"kbd_select_all": "vælg alle",
|
||||
"sort_cpr_asc": "CPR-antal ↑",
|
||||
"preview_error": "Forhåndsvisningsfejl",
|
||||
"preview_unavailable": "Forhåndsvisning utilgængelig",
|
||||
"preview_not_available": "Forhåndsvisning ikke tilgængelig for denne filtype",
|
||||
"lbl_anonymised": "Anonymiseret",
|
||||
"lbl_masked": "CPR maskeret",
|
||||
"lbl_processing": "Behandler…",
|
||||
"lbl_error": "Fejl",
|
||||
"lbl_no_pii": "Ingen ændringer — ingen PII fundet",
|
||||
"badge_anonymised": "✓ anonymiseret",
|
||||
"badge_masked": "✓ maskeret",
|
||||
"badge_blurred": "✓ sløret",
|
||||
"lbl_working": "Arbejder…",
|
||||
"lbl_stopping": "Stopper…",
|
||||
"lbl_no_files_selected": "Ingen filer valgt",
|
||||
"lbl_selected_1": "fil valgt",
|
||||
"lbl_selected_n": "filer valgt",
|
||||
"dialog_delete_body": "Dette vil permanent slette de valgte filer fra disken. Denne handling kan ikke fortrydes.",
|
||||
"lbl_flagged_docs_1": "markeret dokument",
|
||||
"lbl_flagged_docs_n": "markerede dokumenter",
|
||||
"banner_all_clean": "Ingen fund",
|
||||
"banner_files_scanned": "fil(er) scannet, ingen CPR-numre fundet",
|
||||
"banner_need_attention": "fil(er) kræver opmærksomhed ud af",
|
||||
"banner_scanned": "scannet",
|
||||
"summary_face_blur": "billede(r) til ansigtsslørning",
|
||||
"badge_face": "ansigt",
|
||||
"badge_shared": "delt",
|
||||
"badge_archive": "arkiv",
|
||||
"badge_shared_cpr": "Delt CPR",
|
||||
"lbl_also_in": "også i",
|
||||
"filter_shared_cpr": "⚠ Delt CPR",
|
||||
"risk_high": "HØJ",
|
||||
"risk_medium": "MIDDEL",
|
||||
"risk_low": "LAV",
|
||||
"reason_cpr_number": "CPR-nummer",
|
||||
"reason_cpr_numbers": "CPR-numre",
|
||||
"reason_cpr_confirmed": "CPR(er) med nøgleordskontext",
|
||||
"reason_unique_individuals": "unikke personer",
|
||||
"reason_cpr_shared": "CPR delt på tværs af {n} filer",
|
||||
"reason_data_10y": "data > 10 år gammel",
|
||||
"reason_data_5y": "data > 5 år gammel",
|
||||
"btn_export_excel": "Eksporter rapport som Excel",
|
||||
"btn_audit_log_short": "Revisionslog",
|
||||
"btn_delete_selected": "Slet markerede",
|
||||
"audit_action_scan": "Scanning",
|
||||
"audit_action_redact": "Anonymisering",
|
||||
"audit_action_blur_faces": "Ansigtsslørning",
|
||||
"audit_action_delete": "Sletning",
|
||||
"audit_action_restore": "Gendannelse",
|
||||
"audit_action_export": "Eksport",
|
||||
"audit_files": "filer",
|
||||
"audit_flagged": "markerede",
|
||||
"audit_high_risk": "høj risiko",
|
||||
"audit_regions": "områder",
|
||||
"audit_faces": "ansigter",
|
||||
"audit_permanent": "permanent",
|
||||
"audit_trash": "papirkurv",
|
||||
"audit_files_restored": "fil(er) gendannet",
|
||||
"confirm_clear_audit": "Ryd hele revisionsloggen? Dette kan ikke fortrydes.",
|
||||
"lang_spanish": "Spansk",
|
||||
"lang_italian": "Italiensk",
|
||||
"lang_portuguese": "Portugisisk",
|
||||
"lang_finnish": "Finsk",
|
||||
"lang_polish": "Polsk",
|
||||
"lang_czech": "Tjekkisk",
|
||||
"lang_russian": "Russisk",
|
||||
"lang_arabic": "Arabisk",
|
||||
"lang_chinese_simplified": "Kinesisk (forenklet)",
|
||||
"lang_chinese_traditional": "Kinesisk (traditionelt)",
|
||||
"lang_japanese": "Japansk",
|
||||
"lang_korean": "Koreansk",
|
||||
"lbl_root": "rod",
|
||||
"lbl_root_folder": "rodmappe",
|
||||
"lbl_scanning": "Scanner:",
|
||||
"btn_deselect_all": "Fravælg alle",
|
||||
"filter_high_risk": "🔴 Høj risiko",
|
||||
"filter_in_archive": "📦 I arkiv",
|
||||
"log_starting_scan": "Starter scanning af",
|
||||
"log_found_files": "Fandt {n} fil(er) at scanne",
|
||||
"log_cloud_skipped": "kun-sky filer sprunget over",
|
||||
"log_faces_detected": "ansigt(er) fundet",
|
||||
"log_ocr_pages": "side(r)",
|
||||
"log_pages_skipped": "billedside(r) sprunget over (aktivér OCR)",
|
||||
"log_scan_complete": "Scanning fuldført",
|
||||
"log_files_with_cpr": "fil(er) med CPR",
|
||||
"log_no_faces_in": "Ingen ansigter fundet i",
|
||||
"pii_phone": "telefon",
|
||||
"pii_email": "e-mail",
|
||||
"pii_iban": "IBAN",
|
||||
"pii_bank_account": "bankkonto",
|
||||
"pii_name": "navn",
|
||||
"pii_address": "adresse",
|
||||
"pii_org": "org",
|
||||
"lbl_other_pii": "Andre PII",
|
||||
"lbl_found": "fundet",
|
||||
"btn_clear_results_cache": "Ryd resultatcache",
|
||||
"btn_clear_ocr_cache": "Ryd OCR-cache",
|
||||
"confirm_clear_results_cache": "Ryd alle gemte scanningsresultater? Gitteret vil blive ryddet.",
|
||||
"confirm_clear_ocr_cache": "Ryd OCR-cache? Dette vil tvinge ny OCR ved næste scanning.",
|
||||
"log_cache_cleared": "Resultatcache ryddet",
|
||||
"log_ocr_cache_cleared": "OCR-cache ryddet",
|
||||
"m365_app_name": "GDPRScanner",
|
||||
"m365_sources": "Kilder",
|
||||
"m365_options": "Indstillinger",
|
||||
"m365_accounts": "Konti",
|
||||
"m365_stats": "Statistik",
|
||||
"m365_src_email": "Outlook",
|
||||
"m365_src_onedrive": "OneDrive",
|
||||
"m365_src_sharepoint": "SharePoint",
|
||||
"m365_src_teams": "Teams",
|
||||
"m365_opt_date_from": "Scan e-mails/filer fra",
|
||||
"m365_opt_date_from_hint": "Lad være tom for at scanne alt",
|
||||
"m365_opt_email_body": "Scan e-mailindhold",
|
||||
"m365_opt_attachments": "Scan vedhæftede filer",
|
||||
"m365_opt_max_attach": "Maks. vedhæftet filstørrelse (MB)",
|
||||
"m365_opt_max_emails": "Maks. e-mails pr. bruger",
|
||||
"m365_connect_title": "Opret forbindelse til Microsoft 365",
|
||||
"m365_connect_sub": "Angiv dine Azure-appoplysninger for at logge ind.",
|
||||
"m365_label_client_id": "Klient-ID (Applikations-ID)",
|
||||
"m365_label_tenant_id": "Lejer-ID",
|
||||
"m365_label_client_secret": "Klienthemmelighed",
|
||||
"m365_secret_hint": "(valgfri — aktiverer scanning på tværs af organisationen)",
|
||||
"m365_secret_desc_app": "appen tilgår alle brugeres data direkte (applikationstilladelser, kræver ikke login).",
|
||||
"m365_secret_desc_delegated": "du logger ind som dig selv og kan kun scanne egne data, medmindre du er Global Admin.",
|
||||
"m365_btn_connect": "Opret forbindelse",
|
||||
"m365_device_code_go": "Gå til",
|
||||
"m365_device_code_enter": "og indtast denne kode",
|
||||
"m365_btn_cancel_auth": "Annullér",
|
||||
"m365_btn_reconfigure": "Rekonfigurér",
|
||||
"m365_btn_sign_out": "Log ud",
|
||||
"m365_mode_app": "🔑 App-tilstand — hele org.",
|
||||
"m365_mode_delegated": "Delegeret",
|
||||
"m365_search_users": "Søg brugere…",
|
||||
"m365_add_account_label": "Tilføj konto manuelt:",
|
||||
"m365_add_account_placeholder": "e-mail eller UPN",
|
||||
"m365_admin_note": "Viser kun din konto. For at liste alle brugere skal en administrator give samtykke til <strong>User.Read.All</strong> i Azure Portal, eller tilføj konti manuelt nedenfor.",
|
||||
"m365_btn_scan": "Scan",
|
||||
"m365_btn_stop": "Stop",
|
||||
"m365_pill_flagged": "markerede",
|
||||
"m365_pill_scanned": "scannet",
|
||||
"m365_filter_all_sources": "Alle kilder",
|
||||
"m365_filter_email": "Outlook",
|
||||
"m365_filter_onedrive": "OneDrive",
|
||||
"m365_filter_sharepoint": "SharePoint",
|
||||
"m365_filter_teams": "Teams",
|
||||
"m365_empty_hint": "Vælg kilder og klik på <strong>Scan</strong><br>for at finde dokumenter med CPR-numre",
|
||||
"m365_stat_flagged": "Markerede",
|
||||
"m365_stat_cpr": "CPR-fund",
|
||||
"m365_preview_open": "Åbn i M365 ↗",
|
||||
"m365_preview_close": "Luk",
|
||||
"m365_auth_mode_app": "Godkendelsestilstand: Applikation (klientoplysninger — hele org.)",
|
||||
"m365_auth_mode_delegated": "Godkendelsestilstand: Delegeret (enhedskode — kun indlogget bruger)",
|
||||
"m365_phase_teams_index": "Bygger Teams-medlemskabsindeks…",
|
||||
"m365_phase_sharepoint": "Indsamler SharePoint-filer…",
|
||||
"m365_btn_about": "Om",
|
||||
"m365_stat_scanned": "Scannet",
|
||||
"m365_no_users_found": "Ingen brugere fundet",
|
||||
"m365_no_users_match": "Ingen brugere matcher",
|
||||
"m365_no_cpr_found": "Ingen CPR-numre fundet.",
|
||||
"m365_no_matches": "Ingen match",
|
||||
"m365_btn_export_excel": "Eksporter Excel",
|
||||
"m365_export_no_data": "Ingen resultater at eksportere.",
|
||||
"m365_phase_emails": "Indsamler Outlook-beskeder",
|
||||
"m365_phase_onedrive": "Indsamler OneDrive",
|
||||
"m365_phase_teams": "Indsamler Teams",
|
||||
"m365_preset_1yr": "1 år",
|
||||
"m365_preset_2yr": "2 år",
|
||||
"m365_preset_5yr": "5 år",
|
||||
"m365_preset_10yr": "10 år",
|
||||
"m365_preset_any": "Alle",
|
||||
"m365_auth_mode_app_short": "Application permissions · client credentials",
|
||||
"m365_auth_mode_delegated_short": "Delegated permissions · device code flow",
|
||||
"m365_info_permissions": "Tilladelser",
|
||||
"m365_info_signin": "Log-ind krævet",
|
||||
"m365_info_scope": "Rækkevidde",
|
||||
"m365_info_scope_org": "Alle brugere i tenant",
|
||||
"m365_info_scope_user": "Kun den indloggede bruger",
|
||||
"m365_info_consent": "Administratorsamtykke",
|
||||
"m365_info_required": "Påkrævet",
|
||||
"m365_info_admin": "Global Administrator",
|
||||
"m365_info_expands_scope": "Udvider rækkevidde til alle brugere",
|
||||
"m365_info_no": "Nej",
|
||||
"m365_info_yes": "Ja",
|
||||
"m365_info_app_desc": "Appen godkender sig med et Client Secret og tilgår alle brugeres data direkte via Microsoft Graph — intet interaktivt login kræves. Ideel til automatiserede eller planlagte scanninger.",
|
||||
"m365_info_delegated_desc": "Appen handler på vegne af den indloggede bruger via device code flow. Som standard er kun den pågældende brugers data tilgængeligt. En Global Administrator kan give bredere samtykke til at scanne alle brugere.",
|
||||
"m365_filter_search": "Søg…",
|
||||
"m365_filter_clear": "Ryd",
|
||||
"m365_btn_list_view": "Liste",
|
||||
"m365_btn_grid_view": "Gitter",
|
||||
"m365_log_found_items": "Fandt",
|
||||
"m365_log_items_to_scan": "element(er) til scanning",
|
||||
"m365_log_starting_scan": "Starter scanning:",
|
||||
"m365_log_accounts": "konto(er)",
|
||||
"m365_btn_bulk_delete": "Slet",
|
||||
"m365_bulk_delete_title": "Massesletning",
|
||||
"m365_bulk_delete_sub": "E-mails flyttes til Slettet post · Filer sendes til papirkurven",
|
||||
"m365_bulk_filter_heading": "Filtrer hvad der skal slettes",
|
||||
"m365_bulk_filter_source": "Kildetype",
|
||||
"m365_bulk_filter_min_cpr": "Min. CPR-fund",
|
||||
"m365_bulk_filter_older_than": "Ældre end dato",
|
||||
"m365_bulk_no_match": "Ingen elementer matcher disse kriterier.",
|
||||
"m365_bulk_match_count": "element(er) vil blive slettet",
|
||||
"m365_bulk_confirm_q": "element(er) slettes permanent. Fortsæt?",
|
||||
"m365_bulk_deleting": "Sletter…",
|
||||
"m365_bulk_deleted": "slettet",
|
||||
"m365_bulk_failed": "mislykkedes",
|
||||
"m365_bulk_delete_confirm": "Slet matchende elementer",
|
||||
"m365_delete_confirm": "Slet",
|
||||
"m365_delete_warning": "Dette kan ikke fortrydes.",
|
||||
"m365_log_deleted": "Slettet:",
|
||||
"m365_log_delete_failed": "Sletning mislykkedes:",
|
||||
"m365_log_bulk_done": "Massesletning:",
|
||||
"m365_log_older_than": "ældre end",
|
||||
"m365_eta_left": "tilbage",
|
||||
"btn_all": "Alle",
|
||||
"btn_errors": "Fejl",
|
||||
"log_copy": "Kopier",
|
||||
"btn_none": "Ingen",
|
||||
"m365_btn_resume": "Genoptag",
|
||||
"m365_btn_start_fresh": "Start forfra",
|
||||
"m365_resume_banner": "Tidligere scanning afbrudt — {scanned} skannet, {flagged} fundet",
|
||||
"m365_log_resuming": "Genoptager scanning:",
|
||||
"m365_log_already_scanned": "allerede skannet — sprunget over",
|
||||
"m365_resuming": "Genoptager — springer allerede skannede elementer over…",
|
||||
"m365_opt_delta": "Delta-scanning",
|
||||
"m365_opt_delta_hint": "Kun ændrede elementer (efter første fulde scanning)",
|
||||
"m365_delta_tokens_saved": "Tokens gemt",
|
||||
"m365_delta_clear": "Ryd tokens",
|
||||
"m365_delta_cleared": "Delta-tokens ryddet — næste scanning bliver fuld scanning.",
|
||||
"m365_delta_mode": "Delta-tilstand — henter kun ændrede elementer…",
|
||||
"m365_smtp_title": "✉ Send rapport",
|
||||
"m365_smtp_desc": "Send Excel-rapporten via e-mail efter scanning.",
|
||||
"m365_smtp_host": "SMTP-server",
|
||||
"m365_smtp_port": "Port",
|
||||
"m365_smtp_user": "Brugernavn",
|
||||
"m365_smtp_pass": "Adgangskode",
|
||||
"m365_smtp_from": "Afsenderadresse",
|
||||
"m365_smtp_tls": "STARTTLS",
|
||||
"m365_smtp_ssl": "SSL",
|
||||
"m365_smtp_recipients": "Modtagere",
|
||||
"m365_smtp_recipients_hint": "Adskil med komma eller semikolon",
|
||||
"m365_smtp_save": "Gem",
|
||||
"m365_smtp_send": "Send nu",
|
||||
"m365_smtp_saved": "Indstillinger gemt.",
|
||||
"m365_smtp_sending": "Sender…",
|
||||
"m365_smtp_sent": "Rapport sendt.",
|
||||
"m365_smtp_no_recipients": "Angiv mindst én modtager.",
|
||||
"m365_smtp_configure": "Konfigurer",
|
||||
"m365_smtp_from_hint": "(valgfri — standard er brugernavn)",
|
||||
"m365_subject_title": "🔍 Registreret person",
|
||||
"m365_subject_btn": "Slå op",
|
||||
"m365_subject_desc": "Find alle markerede elementer med et givet CPR-nummer. CPR-nummeret hashes før søgning og gemmes aldrig i klartekst.",
|
||||
"m365_subject_placeholder": "DDMMYY-XXXX",
|
||||
"m365_subject_search": "Søg",
|
||||
"m365_subject_searching": "Søger…",
|
||||
"m365_subject_found": "element(er) fundet",
|
||||
"m365_subject_not_found": "Ingen markerede elementer fundet for dette CPR-nummer.",
|
||||
"m365_subject_delete_all": "Slet alle for denne person",
|
||||
"m365_subject_delete_confirm": "element(er) slettes permanent. Fortsæt?",
|
||||
"m365_disposition_label": "Disposition",
|
||||
"m365_disp_unreviewed": "Ikke gennemgået",
|
||||
"m365_disp_retain_legal": "Opbevar — lovkrav",
|
||||
"m365_disp_retain_legit": "Opbevar — legitim interesse",
|
||||
"m365_disp_retain_contract": "Opbevar — kontrakt",
|
||||
"m365_disp_delete_sched": "Slet — planlagt",
|
||||
"m365_disp_personal_use": "Privat brug — uden for scope",
|
||||
"m365_disp_deleted": "Slettet",
|
||||
"m365_disp_save": "Gem",
|
||||
"m365_disp_saved": "✓ Gemt",
|
||||
"m365_opt_retention": "Opbevaringspolitik",
|
||||
"m365_opt_retention_hint": "Flag og slet elementer ældre end N år",
|
||||
"m365_ret_years": "Opbevaringsår",
|
||||
"m365_ret_fy_end": "Regnskabsårs afslutning",
|
||||
"m365_ret_fy_rolling": "Løbende (fra i dag)",
|
||||
"m365_ret_fy_dec": "31 dec. (Bogføringsloven)",
|
||||
"m365_ret_fy_jun": "30 jun.",
|
||||
"m365_ret_fy_mar": "31 mar.",
|
||||
"m365_ret_mode_rolling": "løbende",
|
||||
"m365_ret_mode_fiscal": "regnskabsår",
|
||||
"m365_ret_cutoff_hint": "Elementer ændret før",
|
||||
"m365_ret_cutoff_flagged": "markeres som forfaldne",
|
||||
"m365_overdue_found": "forfaldne element(er) fundet",
|
||||
"m365_bulk_overdue_btn": "Filtrer forfaldne",
|
||||
"m365_bulk_clear_filters": "Ryd filtre",
|
||||
"m365_btn_export_article30": "Art.30",
|
||||
"m365_article30_done": "Artikel 30-rapport klar.",
|
||||
"a30_title": "GDPR Artikel 30",
|
||||
"a30_subtitle": "Fortegnelse over behandlingsaktiviteter",
|
||||
"a30_generated": "Genereret",
|
||||
"a30_confidential": "Fortroligt — GDPR-overholdelses dokument",
|
||||
"a30_s1": "1. Oversigt",
|
||||
"a30_scan_date": "Scanningsdato",
|
||||
"a30_items_scanned": "Scannede elementer",
|
||||
"a30_flagged": "Markerede elementer",
|
||||
"a30_cpr_hits": "CPR-fund i alt",
|
||||
"a30_data_subjects": "Anslåede registrerede",
|
||||
"a30_overdue": "Forfaldne elementer (>5 år)",
|
||||
"a30_by_source": "Fordeling efter kilde",
|
||||
"a30_col_source": "Kilde",
|
||||
"a30_col_items": "Elementer",
|
||||
"a30_col_cpr": "CPR-fund",
|
||||
"a30_col_overdue": "Forfaldne",
|
||||
"a30_s2": "2. Identificerede kategorier af personoplysninger",
|
||||
"a30_s2_intro": "Følgende kategorier af personoplysninger blev fundet under scanning.",
|
||||
"a30_col_gdpr_class": "GDPR-klassifikation",
|
||||
"a30_cpr_label": "CPR-numre (dansk personnummer)",
|
||||
"a30_cpr_class": "Art. 9 — nationalt identifikationsnummer",
|
||||
"a30_pii_class_9": "Art. 9 — helbred/følsomme",
|
||||
"a30_pii_class_4": "Art. 4 — personoplysninger",
|
||||
"a30_s3": "3. Datafortegnelse",
|
||||
"a30_s3_intro": "Alle markerede elementer er listet nedenfor med placering, opbevaringsstatus og dispositionsstatus.",
|
||||
"a30_col_name": "Navn / Emne",
|
||||
"a30_col_account": "Konto",
|
||||
"a30_col_modified": "Ændret",
|
||||
"a30_col_disp": "Disposition",
|
||||
"a30_more_items": "yderligere elementer vises ikke. Eksporter Excel-rapporten for den fulde liste.",
|
||||
"a30_s4": "4. Opbevaringsanalyse",
|
||||
"a30_s4_intro": "Følgende elementer overskrider 5-års opbevaringsgrænsen og bør gennemgås for sletning iht. GDPR artikel 5(1)(e) — opbevaringsbegrænsning.",
|
||||
"a30_s5": "5. Overholdelsestrend",
|
||||
"a30_s5_intro": "Antal markerede elementer over de seneste scanninger (nyeste først).",
|
||||
"a30_col_scan_date": "Scanningsdato",
|
||||
"a30_col_scan_type": "Scanningstype",
|
||||
"a30_scan_delta": "Delta",
|
||||
"a30_scan_full": "Fuld",
|
||||
"a30_s6": "6. Metode og retsgrundlag",
|
||||
"a30_method_title": "Scanningsmetode",
|
||||
"a30_method_1": "CPR-numre registreres ved mønstergenkendelse mod det officielle danske CPR-format (DDMMYY-XXXX) med fuld datovalidering og århundrede-cifferverifikation iht. CPR-registerets regler.",
|
||||
"a30_method_2": "Yderligere personoplysninger (telefonnumre, e-mailadresser, IBAN, bankkontonumre, navne, adresser og organisationer) registreres med regex og spaCy NER.",
|
||||
"a30_method_3": "CPR-numre i dette dokuments database er SHA-256-hashede og gemmes aldrig i klartekst.",
|
||||
"a30_method_4": "Scanning dækker Exchange-mailbokse (alle mapper inkl. Sendt post), OneDrive, SharePoint og Microsoft Teams-kanalefiler via Microsoft Graph API. Ved tilslutning dækker Google Workspace-scanning Gmail og Google Drev via en tjenestekonto med domæneomfattende delegation. Lokale og netværksbaserede filshares (SMB) scannes direkte.",
|
||||
"a30_gdpr_title": "Refererede GDPR-artikler",
|
||||
"a30_gdpr_1": "Artikel 5(1)(c) — Dataminimering: kun nødvendige oplysninger må opbevares",
|
||||
"a30_gdpr_2": "Artikel 5(1)(e) — Opbevaringsbegrænsning: data må ikke opbevares længere end nødvendigt",
|
||||
"a30_gdpr_3": "Artikel 9 — Særlige kategorier: helbredsoplysninger, strafbare forhold, fagforeningsmedlemskab mv. kræver udtrykkeligt retsgrundlag",
|
||||
"a30_gdpr_4": "Artikel 15 — Ret til indsigt: registrerede kan anmode om oplysninger om deres data",
|
||||
"a30_gdpr_5": "Artikel 17 — Ret til sletning: registrerede kan anmode om sletning",
|
||||
"a30_gdpr_6": "Artikel 30 — Fortegnelse over behandlingsaktiviteter: dette dokument opfylder forpligtelsen",
|
||||
"a30_disp_unreviewed": "Ikke gennemgået",
|
||||
"a30_disp_retain_legal": "Opbevar — Lovkrav",
|
||||
"a30_disp_retain_legit": "Opbevar — Legitim interesse",
|
||||
"a30_disp_retain_contract": "Opbevar — Kontrakt",
|
||||
"a30_disp_delete_sched": "Slet — Planlagt",
|
||||
"a30_disp_personal_use": "Personal use — out of GDPR scope (Art. 2(2)(c))",
|
||||
"a30_disp_deleted": "Slettet",
|
||||
"a30_s6_short": "Metode og retsgrundlag",
|
||||
"m365_role_all": "Alle",
|
||||
"m365_role_staff": "Ansat",
|
||||
"m365_role_student": "Elev",
|
||||
"a30_s_dellog": "Sletningslog",
|
||||
"a30_dellog_intro": "element(er) med personoplysninger er slettet via GDPRScanner. Denne log opfylder dokumentationspligten i GDPR artikel 5, stk. 2.",
|
||||
"a30_dellog_by_reason": "Sletninger efter årsag",
|
||||
"a30_dellog_records": "Sletningsposter",
|
||||
"a30_col_reason": "Årsag",
|
||||
"a30_col_count": "Antal",
|
||||
"a30_col_deleted_at": "Slettet den",
|
||||
"a30_col_deleted_by": "Slettet af",
|
||||
"a30_reason_manual": "Manuel (enkelt kort)",
|
||||
"a30_reason_bulk": "Massesletning",
|
||||
"a30_reason_retention": "Opbevaringspolitik",
|
||||
"a30_reason_dsr": "Sletningsanmodning fra registreret (art. 17)",
|
||||
"m365_filter_all_disp": "Alle dispositioner",
|
||||
"m365_trend_title": "Tendens",
|
||||
"m365_trend_flagged": "Markerede",
|
||||
"m365_trend_overdue": "Forfaldne",
|
||||
"m365_filter_all_transfer": "Alle elementer",
|
||||
"m365_filter_ext_recipient": "Ekstern modtager",
|
||||
"m365_filter_ext_share": "Eksternt delt",
|
||||
"m365_filter_shared": "Delt",
|
||||
"m365_badge_ext_recipient": "Ekstern",
|
||||
"m365_badge_shared": "Delt",
|
||||
"a30_s_special": "Særlige kategorier af personoplysninger (artikel 9)",
|
||||
"a30_special_intro": "element(er) indeholder særlige kategorier af personoplysninger iht. GDPR artikel 9. Kræver eksplicit retsgrundlag og konsekvensanalyse (DPIA).",
|
||||
"a30_special_by_cat": "Opdagede kategorier",
|
||||
"a30_special_items": "Berørte elementer (op til 50)",
|
||||
"a30_col_category": "Kategori",
|
||||
"a30_cat_health": "Helbredsoplysninger (art. 9)",
|
||||
"a30_cat_mental": "Psykisk helbred (art. 9)",
|
||||
"a30_cat_criminal": "Strafbare forhold (art. 10)",
|
||||
"a30_cat_union": "Fagforeningsmedlemskab (art. 9)",
|
||||
"a30_cat_religion": "Religiøs overbevisning (art. 9)",
|
||||
"a30_cat_ethnicity": "Etnisk oprindelse (art. 9)",
|
||||
"a30_cat_political": "Politisk overbevisning (art. 9)",
|
||||
"a30_cat_biometric": "Biometriske oplysninger (art. 9)",
|
||||
"a30_cat_sexual": "Seksuel orientering (art. 9)",
|
||||
"m365_filter_all_special": "Alle risikoniveauer",
|
||||
"m365_filter_special_only": "Art. 9 følsomme data",
|
||||
"m365_badge_special": "Art.9",
|
||||
"m365_phase_scanning": "Scanner…",
|
||||
"a30_special_cat": "Særlige kategorier (artikel 9)",
|
||||
"a30_special_cat_note": "Disse elementer indeholder helbredsoplysninger, straffeoplysninger, biometriske data, religiøse, etniske, fagforeningsmæssige, politiske eller seksuelle oplysninger. Et eksplicit retsgrundlag (art. 9, stk. 2) og eventuelt en konsekvensanalyse (art. 35) er påkrævet.",
|
||||
"a30_col_special": "Art. 9",
|
||||
"a30_pii_phone": "Telefonnumre",
|
||||
"a30_pii_email": "E-mailadresser",
|
||||
"a30_pii_iban": "IBAN-bankkontonumre",
|
||||
"a30_pii_bank": "Bankkontonumre",
|
||||
"a30_pii_name": "Personnavne (NER)",
|
||||
"a30_pii_address": "Adresser (NER)",
|
||||
"a30_pii_org": "Organisationer (NER)",
|
||||
"a30_col_cpr_short": "CPR",
|
||||
"a30_inv_staff": "Personale / Undervisere",
|
||||
"a30_inv_students": "Elever",
|
||||
"a30_student_consent_note": "Bemærk: Elevers konti i dansk folkeskole (elever under 15 år) kræver forældrenes samtykke til behandling af personoplysninger i henhold til Databeskyttelsesloven §6. Elementer i elevers konti må ikke slettes automatisk — enhver handling kræver godkendelse fra skoleledelsen, og for elever under 15 år skal forældre eller værger som rettighedshavere underrettes i henhold til GDPR artikel 8.",
|
||||
"m365_profile_label": "Profil:",
|
||||
"m365_profile_placeholder": "— Vælg profil —",
|
||||
"m365_profile_save_tip": "Gem aktuelle indstillinger som profil",
|
||||
"m365_profile_save_prompt": "Profilnavn:",
|
||||
"m365_profile_applied": "Profil indlæst",
|
||||
"m365_profile_saved": "Profil gemt",
|
||||
"m365_profile_manage_btn": "Profiler",
|
||||
"m365_profile_clear_btn": "Ryd",
|
||||
"m365_profile_save_btn": "Gem",
|
||||
"m365_profile_manage_title": "⚙ Administrer profiler",
|
||||
"m365_profile_no_profiles": "Ingen gemte profiler endnu. Brug 💾 til at gemme de aktuelle sidebjælkeindstillinger som en profil.",
|
||||
"m365_profile_use": "Brug",
|
||||
"m365_profile_edit": "Rediger",
|
||||
"m365_profile_duplicate": "Dupliker",
|
||||
"m365_profile_delete": "Slet",
|
||||
"m365_profile_delete_confirm": "Slet profil",
|
||||
"m365_profile_duplicated": "Profil duplikeret",
|
||||
"m365_profile_deleted": "Profil slettet",
|
||||
"m365_profile_never": "aldrig",
|
||||
"m365_profile_last_run": "Sidst kørt",
|
||||
"m365_profile_name_placeholder": "Profilnavn",
|
||||
"m365_profile_desc_placeholder": "Beskrivelse (valgfri)",
|
||||
"m365_profile_name_required": "Profilnavn er påkrævet.",
|
||||
"m365_db_title": "🗄 Database",
|
||||
"m365_db_export": "Eksporter",
|
||||
"m365_db_import": "Importer",
|
||||
"m365_db_export_error": "Eksport mislykkedes",
|
||||
"m365_db_exported": "Database eksporteret",
|
||||
"m365_db_import_title": "📥 Importer database",
|
||||
"m365_db_import_desc": "Vælg en tidligere eksporteret .zip-fil. Sammenfletning tilføjer dispositioner og slettelog. Erstatning sletter alt og gendanner fuldt ud.",
|
||||
"m365_db_import_file": "ZIP-fil",
|
||||
"m365_db_import_mode": "Tilstand:",
|
||||
"m365_db_import_merge": "Sammenflet (sikker)",
|
||||
"m365_db_import_replace": "Erstat (fuld gendannelse)",
|
||||
"m365_db_import_replace_warn": "⚠ Erstatningstilstand sletter alle eksisterende scanningsdata inden gendannelse. Sørg for at have en sikkerhedskopi af ~/.gdpr_scanner.db først.",
|
||||
"m365_db_import_replace_confirm": "Erstatningstilstand sletter ALLE eksisterende scanningsdata og gendanner fra arkivet.\\n\\nSørg for at have en manuel sikkerhedskopi af ~/.gdpr_scanner.db.\\n\\nFortsæt?",
|
||||
"m365_db_import_no_file": "Vælg venligst en ZIP-fil først.",
|
||||
"m365_db_importing": "Importerer…",
|
||||
"m365_db_imported": "Importeret",
|
||||
"m365_db_import_run": "Importer",
|
||||
"m365_opt_scan_photos": "Søg efter ansigter i billeder",
|
||||
"m365_opt_scan_photos_hint": "Markerer billeder med registrerede ansigter som Art. 9 biometriske data. Langsommere — aktivér efter behov.",
|
||||
"m365_filter_photo_only": "📷 Billeder / biometrisk",
|
||||
"m365_badge_faces": "ansigter",
|
||||
"a30_photo_items": "Billeder med registrerede ansigter (Art. 9 biometrisk)",
|
||||
"a30_photo_note": "Fotografier af identificerbare personer er biometriske data i henhold til Art. 9 GDPR. Opbevaring kræver et dokumenteret retsgrundlag i henhold til Art. 9(2). For skolefotografier af elever under 15 år er forældrenes samtykke påkrævet (Databeskyttelsesloven §6). Se Datatilsynets vejledning om fotografering i skoler.",
|
||||
"a30_s_photos": "Fotografier og biometriske data (artikel 9)",
|
||||
"a30_photo_intro": "billedfil(er) med registrerede ansigt(er) blev fundet i scanningen. Fotografier af identificerbare personer udgør biometriske data i henhold til GDPR artikel 9 og er underlagt den samme forhøjede beskyttelse som sundheds- eller straffeoplysninger.",
|
||||
"a30_photo_guidance": "Vejledning om opbevaringsperiode",
|
||||
"a30_photo_g1": "Billeder må kun opbevares, så længe det oprindelige formål er gyldigt (Art. 5(1)(b) — formålsbegrænsning).",
|
||||
"a30_photo_g2": "Elever under 15 år kræver forældrenes samtykke (Databeskyttelsesloven §6). Samtykket skal være frit givet, specifikt og dokumenteret.",
|
||||
"a30_photo_g3": "Billeder på offentligt tilgængelige hjemmesider skal fjernes straks, når en person forlader organisationen eller trækker sit samtykke tilbage (Art. 17 — retten til sletning).",
|
||||
"a30_photo_g4": "Historisk/arkivbrug kan begrunde længere opbevaring i henhold til Art. 89 kun med specifikke sikkerhedsforanstaltninger og en konkret vurdering.",
|
||||
"a30_photo_col_faces": "Ansigter",
|
||||
"a30_method_5": "Når billedscanning er aktiveret, analyseres billedfiler ved hjælp af OpenCV Haar cascade-ansigtsdetektion for at identificere fotografier af personer (Art. 9 biometriske data).",
|
||||
"m365_role_cycle_tip": "Klik for at ændre rolle (skifter elev → personale → andet → auto)",
|
||||
"m365_role_set": "Rolle sat",
|
||||
"m365_role_cleared": "Rolletilsidesættelse fjernet",
|
||||
"m365_sku_debug_title": "🔍 Lejer-SKU-ID'er",
|
||||
"m365_sku_debug_desc": "Dette er de rå SKU-ID'er tildelt dine brugere. Dem markeret ❓ ukendt er ikke i classification/m365_skus.json — kopiér dem ind under student_ids eller staff_ids og genstart.",
|
||||
"m365_sku_debug_none": "Ingen licensdata returneret — kontrollér at appen har User.Read.All tilladelse.",
|
||||
"m365_file_sources_title": "📁 Filkilder",
|
||||
"m365_file_sources_manage": "Administrer",
|
||||
"m365_file_sources_empty": "Ingen filkilder konfigureret. Tilføj en lokal mappe eller netværksdeling nedenfor.",
|
||||
"m365_file_sources_add": "Tilføj kilde",
|
||||
"m365_fsrc_label": "Betegnelse",
|
||||
"m365_fsrc_path": "Sti",
|
||||
"m365_fsrc_smb_detected": "SMB/CIFS-netværksdeling registreret",
|
||||
"m365_fsrc_smb_host": "SMB-vært",
|
||||
"m365_fsrc_smb_user": "Brugernavn",
|
||||
"m365_fsrc_smb_pw": "Adgangskode",
|
||||
"m365_fsrc_smb_pw_hint": "Adgangskoden gemmes i nøglekæden — aldrig i en fil.",
|
||||
"m365_fsrc_add_btn": "Tilføj",
|
||||
"m365_fsrc_saved": "Kilde gemt",
|
||||
"m365_fsrc_saving": "Gemmer...",
|
||||
"m365_fsrc_path_required": "Sti er påkrævet.",
|
||||
"m365_fsrc_scan_btn": "Scan",
|
||||
"m365_fsrc_scan_start": "Starter filscanning",
|
||||
"m365_src_group_files": "Filkilder",
|
||||
"m365_no_sources": "Ingen kilder valgt — intet at scanne.",
|
||||
"m365_fsrc_name_required": "Navn er påkrævet.",
|
||||
"m365_srcmgmt_title": "⚙ Kildeadministration",
|
||||
"m365_srcmgmt_tab_m365": "Microsoft 365",
|
||||
"m365_srcmgmt_tab_google": "Google Workspace",
|
||||
"m365_srcmgmt_tab_files": "Filkilder",
|
||||
"m365_srcmgmt_connection": "Forbindelse",
|
||||
"m365_srcmgmt_azure_creds": "Azure-legitimationsoplysninger",
|
||||
"m365_srcmgmt_sources_m365": "Kilder der skal scannes",
|
||||
"m365_srcmgmt_connected": "Forbundet",
|
||||
"m365_srcmgmt_not_connected": "Ikke forbundet",
|
||||
"m365_srcmgmt_coming_soon": "Kommer snart",
|
||||
"m365_srcmgmt_google_sub": "Gmail og Google Drev-scanning vises her, når det er implementeret.",
|
||||
"m365_srcmgmt_file_sources": "Filkilder",
|
||||
"m365_sources_manage_btn": "Kilder",
|
||||
"m365_connecting": "Opretter forbindelse...",
|
||||
"m365_err_creds_required": "Klient-ID og lejer-ID er påkrævet",
|
||||
"m365_signout_confirm": "Afbryd forbindelsen og ryd legitimationsoplysninger?",
|
||||
"m365_btn_settings": "Indstillinger",
|
||||
"m365_settings_title": "⚙ Indstillinger",
|
||||
"m365_settings_tab_general": "Generelt",
|
||||
"m365_settings_tab_email": "E-mailrapport",
|
||||
"m365_settings_tab_database": "Database",
|
||||
"m365_settings_appearance": "Udseende",
|
||||
"m365_settings_language": "Sprog",
|
||||
"m365_settings_theme": "Tema",
|
||||
"m365_settings_db_actions": "Handlinger",
|
||||
"m365_db_reset": "Nulstil database",
|
||||
"m365_db_reset_confirm": "Nulstil database? Alle scanresultater slettes.",
|
||||
"m365_db_reset_done": "Database nulstillet",
|
||||
"m365_db_scans": "Scanninger",
|
||||
"m365_smtp_saving": "Gemmer...",
|
||||
"m365_settings_admin_pin": "Admin-PIN",
|
||||
"m365_settings_pin_hint": "Påkrævet ved destruktive handlinger (f.eks. nulstil database). Lad stå tom for at deaktivere.",
|
||||
"m365_settings_current_pin": "Nuværende PIN",
|
||||
"m365_settings_new_pin": "Ny PIN",
|
||||
"m365_settings_confirm_pin": "Bekræft PIN",
|
||||
"m365_settings_pin_set": "Admin-PIN er indstillet",
|
||||
"m365_settings_pin_not_set": "Ingen PIN — nulstil database er ubeskyttet",
|
||||
"m365_settings_pin_required": "PIN er påkrævet.",
|
||||
"m365_settings_pin_mismatch": "PINs stemmer ikke overens.",
|
||||
"m365_settings_pin_wrong": "Forkert PIN — nulstilling annulleret.",
|
||||
"m365_settings_pin_saved": "PIN gemt",
|
||||
"m365_settings_enter_pin": "Indtast admin-PIN",
|
||||
"m365_settings_enter_pin_reset": "Indtast admin-PIN for at nulstille databasen.",
|
||||
"btn_confirm": "Bekræft",
|
||||
"m365_log_scan_started": "Scanning startet",
|
||||
"m365_preview_local_file": "Lokal fil — ingen cloud-forhåndsvisning tilgængelig",
|
||||
"m365_badge_gps": "GPS-position",
|
||||
"a30_gps_items": "Elementer med GPS-placeringsdata (Art. 4 — placering = persondata)",
|
||||
"a30_exif_pii_items": "Elementer med EXIF-personoplysninger (forfatter, beskrivelse, nøgleord)",
|
||||
"a30_gps_title": "Elementer med GPS-placeringsdata",
|
||||
"a30_gps_intro": "Følgende filer indeholder GPS-koordinater indlejret i EXIF-metadata. Placeringsdata udgør personoplysninger i henhold til GDPR Art. 4.",
|
||||
"a30_gps_col_lat": "Breddegrad",
|
||||
"a30_gps_col_lon": "Længdegrad",
|
||||
"m365_accounts_disabled_tip": "Vælg en Microsoft 365-kilde for at aktivere kontovalg",
|
||||
"m365_smtp_test": "Test",
|
||||
"m365_smtp_testing": "Sender test-email…",
|
||||
"m365_smtp_test_ok": "Test-email sendt",
|
||||
"m365_smtp_test_fail": "Forbindelse mislykkedes",
|
||||
"m365_fsrc_edit_btn": "Rediger",
|
||||
"m365_fsrc_save_changes": "Gem ændringer",
|
||||
"m365_settings_tab_scheduler": "Planlægger",
|
||||
"m365_sched_title": "Planlagte scanninger",
|
||||
"m365_sched_next": "Næste",
|
||||
"m365_sched_hint": "Kør scanninger automatisk på et fast tidspunkt. Kræver en aktiv M365-forbindelse (app-tilstand anbefales).",
|
||||
"m365_sched_no_aps": "⚠ APScheduler ikke installeret. Kør: pip install apscheduler",
|
||||
"m365_sched_enabled": "Aktivér planlægger",
|
||||
"m365_sched_frequency": "Frekvens",
|
||||
"m365_sched_dow": "Ugedag",
|
||||
"m365_sched_dom": "Dag i måneden",
|
||||
"m365_sched_time": "Tidspunkt",
|
||||
"m365_sched_profile": "Profil",
|
||||
"m365_sched_profile_last": "Sidst gemte indstillinger",
|
||||
"m365_sched_after_scan": "Efter scanning",
|
||||
"m365_sched_auto_email": "Send rapport automatisk",
|
||||
"m365_sched_auto_retention": "Håndhæv opbevaringspolitik",
|
||||
"m365_sched_status": "Status",
|
||||
"m365_sched_run_now": "▶ Kør nu",
|
||||
"m365_sched_add": "+ Tilføj planlagt scanning",
|
||||
"m365_sched_name": "Navn",
|
||||
"m365_sched_editor_new": "Ny planlagt scanning",
|
||||
"m365_sched_editor_edit": "Rediger planlagt scanning",
|
||||
"m365_sched_name_required": "Navn er påkrævet",
|
||||
"m365_sched_no_runs": "Ingen planlagte kørsler endnu",
|
||||
"m365_sched_freq_daily": "Dagligt",
|
||||
"m365_sched_freq_weekly": "Ugentligt",
|
||||
"m365_sched_freq_monthly": "Månedligt",
|
||||
"m365_sched_dow_mon": "Mandag",
|
||||
"m365_sched_dow_tue": "Tirsdag",
|
||||
"m365_sched_dow_wed": "Onsdag",
|
||||
"m365_sched_dow_thu": "Torsdag",
|
||||
"m365_sched_dow_fri": "Fredag",
|
||||
"m365_sched_dow_sat": "Lørdag",
|
||||
"m365_sched_dow_sun": "Søndag",
|
||||
"btn_save": "Gem",
|
||||
"m365_settings_about": "Om",
|
||||
"m365_settings_save_pin": "Gem PIN",
|
||||
"m365_sse_reconnecting": "Genopretter forbindelse til kørende scanning…",
|
||||
"m365_sse_replay_note": "Live-log genoptaget — tidligere indlæg afspillet fra igangværende scanning.",
|
||||
"m365_google_sa_creds": "Tjenestekonto-legitimationsoplysninger",
|
||||
"m365_google_sa_key_file": "JSON-nøgle til tjenestekonto",
|
||||
"m365_google_sa_key_hint": "Download fra Google Cloud Console → IAM & Admin → Tjenestekonti → Nøgler → Tilføj nøgle → JSON",
|
||||
"m365_google_admin_email": "Admin-e-mail",
|
||||
"m365_google_admin_email_hint": "Bruges til domæneomfattende delegation — skal være Workspace-superadmin.",
|
||||
"m365_google_libs_missing": "Biblioteker ikke installeret",
|
||||
"m365_google_key_required": "Vælg en JSON-nøglefil til tjenestekontoen",
|
||||
"m365_google_invalid_json": "Ugyldig JSON-fil",
|
||||
"m365_srcmgmt_sources_google": "Kilder der skal scannes",
|
||||
"m365_google_src_gmail": "Gmail",
|
||||
"m365_google_src_drive": "Google Drev",
|
||||
"m365_google_setup_title": "Opsætning kræves i Google Workspace:",
|
||||
"m365_google_setup_step1": "Opret et Google Cloud-projekt og aktiver Gmail API + Drive API + Admin SDK.",
|
||||
"m365_google_setup_step2": "Opret en tjenestekonto, download JSON-nøglen, og aktiver domæneomfattende delegation.",
|
||||
"m365_google_setup_step3": "I Workspace Admin → Sikkerhed → API-kontroller → Domæneomfattende delegation, tilføj tjenestekontoens klient-ID med omfang:",
|
||||
"m365_google_auth_mode": "Godkendelsestilstand",
|
||||
"m365_google_mode_workspace": "Workspace",
|
||||
"m365_google_mode_personal": "Personlig konto",
|
||||
"m365_google_personal_creds": "Personlig konto",
|
||||
"m365_google_personal_client_id": "Klient-ID",
|
||||
"m365_google_personal_client_secret": "Klienthemmelighed",
|
||||
"m365_google_personal_hint": "Opret OAuth 2.0 Desktop-legitimationsoplysninger i Google Cloud Console, og indsæt klient-ID og hemmelighed ovenfor.",
|
||||
"m365_google_personal_sign_in": "Log ind",
|
||||
"m365_google_personal_creds_required": "Klient-ID og hemmelighed er påkrævet",
|
||||
"m365_google_personal_setup_title": "Opsætning kræves:",
|
||||
"m365_google_personal_setup_step1": "I Google Cloud Console skal du oprette et projekt og aktivere Gmail API + Drive API.",
|
||||
"m365_google_personal_setup_step2": "Opret OAuth 2.0-legitimationsoplysninger (typen Desktop-app) og kopiér klient-ID og hemmelighed.",
|
||||
"m365_google_personal_setup_step3": "Tilføj din Google-konto-e-mail til listen over testbrugere på OAuth-samtykkeskærmen.",
|
||||
"m365_auth_waiting": "Venter på login…",
|
||||
"role_staff": "Ansat",
|
||||
"role_student": "Elev",
|
||||
"role_other": "Anden",
|
||||
|
||||
"m365_settings_tab_security": "Sikkerhed",
|
||||
|
||||
"share_modal_title": "Del resultater",
|
||||
"share_modal_desc": "Skrivebeskyttede links lader en DPO eller gennemganger se resultater og tilknytte dispositioner uden adgang til scanningskontroller eller legitimationsoplysninger.",
|
||||
"share_new_link": "Nyt link",
|
||||
"share_label_lbl": "Etiket (valgfrit)",
|
||||
"share_label_placeholder": "f.eks. DPO-gennemgang 2026",
|
||||
"share_expires_in": "Udløber om",
|
||||
"share_expires_never": "Aldrig",
|
||||
"share_expires_7d": "7 dage",
|
||||
"share_expires_30d": "30 dage",
|
||||
"share_expires_90d": "90 dage",
|
||||
"share_expires_1y": "1 år",
|
||||
"share_create": "Opret",
|
||||
"share_copy_link_prompt": "Kopiér link:",
|
||||
"share_active_links": "Aktive links",
|
||||
"share_viewer_pin_label": "Seerens PIN:",
|
||||
"share_pin_configure": "Konfigurér",
|
||||
"share_pin_set": "Angivet",
|
||||
"share_pin_not_set": "Ikke angivet",
|
||||
"share_no_links": "Ingen aktive links.",
|
||||
"share_unlabelled": "Uden etiket",
|
||||
"share_expires_prefix": "Udløber:",
|
||||
"share_last_used": "Sidst brugt:",
|
||||
"share_revoke": "Tilbagekald",
|
||||
"share_copied": "Kopiéret!",
|
||||
"share_load_error": "Kunne ikke indlæse links.",
|
||||
"share_create_error": "Kunne ikke oprette link:",
|
||||
"share_revoke_confirm": "Tilbagekald dette link? Alle der bruger det, mister straks adgang.",
|
||||
"share_revoke_error": "Kunne ikke tilbagekalde:",
|
||||
|
||||
"viewer_pin_group_title": "Seerens PIN",
|
||||
"viewer_pin_desc": "En numerisk PIN (4–8 cifre), der lader alle åbne <code style=\"font-size:10px\">/view</code> i en browser for skrivebeskyttet adgang til resultater uden et token-link.",
|
||||
"viewer_pin_clear": "Ryd PIN",
|
||||
"viewer_pin_is_set": "Seerens PIN er angivet",
|
||||
"viewer_pin_not_set_msg": "Ingen PIN angivet — /view kræver et token-link",
|
||||
"viewer_pin_format": "PIN skal være 4–8 cifre.",
|
||||
"viewer_pin_saving": "Gemmer…",
|
||||
"viewer_pin_saved": "PIN gemt",
|
||||
"viewer_pin_clear_confirm": "Fjern seerens PIN? /view vil igen kræve et token-link.",
|
||||
"viewer_pin_cleared": "PIN ryddet"
|
||||
}
|
||||
773
lang/de.json
Normal file
773
lang/de.json
Normal file
@ -0,0 +1,773 @@
|
||||
{
|
||||
"app_name": "Document Scanner",
|
||||
"label_root_folder": "Stammordner",
|
||||
"label_older_than": "Dateien mit Daten älter als markieren",
|
||||
"placeholder_folder": "/pfad/zu/dokumenten",
|
||||
"btn_scan": "Scan starten",
|
||||
"btn_stop": "Scan stoppen",
|
||||
"toggle_anonymise": "Anonymisieren",
|
||||
"toggle_mask": "Nur Ausweis-Nr. maskieren",
|
||||
"toggle_blur_faces": "Gesichter unscharf",
|
||||
"toggle_skip_cloud": "Nur-Cloud-Dateien überspringen",
|
||||
"toggle_ocr": "OCR für gescannte PDFs",
|
||||
"label_face_sensitivity": "Gesichtsempfindlichkeit",
|
||||
"face_sensitivity_high": "Hoch",
|
||||
"face_sensitivity_low": "Niedrig",
|
||||
"face_sensitivity_hint": "Höher = weniger Fehlerkennungen",
|
||||
"label_ocr_language": "OCR-Sprache",
|
||||
"label_ocr_dpi": "DPI (Qualität vs. Geschwindigkeit)",
|
||||
"lang_danish": "Dänisch",
|
||||
"lang_danish_english": "Dänisch + Englisch",
|
||||
"lang_english": "Englisch",
|
||||
"lang_norwegian": "Norwegisch",
|
||||
"lang_swedish": "Schwedisch",
|
||||
"lang_german": "Deutsch",
|
||||
"lang_french": "Französisch",
|
||||
"lang_dutch": "Niederländisch",
|
||||
"lang_spanish": "Spanisch",
|
||||
"lang_italian": "Italienisch",
|
||||
"lang_portuguese": "Portugiesisch",
|
||||
"lang_finnish": "Finnisch",
|
||||
"lang_polish": "Polnisch",
|
||||
"lang_czech": "Tschechisch",
|
||||
"lang_russian": "Russisch",
|
||||
"lang_arabic": "Arabisch",
|
||||
"lang_chinese_simplified": "Chinesisch (vereinfacht)",
|
||||
"lang_chinese_traditional": "Chinesisch (traditionell)",
|
||||
"lang_japanese": "Japanisch",
|
||||
"lang_korean": "Koreanisch",
|
||||
"time_any": "Beliebig",
|
||||
"time_1y": "1 Jahr",
|
||||
"time_2y": "2 Jahre",
|
||||
"time_5y": "5 Jahre",
|
||||
"time_10y": "10 Jahre",
|
||||
"stat_scanned": "Gescannte Dateien",
|
||||
"stat_flagged": "Markierte",
|
||||
"stat_high_risk": "Hohes Risiko",
|
||||
"stat_cpr": "Ausweis-Nr. gefunden",
|
||||
"col_file": "Datei",
|
||||
"col_cpr": "Ausweis-Nr.",
|
||||
"col_oldest": "Ältestes Datum",
|
||||
"col_risk": "Risiko",
|
||||
"col_action": "Aktion",
|
||||
"col_detail": "Details",
|
||||
"sort_name_az": "Name A–Z",
|
||||
"sort_name_za": "Name Z–A",
|
||||
"sort_cpr_desc": "Anzahl Ausweis-Nr. ↓",
|
||||
"sort_cpr_asc": "Anzahl Ausweis-Nr. ↑",
|
||||
"sort_oldest_desc": "Ältestes Datum ↓",
|
||||
"sort_risk_desc": "Risiko ↓",
|
||||
"sort_size_desc": "Größe ↓",
|
||||
"filter_all_types": "Alle Typen",
|
||||
"filter_pdf": "PDF",
|
||||
"filter_word": "Word",
|
||||
"filter_excel": "Excel",
|
||||
"filter_image": "Bild",
|
||||
"filter_shared_cpr": "⚠ Geteilte Ausweis-Nr.",
|
||||
"placeholder_search": "Dateiname suchen…",
|
||||
"btn_anonymise": "Anonymisieren",
|
||||
"btn_mask": "Ausweis-Nr. maskieren",
|
||||
"btn_blur": "Unscharf",
|
||||
"btn_preview": "Vorschau",
|
||||
"btn_show_in_folder": "Im Ordner anzeigen",
|
||||
"btn_move_to_trash": "In den Papierkorb",
|
||||
"btn_undo": "Rückgängig",
|
||||
"btn_export_csv": "CSV",
|
||||
"btn_select_all": "Alle auswählen",
|
||||
"btn_anonymise_flagged": "Markierte anonymisieren",
|
||||
"btn_anonymise_all": "Alle markierten anonymisieren",
|
||||
"btn_cancel": "Abbrechen",
|
||||
"btn_close": "Schließen",
|
||||
"btn_clear": "Leeren",
|
||||
"btn_export_excel": "Bericht als Excel exportieren",
|
||||
"btn_audit_log_short": "Protokoll",
|
||||
"btn_delete_selected": "Auswahl löschen",
|
||||
"btn_about": "Über",
|
||||
"preview_original": "Original",
|
||||
"preview_processed": "Verarbeitet",
|
||||
"preview_anonymise": "Anonymisieren → Vorschau",
|
||||
"preview_mask": "Ausweis-Nr. maskieren → Vorschau",
|
||||
"preview_blur_faces": "{n} Gesicht(er) unscharf → Vorschau",
|
||||
"preview_no_faces": "✓ Keine Gesichter erkannt",
|
||||
"preview_scanning_faces": "Gesichter werden gesucht…",
|
||||
"preview_processing_faces": "Gesichter werden verarbeitet…",
|
||||
"preview_rendering": "Wird geladen…",
|
||||
"preview_click_hint": "Auf ein Dokument klicken, um die Vorschau anzuzeigen",
|
||||
"preview_error": "Vorschaufehler",
|
||||
"preview_unavailable": "Vorschau nicht verfügbar",
|
||||
"preview_not_available": "Vorschau für diesen Dateityp nicht verfügbar",
|
||||
"scan_preparing": "Vorbereitung…",
|
||||
"scan_scanning": "Wird gescannt…",
|
||||
"scan_face_scanning": "{total} Datei(en) auf Gesichter prüfen…",
|
||||
"scan_face_progress": "Gesichter: {index} / {total} — {file}",
|
||||
"scan_eta": "{eta} verbleibend",
|
||||
"scan_stopped": "Scan gestoppt.",
|
||||
"empty_flagged": "Keine markierten Dokumente",
|
||||
"empty_flagged_detail": "Scan starten, um markierte Dokumente anzuzeigen",
|
||||
"empty_flagged_found": "Keine markierten Dokumente gefunden.",
|
||||
"empty_filter": "Keine Dateien entsprechen dem Filter",
|
||||
"empty_scan_hint": "Ordnerpfad eingeben und klicken",
|
||||
"no_audit": "Noch keine Protokolleinträge",
|
||||
"dialog_delete_title": "Dateien löschen?",
|
||||
"dialog_delete_confirm": "In den Papierkorb",
|
||||
"dialog_delete_body": "Die ausgewählten Dateien werden dauerhaft von der Festplatte gelöscht. Diese Aktion kann nicht rückgängig gemacht werden.",
|
||||
"all_trashed": "Alle markierten Dokumente wurden in den Papierkorb verschoben.",
|
||||
"confirm_clear_audit": "Gesamtes Protokoll leeren? Dies kann nicht rückgängig gemacht werden.",
|
||||
"btn_audit_log": "Protokoll",
|
||||
"audit_cleared": "Protokoll geleert",
|
||||
"failed_audit": "Protokoll konnte nicht geladen werden",
|
||||
"audit_action_scan": "Scan",
|
||||
"audit_action_redact": "Schwärzung",
|
||||
"audit_action_blur_faces": "Gesichtsunschärfe",
|
||||
"audit_action_delete": "Löschen",
|
||||
"audit_action_restore": "Wiederherstellen",
|
||||
"audit_action_export": "Export",
|
||||
"audit_files": "Dateien",
|
||||
"audit_flagged": "markiert",
|
||||
"audit_high_risk": "hohes Risiko",
|
||||
"audit_regions": "Bereiche",
|
||||
"audit_faces": "Gesichter",
|
||||
"audit_permanent": "dauerhaft",
|
||||
"audit_trash": "Papierkorb",
|
||||
"audit_files_restored": "Datei(en) wiederhergestellt",
|
||||
"about_title": "Über",
|
||||
"label_python": "Python",
|
||||
"label_spacy": "spaCy-Modell",
|
||||
"label_tesseract": "Tesseract",
|
||||
"label_pymupdf": "PyMuPDF",
|
||||
"label_opencv": "OpenCV",
|
||||
"no_model": "kein Modell installiert",
|
||||
"not_installed": "nicht installiert",
|
||||
"risk_high": "HOCH",
|
||||
"risk_medium": "MITTEL",
|
||||
"risk_low": "NIEDRIG",
|
||||
"reason_cpr_number": "Ausweis-Nr.",
|
||||
"reason_cpr_numbers": "Ausweis-Nummern",
|
||||
"reason_cpr_confirmed": "Ausweis-Nr. mit Schlüsselwortkontext",
|
||||
"reason_unique_individuals": "eindeutige Personen",
|
||||
"reason_cpr_shared": "Ausweis-Nr. in {n} Dateien geteilt",
|
||||
"reason_data_10y": "Daten > 10 Jahre alt",
|
||||
"reason_data_5y": "Daten > 5 Jahre alt",
|
||||
"badge_face": "Gesicht",
|
||||
"badge_shared": "geteilt",
|
||||
"badge_archive": "Archiv",
|
||||
"badge_shared_cpr": "Geteilte Ausweis-Nr.",
|
||||
"lbl_also_in": "auch in",
|
||||
"badge_anonymised": "✓ anonymisiert",
|
||||
"badge_masked": "✓ maskiert",
|
||||
"badge_blurred": "✓ unscharf gemacht",
|
||||
"lbl_anonymised": "Anonymisiert",
|
||||
"lbl_masked": "Ausweis-Nr. maskiert",
|
||||
"lbl_processing": "Wird verarbeitet…",
|
||||
"lbl_error": "Fehler",
|
||||
"lbl_no_pii": "Keine Änderungen — keine personenbez. Daten gefunden",
|
||||
"lbl_working": "Wird bearbeitet…",
|
||||
"lbl_stopping": "Wird gestoppt…",
|
||||
"lbl_loading": "Wird geladen…",
|
||||
"lbl_blurred": "Unscharf gemacht",
|
||||
"lbl_none": "Keine",
|
||||
"lbl_size": "Größe",
|
||||
"lbl_time": "Zeit",
|
||||
"lbl_space": "Leerzeichen",
|
||||
"lbl_scanner": "Scanner",
|
||||
"lbl_document": "Dokument",
|
||||
"lbl_folder": "Ordner",
|
||||
"lbl_no_files_selected": "Keine Dateien ausgewählt",
|
||||
"lbl_selected_1": "Datei ausgewählt",
|
||||
"lbl_selected_n": "Dateien ausgewählt",
|
||||
"lbl_root": "Stamm",
|
||||
"lbl_root_folder": "Stammordner",
|
||||
"lbl_scanning": "Scannen:",
|
||||
"banner_all_clean": "Keine Funde",
|
||||
"banner_files_scanned": "Datei(en) gescannt, keine Ausweis-Nr. gefunden",
|
||||
"banner_need_attention": "Datei(en) erfordern Aufmerksamkeit von",
|
||||
"banner_scanned": "gescannt",
|
||||
"summary_face_blur": "Bild(er) zur Gesichtsunschärfe",
|
||||
"kbd_select": "auswählen",
|
||||
"kbd_delete": "löschen",
|
||||
"kbd_close_preview": "Vorschau schließen",
|
||||
"kbd_select_all": "alle auswählen",
|
||||
"lbl_flagged_docs_1": "markiertes Dokument",
|
||||
"lbl_flagged_docs_n": "markierte Dokumente",
|
||||
"btn_deselect_all": "Alle abwählen",
|
||||
"filter_high_risk": "🔴 Hohes Risiko",
|
||||
"filter_in_archive": "📦 Im Archiv",
|
||||
"log_starting_scan": "Scan gestartet von",
|
||||
"log_found_files": "{n} Datei(en) gefunden",
|
||||
"log_cloud_skipped": "Nur-Cloud-Dateien übersprungen",
|
||||
"log_faces_detected": "Gesicht(er) erkannt",
|
||||
"log_ocr_pages": "Seite(n)",
|
||||
"log_pages_skipped": "Bildseite(n) übersprungen (OCR aktivieren)",
|
||||
"log_scan_complete": "Scan abgeschlossen",
|
||||
"log_files_with_cpr": "Datei(en) mit Ausweis-Nr.",
|
||||
"log_no_faces_in": "Keine Gesichter erkannt in",
|
||||
"pii_phone": "Telefon",
|
||||
"pii_email": "E-Mail",
|
||||
"pii_iban": "IBAN",
|
||||
"pii_bank_account": "Bankkonto",
|
||||
"pii_name": "Name",
|
||||
"pii_address": "Adresse",
|
||||
"pii_org": "Org",
|
||||
"lbl_other_pii": "Weitere PII",
|
||||
"lbl_found": "gefunden",
|
||||
"btn_clear_results_cache": "Ergebniscache leeren",
|
||||
"btn_clear_ocr_cache": "OCR-Cache leeren",
|
||||
"confirm_clear_results_cache": "Alle gespeicherten Scan-Ergebnisse löschen? Das Raster wird geleert.",
|
||||
"confirm_clear_ocr_cache": "OCR-Cache leeren? Beim nächsten Scan wird OCR erneut durchgeführt.",
|
||||
"log_cache_cleared": "Ergebniscache geleert",
|
||||
"log_ocr_cache_cleared": "OCR-Cache geleert",
|
||||
"m365_app_name": "GDPRScanner",
|
||||
"m365_sources": "Quellen",
|
||||
"m365_options": "Optionen",
|
||||
"m365_accounts": "Konten",
|
||||
"m365_stats": "Statistik",
|
||||
"m365_src_email": "Outlook",
|
||||
"m365_src_onedrive": "OneDrive",
|
||||
"m365_src_sharepoint": "SharePoint",
|
||||
"m365_src_teams": "Teams",
|
||||
"m365_opt_date_from": "E-Mails/Dateien scannen ab",
|
||||
"m365_opt_date_from_hint": "Leer lassen, um alles zu scannen",
|
||||
"m365_opt_email_body": "E-Mail-Text scannen",
|
||||
"m365_opt_attachments": "Anhänge scannen",
|
||||
"m365_opt_max_attach": "Max. Anhangsgröße (MB)",
|
||||
"m365_opt_max_emails": "Max. E-Mails pro Benutzer",
|
||||
"m365_connect_title": "Mit Microsoft 365 verbinden",
|
||||
"m365_connect_sub": "Geben Sie Ihre Azure-App-Anmeldedaten ein.",
|
||||
"m365_label_client_id": "Client-ID (Anwendungs-ID)",
|
||||
"m365_label_tenant_id": "Mandanten-ID",
|
||||
"m365_label_client_secret": "Client-Geheimnis",
|
||||
"m365_secret_hint": "(optional — ermöglicht organisationsweites Scannen)",
|
||||
"m365_secret_desc_app": "App greift direkt auf Daten aller Benutzer zu (Anwendungsberechtigungen, keine Anmeldung erforderlich).",
|
||||
"m365_secret_desc_delegated": "Sie melden sich als sich selbst an und können nur eigene Daten scannen, sofern Sie kein globaler Administrator sind.",
|
||||
"m365_btn_connect": "Verbinden",
|
||||
"m365_device_code_go": "Gehen Sie zu",
|
||||
"m365_device_code_enter": "und geben Sie diesen Code ein",
|
||||
"m365_btn_cancel_auth": "Abbrechen",
|
||||
"m365_btn_reconfigure": "Neu konfigurieren",
|
||||
"m365_btn_sign_out": "Abmelden",
|
||||
"m365_mode_app": "🔑 App-Modus — organisationsweit",
|
||||
"m365_mode_delegated": "Delegiert",
|
||||
"m365_search_users": "Benutzer suchen…",
|
||||
"m365_add_account_label": "Konto manuell hinzufügen:",
|
||||
"m365_add_account_placeholder": "E-Mail oder UPN",
|
||||
"m365_admin_note": "Es wird nur Ihr Konto angezeigt. Um alle Benutzer aufzulisten, muss ein Administrator <strong>User.Read.All</strong> im Azure Portal genehmigen oder Konten manuell unten hinzufügen.",
|
||||
"m365_btn_scan": "Scannen",
|
||||
"m365_btn_stop": "Stop",
|
||||
"m365_pill_flagged": "markiert",
|
||||
"m365_pill_scanned": "gescannt",
|
||||
"m365_filter_all_sources": "Alle Quellen",
|
||||
"m365_filter_email": "Outlook",
|
||||
"m365_filter_onedrive": "OneDrive",
|
||||
"m365_filter_sharepoint": "SharePoint",
|
||||
"m365_filter_teams": "Teams",
|
||||
"m365_empty_hint": "Quellen auswählen und auf <strong>Scannen</strong> klicken,<br>um Dokumente mit Ausweis-Nr. zu finden",
|
||||
"m365_stat_flagged": "Markiert",
|
||||
"m365_stat_cpr": "Treffer",
|
||||
"m365_preview_open": "In M365 öffnen ↗",
|
||||
"m365_preview_close": "Schließen",
|
||||
"m365_auth_mode_app": "Auth-Modus: Anwendung (Client-Anmeldedaten — organisationsweit)",
|
||||
"m365_auth_mode_delegated": "Auth-Modus: Delegiert (Gerätecode — nur angemeldeter Benutzer)",
|
||||
"m365_phase_teams_index": "Teams-Mitgliedschaftsindex wird erstellt…",
|
||||
"m365_phase_sharepoint": "SharePoint-Dateien werden gesammelt…",
|
||||
"m365_btn_about": "Info",
|
||||
"m365_stat_scanned": "Gescannt",
|
||||
"m365_no_users_found": "Keine Benutzer gefunden",
|
||||
"m365_no_users_match": "Keine Benutzer entsprechen",
|
||||
"m365_no_cpr_found": "Keine Ausweis-Nummern gefunden.",
|
||||
"m365_no_matches": "Keine Treffer",
|
||||
"m365_btn_export_excel": "Excel exportieren",
|
||||
"m365_export_no_data": "Keine Ergebnisse zum Exportieren.",
|
||||
"m365_phase_emails": "Outlook-Nachrichten werden gesammelt",
|
||||
"m365_phase_onedrive": "OneDrive wird gesammelt",
|
||||
"m365_phase_teams": "Teams werden gesammelt",
|
||||
"m365_preset_1yr": "1 J.",
|
||||
"m365_preset_2yr": "2 J.",
|
||||
"m365_preset_5yr": "5 J.",
|
||||
"m365_preset_10yr": "10 J.",
|
||||
"m365_preset_any": "Alle",
|
||||
"m365_auth_mode_app_short": "Application permissions · client credentials",
|
||||
"m365_auth_mode_delegated_short": "Delegated permissions · device code flow",
|
||||
"m365_info_permissions": "Berechtigungen",
|
||||
"m365_info_signin": "Anmeldung erforderlich",
|
||||
"m365_info_scope": "Umfang",
|
||||
"m365_info_scope_org": "Alle Benutzer im Mandanten",
|
||||
"m365_info_scope_user": "Nur angemeldeter Benutzer",
|
||||
"m365_info_consent": "Administratorzustimmung",
|
||||
"m365_info_required": "Erforderlich",
|
||||
"m365_info_admin": "Global Administrator",
|
||||
"m365_info_expands_scope": "Erweitert Umfang auf alle Benutzer",
|
||||
"m365_info_no": "Nein",
|
||||
"m365_info_yes": "Ja",
|
||||
"m365_info_app_desc": "Die App authentifiziert sich mit einem Client Secret und greift direkt über Microsoft Graph auf die Daten aller Benutzer zu — kein interaktives Anmelden erforderlich. Ideal für automatisierte oder geplante Scans.",
|
||||
"m365_info_delegated_desc": "Die App handelt im Namen des angemeldeten Benutzers über den device code flow. Standardmäßig sind nur die Daten dieses Benutzers zugänglich. Ein Global Administrator kann eine umfassendere Zustimmung erteilen.",
|
||||
"m365_filter_search": "Suchen…",
|
||||
"m365_filter_clear": "Löschen",
|
||||
"m365_btn_list_view": "Liste",
|
||||
"m365_btn_grid_view": "Raster",
|
||||
"m365_log_found_items": "Gefunden",
|
||||
"m365_log_items_to_scan": "Element(e) zum Scannen",
|
||||
"m365_log_starting_scan": "Scan starten:",
|
||||
"m365_log_accounts": "Konto(en)",
|
||||
"m365_btn_bulk_delete": "Löschen",
|
||||
"m365_bulk_delete_title": "Massenlöschung",
|
||||
"m365_bulk_delete_sub": "E-Mails werden in „Gelöschte Elemente\" verschoben · Dateien kommen in den Papierkorb",
|
||||
"m365_bulk_filter_heading": "Filter für zu löschende Elemente",
|
||||
"m365_bulk_filter_source": "Quellentyp",
|
||||
"m365_bulk_filter_min_cpr": "Min. CPR-Treffer",
|
||||
"m365_bulk_filter_older_than": "Älter als Datum",
|
||||
"m365_bulk_no_match": "Keine Elemente entsprechen diesen Kriterien.",
|
||||
"m365_bulk_match_count": "Element(e) werden gelöscht",
|
||||
"m365_bulk_confirm_q": "Element(e) werden dauerhaft gelöscht. Fortfahren?",
|
||||
"m365_bulk_deleting": "Löschen…",
|
||||
"m365_bulk_deleted": "gelöscht",
|
||||
"m365_bulk_failed": "fehlgeschlagen",
|
||||
"m365_bulk_delete_confirm": "Passende Elemente löschen",
|
||||
"m365_delete_confirm": "Löschen",
|
||||
"m365_delete_warning": "Dies kann nicht rückgängig gemacht werden.",
|
||||
"m365_log_deleted": "Gelöscht:",
|
||||
"m365_log_delete_failed": "Löschen fehlgeschlagen:",
|
||||
"m365_log_bulk_done": "Massenlöschung:",
|
||||
"m365_log_older_than": "älter als",
|
||||
"m365_eta_left": "verbleibend",
|
||||
"btn_all": "Alle",
|
||||
"btn_errors": "Fehler",
|
||||
"log_copy": "Kopieren",
|
||||
"btn_none": "Keine",
|
||||
"m365_btn_resume": "Fortsetzen",
|
||||
"m365_btn_start_fresh": "Neu starten",
|
||||
"m365_resume_banner": "Vorheriger Scan unterbrochen — {scanned} gescannt, {flagged} gefunden",
|
||||
"m365_log_resuming": "Scan fortsetzen:",
|
||||
"m365_log_already_scanned": "bereits gescannt — übersprungen",
|
||||
"m365_resuming": "Fortsetzen — bereits gescannte Elemente werden übersprungen…",
|
||||
"m365_opt_delta": "Delta-Scan",
|
||||
"m365_opt_delta_hint": "Nur geänderte Elemente (nach erstem Vollscan)",
|
||||
"m365_delta_tokens_saved": "Tokens gespeichert",
|
||||
"m365_delta_clear": "Tokens löschen",
|
||||
"m365_delta_cleared": "Delta-Tokens gelöscht — nächster Scan wird ein Vollscan.",
|
||||
"m365_delta_mode": "Delta-Modus — nur geänderte Elemente werden abgerufen…",
|
||||
"m365_smtp_title": "✉ Bericht senden",
|
||||
"m365_smtp_desc": "Excel-Bericht nach dem Scan per E-Mail senden.",
|
||||
"m365_smtp_host": "SMTP-Server",
|
||||
"m365_smtp_port": "Port",
|
||||
"m365_smtp_user": "Benutzername",
|
||||
"m365_smtp_pass": "Passwort",
|
||||
"m365_smtp_from": "Absenderadresse",
|
||||
"m365_smtp_tls": "STARTTLS",
|
||||
"m365_smtp_ssl": "SSL",
|
||||
"m365_smtp_recipients": "Empfänger",
|
||||
"m365_smtp_recipients_hint": "Komma- oder semikolongetrennt",
|
||||
"m365_smtp_save": "Speichern",
|
||||
"m365_smtp_send": "Jetzt senden",
|
||||
"m365_smtp_saved": "Einstellungen gespeichert.",
|
||||
"m365_smtp_sending": "Senden…",
|
||||
"m365_smtp_sent": "Bericht gesendet.",
|
||||
"m365_smtp_no_recipients": "Mindestens einen Empfänger eingeben.",
|
||||
"m365_smtp_configure": "Konfigurieren",
|
||||
"m365_smtp_from_hint": "(optional — Standard ist Benutzername)",
|
||||
"m365_subject_title": "🔍 Betroffenenperson",
|
||||
"m365_subject_btn": "Nachschlagen",
|
||||
"m365_subject_desc": "Alle markierten Elemente mit einer bestimmten CPR-Nummer finden. Die CPR-Nummer wird vor der Abfrage gehasht und nie im Klartext gespeichert.",
|
||||
"m365_subject_placeholder": "TTMMJJ-XXXX",
|
||||
"m365_subject_search": "Suchen",
|
||||
"m365_subject_searching": "Suche…",
|
||||
"m365_subject_found": "Element(e) gefunden",
|
||||
"m365_subject_not_found": "Keine markierten Elemente für diese CPR-Nummer gefunden.",
|
||||
"m365_subject_delete_all": "Alle für diese Person löschen",
|
||||
"m365_subject_delete_confirm": "Element(e) werden dauerhaft gelöscht. Fortfahren?",
|
||||
"m365_disposition_label": "Disposition",
|
||||
"m365_disp_unreviewed": "Nicht geprüft",
|
||||
"m365_disp_retain_legal": "Aufbewahren — gesetzliche Pflicht",
|
||||
"m365_disp_retain_legit": "Aufbewahren — berechtigtes Interesse",
|
||||
"m365_disp_retain_contract": "Aufbewahren — Vertrag",
|
||||
"m365_disp_delete_sched": "Löschen — geplant",
|
||||
"m365_disp_personal_use": "Privatgebrauch — außerhalb des Geltungsbereichs",
|
||||
"m365_disp_deleted": "Gelöscht",
|
||||
"m365_disp_save": "Speichern",
|
||||
"m365_disp_saved": "✓ Gespeichert",
|
||||
"m365_opt_retention": "Aufbewahrungsrichtlinie",
|
||||
"m365_opt_retention_hint": "Elemente älter als N Jahre markieren und löschen",
|
||||
"m365_ret_years": "Aufbewahrungsjahre",
|
||||
"m365_ret_fy_end": "Geschäftsjahresende",
|
||||
"m365_ret_fy_rolling": "Rollierend (ab heute)",
|
||||
"m365_ret_fy_dec": "31. Dez. (Bogføringsloven)",
|
||||
"m365_ret_fy_jun": "30. Jun.",
|
||||
"m365_ret_fy_mar": "31. Mär.",
|
||||
"m365_ret_mode_rolling": "rollierend",
|
||||
"m365_ret_mode_fiscal": "Geschäftsjahr",
|
||||
"m365_ret_cutoff_hint": "Elemente geändert vor",
|
||||
"m365_ret_cutoff_flagged": "werden als überfällig markiert",
|
||||
"m365_overdue_found": "überfällige(s) Element(e) gefunden",
|
||||
"m365_bulk_overdue_btn": "Übrfällige filtern",
|
||||
"m365_bulk_clear_filters": "Filter löschen",
|
||||
"m365_btn_export_article30": "Art.30",
|
||||
"m365_article30_done": "Artikel-30-Bericht bereit.",
|
||||
"a30_title": "DSGVO Artikel 30",
|
||||
"a30_subtitle": "Verzeichnis von Verarbeitungstätigkeiten",
|
||||
"a30_generated": "Erstellt",
|
||||
"a30_confidential": "Vertraulich — DSGVO-Compliance-Dokument",
|
||||
"a30_s1": "1. Zusammenfassung",
|
||||
"a30_scan_date": "Scan-Datum",
|
||||
"a30_items_scanned": "Gescannte Elemente",
|
||||
"a30_flagged": "Markierte Elemente",
|
||||
"a30_cpr_hits": "CPR-Treffer gesamt",
|
||||
"a30_data_subjects": "Geschätzte betroffene Personen",
|
||||
"a30_overdue": "Überfällige Elemente (>5 J.)",
|
||||
"a30_by_source": "Aufschlüsselung nach Quelle",
|
||||
"a30_col_source": "Quelle",
|
||||
"a30_col_items": "Elemente",
|
||||
"a30_col_cpr": "CPR-Treffer",
|
||||
"a30_col_overdue": "Überfällig",
|
||||
"a30_s2": "2. Identifizierte Kategorien personenbezogener Daten",
|
||||
"a30_s2_intro": "Folgende Kategorien personenbezogener Daten wurden beim Scan erkannt.",
|
||||
"a30_col_gdpr_class": "DSGVO-Klassifizierung",
|
||||
"a30_cpr_label": "CPR-Nummern (dän. Personalausweis-Nr.)",
|
||||
"a30_cpr_class": "Art. 9 — nationales Kennzeichen",
|
||||
"a30_pii_class_9": "Art. 9 — Gesundheit/sensibel",
|
||||
"a30_pii_class_4": "Art. 4 — personenbezogene Daten",
|
||||
"a30_s3": "3. Datenbestand",
|
||||
"a30_s3_intro": "Alle markierten Elemente sind unten mit Speicherort, Aufbewahrungsstatus und Compliance-Disposition aufgeführt.",
|
||||
"a30_col_name": "Name / Betreff",
|
||||
"a30_col_account": "Konto",
|
||||
"a30_col_modified": "Geändert",
|
||||
"a30_col_disp": "Disposition",
|
||||
"a30_more_items": "weitere Elemente nicht angezeigt. Für die vollständige Liste den Excel-Bericht exportieren.",
|
||||
"a30_s4": "4. Aufbewahrungsanalyse",
|
||||
"a30_s4_intro": "Folgende Elemente überschreiten den 5-jährigen Aufbewahrungszeitraum und sollten gemäß DSGVO Artikel 5(1)(e) — Speicherbegrenzung — auf Löschung geprüft werden.",
|
||||
"a30_s5": "5. Compliance-Trend",
|
||||
"a30_s5_intro": "Anzahl markierter Elemente über die letzten Scans (neuester zuerst).",
|
||||
"a30_col_scan_date": "Scan-Datum",
|
||||
"a30_col_scan_type": "Scan-Typ",
|
||||
"a30_scan_delta": "Delta",
|
||||
"a30_scan_full": "Vollständig",
|
||||
"a30_s6": "6. Methodik und Rechtsgrundlage",
|
||||
"a30_method_title": "Scan-Methodik",
|
||||
"a30_method_1": "CPR-Nummern werden per Mustererkennung gegen das offizielle dänische CPR-Format (TTMMJJ-XXXX) mit vollständiger Datumsvalidierung erkannt.",
|
||||
"a30_method_2": "Weitere personenbezogene Daten (Telefonnummern, E-Mail-Adressen, IBANs, Bankkontonummern, Namen, Adressen und Organisationen) werden per Regex und spaCy NER erkannt.",
|
||||
"a30_method_3": "CPR-Nummern in der Datenbank dieses Dokuments sind SHA-256-gehasht und werden nie im Klartext gespeichert.",
|
||||
"a30_method_4": "Die Überprüfung umfasst Exchange-Postfächer (alle Ordner einschl. Gesendete Elemente), OneDrive, SharePoint und Microsoft Teams-Kanaldateien über die Microsoft Graph API. Bei Verbindung umfasst das Google Workspace-Scanning Gmail und Google Drive über ein Dienstkonto mit domänenweiter Delegierung. Lokale und netzwerkbasierte Dateifreigaben (SMB) werden direkt gescannt.",
|
||||
"a30_gdpr_title": "Referenzierte DSGVO-Artikel",
|
||||
"a30_gdpr_1": "Artikel 5(1)(c) — Datenminimierung: nur notwendige Daten dürfen aufbewahrt werden",
|
||||
"a30_gdpr_2": "Artikel 5(1)(e) — Speicherbegrenzung: Daten dürfen nicht länger als nötig aufbewahrt werden",
|
||||
"a30_gdpr_3": "Artikel 9 — Besondere Kategorien: Gesundheitsdaten, Strafregister, Gewerkschaftszugehörigkeit usw. erfordern eine ausdrückliche Rechtsgrundlage",
|
||||
"a30_gdpr_4": "Artikel 15 — Auskunftsrecht: betroffene Personen können Auskunft über ihre Daten verlangen",
|
||||
"a30_gdpr_5": "Artikel 17 — Recht auf Löschung: betroffene Personen können Löschung verlangen",
|
||||
"a30_gdpr_6": "Artikel 30 — Verzeichnis von Verarbeitungstätigkeiten: dieses Dokument erfüllt die Pflicht",
|
||||
"a30_disp_unreviewed": "Nicht geprüft",
|
||||
"a30_disp_retain_legal": "Aufbewahren — Gesetzliche Pflicht",
|
||||
"a30_disp_retain_legit": "Aufbewahren — Berechtigtes Interesse",
|
||||
"a30_disp_retain_contract": "Aufbewahren — Vertrag",
|
||||
"a30_disp_delete_sched": "Löschen — Geplant",
|
||||
"a30_disp_personal_use": "Personal use — out of GDPR scope (Art. 2(2)(c))",
|
||||
"a30_disp_deleted": "Gelöscht",
|
||||
"a30_s6_short": "Methodik und Rechtsgrundlage",
|
||||
"m365_role_all": "Alle",
|
||||
"m365_role_staff": "Personal",
|
||||
"m365_role_student": "Schüler",
|
||||
"a30_s_dellog": "Löschprotokoll",
|
||||
"a30_dellog_intro": "Element(e) mit personenbezogenen Daten wurden über GDPRScanner gelöscht. Dieses Protokoll erfüllt die Rechenschaftspflicht gemäß DSGVO Art. 5 Abs. 2.",
|
||||
"a30_dellog_by_reason": "Löschungen nach Grund",
|
||||
"a30_dellog_records": "Löscheinträge",
|
||||
"a30_col_reason": "Grund",
|
||||
"a30_col_count": "Anzahl",
|
||||
"a30_col_deleted_at": "Gelöscht am",
|
||||
"a30_col_deleted_by": "Gelöscht von",
|
||||
"a30_reason_manual": "Manuell (einzelne Karte)",
|
||||
"a30_reason_bulk": "Massенlöschung",
|
||||
"a30_reason_retention": "Aufbewahrungsrichtlinie",
|
||||
"a30_reason_dsr": "Löschanfrage der betroffenen Person (Art. 17)",
|
||||
"m365_filter_all_disp": "Alle Dispositionen",
|
||||
"m365_trend_title": "Trend",
|
||||
"m365_trend_flagged": "Markiert",
|
||||
"m365_trend_overdue": "Überfällig",
|
||||
"m365_filter_all_transfer": "Alle Elemente",
|
||||
"m365_filter_ext_recipient": "Externer Empfänger",
|
||||
"m365_filter_ext_share": "Extern freigegeben",
|
||||
"m365_filter_shared": "Freigegeben",
|
||||
"m365_badge_ext_recipient": "Extern",
|
||||
"m365_badge_shared": "Freigegeben",
|
||||
"a30_s_special": "Besondere Kategorien personenbezogener Daten (Artikel 9)",
|
||||
"a30_special_intro": "Element(e) als besondere Kategorie gemäß DSGVO Artikel 9 erkannt. Erfordert explizite Rechtsgrundlage und Datenschutz-Folgenabschätzung (DSFA).",
|
||||
"a30_special_by_cat": "Erkannte Kategorien",
|
||||
"a30_special_items": "Betroffene Elemente (bis zu 50)",
|
||||
"a30_col_category": "Kategorie",
|
||||
"a30_cat_health": "Gesundheitsdaten (Art. 9)",
|
||||
"a30_cat_mental": "Psychische Gesundheit (Art. 9)",
|
||||
"a30_cat_criminal": "Strafregister (Art. 10)",
|
||||
"a30_cat_union": "Gewerkschaftsmitgliedschaft (Art. 9)",
|
||||
"a30_cat_religion": "Religiöse Überzeugungen (Art. 9)",
|
||||
"a30_cat_ethnicity": "Ethnische Herkunft (Art. 9)",
|
||||
"a30_cat_political": "Politische Meinungen (Art. 9)",
|
||||
"a30_cat_biometric": "Biometrische Daten (Art. 9)",
|
||||
"a30_cat_sexual": "Sexuelle Orientierung (Art. 9)",
|
||||
"m365_filter_all_special": "Alle Risikostufen",
|
||||
"m365_filter_special_only": "Art. 9 besondere Kategorien",
|
||||
"m365_badge_special": "Art.9",
|
||||
"m365_phase_scanning": "Scanne…",
|
||||
"a30_special_cat": "Besondere Kategorien (Artikel 9)",
|
||||
"a30_special_cat_note": "Diese Elemente enthalten Gesundheits-, Straf-, biometrische, religiöse, ethnische, gewerkschaftliche, politische oder sexuelle Daten. Eine ausdrückliche Rechtsgrundlage (Art. 9 Abs. 2) und ggf. eine DSFA (Art. 35) sind erforderlich.",
|
||||
"a30_col_special": "Art. 9",
|
||||
"a30_pii_phone": "Telefonnummern",
|
||||
"a30_pii_email": "E-Mail-Adressen",
|
||||
"a30_pii_iban": "IBAN-Bankkontonummern",
|
||||
"a30_pii_bank": "Bankkontonummern",
|
||||
"a30_pii_name": "Personennamen (NER)",
|
||||
"a30_pii_address": "Adressen (NER)",
|
||||
"a30_pii_org": "Organisationen (NER)",
|
||||
"a30_col_cpr_short": "CPR",
|
||||
"a30_inv_staff": "Personal / Lehrkräfte",
|
||||
"a30_inv_students": "Schüler",
|
||||
"a30_student_consent_note": "Hinweis: Schülerkonten in dänischen Folkeskoler (Schüler unter 15 Jahren) erfordern die elterliche Einwilligung zur Verarbeitung personenbezogener Daten gemäß Databeskyttelsesloven §6. Elemente in Schülerkonten dürfen nicht automatisch gelöscht werden — jede Maßnahme erfordert die Genehmigung der Schulleitung und für Schüler unter 15 Jahren die Benachrichtigung der Eltern oder Erziehungsberechtigten als Rechteinhaber gemäß DSGVO Artikel 8.",
|
||||
"m365_profile_label": "Profil:",
|
||||
"m365_profile_placeholder": "— Profil wählen —",
|
||||
"m365_profile_save_tip": "Aktuelle Einstellungen als Profil speichern",
|
||||
"m365_profile_save_prompt": "Profilname:",
|
||||
"m365_profile_applied": "Profil geladen",
|
||||
"m365_profile_saved": "Profil gespeichert",
|
||||
"m365_profile_manage_btn": "Profile",
|
||||
"m365_profile_clear_btn": "Löschen",
|
||||
"m365_profile_save_btn": "Speichern",
|
||||
"m365_profile_manage_title": "⚙ Profile verwalten",
|
||||
"m365_profile_no_profiles": "Noch keine gespeicherten Profile. Verwenden Sie 💾, um die aktuellen Seitenleisteneinstellungen als Profil zu speichern.",
|
||||
"m365_profile_use": "Verwenden",
|
||||
"m365_profile_edit": "Bearbeiten",
|
||||
"m365_profile_duplicate": "Duplizieren",
|
||||
"m365_profile_delete": "Löschen",
|
||||
"m365_profile_delete_confirm": "Profil löschen",
|
||||
"m365_profile_duplicated": "Profil dupliziert",
|
||||
"m365_profile_deleted": "Profil gelöscht",
|
||||
"m365_profile_never": "nie",
|
||||
"m365_profile_last_run": "Zuletzt ausgeführt",
|
||||
"m365_profile_name_placeholder": "Profilname",
|
||||
"m365_profile_desc_placeholder": "Beschreibung (optional)",
|
||||
"m365_profile_name_required": "Profilname ist erforderlich.",
|
||||
"m365_db_title": "🗄 Datenbank",
|
||||
"m365_db_export": "Exportieren",
|
||||
"m365_db_import": "Importieren",
|
||||
"m365_db_export_error": "Export fehlgeschlagen",
|
||||
"m365_db_exported": "Datenbank exportiert",
|
||||
"m365_db_import_title": "📥 Datenbank importieren",
|
||||
"m365_db_import_desc": "Wählen Sie eine zuvor exportierte .zip-Datei. Zusammenführen fügt Dispositionen und Löschprotokoll hinzu. Ersetzen löscht alles und stellt vollständig wieder her.",
|
||||
"m365_db_import_file": "ZIP-Datei",
|
||||
"m365_db_import_mode": "Modus:",
|
||||
"m365_db_import_merge": "Zusammenführen (sicher)",
|
||||
"m365_db_import_replace": "Ersetzen (vollständige Wiederherstellung)",
|
||||
"m365_db_import_replace_warn": "⚠ Der Ersetzungsmodus löscht alle vorhandenen Scandaten vor der Wiederherstellung. Stellen Sie sicher, dass Sie zuerst eine Sicherungskopie von ~/.gdpr_scanner.db haben.",
|
||||
"m365_db_import_replace_confirm": "Der Ersetzungsmodus löscht ALLE vorhandenen Scandaten und stellt aus dem Archiv wieder her.\\n\\nStellen Sie sicher, dass Sie eine manuelle Sicherungskopie von ~/.gdpr_scanner.db haben.\\n\\nFortfahren?",
|
||||
"m365_db_import_no_file": "Bitte wählen Sie zuerst eine ZIP-Datei aus.",
|
||||
"m365_db_importing": "Importiere…",
|
||||
"m365_db_imported": "Importiert",
|
||||
"m365_db_import_run": "Importieren",
|
||||
"m365_opt_scan_photos": "Fotos nach Gesichtern durchsuchen",
|
||||
"m365_opt_scan_photos_hint": "Markiert Bilder mit erkannten Gesichtern als biometrische Daten gem. Art. 9. Langsamer — bei Bedarf aktivieren.",
|
||||
"m365_filter_photo_only": "📷 Fotos / biometrisch",
|
||||
"m365_badge_faces": "Gesichter",
|
||||
"a30_photo_items": "Fotos mit erkannten Gesichtern (Art. 9 biometrisch)",
|
||||
"a30_photo_note": "Fotografien identifizierbarer Personen sind biometrische Daten gemäß Art. 9 DSGVO. Die Aufbewahrung erfordert eine dokumentierte Rechtsgrundlage gemäß Art. 9(2). Für Schulfotos von Schülern unter 15 Jahren ist die elterliche Einwilligung erforderlich (Databeskyttelsesloven §6). Siehe Leitfaden des Datatilsynet zur Schulfotografie.",
|
||||
"a30_s_photos": "Fotografien und biometrische Daten (Artikel 9)",
|
||||
"a30_photo_intro": "Bilddatei(en) mit erkanntem(n) Gesicht(ern) wurden beim Scan gefunden. Fotografien identifizierbarer Personen stellen biometrische Daten gemäß DSGVO Artikel 9 dar und unterliegen demselben erhöhten Schutz wie Gesundheits- oder Strafregisterdaten.",
|
||||
"a30_photo_guidance": "Aufbewahrungshinweise",
|
||||
"a30_photo_g1": "Fotos dürfen nur aufbewahrt werden, solange der ursprüngliche Zweck gültig ist (Art. 5(1)(b) — Zweckbindung).",
|
||||
"a30_photo_g2": "Schüler unter 15 Jahren benötigen die elterliche Einwilligung (Databeskyttelsesloven §6). Die Einwilligung muss freiwillig, spezifisch und dokumentiert sein.",
|
||||
"a30_photo_g3": "Fotos auf öffentlich zugänglichen Websites müssen umgehend entfernt werden, wenn eine Person die Organisation verlässt oder ihre Einwilligung widerruft (Art. 17 — Recht auf Löschung).",
|
||||
"a30_photo_g4": "Historische/Archivnutzung kann eine längere Aufbewahrung gemäß Art. 89 nur mit spezifischen Schutzmaßnahmen und einer Einzelfallbewertung rechtfertigen.",
|
||||
"a30_photo_col_faces": "Gesichter",
|
||||
"a30_method_5": "Wenn die Fotoerfassung aktiviert ist, werden Bilddateien mithilfe der OpenCV Haar-Cascade-Gesichtserkennung analysiert, um Fotografien von Personen zu identifizieren (Art. 9 biometrische Daten).",
|
||||
"m365_role_cycle_tip": "Klicken zum Ändern der Rolle (wechselt Schüler → Personal → Sonstiges → Auto)",
|
||||
"m365_role_set": "Rolle gesetzt",
|
||||
"m365_role_cleared": "Rollenüberschreibung entfernt",
|
||||
"m365_sku_debug_title": "🔍 Mandanten-SKU-IDs",
|
||||
"m365_sku_debug_desc": "Dies sind die rohen SKU-IDs, die Ihren Benutzern zugewiesen sind. Alle mit ❓ unbekannt markierten sind nicht in classification/m365_skus.json — kopieren Sie sie unter student_ids oder staff_ids und starten Sie neu.",
|
||||
"m365_sku_debug_none": "Keine Lizenzdaten zurückgegeben — überprüfen Sie, ob die App die Berechtigung User.Read.All hat.",
|
||||
"m365_file_sources_title": "📁 Dateiquellen",
|
||||
"m365_file_sources_manage": "Verwalten",
|
||||
"m365_file_sources_empty": "Keine Dateiquellen konfiguriert. Fügen Sie unten einen lokalen Ordner oder eine Netzwerkfreigabe hinzu.",
|
||||
"m365_file_sources_add": "Quelle hinzufügen",
|
||||
"m365_fsrc_label": "Bezeichnung",
|
||||
"m365_fsrc_path": "Pfad",
|
||||
"m365_fsrc_smb_detected": "SMB/CIFS-Netzwerkfreigabe erkannt",
|
||||
"m365_fsrc_smb_host": "SMB-Host",
|
||||
"m365_fsrc_smb_user": "Benutzername",
|
||||
"m365_fsrc_smb_pw": "Passwort",
|
||||
"m365_fsrc_smb_pw_hint": "Das Passwort wird im OS-Schlüsselbund gespeichert — nie in einer Datei.",
|
||||
"m365_fsrc_add_btn": "Hinzufügen",
|
||||
"m365_fsrc_saved": "Quelle gespeichert",
|
||||
"m365_fsrc_saving": "Speichern...",
|
||||
"m365_fsrc_path_required": "Pfad ist erforderlich.",
|
||||
"m365_fsrc_scan_btn": "Scannen",
|
||||
"m365_fsrc_scan_start": "Datei-Scan wird gestartet",
|
||||
"m365_src_group_files": "Dateiquellen",
|
||||
"m365_no_sources": "Keine Quellen ausgewählt — nichts zu scannen.",
|
||||
"m365_fsrc_name_required": "Name ist erforderlich.",
|
||||
"m365_srcmgmt_title": "⚙ Quellenverwaltung",
|
||||
"m365_srcmgmt_tab_m365": "Microsoft 365",
|
||||
"m365_srcmgmt_tab_google": "Google Workspace",
|
||||
"m365_srcmgmt_tab_files": "Dateiquellen",
|
||||
"m365_srcmgmt_connection": "Verbindung",
|
||||
"m365_srcmgmt_azure_creds": "Azure-Anmeldedaten",
|
||||
"m365_srcmgmt_sources_m365": "Zu scannende Quellen",
|
||||
"m365_srcmgmt_connected": "Verbunden",
|
||||
"m365_srcmgmt_not_connected": "Nicht verbunden",
|
||||
"m365_srcmgmt_coming_soon": "Kommt bald",
|
||||
"m365_srcmgmt_google_sub": "Gmail- und Google Drive-Scan erscheint hier, wenn implementiert.",
|
||||
"m365_srcmgmt_file_sources": "Dateiquellen",
|
||||
"m365_sources_manage_btn": "Quellen",
|
||||
"m365_connecting": "Verbinde...",
|
||||
"m365_err_creds_required": "Client-ID und Mandanten-ID erforderlich",
|
||||
"m365_signout_confirm": "Trennen und Anmeldedaten löschen?",
|
||||
"m365_btn_settings": "Einstellungen",
|
||||
"m365_settings_title": "⚙ Einstellungen",
|
||||
"m365_settings_tab_general": "Allgemein",
|
||||
"m365_settings_tab_email": "E-Mail-Bericht",
|
||||
"m365_settings_tab_database": "Datenbank",
|
||||
"m365_settings_appearance": "Erscheinungsbild",
|
||||
"m365_settings_language": "Sprache",
|
||||
"m365_settings_theme": "Design",
|
||||
"m365_settings_db_actions": "Aktionen",
|
||||
"m365_db_reset": "DB zurücksetzen",
|
||||
"m365_db_reset_confirm": "Datenbank zurücksetzen? Alle Scan-Ergebnisse werden gelöscht.",
|
||||
"m365_db_reset_done": "Datenbank zurückgesetzt",
|
||||
"m365_db_scans": "Scans",
|
||||
"m365_smtp_saving": "Speichern...",
|
||||
"m365_settings_admin_pin": "Admin-PIN",
|
||||
"m365_settings_pin_hint": "Erforderlich für destruktive Aktionen (z.B. DB zurücksetzen). Leer lassen zum Deaktivieren.",
|
||||
"m365_settings_current_pin": "Aktueller PIN",
|
||||
"m365_settings_new_pin": "Neuer PIN",
|
||||
"m365_settings_confirm_pin": "PIN bestätigen",
|
||||
"m365_settings_pin_set": "Admin-PIN ist gesetzt",
|
||||
"m365_settings_pin_not_set": "Kein PIN — DB zurücksetzen ist ungeschützt",
|
||||
"m365_settings_pin_required": "PIN ist erforderlich.",
|
||||
"m365_settings_pin_mismatch": "PINs stimmen nicht überein.",
|
||||
"m365_settings_pin_wrong": "Falscher PIN — Zurücksetzen abgebrochen.",
|
||||
"m365_settings_pin_saved": "PIN gespeichert",
|
||||
"m365_settings_enter_pin": "Admin-PIN eingeben",
|
||||
"m365_settings_enter_pin_reset": "Admin-PIN eingeben, um die Datenbank zurückzusetzen.",
|
||||
"btn_confirm": "Bestätigen",
|
||||
"m365_log_scan_started": "Scan gestartet",
|
||||
"m365_preview_local_file": "Lokale Datei — keine Cloud-Vorschau verfügbar",
|
||||
"m365_badge_gps": "GPS-Standort",
|
||||
"a30_gps_items": "Elemente mit GPS-Standortdaten (Art. 4 — Standort = personenbezogene Daten)",
|
||||
"a30_exif_pii_items": "Elemente mit EXIF-PII (Autor, Beschreibung, Schlüsselwörter)",
|
||||
"a30_gps_title": "Elemente mit GPS-Standortdaten",
|
||||
"a30_gps_intro": "Die folgenden Dateien enthalten GPS-Koordinaten in den EXIF-Metadaten. Standortdaten sind personenbezogene Daten gemäß Art. 4 DSGVO.",
|
||||
"a30_gps_col_lat": "Breitengrad",
|
||||
"a30_gps_col_lon": "Längengrad",
|
||||
"m365_accounts_disabled_tip": "Wählen Sie eine Microsoft 365-Quelle aus, um die Kontoauswahl zu aktivieren",
|
||||
"m365_smtp_test": "Testen",
|
||||
"m365_smtp_testing": "Test-E-Mail wird gesendet…",
|
||||
"m365_smtp_test_ok": "Test-E-Mail gesendet",
|
||||
"m365_smtp_test_fail": "Verbindung fehlgeschlagen",
|
||||
"m365_fsrc_edit_btn": "Bearbeiten",
|
||||
"m365_fsrc_save_changes": "Änderungen speichern",
|
||||
"m365_settings_tab_scheduler": "Zeitplaner",
|
||||
"m365_sched_title": "Geplante Scans",
|
||||
"m365_sched_next": "Nächste",
|
||||
"m365_sched_hint": "Scans automatisch zu einem festgelegten Zeitpunkt ausführen. Erfordert eine aktive M365-Verbindung (Anwendungsmodus empfohlen).",
|
||||
"m365_sched_no_aps": "⚠ APScheduler nicht installiert. Ausführen: pip install apscheduler",
|
||||
"m365_sched_enabled": "Zeitplaner aktivieren",
|
||||
"m365_sched_frequency": "Häufigkeit",
|
||||
"m365_sched_dow": "Wochentag",
|
||||
"m365_sched_dom": "Tag im Monat",
|
||||
"m365_sched_time": "Uhrzeit",
|
||||
"m365_sched_profile": "Profil",
|
||||
"m365_sched_profile_last": "Zuletzt gespeicherte Einstellungen",
|
||||
"m365_sched_after_scan": "Nach dem Scan",
|
||||
"m365_sched_auto_email": "Bericht automatisch senden",
|
||||
"m365_sched_auto_retention": "Aufbewahrungsrichtlinie durchsetzen",
|
||||
"m365_sched_status": "Status",
|
||||
"m365_sched_run_now": "▶ Jetzt ausführen",
|
||||
"m365_sched_add": "+ Geplante Suche hinzufügen",
|
||||
"m365_sched_name": "Name",
|
||||
"m365_sched_editor_new": "Neue geplante Suche",
|
||||
"m365_sched_editor_edit": "Geplante Suche bearbeiten",
|
||||
"m365_sched_name_required": "Name ist erforderlich",
|
||||
"m365_sched_no_runs": "Noch keine geplanten Läufe",
|
||||
"m365_sched_freq_daily": "Täglich",
|
||||
"m365_sched_freq_weekly": "Wöchentlich",
|
||||
"m365_sched_freq_monthly": "Monatlich",
|
||||
"m365_sched_dow_mon": "Montag",
|
||||
"m365_sched_dow_tue": "Dienstag",
|
||||
"m365_sched_dow_wed": "Mittwoch",
|
||||
"m365_sched_dow_thu": "Donnerstag",
|
||||
"m365_sched_dow_fri": "Freitag",
|
||||
"m365_sched_dow_sat": "Samstag",
|
||||
"m365_sched_dow_sun": "Sonntag",
|
||||
"btn_save": "Speichern",
|
||||
"m365_settings_about": "Über",
|
||||
"m365_settings_save_pin": "PIN speichern",
|
||||
"m365_sse_reconnecting": "Verbindung zum laufenden Scan wird wiederhergestellt…",
|
||||
"m365_sse_replay_note": "Live-Log fortgesetzt — frühere Einträge vom laufenden Scan wiedergegeben.",
|
||||
"m365_google_sa_creds": "Dienstkonto-Anmeldedaten",
|
||||
"m365_google_sa_key_file": "JSON-Schlüssel des Dienstkontos",
|
||||
"m365_google_sa_key_hint": "Download von Google Cloud Console → IAM & Admin → Dienstkonten → Schlüssel → Schlüssel hinzufügen → JSON",
|
||||
"m365_google_admin_email": "Admin-E-Mail",
|
||||
"m365_google_admin_email_hint": "Wird für domänenweite Delegierung verwendet — muss ein Workspace-Superadmin sein.",
|
||||
"m365_google_libs_missing": "Bibliotheken nicht installiert",
|
||||
"m365_google_key_required": "Wählen Sie eine JSON-Schlüsseldatei für das Dienstkonto aus",
|
||||
"m365_google_invalid_json": "Ungültige JSON-Datei",
|
||||
"m365_srcmgmt_sources_google": "Zu scannende Quellen",
|
||||
"m365_google_src_gmail": "Gmail",
|
||||
"m365_google_src_drive": "Google Drive",
|
||||
"m365_google_setup_title": "Einrichtung in Google Workspace erforderlich:",
|
||||
"m365_google_setup_step1": "Erstellen Sie ein Google Cloud-Projekt und aktivieren Sie Gmail API + Drive API + Admin SDK.",
|
||||
"m365_google_setup_step2": "Erstellen Sie ein Dienstkonto, laden Sie den JSON-Schlüssel herunter und aktivieren Sie die domänenweite Delegierung.",
|
||||
"m365_google_setup_step3": "Fügen Sie in Workspace Admin → Sicherheit → API-Steuerung → Domänenweite Delegierung die Client-ID des Dienstkontos mit folgenden Scopes hinzu:",
|
||||
"m365_google_auth_mode": "Authentifizierungsmodus",
|
||||
"m365_google_mode_workspace": "Workspace",
|
||||
"m365_google_mode_personal": "Persönliches Konto",
|
||||
"m365_google_personal_creds": "Persönliches Konto",
|
||||
"m365_google_personal_client_id": "Client-ID",
|
||||
"m365_google_personal_client_secret": "Client-Secret",
|
||||
"m365_google_personal_hint": "Erstellen Sie OAuth 2.0 Desktop-Anmeldedaten in der Google Cloud Console und fügen Sie Client-ID und Secret oben ein.",
|
||||
"m365_google_personal_sign_in": "Anmelden",
|
||||
"m365_google_personal_creds_required": "Client-ID und Secret erforderlich",
|
||||
"m365_google_personal_setup_title": "Einrichtung erforderlich:",
|
||||
"m365_google_personal_setup_step1": "Erstellen Sie in der Google Cloud Console ein Projekt und aktivieren Sie Gmail API + Drive API.",
|
||||
"m365_google_personal_setup_step2": "Erstellen Sie OAuth 2.0-Anmeldedaten (Typ: Desktop-App) und kopieren Sie Client-ID und Secret.",
|
||||
"m365_google_personal_setup_step3": "Fügen Sie Ihre Google-Konto-E-Mail zur Liste der Testnutzer im OAuth-Einwilligungsbildschirm hinzu.",
|
||||
"m365_auth_waiting": "Warte auf Anmeldung…",
|
||||
"role_staff": "Personal",
|
||||
"role_student": "Schüler",
|
||||
"role_other": "Andere",
|
||||
|
||||
"m365_settings_tab_security": "Sicherheit",
|
||||
|
||||
"share_modal_title": "Ergebnisse teilen",
|
||||
"share_modal_desc": "Schreibgeschützte Links ermöglichen einem Datenschutzbeauftragten oder Prüfer, Ergebnisse einzusehen und Verwendungszwecke zuzuweisen, ohne Zugriff auf Scansteuerung oder Anmeldedaten.",
|
||||
"share_new_link": "Neuer Link",
|
||||
"share_label_lbl": "Bezeichnung (optional)",
|
||||
"share_label_placeholder": "z. B. DSB-Prüfung 2026",
|
||||
"share_expires_in": "Läuft ab in",
|
||||
"share_expires_never": "Nie",
|
||||
"share_expires_7d": "7 Tage",
|
||||
"share_expires_30d": "30 Tage",
|
||||
"share_expires_90d": "90 Tage",
|
||||
"share_expires_1y": "1 Jahr",
|
||||
"share_create": "Erstellen",
|
||||
"share_copy_link_prompt": "Link kopieren:",
|
||||
"share_active_links": "Aktive Links",
|
||||
"share_viewer_pin_label": "Betrachter-PIN:",
|
||||
"share_pin_configure": "Konfigurieren",
|
||||
"share_pin_set": "Festgelegt",
|
||||
"share_pin_not_set": "Nicht festgelegt",
|
||||
"share_no_links": "Keine aktiven Links.",
|
||||
"share_unlabelled": "Ohne Bezeichnung",
|
||||
"share_expires_prefix": "Läuft ab:",
|
||||
"share_last_used": "Zuletzt verwendet:",
|
||||
"share_revoke": "Widerrufen",
|
||||
"share_copied": "Kopiert!",
|
||||
"share_load_error": "Links konnten nicht geladen werden.",
|
||||
"share_create_error": "Link konnte nicht erstellt werden:",
|
||||
"share_revoke_confirm": "Diesen Link widerrufen? Alle Nutzer verlieren sofort den Zugriff.",
|
||||
"share_revoke_error": "Widerrufen fehlgeschlagen:",
|
||||
|
||||
"viewer_pin_group_title": "Betrachter-PIN",
|
||||
"viewer_pin_desc": "Eine numerische PIN (4–8 Stellen), die es jedem ermöglicht, <code style=\"font-size:10px\">/view</code> im Browser zu öffnen und schreibgeschützt auf Ergebnisse zuzugreifen \u2013 ohne Token-Link.",
|
||||
"viewer_pin_clear": "PIN löschen",
|
||||
"viewer_pin_is_set": "Betrachter-PIN ist festgelegt",
|
||||
"viewer_pin_not_set_msg": "Keine PIN festgelegt — /view erfordert einen Token-Link",
|
||||
"viewer_pin_format": "PIN muss 4–8 Stellen haben.",
|
||||
"viewer_pin_saving": "Wird gespeichert…",
|
||||
"viewer_pin_saved": "PIN gespeichert",
|
||||
"viewer_pin_clear_confirm": "Betrachter-PIN entfernen? /view erfordert dann wieder einen Token-Link.",
|
||||
"viewer_pin_cleared": "PIN gelöscht"
|
||||
}
|
||||
773
lang/en.json
Normal file
773
lang/en.json
Normal file
@ -0,0 +1,773 @@
|
||||
{
|
||||
"app_name": "Document Scanner",
|
||||
"label_root_folder": "Root Folder",
|
||||
"label_older_than": "Flag files with data older than",
|
||||
"placeholder_folder": "/path/to/documents",
|
||||
"btn_scan": "Run Scan",
|
||||
"btn_stop": "Stop scan",
|
||||
"toggle_anonymise": "Anonymise",
|
||||
"toggle_mask": "Mask CPR only",
|
||||
"toggle_blur_faces": "Blur faces",
|
||||
"toggle_skip_cloud": "Skip cloud-only files",
|
||||
"toggle_ocr": "OCR scanned PDFs",
|
||||
"label_face_sensitivity": "Face sensitivity",
|
||||
"face_sensitivity_high": "High",
|
||||
"face_sensitivity_low": "Low",
|
||||
"face_sensitivity_hint": "Higher = fewer false detections",
|
||||
"label_ocr_language": "OCR language",
|
||||
"label_ocr_dpi": "DPI (quality vs speed)",
|
||||
"lang_danish": "Danish",
|
||||
"lang_danish_english": "Danish + English",
|
||||
"lang_english": "English",
|
||||
"lang_norwegian": "Norwegian",
|
||||
"lang_swedish": "Swedish",
|
||||
"lang_german": "German",
|
||||
"lang_french": "French",
|
||||
"lang_dutch": "Dutch",
|
||||
"time_any": "Any",
|
||||
"time_1y": "1 year",
|
||||
"time_2y": "2 years",
|
||||
"time_5y": "5 years",
|
||||
"time_10y": "10 years",
|
||||
"stat_scanned": "Files scanned",
|
||||
"stat_flagged": "Flagged",
|
||||
"stat_high_risk": "High risk",
|
||||
"stat_cpr": "CPR numbers found",
|
||||
"col_file": "File",
|
||||
"col_cpr": "CPR numbers",
|
||||
"col_oldest": "Oldest date",
|
||||
"col_risk": "Risk",
|
||||
"col_action": "Action",
|
||||
"col_detail": "Detail",
|
||||
"sort_name_az": "Name A–Z",
|
||||
"sort_name_za": "Name Z–A",
|
||||
"sort_cpr_desc": "CPR count ↓",
|
||||
"sort_oldest_desc": "Oldest date ↓",
|
||||
"sort_risk_desc": "Risk ↓",
|
||||
"sort_size_desc": "Size ↓",
|
||||
"filter_all_types": "All types",
|
||||
"filter_pdf": "PDF",
|
||||
"filter_word": "Word",
|
||||
"filter_excel": "Excel",
|
||||
"filter_image": "Image",
|
||||
"placeholder_search": "Search filename…",
|
||||
"btn_anonymise": "Anonymise",
|
||||
"btn_mask": "Mask CPR",
|
||||
"btn_blur": "Blur",
|
||||
"btn_preview": "Preview",
|
||||
"btn_show_in_folder": "Show in folder",
|
||||
"btn_move_to_trash": "Move to trash",
|
||||
"btn_undo": "Undo",
|
||||
"btn_export_csv": "CSV",
|
||||
"btn_select_all": "Select all",
|
||||
"btn_anonymise_flagged": "Anonymise flagged",
|
||||
"btn_anonymise_all": "Anonymise all flagged",
|
||||
"btn_cancel": "Cancel",
|
||||
"btn_close": "Close",
|
||||
"btn_clear": "Clear",
|
||||
"preview_original": "Original",
|
||||
"preview_processed": "Processed",
|
||||
"preview_anonymise": "Anonymise → preview",
|
||||
"preview_mask": "Mask CPR → preview",
|
||||
"preview_blur_faces": "Blur {n} face(s) → preview",
|
||||
"preview_no_faces": "✓ No faces detected",
|
||||
"preview_scanning_faces": "Scanning for faces…",
|
||||
"preview_processing_faces": "Processing faces…",
|
||||
"preview_rendering": "Rendering…",
|
||||
"scan_preparing": "Preparing…",
|
||||
"scan_scanning": "Scanning…",
|
||||
"scan_face_scanning": "Scanning {total} file(s) for faces…",
|
||||
"scan_face_progress": "Faces: {index} / {total} — {file}",
|
||||
"scan_eta": "{eta} left",
|
||||
"scan_stopped": "Scan stopped.",
|
||||
"empty_flagged": "No flagged documents",
|
||||
"empty_flagged_detail": "Run a scan to view flagged documents",
|
||||
"empty_filter": "No files match your filter",
|
||||
"no_audit": "No audit entries yet",
|
||||
"dialog_delete_title": "Delete files?",
|
||||
"dialog_delete_confirm": "Move to trash",
|
||||
"all_trashed": "All flagged documents moved to trash.",
|
||||
"btn_audit_log": "Audit Log",
|
||||
"audit_cleared": "Audit log cleared",
|
||||
"failed_audit": "Failed to load audit log",
|
||||
"about_title": "About",
|
||||
"label_python": "Python",
|
||||
"label_spacy": "spaCy model",
|
||||
"label_tesseract": "Tesseract",
|
||||
"label_pymupdf": "PyMuPDF",
|
||||
"label_opencv": "OpenCV",
|
||||
"no_model": "no model installed",
|
||||
"not_installed": "not installed",
|
||||
"btn_about": "About",
|
||||
"lbl_size": "Size",
|
||||
"lbl_time": "Time",
|
||||
"lbl_space": "Space",
|
||||
"lbl_loading": "Loading…",
|
||||
"lbl_blurred": "Blurred",
|
||||
"lbl_none": "None",
|
||||
"lbl_scanner": "Scanner",
|
||||
"lbl_document": "Document",
|
||||
"lbl_folder": "Folder",
|
||||
"empty_scan_hint": "Set a folder path and click",
|
||||
"empty_flagged_found": "No flagged documents found.",
|
||||
"preview_click_hint": "Click a document to preview it",
|
||||
"kbd_select": "select",
|
||||
"kbd_delete": "delete",
|
||||
"kbd_close_preview": "close preview",
|
||||
"kbd_select_all": "select all",
|
||||
"sort_cpr_asc": "CPR count ↑",
|
||||
"preview_error": "Preview error",
|
||||
"preview_unavailable": "Preview unavailable",
|
||||
"preview_not_available": "Preview not available for this file type",
|
||||
"lbl_anonymised": "Anonymised",
|
||||
"lbl_masked": "Masked CPR",
|
||||
"lbl_processing": "Processing…",
|
||||
"lbl_error": "Error",
|
||||
"lbl_no_pii": "No changes — no PII found",
|
||||
"badge_anonymised": "✓ anonymised",
|
||||
"badge_masked": "✓ masked",
|
||||
"badge_blurred": "✓ blurred",
|
||||
"lbl_working": "Working…",
|
||||
"lbl_stopping": "Stopping…",
|
||||
"lbl_no_files_selected": "No files selected",
|
||||
"lbl_selected_1": "file selected",
|
||||
"lbl_selected_n": "files selected",
|
||||
"dialog_delete_body": "This will permanently delete the selected files from disk. This action cannot be undone.",
|
||||
"lbl_flagged_docs_1": "flagged document",
|
||||
"lbl_flagged_docs_n": "flagged documents",
|
||||
"banner_all_clean": "All clean",
|
||||
"banner_files_scanned": "file(s) scanned, no CPR numbers found",
|
||||
"banner_need_attention": "file(s) need attention out of",
|
||||
"banner_scanned": "scanned",
|
||||
"summary_face_blur": "image(s) to face-blur",
|
||||
"badge_face": "face",
|
||||
"badge_shared": "shared",
|
||||
"badge_archive": "archive",
|
||||
"badge_shared_cpr": "Shared CPR",
|
||||
"lbl_also_in": "also in",
|
||||
"filter_shared_cpr": "⚠ Shared CPR",
|
||||
"risk_high": "HIGH",
|
||||
"risk_medium": "MEDIUM",
|
||||
"risk_low": "LOW",
|
||||
"reason_cpr_number": "CPR number",
|
||||
"reason_cpr_numbers": "CPR numbers",
|
||||
"reason_cpr_confirmed": "CPR(s) with keyword context",
|
||||
"reason_unique_individuals": "unique individuals",
|
||||
"reason_cpr_shared": "CPR shared across {n} files",
|
||||
"reason_data_10y": "data > 10 years old",
|
||||
"reason_data_5y": "data > 5 years old",
|
||||
"btn_export_excel": "Export report as Excel",
|
||||
"btn_audit_log_short": "Audit log",
|
||||
"btn_delete_selected": "Delete selected",
|
||||
"audit_action_scan": "Scan",
|
||||
"audit_action_redact": "Redact",
|
||||
"audit_action_blur_faces": "Blur faces",
|
||||
"audit_action_delete": "Delete",
|
||||
"audit_action_restore": "Restore",
|
||||
"audit_action_export": "Export",
|
||||
"audit_files": "files",
|
||||
"audit_flagged": "flagged",
|
||||
"audit_high_risk": "high risk",
|
||||
"audit_regions": "regions",
|
||||
"audit_faces": "faces",
|
||||
"audit_permanent": "permanent",
|
||||
"audit_trash": "trash",
|
||||
"audit_files_restored": "file(s) restored",
|
||||
"confirm_clear_audit": "Clear the entire audit log? This cannot be undone.",
|
||||
"lang_spanish": "Spanish",
|
||||
"lang_italian": "Italian",
|
||||
"lang_portuguese": "Portuguese",
|
||||
"lang_finnish": "Finnish",
|
||||
"lang_polish": "Polish",
|
||||
"lang_czech": "Czech",
|
||||
"lang_russian": "Russian",
|
||||
"lang_arabic": "Arabic",
|
||||
"lang_chinese_simplified": "Chinese (Simplified)",
|
||||
"lang_chinese_traditional": "Chinese (Traditional)",
|
||||
"lang_japanese": "Japanese",
|
||||
"lang_korean": "Korean",
|
||||
"lbl_root": "root",
|
||||
"lbl_root_folder": "root folder",
|
||||
"lbl_scanning": "Scanning:",
|
||||
"btn_deselect_all": "Deselect all",
|
||||
"filter_high_risk": "🔴 High risk",
|
||||
"filter_in_archive": "📦 In archive",
|
||||
"log_starting_scan": "Starting scan of",
|
||||
"log_found_files": "Found {n} file(s) to scan",
|
||||
"log_cloud_skipped": "cloud-only skipped",
|
||||
"log_faces_detected": "face(s) detected",
|
||||
"log_ocr_pages": "page(s)",
|
||||
"log_pages_skipped": "image page(s) skipped (enable OCR)",
|
||||
"log_scan_complete": "Scan complete",
|
||||
"log_files_with_cpr": "file(s) with CPR",
|
||||
"log_no_faces_in": "No faces detected in",
|
||||
"pii_phone": "phone",
|
||||
"pii_email": "email",
|
||||
"pii_iban": "IBAN",
|
||||
"pii_bank_account": "bank account",
|
||||
"pii_name": "name",
|
||||
"pii_address": "address",
|
||||
"pii_org": "org",
|
||||
"lbl_other_pii": "Other PII",
|
||||
"lbl_found": "found",
|
||||
"btn_clear_results_cache": "Clear results cache",
|
||||
"btn_clear_ocr_cache": "Clear OCR cache",
|
||||
"confirm_clear_results_cache": "Clear all cached scan results? The grid will be cleared.",
|
||||
"confirm_clear_ocr_cache": "Clear OCR cache? This will force re-OCR on next scan.",
|
||||
"log_cache_cleared": "Results cache cleared",
|
||||
"log_ocr_cache_cleared": "OCR cache cleared",
|
||||
"m365_app_name": "GDPRScanner",
|
||||
"m365_sources": "Sources",
|
||||
"m365_options": "Options",
|
||||
"m365_accounts": "Accounts",
|
||||
"m365_stats": "Stats",
|
||||
"m365_src_email": "Outlook",
|
||||
"m365_src_onedrive": "OneDrive",
|
||||
"m365_src_sharepoint": "SharePoint",
|
||||
"m365_src_teams": "Teams",
|
||||
"m365_opt_date_from": "Scan emails/files from",
|
||||
"m365_opt_date_from_hint": "Leave blank to scan all",
|
||||
"m365_opt_email_body": "Scan email body",
|
||||
"m365_opt_attachments": "Scan attachments",
|
||||
"m365_opt_max_attach": "Max attachment size (MB)",
|
||||
"m365_opt_max_emails": "Max emails per user",
|
||||
"m365_connect_title": "Connect to Microsoft 365",
|
||||
"m365_connect_sub": "Enter your Azure app credentials to sign in.",
|
||||
"m365_label_client_id": "Client ID (Application ID)",
|
||||
"m365_label_tenant_id": "Tenant ID",
|
||||
"m365_label_client_secret": "Client Secret",
|
||||
"m365_secret_hint": "(optional — enables org-wide scanning)",
|
||||
"m365_secret_desc_app": "app accesses all users' data directly (Application permissions, no sign-in required).",
|
||||
"m365_secret_desc_delegated": "you sign in as yourself and can only scan your own data unless you're a Global Admin.",
|
||||
"m365_btn_connect": "Connect",
|
||||
"m365_device_code_go": "Go to",
|
||||
"m365_device_code_enter": "and enter this code",
|
||||
"m365_btn_cancel_auth": "Cancel",
|
||||
"m365_btn_reconfigure": "Reconfigure",
|
||||
"m365_btn_sign_out": "Sign out",
|
||||
"m365_mode_app": "🔑 App mode — org-wide",
|
||||
"m365_mode_delegated": "Delegated",
|
||||
"m365_search_users": "Search users…",
|
||||
"m365_add_account_label": "Add account manually:",
|
||||
"m365_add_account_placeholder": "email or UPN",
|
||||
"m365_admin_note": "Only showing your account. To list all users, an admin must grant <strong>User.Read.All</strong> consent in Azure Portal, or add accounts manually below.",
|
||||
"m365_btn_scan": "Scan",
|
||||
"m365_btn_stop": "Stop",
|
||||
"m365_pill_flagged": "flagged",
|
||||
"m365_pill_scanned": "scanned",
|
||||
"m365_filter_all_sources": "All sources",
|
||||
"m365_filter_email": "Outlook",
|
||||
"m365_filter_onedrive": "OneDrive",
|
||||
"m365_filter_sharepoint": "SharePoint",
|
||||
"m365_filter_teams": "Teams",
|
||||
"m365_empty_hint": "Select sources and click <strong>Scan</strong><br>to find documents with CPR numbers",
|
||||
"m365_stat_flagged": "Flagged",
|
||||
"m365_stat_cpr": "CPR hits",
|
||||
"m365_preview_open": "Open in M365 ↗",
|
||||
"m365_preview_close": "Close",
|
||||
"m365_auth_mode_app": "Auth mode: Application (client credentials — org-wide)",
|
||||
"m365_auth_mode_delegated": "Auth mode: Delegated (device code — signed-in user only)",
|
||||
"m365_phase_teams_index": "Building Teams membership index…",
|
||||
"m365_phase_sharepoint": "Collecting SharePoint files…",
|
||||
"m365_btn_about": "About",
|
||||
"m365_stat_scanned": "Scanned",
|
||||
"m365_no_users_found": "No users found",
|
||||
"m365_no_users_match": "No users match",
|
||||
"m365_no_cpr_found": "No CPR numbers found.",
|
||||
"m365_no_matches": "No matches",
|
||||
"m365_btn_export_excel": "Export Excel",
|
||||
"m365_export_no_data": "No results to export.",
|
||||
"m365_phase_emails": "Collecting Outlook messages",
|
||||
"m365_phase_onedrive": "Collecting OneDrive",
|
||||
"m365_phase_teams": "Collecting Teams",
|
||||
"m365_preset_1yr": "1 yr",
|
||||
"m365_preset_2yr": "2 yr",
|
||||
"m365_preset_5yr": "5 yr",
|
||||
"m365_preset_10yr": "10 yr",
|
||||
"m365_preset_any": "Any",
|
||||
"m365_auth_mode_app_short": "Application permissions · client credentials",
|
||||
"m365_auth_mode_delegated_short": "Delegated permissions · device code flow",
|
||||
"m365_info_permissions": "Permissions",
|
||||
"m365_info_signin": "Sign-in required",
|
||||
"m365_info_scope": "Scope",
|
||||
"m365_info_scope_org": "All users in tenant",
|
||||
"m365_info_scope_user": "Signed-in user only",
|
||||
"m365_info_consent": "Admin consent",
|
||||
"m365_info_required": "Required",
|
||||
"m365_info_admin": "Global Admin",
|
||||
"m365_info_expands_scope": "Expands scope to all users",
|
||||
"m365_info_no": "No",
|
||||
"m365_info_yes": "Yes",
|
||||
"m365_info_app_desc": "The app authenticates with a Client Secret and accesses all users' data directly via Microsoft Graph — no interactive sign-in needed. Ideal for automated or scheduled scans.",
|
||||
"m365_info_delegated_desc": "The app acts on behalf of the signed-in user via the device code flow. By default only that user's data is accessible. A Global Admin can grant broader consent to scan all users.",
|
||||
"m365_filter_search": "Search…",
|
||||
"m365_filter_clear": "Clear",
|
||||
"m365_btn_list_view": "List",
|
||||
"m365_btn_grid_view": "Grid",
|
||||
"m365_log_found_items": "Found",
|
||||
"m365_log_items_to_scan": "item(s) to scan",
|
||||
"m365_log_starting_scan": "Starting scan:",
|
||||
"m365_log_accounts": "account(s)",
|
||||
"m365_btn_bulk_delete": "Delete",
|
||||
"m365_bulk_delete_title": "Bulk Delete",
|
||||
"m365_bulk_delete_sub": "Emails move to Deleted Items · Files go to the recycle bin",
|
||||
"m365_bulk_filter_heading": "Filter what to delete",
|
||||
"m365_bulk_filter_source": "Source type",
|
||||
"m365_bulk_filter_min_cpr": "Min CPR hits",
|
||||
"m365_bulk_filter_older_than": "Older than date",
|
||||
"m365_bulk_no_match": "No items match these criteria.",
|
||||
"m365_bulk_match_count": "item(s) will be deleted",
|
||||
"m365_bulk_confirm_q": "item(s) will be permanently deleted. Continue?",
|
||||
"m365_bulk_deleting": "Deleting…",
|
||||
"m365_bulk_deleted": "deleted",
|
||||
"m365_bulk_failed": "failed",
|
||||
"m365_bulk_delete_confirm": "Delete matching items",
|
||||
"m365_delete_confirm": "Delete",
|
||||
"m365_delete_warning": "This cannot be undone.",
|
||||
"m365_log_deleted": "Deleted:",
|
||||
"m365_log_delete_failed": "Delete failed:",
|
||||
"m365_log_bulk_done": "Bulk delete:",
|
||||
"m365_log_older_than": "older than",
|
||||
"m365_eta_left": "left",
|
||||
"btn_all": "All",
|
||||
"btn_errors": "Errors",
|
||||
"log_copy": "Copy",
|
||||
"btn_none": "None",
|
||||
"m365_btn_resume": "Resume",
|
||||
"m365_btn_start_fresh": "Start fresh",
|
||||
"m365_resume_banner": "Previous scan interrupted — {scanned} scanned, {flagged} found",
|
||||
"m365_log_resuming": "Resuming scan:",
|
||||
"m365_log_already_scanned": "already scanned — skipped",
|
||||
"m365_resuming": "Resuming — skipping already-scanned items…",
|
||||
"m365_opt_delta": "Delta scan",
|
||||
"m365_opt_delta_hint": "Changed items only (after first full scan)",
|
||||
"m365_delta_tokens_saved": "Tokens saved",
|
||||
"m365_delta_clear": "Clear tokens",
|
||||
"m365_delta_cleared": "Delta tokens cleared — next scan will be a full scan.",
|
||||
"m365_delta_mode": "Delta mode — fetching changed items only…",
|
||||
"m365_smtp_title": "✉ Email report",
|
||||
"m365_smtp_desc": "Send the Excel report by email after scanning.",
|
||||
"m365_smtp_host": "SMTP host",
|
||||
"m365_smtp_port": "Port",
|
||||
"m365_smtp_user": "Username",
|
||||
"m365_smtp_pass": "Password",
|
||||
"m365_smtp_from": "From address",
|
||||
"m365_smtp_tls": "STARTTLS",
|
||||
"m365_smtp_ssl": "SSL",
|
||||
"m365_smtp_recipients": "Recipients",
|
||||
"m365_smtp_recipients_hint": "Comma or semicolon separated",
|
||||
"m365_smtp_save": "Save",
|
||||
"m365_smtp_send": "Send now",
|
||||
"m365_smtp_saved": "Settings saved.",
|
||||
"m365_smtp_sending": "Sending…",
|
||||
"m365_smtp_sent": "Report sent.",
|
||||
"m365_smtp_no_recipients": "Enter at least one recipient.",
|
||||
"m365_smtp_configure": "Configure",
|
||||
"m365_smtp_from_hint": "(optional — defaults to username)",
|
||||
"m365_subject_title": "🔍 Data subject lookup",
|
||||
"m365_subject_btn": "Look up",
|
||||
"m365_subject_desc": "Find all flagged items containing a given CPR number. The CPR is hashed before querying and never stored in plaintext.",
|
||||
"m365_subject_placeholder": "DDMMYY-XXXX",
|
||||
"m365_subject_search": "Search",
|
||||
"m365_subject_searching": "Searching…",
|
||||
"m365_subject_found": "item(s) found",
|
||||
"m365_subject_not_found": "No flagged items found for this CPR number.",
|
||||
"m365_subject_delete_all": "Delete all for this person",
|
||||
"m365_subject_delete_confirm": "item(s) will be permanently deleted. Continue?",
|
||||
"m365_disposition_label": "Disposition",
|
||||
"m365_disp_unreviewed": "Unreviewed",
|
||||
"m365_disp_retain_legal": "Retain — legal obligation",
|
||||
"m365_disp_retain_legit": "Retain — legitimate interest",
|
||||
"m365_disp_retain_contract": "Retain — contract",
|
||||
"m365_disp_delete_sched": "Delete — scheduled",
|
||||
"m365_disp_personal_use": "Personal use — out of scope",
|
||||
"m365_disp_deleted": "Deleted",
|
||||
"m365_disp_save": "Save",
|
||||
"m365_disp_saved": "✓ Saved",
|
||||
"m365_opt_retention": "Retention policy",
|
||||
"m365_opt_retention_hint": "Flag and delete items older than N years",
|
||||
"m365_ret_years": "Retention years",
|
||||
"m365_ret_fy_end": "Fiscal year end",
|
||||
"m365_ret_fy_rolling": "Rolling (from today)",
|
||||
"m365_ret_fy_dec": "31 Dec (Bogføringsloven)",
|
||||
"m365_ret_fy_jun": "30 Jun",
|
||||
"m365_ret_fy_mar": "31 Mar",
|
||||
"m365_ret_mode_rolling": "rolling",
|
||||
"m365_ret_mode_fiscal": "fiscal year",
|
||||
"m365_ret_cutoff_hint": "Items modified before",
|
||||
"m365_ret_cutoff_flagged": "will be flagged as overdue",
|
||||
"m365_overdue_found": "overdue item(s) found",
|
||||
"m365_bulk_overdue_btn": "Filter overdue",
|
||||
"m365_bulk_clear_filters": "Clear filters",
|
||||
"m365_btn_export_article30": "Art.30",
|
||||
"m365_article30_done": "Article 30 report ready.",
|
||||
"a30_title": "GDPR Article 30",
|
||||
"a30_subtitle": "Register of Processing Activities",
|
||||
"a30_generated": "Generated",
|
||||
"a30_confidential": "Confidential — GDPR compliance document",
|
||||
"a30_s1": "1. Summary",
|
||||
"a30_scan_date": "Scan date",
|
||||
"a30_items_scanned": "Items scanned",
|
||||
"a30_flagged": "Flagged items",
|
||||
"a30_cpr_hits": "Total CPR hits",
|
||||
"a30_data_subjects": "Estimated data subjects",
|
||||
"a30_overdue": "Overdue items (>5 yrs)",
|
||||
"a30_by_source": "Breakdown by source",
|
||||
"a30_col_source": "Source",
|
||||
"a30_col_items": "Items",
|
||||
"a30_col_cpr": "CPR hits",
|
||||
"a30_col_overdue": "Overdue",
|
||||
"a30_s2": "2. Personal Data Categories Identified",
|
||||
"a30_s2_intro": "The following categories of personal data were detected during scanning.",
|
||||
"a30_col_gdpr_class": "GDPR classification",
|
||||
"a30_cpr_label": "CPR numbers (Danish personal ID)",
|
||||
"a30_cpr_class": "Art. 9 — national identifier",
|
||||
"a30_pii_class_9": "Art. 9 — health/sensitive",
|
||||
"a30_pii_class_4": "Art. 4 — personal data",
|
||||
"a30_s3": "3. Data Inventory",
|
||||
"a30_s3_intro": "All flagged items are listed below with location, retention status, and compliance disposition.",
|
||||
"a30_col_name": "Name / Subject",
|
||||
"a30_col_account": "Account",
|
||||
"a30_col_modified": "Modified",
|
||||
"a30_col_disp": "Disposition",
|
||||
"a30_more_items": "additional items not shown. Export the Excel report for the complete list.",
|
||||
"a30_s4": "4. Retention Analysis",
|
||||
"a30_s4_intro": "The following items exceed the 5-year retention threshold and should be reviewed for deletion under GDPR Article 5(1)(e) — storage limitation.",
|
||||
"a30_s5": "5. Compliance Trend",
|
||||
"a30_s5_intro": "Flagged item counts over the last scans (most recent first).",
|
||||
"a30_col_scan_date": "Scan date",
|
||||
"a30_col_scan_type": "Scan type",
|
||||
"a30_scan_delta": "Delta",
|
||||
"a30_scan_full": "Full",
|
||||
"a30_s6": "6. Methodology and Legal Basis",
|
||||
"a30_method_title": "Scanning methodology",
|
||||
"a30_method_1": "CPR numbers are detected using pattern matching against the official Danish CPR format (DDMMYY-XXXX) with full date validation and century-digit verification per the CPR register rules.",
|
||||
"a30_method_2": "Additional personal data (phone numbers, email addresses, IBANs, bank accounts, names, addresses, and organisations) is detected using regular expressions and spaCy NER.",
|
||||
"a30_method_3": "CPR numbers stored in this document's database are SHA-256 hashed and never stored in plaintext.",
|
||||
"a30_method_4": "Scanning covers Exchange mailboxes (all folders including Sent Items), OneDrive, SharePoint, and Microsoft Teams channel files via the Microsoft Graph API. When connected, Google Workspace scanning covers Gmail and Google Drive via a service account with domain-wide delegation. Local and network (SMB) file shares are scanned directly.",
|
||||
"a30_gdpr_title": "GDPR Articles referenced",
|
||||
"a30_gdpr_1": "Article 5(1)(c) — Data minimisation: only necessary data should be retained",
|
||||
"a30_gdpr_2": "Article 5(1)(e) — Storage limitation: data must not be kept longer than necessary",
|
||||
"a30_gdpr_3": "Article 9 — Special categories: health, criminal, trade union, and similar data require explicit legal basis",
|
||||
"a30_gdpr_4": "Article 15 — Right of access: data subjects may request information about their data",
|
||||
"a30_gdpr_5": "Article 17 — Right to erasure: data subjects may request deletion",
|
||||
"a30_gdpr_6": "Article 30 — Records of processing activities: this document satisfies the obligation",
|
||||
"a30_disp_unreviewed": "Unreviewed",
|
||||
"a30_disp_retain_legal": "Retain — Legal obligation",
|
||||
"a30_disp_retain_legit": "Retain — Legitimate interest",
|
||||
"a30_disp_retain_contract": "Retain — Contract",
|
||||
"a30_disp_delete_sched": "Delete — Scheduled",
|
||||
"a30_disp_personal_use": "Personal use — out of GDPR scope (Art. 2(2)(c))",
|
||||
"a30_disp_deleted": "Deleted",
|
||||
"a30_s6_short": "Methodology and Legal Basis",
|
||||
"m365_role_all": "All",
|
||||
"m365_role_staff": "Staff",
|
||||
"m365_role_student": "Student",
|
||||
"a30_s_dellog": "Deletion Audit Log",
|
||||
"a30_dellog_intro": "item(s) containing personal data have been deleted via GDPRScanner. This log satisfies the accountability obligation under GDPR Article 5(2).",
|
||||
"a30_dellog_by_reason": "Deletions by reason",
|
||||
"a30_dellog_records": "Deletion records",
|
||||
"a30_col_reason": "Reason",
|
||||
"a30_col_count": "Count",
|
||||
"a30_col_deleted_at": "Deleted at",
|
||||
"a30_col_deleted_by": "Deleted by",
|
||||
"a30_reason_manual": "Manual (individual card delete)",
|
||||
"a30_reason_bulk": "Bulk delete",
|
||||
"a30_reason_retention": "Retention policy enforcement",
|
||||
"a30_reason_dsr": "Data subject erasure request (Art. 17)",
|
||||
"m365_filter_all_disp": "All dispositions",
|
||||
"m365_trend_title": "Trend",
|
||||
"m365_trend_flagged": "Flagged",
|
||||
"m365_trend_overdue": "Overdue",
|
||||
"m365_filter_all_transfer": "All items",
|
||||
"m365_filter_ext_recipient": "External recipient",
|
||||
"m365_filter_ext_share": "Externally shared",
|
||||
"m365_filter_shared": "Shared",
|
||||
"m365_badge_ext_recipient": "External",
|
||||
"m365_badge_shared": "Shared",
|
||||
"a30_s_special": "Special Category Data (Article 9)",
|
||||
"a30_special_intro": "item(s) detected as special category data under GDPR Article 9. Requires explicit legal basis and DPIA.",
|
||||
"a30_special_by_cat": "Detected categories",
|
||||
"a30_special_items": "Affected items (up to 50)",
|
||||
"a30_col_category": "Category",
|
||||
"a30_cat_health": "Health data (Art. 9)",
|
||||
"a30_cat_mental": "Mental health (Art. 9)",
|
||||
"a30_cat_criminal": "Criminal records (Art. 10)",
|
||||
"a30_cat_union": "Trade union membership (Art. 9)",
|
||||
"a30_cat_religion": "Religious beliefs (Art. 9)",
|
||||
"a30_cat_ethnicity": "Racial/ethnic origin (Art. 9)",
|
||||
"a30_cat_political": "Political opinions (Art. 9)",
|
||||
"a30_cat_biometric": "Biometric data (Art. 9)",
|
||||
"a30_cat_sexual": "Sexual orientation (Art. 9)",
|
||||
"m365_filter_all_special": "All risk levels",
|
||||
"m365_filter_special_only": "Art. 9 special category",
|
||||
"m365_badge_special": "Art.9",
|
||||
"m365_phase_scanning": "Scanning…",
|
||||
"a30_special_cat": "Art. 9 special category items",
|
||||
"a30_special_cat_note": "These items contain health, criminal, biometric, religious, ethnic, trade union, political, or sexual orientation data. An explicit legal basis (Art. 9(2)) and possibly a DPIA (Art. 35) is required.",
|
||||
"a30_col_special": "Art. 9",
|
||||
"a30_pii_phone": "Phone numbers",
|
||||
"a30_pii_email": "Email addresses",
|
||||
"a30_pii_iban": "IBAN bank numbers",
|
||||
"a30_pii_bank": "Bank account numbers",
|
||||
"a30_pii_name": "Personal names (NER)",
|
||||
"a30_pii_address": "Addresses (NER)",
|
||||
"a30_pii_org": "Organisations (NER)",
|
||||
"a30_col_cpr_short": "CPR",
|
||||
"a30_inv_staff": "Staff / Faculty",
|
||||
"a30_inv_students": "Students",
|
||||
"a30_student_consent_note": "Note: Student accounts in Danish folkeskole (pupils under age 15) require parental consent for processing of personal data under Databeskyttelsesloven §6. Items in student accounts must not be auto-deleted — any action requires review by school administration and, for pupils under 15, notification of parents or guardians as rights holders under GDPR Article 8.",
|
||||
"m365_profile_label": "Profile:",
|
||||
"m365_profile_placeholder": "— Select profile —",
|
||||
"m365_profile_save_tip": "Save current settings as a profile",
|
||||
"m365_profile_save_prompt": "Profile name:",
|
||||
"m365_profile_applied": "Profile loaded",
|
||||
"m365_profile_saved": "Profile saved",
|
||||
"m365_profile_manage_btn": "Profiles",
|
||||
"m365_profile_clear_btn": "Clear",
|
||||
"m365_profile_save_btn": "Save",
|
||||
"m365_profile_manage_title": "⚙ Manage Profiles",
|
||||
"m365_profile_no_profiles": "No saved profiles yet. Use 💾 to save the current sidebar settings as a profile.",
|
||||
"m365_profile_use": "Use",
|
||||
"m365_profile_edit": "Edit",
|
||||
"m365_profile_duplicate": "Duplicate",
|
||||
"m365_profile_delete": "Delete",
|
||||
"m365_profile_delete_confirm": "Delete profile",
|
||||
"m365_profile_duplicated": "Profile duplicated",
|
||||
"m365_profile_deleted": "Profile deleted",
|
||||
"m365_profile_never": "never",
|
||||
"m365_profile_last_run": "Last run",
|
||||
"m365_profile_name_placeholder": "Profile name",
|
||||
"m365_profile_desc_placeholder": "Description (optional)",
|
||||
"m365_profile_name_required": "Profile name is required.",
|
||||
"m365_db_title": "🗄 Database",
|
||||
"m365_db_export": "Export",
|
||||
"m365_db_import": "Import",
|
||||
"m365_db_export_error": "Export failed",
|
||||
"m365_db_exported": "Database exported",
|
||||
"m365_db_import_title": "📥 Import Database",
|
||||
"m365_db_import_desc": "Select a previously exported .zip file. Merge adds dispositions and deletion log. Replace wipes and fully restores.",
|
||||
"m365_db_import_file": "ZIP file",
|
||||
"m365_db_import_mode": "Mode:",
|
||||
"m365_db_import_merge": "Merge (safe)",
|
||||
"m365_db_import_replace": "Replace (full restore)",
|
||||
"m365_db_import_replace_warn": "⚠ Replace mode will erase all existing scan data before restoring. Make sure you have a backup of ~/.gdpr_scanner.db first.",
|
||||
"m365_db_import_replace_confirm": "Replace mode will erase ALL existing scan data and restore from the archive.\\n\\nMake sure you have a manual backup of ~/.gdpr_scanner.db.\\n\\nProceed?",
|
||||
"m365_db_import_no_file": "Please select a ZIP file first.",
|
||||
"m365_db_importing": "Importing…",
|
||||
"m365_db_imported": "Imported",
|
||||
"m365_db_import_run": "Import",
|
||||
"m365_opt_scan_photos": "Scan photos for faces",
|
||||
"m365_opt_scan_photos_hint": "Flags images with detected faces as Art. 9 biometric data. Slower — opt in.",
|
||||
"m365_filter_photo_only": "📷 Photos / biometric",
|
||||
"m365_badge_faces": "faces",
|
||||
"a30_photo_items": "Photos with detected faces (Art. 9 biometric)",
|
||||
"a30_photo_note": "Photographs of identifiable persons are biometric data under Art. 9 GDPR. Retention requires a documented legal basis under Art. 9(2). For school photographs of pupils under 15, parental consent is required (Databeskyttelsesloven §6). See Datatilsynet guidance on school photography.",
|
||||
"a30_s_photos": "Photographs and Biometric Data (Article 9)",
|
||||
"a30_photo_intro": "image file(s) containing detected face(s) were found in the scan. Photographs of identifiable persons constitute biometric data under GDPR Article 9 and are subject to the same heightened protection as health or criminal records data.",
|
||||
"a30_photo_guidance": "Retention guidance",
|
||||
"a30_photo_g1": "Photos may only be retained while the original purpose remains valid (Art. 5(1)(b) — purpose limitation).",
|
||||
"a30_photo_g2": "Pupils under 15 require parental consent (Databeskyttelsesloven §6). Consent must be freely given, specific, and documented.",
|
||||
"a30_photo_g3": "Photos on public-facing websites must be removed promptly after a person leaves the organisation or withdraws consent (Art. 17 — right to erasure).",
|
||||
"a30_photo_g4": "Historical/archive use may justify longer retention under Art. 89 only with specific safeguards and case-by-case assessment.",
|
||||
"a30_photo_col_faces": "Faces",
|
||||
"a30_method_5": "When photo scanning is enabled, image files are analysed using OpenCV Haar cascade face detection to identify photographs of persons (Art. 9 biometric data).",
|
||||
"m365_role_cycle_tip": "Click to override role (cycles student → staff → other → auto)",
|
||||
"m365_role_set": "Role set",
|
||||
"m365_role_cleared": "Role override cleared",
|
||||
"m365_sku_debug_title": "🔍 Tenant SKU IDs",
|
||||
"m365_sku_debug_desc": "These are the raw SKU IDs assigned to your users. Any marked ❓ unknown are not in classification/m365_skus.json — copy them under student_ids or staff_ids and restart.",
|
||||
"m365_sku_debug_none": "No license data returned — check that the app has User.Read.All permission.",
|
||||
"m365_file_sources_title": "📁 File sources",
|
||||
"m365_file_sources_manage": "Manage",
|
||||
"m365_file_sources_empty": "No file sources configured. Add a local folder or network share below.",
|
||||
"m365_file_sources_add": "Add source",
|
||||
"m365_fsrc_label": "Label",
|
||||
"m365_fsrc_path": "Path",
|
||||
"m365_fsrc_smb_detected": "SMB/CIFS network share detected",
|
||||
"m365_fsrc_smb_host": "SMB host",
|
||||
"m365_fsrc_smb_user": "Username",
|
||||
"m365_fsrc_smb_pw": "Password",
|
||||
"m365_fsrc_smb_pw_hint": "Password is saved to the OS keychain — never stored in a file.",
|
||||
"m365_fsrc_add_btn": "Add",
|
||||
"m365_fsrc_saved": "Source saved",
|
||||
"m365_fsrc_saving": "Saving...",
|
||||
"m365_fsrc_path_required": "Path is required.",
|
||||
"m365_fsrc_scan_btn": "Scan",
|
||||
"m365_fsrc_scan_start": "Starting file scan",
|
||||
"m365_src_group_files": "File sources",
|
||||
"m365_no_sources": "No sources selected — nothing to scan.",
|
||||
"m365_fsrc_name_required": "Name is required.",
|
||||
"m365_srcmgmt_title": "⚙ Source management",
|
||||
"m365_srcmgmt_tab_m365": "Microsoft 365",
|
||||
"m365_srcmgmt_tab_google": "Google Workspace",
|
||||
"m365_srcmgmt_tab_files": "File sources",
|
||||
"m365_srcmgmt_connection": "Connection",
|
||||
"m365_srcmgmt_azure_creds": "Azure credentials",
|
||||
"m365_srcmgmt_sources_m365": "Sources to scan",
|
||||
"m365_srcmgmt_connected": "Connected",
|
||||
"m365_srcmgmt_not_connected": "Not connected",
|
||||
"m365_srcmgmt_coming_soon": "Coming soon",
|
||||
"m365_srcmgmt_google_sub": "Gmail and Google Drive scanning will appear here when implemented.",
|
||||
"m365_srcmgmt_file_sources": "File sources",
|
||||
"m365_sources_manage_btn": "Sources",
|
||||
"m365_connecting": "Connecting...",
|
||||
"m365_err_creds_required": "Client ID and Tenant ID required",
|
||||
"m365_signout_confirm": "Disconnect and clear credentials?",
|
||||
"m365_btn_settings": "Settings",
|
||||
"m365_settings_title": "⚙ Settings",
|
||||
"m365_settings_tab_general": "General",
|
||||
"m365_settings_tab_email": "Email report",
|
||||
"m365_settings_tab_database": "Database",
|
||||
"m365_settings_appearance": "Appearance",
|
||||
"m365_settings_language": "Language",
|
||||
"m365_settings_theme": "Theme",
|
||||
"m365_settings_db_actions": "Actions",
|
||||
"m365_db_reset": "Reset DB",
|
||||
"m365_db_reset_confirm": "Reset database? All scan results will be deleted.",
|
||||
"m365_db_reset_done": "Database reset",
|
||||
"m365_db_scans": "Scans",
|
||||
"m365_smtp_saving": "Saving...",
|
||||
"m365_settings_admin_pin": "Admin PIN",
|
||||
"m365_settings_pin_hint": "Required for destructive actions (e.g. Reset DB). Leave blank to disable.",
|
||||
"m365_settings_current_pin": "Current PIN",
|
||||
"m365_settings_new_pin": "New PIN",
|
||||
"m365_settings_confirm_pin": "Confirm PIN",
|
||||
"m365_settings_pin_set": "Admin PIN is set",
|
||||
"m365_settings_pin_not_set": "No PIN set — Reset DB is unprotected",
|
||||
"m365_settings_pin_required": "PIN is required.",
|
||||
"m365_settings_pin_mismatch": "PINs do not match.",
|
||||
"m365_settings_pin_wrong": "Incorrect PIN — reset cancelled.",
|
||||
"m365_settings_pin_saved": "PIN saved",
|
||||
"m365_settings_enter_pin": "Enter admin PIN",
|
||||
"m365_settings_enter_pin_reset": "Enter admin PIN to reset the database.",
|
||||
"btn_confirm": "Confirm",
|
||||
"m365_log_scan_started": "Scan started",
|
||||
"m365_preview_local_file": "Local file — no cloud preview available",
|
||||
"m365_badge_gps": "GPS location",
|
||||
"a30_gps_items": "Items with GPS location data (Art. 4 — location = personal data)",
|
||||
"a30_exif_pii_items": "Items with EXIF PII (author, description, keywords)",
|
||||
"a30_gps_title": "Items with GPS location data",
|
||||
"a30_gps_intro": "The following files contain GPS coordinates embedded in EXIF metadata. Location data constitutes personal data under Art. 4 GDPR.",
|
||||
"a30_gps_col_lat": "Latitude",
|
||||
"a30_gps_col_lon": "Longitude",
|
||||
"m365_accounts_disabled_tip": "Select a Microsoft 365 source to enable account selection",
|
||||
"m365_smtp_test": "Test",
|
||||
"m365_smtp_testing": "Sending test email…",
|
||||
"m365_smtp_test_ok": "Test email sent",
|
||||
"m365_smtp_test_fail": "Connection failed",
|
||||
"m365_fsrc_edit_btn": "Edit",
|
||||
"m365_fsrc_save_changes": "Save changes",
|
||||
"m365_settings_tab_scheduler": "Scheduler",
|
||||
"m365_sched_title": "Scheduled scans",
|
||||
"m365_sched_next": "Next",
|
||||
"m365_sched_hint": "Run scans automatically at a set time. Requires an active M365 connection (application mode recommended).",
|
||||
"m365_sched_no_aps": "⚠ APScheduler not installed. Run: pip install apscheduler",
|
||||
"m365_sched_enabled": "Enable scheduler",
|
||||
"m365_sched_frequency": "Frequency",
|
||||
"m365_sched_dow": "Day of week",
|
||||
"m365_sched_dom": "Day of month",
|
||||
"m365_sched_time": "Time",
|
||||
"m365_sched_profile": "Profile",
|
||||
"m365_sched_profile_last": "Last saved settings",
|
||||
"m365_sched_after_scan": "After scan",
|
||||
"m365_sched_auto_email": "Email report automatically",
|
||||
"m365_sched_auto_retention": "Enforce retention policy",
|
||||
"m365_sched_status": "Status",
|
||||
"m365_sched_run_now": "▶ Run now",
|
||||
"m365_sched_add": "+ Add scheduled scan",
|
||||
"m365_sched_name": "Name",
|
||||
"m365_sched_editor_new": "New scheduled scan",
|
||||
"m365_sched_editor_edit": "Edit scheduled scan",
|
||||
"m365_sched_name_required": "Name is required",
|
||||
"m365_sched_no_runs": "No scheduled runs yet",
|
||||
"m365_sched_freq_daily": "Daily",
|
||||
"m365_sched_freq_weekly": "Weekly",
|
||||
"m365_sched_freq_monthly": "Monthly",
|
||||
"m365_sched_dow_mon": "Monday",
|
||||
"m365_sched_dow_tue": "Tuesday",
|
||||
"m365_sched_dow_wed": "Wednesday",
|
||||
"m365_sched_dow_thu": "Thursday",
|
||||
"m365_sched_dow_fri": "Friday",
|
||||
"m365_sched_dow_sat": "Saturday",
|
||||
"m365_sched_dow_sun": "Sunday",
|
||||
"btn_save": "Save",
|
||||
"m365_settings_about": "About",
|
||||
"m365_settings_save_pin": "Save PIN",
|
||||
"m365_sse_reconnecting": "Reconnecting to running scan…",
|
||||
"m365_sse_replay_note": "Live log resumed — earlier entries replayed from running scan.",
|
||||
"m365_google_sa_creds": "Service account credentials",
|
||||
"m365_google_sa_key_file": "Service Account JSON key",
|
||||
"m365_google_sa_key_hint": "Download from Google Cloud Console → IAM & Admin → Service Accounts → Keys → Add Key → JSON",
|
||||
"m365_google_admin_email": "Admin email",
|
||||
"m365_google_admin_email_hint": "Used for domain-wide delegation — must be a Workspace super-admin.",
|
||||
"m365_google_libs_missing": "Libraries not installed",
|
||||
"m365_google_key_required": "Select a service account JSON key file",
|
||||
"m365_google_invalid_json": "Invalid JSON file",
|
||||
"m365_srcmgmt_sources_google": "Sources to scan",
|
||||
"m365_google_src_gmail": "Gmail",
|
||||
"m365_google_src_drive": "Google Drive",
|
||||
"m365_google_setup_title": "Setup required in Google Workspace:",
|
||||
"m365_google_setup_step1": "Create a Google Cloud project and enable Gmail API + Drive API + Admin SDK.",
|
||||
"m365_google_setup_step2": "Create a service account, download the JSON key, and enable domain-wide delegation.",
|
||||
"m365_google_setup_step3": "In Workspace Admin → Security → API Controls → Domain-wide delegation, add the service account client ID with scopes:",
|
||||
"m365_google_auth_mode": "Auth mode",
|
||||
"m365_google_mode_workspace": "Workspace",
|
||||
"m365_google_mode_personal": "Personal account",
|
||||
"m365_google_personal_creds": "Personal account",
|
||||
"m365_google_personal_client_id": "Client ID",
|
||||
"m365_google_personal_client_secret": "Client secret",
|
||||
"m365_google_personal_hint": "Create OAuth 2.0 Desktop credentials in Google Cloud Console, then paste the client ID and secret above.",
|
||||
"m365_google_personal_sign_in": "Sign in",
|
||||
"m365_google_personal_creds_required": "Client ID and secret required",
|
||||
"m365_google_personal_setup_title": "Setup required:",
|
||||
"m365_google_personal_setup_step1": "In Google Cloud Console, create a project and enable Gmail API + Drive API.",
|
||||
"m365_google_personal_setup_step2": "Create OAuth 2.0 credentials (Desktop app type) and copy the client ID and secret.",
|
||||
"m365_google_personal_setup_step3": "Add your Google account email to the OAuth consent screen test users list.",
|
||||
"m365_auth_waiting": "Waiting for sign-in…",
|
||||
"role_staff": "Staff",
|
||||
"role_student": "Student",
|
||||
"role_other": "Other",
|
||||
|
||||
"m365_settings_tab_security": "Security",
|
||||
|
||||
"share_modal_title": "Share results",
|
||||
"share_modal_desc": "Read-only links let a DPO or reviewer browse results and tag dispositions without access to scan controls or credentials.",
|
||||
"share_new_link": "New link",
|
||||
"share_label_lbl": "Label (optional)",
|
||||
"share_label_placeholder": "e.g. DPO review 2026",
|
||||
"share_expires_in": "Expires in",
|
||||
"share_expires_never": "Never",
|
||||
"share_expires_7d": "7 days",
|
||||
"share_expires_30d": "30 days",
|
||||
"share_expires_90d": "90 days",
|
||||
"share_expires_1y": "1 year",
|
||||
"share_create": "Create",
|
||||
"share_copy_link_prompt": "Copy link:",
|
||||
"share_active_links": "Active links",
|
||||
"share_viewer_pin_label": "Viewer PIN:",
|
||||
"share_pin_configure": "Configure",
|
||||
"share_pin_set": "Set",
|
||||
"share_pin_not_set": "Not set",
|
||||
"share_no_links": "No active links.",
|
||||
"share_unlabelled": "Unlabelled",
|
||||
"share_expires_prefix": "Expires:",
|
||||
"share_last_used": "Last used:",
|
||||
"share_revoke": "Revoke",
|
||||
"share_copied": "Copied!",
|
||||
"share_load_error": "Failed to load links.",
|
||||
"share_create_error": "Failed to create link:",
|
||||
"share_revoke_confirm": "Revoke this link? Anyone using it will immediately lose access.",
|
||||
"share_revoke_error": "Failed to revoke:",
|
||||
|
||||
"viewer_pin_group_title": "Viewer PIN",
|
||||
"viewer_pin_desc": "A numeric PIN (4\u20138 digits) that lets anyone open <code style=\"font-size:10px\">/view</code> in a browser for read-only access to results without a token URL.",
|
||||
"viewer_pin_clear": "Clear PIN",
|
||||
"viewer_pin_is_set": "Viewer PIN is set",
|
||||
"viewer_pin_not_set_msg": "No PIN set \u2014 /view requires a token link",
|
||||
"viewer_pin_format": "PIN must be 4\u20138 digits.",
|
||||
"viewer_pin_saving": "Saving\u2026",
|
||||
"viewer_pin_saved": "PIN saved",
|
||||
"viewer_pin_clear_confirm": "Remove the viewer PIN? /view will require a token link again.",
|
||||
"viewer_pin_cleared": "PIN cleared"
|
||||
}
|
||||
1141
m365_connector.py
Normal file
1141
m365_connector.py
Normal file
File diff suppressed because it is too large
Load Diff
446
m365_launcher.py
Normal file
446
m365_launcher.py
Normal file
@ -0,0 +1,446 @@
|
||||
"""
|
||||
gdpr_launcher.py — entry point for the packaged GDPRScanner app.
|
||||
|
||||
Responsibilities:
|
||||
1. Find a free port (default 5100)
|
||||
2. Start Flask in a background thread
|
||||
3. Open the UI in a native webview window (pywebview)
|
||||
— falls back to the system browser if pywebview is unavailable
|
||||
|
||||
Generated by build_gdpr.py — do not edit manually.
|
||||
"""
|
||||
import os
|
||||
os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
import webbrowser
|
||||
from pathlib import Path
|
||||
|
||||
if getattr(sys, "frozen", False):
|
||||
BASE_DIR = Path(sys._MEIPASS)
|
||||
else:
|
||||
BASE_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def _setup_external_tools():
|
||||
"""
|
||||
Locate Tesseract and Poppler regardless of how the app was launched.
|
||||
GDPRScanner calls document_scanner for file content extraction, which
|
||||
may need OCR for scanned PDFs — same setup as Document Scanner.
|
||||
"""
|
||||
extra_paths = []
|
||||
|
||||
if sys.platform == "darwin":
|
||||
brew_prefix = None
|
||||
for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
|
||||
if Path(brew_candidate).exists():
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[brew_candidate, "--prefix"],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0:
|
||||
brew_prefix = result.stdout.strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
brew_candidates = []
|
||||
if brew_prefix:
|
||||
brew_candidates.append(brew_prefix)
|
||||
brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
|
||||
|
||||
for prefix in brew_candidates:
|
||||
bin_dir = Path(prefix) / "bin"
|
||||
if bin_dir.exists():
|
||||
extra_paths.append(str(bin_dir))
|
||||
tessdata = Path(prefix) / "share" / "tessdata"
|
||||
if tessdata.exists():
|
||||
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
|
||||
|
||||
for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
|
||||
if Path(t).exists():
|
||||
os.environ.setdefault("TESSERACT_CMD", t)
|
||||
break
|
||||
|
||||
for p in ["/opt/homebrew/bin", "/usr/local/bin",
|
||||
"/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
|
||||
if (Path(p) / "pdftoppm").exists():
|
||||
os.environ.setdefault("POPPLER_PATH", p)
|
||||
extra_paths.insert(0, p)
|
||||
break
|
||||
|
||||
elif sys.platform == "win32":
|
||||
import winreg
|
||||
tess_dir = None
|
||||
try:
|
||||
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR")
|
||||
tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
|
||||
winreg.CloseKey(key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for d in ([tess_dir] if tess_dir else []) + [
|
||||
r"C:\Program Files\Tesseract-OCR",
|
||||
r"C:\Program Files (x86)\Tesseract-OCR",
|
||||
r"C:\Tesseract-OCR",
|
||||
]:
|
||||
if d and Path(d, "tesseract.exe").exists():
|
||||
os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
|
||||
extra_paths.append(d)
|
||||
tessdata = Path(d) / "tessdata"
|
||||
if tessdata.exists():
|
||||
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
|
||||
break
|
||||
|
||||
for d in [
|
||||
r"C:\poppler\Library\bin", r"C:\poppler\bin",
|
||||
r"C:\Program Files\poppler\Library\bin",
|
||||
r"C:\Program Files\poppler\bin",
|
||||
r"C:\tools\poppler\Library\bin",
|
||||
]:
|
||||
if (Path(d) / "pdftoppm.exe").exists():
|
||||
os.environ.setdefault("POPPLER_PATH", d)
|
||||
extra_paths.insert(0, d)
|
||||
break
|
||||
|
||||
if getattr(sys, "frozen", False):
|
||||
tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
|
||||
if tess_bin.exists():
|
||||
os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
|
||||
for sub in ["poppler/bin", "poppler/Library/bin", "."]:
|
||||
pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
|
||||
if pdftoppm.exists():
|
||||
os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
|
||||
extra_paths.insert(0, str(pdftoppm.parent))
|
||||
break
|
||||
extra_paths.insert(0, str(BASE_DIR))
|
||||
|
||||
if extra_paths:
|
||||
current = os.environ.get("PATH", "")
|
||||
additions = os.pathsep.join(p for p in extra_paths if p not in current)
|
||||
if additions:
|
||||
os.environ["PATH"] = additions + os.pathsep + current
|
||||
|
||||
cmd = os.environ.get("TESSERACT_CMD")
|
||||
if cmd and Path(cmd).exists():
|
||||
try:
|
||||
import pytesseract
|
||||
pytesseract.pytesseract.tesseract_cmd = cmd
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
poppler = os.environ.get("POPPLER_PATH")
|
||||
if poppler:
|
||||
try:
|
||||
import pdf2image.pdf2image as _p2i
|
||||
_orig = _p2i.convert_from_path
|
||||
def _patched(pdf_path, *a, poppler_path=None, **kw):
|
||||
return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
|
||||
_p2i.convert_from_path = _patched
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
_setup_external_tools()
|
||||
|
||||
|
||||
def find_free_port(start: int = 5100) -> int:
|
||||
for port in range(start, start + 100):
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
try:
|
||||
s.bind(("127.0.0.1", port))
|
||||
return port
|
||||
except OSError:
|
||||
continue
|
||||
raise RuntimeError("No free port found in range 5100-5200")
|
||||
|
||||
|
||||
# ── Single-instance lock ──────────────────────────────────────────────────────
|
||||
_LOCK_FH = None
|
||||
|
||||
def acquire_instance_lock() -> bool:
|
||||
"""
|
||||
Acquire an exclusive process lock so only one instance runs at a time.
|
||||
Returns True if the lock was acquired, False if another instance holds it.
|
||||
The lock is released automatically when the process exits.
|
||||
"""
|
||||
global _LOCK_FH
|
||||
lock_dir = Path.home() / ".gdprscanner"
|
||||
lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
lock_path = lock_dir / "app.lock"
|
||||
try:
|
||||
_LOCK_FH = open(lock_path, "w")
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
else:
|
||||
import fcntl
|
||||
fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
_LOCK_FH.write(str(os.getpid()))
|
||||
_LOCK_FH.flush()
|
||||
return True
|
||||
except (IOError, OSError):
|
||||
if _LOCK_FH:
|
||||
_LOCK_FH.close()
|
||||
_LOCK_FH = None
|
||||
return False
|
||||
|
||||
|
||||
def _activate_venv():
|
||||
if getattr(sys, "frozen", False):
|
||||
return
|
||||
for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
|
||||
if sys.platform == "win32":
|
||||
site_pkg = candidate / "Lib" / "site-packages"
|
||||
else:
|
||||
lib = candidate / "lib"
|
||||
site_pkg = None
|
||||
if lib.exists():
|
||||
for d in lib.iterdir():
|
||||
sp = d / "site-packages"
|
||||
if sp.exists():
|
||||
site_pkg = sp
|
||||
break
|
||||
if site_pkg and site_pkg.exists():
|
||||
sys.path.insert(0, str(site_pkg))
|
||||
os.environ["VIRTUAL_ENV"] = str(candidate)
|
||||
os.environ.pop("PYTHONHOME", None)
|
||||
break
|
||||
|
||||
|
||||
_activate_venv()
|
||||
|
||||
|
||||
def start_flask(port: int):
|
||||
import gdpr_scanner as _app
|
||||
_app.app.run(host="127.0.0.1", port=port, debug=False,
|
||||
threaded=True, use_reloader=False)
|
||||
|
||||
|
||||
def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", port), timeout=0.2):
|
||||
return True
|
||||
except OSError:
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
def _load_icon_image():
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
|
||||
"icon.ico", "icon.icns", "icon.png",
|
||||
"icon_m365.ico", "icon_m365.icns", "icon_m365.png"]: # legacy fallback
|
||||
p = BASE_DIR / name
|
||||
if p.exists():
|
||||
return PILImage.open(p).convert("RGBA").resize((64, 64))
|
||||
# Minimal fallback — blue square
|
||||
img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
|
||||
return img
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def run_webview(port: int):
|
||||
"""
|
||||
Open the app in a native webview window.
|
||||
Returns True on success, False if pywebview is unavailable.
|
||||
"""
|
||||
try:
|
||||
import webview
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
class Api:
|
||||
def quit(self):
|
||||
import webview as _wv
|
||||
for w in _wv.windows:
|
||||
w.destroy()
|
||||
|
||||
def save_excel(self):
|
||||
"""Fetch the Excel export from Flask and save via native dialog."""
|
||||
import urllib.request, datetime, os, webview as _wv
|
||||
try:
|
||||
url = f"http://127.0.0.1:{port}/api/export_excel"
|
||||
with urllib.request.urlopen(url) as resp:
|
||||
data = resp.read()
|
||||
fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
||||
win = _wv.windows[0] if _wv.windows else None
|
||||
if win:
|
||||
paths = win.create_file_dialog(
|
||||
_wv.SAVE_DIALOG,
|
||||
save_filename=fname,
|
||||
file_types=("Excel Files (*.xlsx)",),
|
||||
)
|
||||
if paths:
|
||||
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
||||
if not dest.endswith(".xlsx"):
|
||||
dest += ".xlsx"
|
||||
with open(dest, "wb") as f:
|
||||
f.write(data)
|
||||
return {"ok": True, "path": dest}
|
||||
return {"ok": False, "error": "cancelled"}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
def save_db_export(self):
|
||||
"""Fetch the DB export ZIP from Flask and save via native dialog."""
|
||||
import urllib.request, datetime, webview as _wv
|
||||
try:
|
||||
url = f"http://127.0.0.1:{port}/api/db/export"
|
||||
with urllib.request.urlopen(url) as resp:
|
||||
data = resp.read()
|
||||
fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
|
||||
win = _wv.windows[0] if _wv.windows else None
|
||||
if win:
|
||||
paths = win.create_file_dialog(
|
||||
_wv.SAVE_DIALOG,
|
||||
save_filename=fname,
|
||||
file_types=("ZIP Archive (*.zip)",),
|
||||
)
|
||||
if paths:
|
||||
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
||||
if not dest.endswith(".zip"):
|
||||
dest += ".zip"
|
||||
with open(dest, "wb") as f:
|
||||
f.write(data)
|
||||
return {"ok": True, "path": dest}
|
||||
return {"ok": False, "error": "cancelled"}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
def save_article30(self):
|
||||
"""Fetch the Article 30 Word doc from Flask and save via native dialog."""
|
||||
import urllib.request, datetime, webview as _wv
|
||||
try:
|
||||
url = f"http://127.0.0.1:{port}/api/export_article30"
|
||||
with urllib.request.urlopen(url) as resp:
|
||||
data = resp.read()
|
||||
fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
|
||||
win = _wv.windows[0] if _wv.windows else None
|
||||
if win:
|
||||
paths = win.create_file_dialog(
|
||||
_wv.SAVE_DIALOG,
|
||||
save_filename=fname,
|
||||
file_types=("Word Document (*.docx)",),
|
||||
)
|
||||
if paths:
|
||||
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
||||
if not dest.endswith(".docx"):
|
||||
dest += ".docx"
|
||||
with open(dest, "wb") as f:
|
||||
f.write(data)
|
||||
return {"ok": True, "path": dest}
|
||||
return {"ok": False, "error": "cancelled"}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
def open_manual(self, lang: str):
|
||||
"""Open the user manual in a new native webview window."""
|
||||
import webview as _wv
|
||||
url = f"http://127.0.0.1:{port}/manual?lang={lang}"
|
||||
existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
|
||||
if existing:
|
||||
existing.load_url(url)
|
||||
else:
|
||||
mw = _wv.create_window(
|
||||
title="GDPRScanner — Manual",
|
||||
url=url,
|
||||
width=960,
|
||||
height=800,
|
||||
resizable=True,
|
||||
)
|
||||
mw._is_manual = True
|
||||
|
||||
w = webview.create_window(
|
||||
title="GDPRScanner",
|
||||
url=f"http://127.0.0.1:{port}/",
|
||||
width=1400,
|
||||
height=900,
|
||||
min_size=(900, 600),
|
||||
js_api=Api(),
|
||||
)
|
||||
|
||||
def _on_closed():
|
||||
os._exit(0)
|
||||
|
||||
w.events.closed += _on_closed
|
||||
webview.start(debug=False)
|
||||
return True
|
||||
|
||||
|
||||
def _run_browser_fallback(port: int):
|
||||
"""Open in system browser + optional tray icon."""
|
||||
url = f"http://127.0.0.1:{port}/"
|
||||
webbrowser.open(url)
|
||||
|
||||
try:
|
||||
import pystray
|
||||
from PIL import Image as PILImage
|
||||
|
||||
img = _load_icon_image()
|
||||
if img is None:
|
||||
return
|
||||
|
||||
def _quit(icon, item):
|
||||
icon.stop()
|
||||
os._exit(0)
|
||||
|
||||
def _open(icon, item):
|
||||
webbrowser.open(url)
|
||||
|
||||
menu = pystray.Menu(
|
||||
pystray.MenuItem("Open GDPRScanner", _open, default=True),
|
||||
pystray.MenuItem("Quit", _quit),
|
||||
)
|
||||
icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
|
||||
icon.run()
|
||||
except ImportError:
|
||||
# No pystray — just keep the process alive
|
||||
try:
|
||||
while True:
|
||||
time.sleep(60)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not acquire_instance_lock():
|
||||
print("GDPRScanner is already running.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# On macOS, multiprocessing uses "fork" which is unsafe with some
|
||||
# frameworks — use "spawn" to match PyInstaller's behaviour.
|
||||
if sys.platform == "darwin":
|
||||
import multiprocessing
|
||||
multiprocessing.set_start_method("spawn", force=True)
|
||||
|
||||
port = find_free_port()
|
||||
# Machine-readable port line — stdout pipe for any parent process.
|
||||
print(f"GDPR_PORT={port}", flush=True)
|
||||
|
||||
# Pre-import on main thread so cv2 / numpy initialise safely
|
||||
try:
|
||||
import gdpr_scanner # noqa: F401 — side effect: loads Flask app
|
||||
except Exception as e:
|
||||
print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
|
||||
flask_thread.start()
|
||||
|
||||
if not wait_for_flask(port):
|
||||
print("[!] Flask did not start in time", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
webview_ok = run_webview(port)
|
||||
if not webview_ok:
|
||||
_run_browser_fallback(port)
|
||||
6
pytest.ini
Normal file
6
pytest.ini
Normal file
@ -0,0 +1,6 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts = -v --tb=short
|
||||
48
requirements.txt
Normal file
48
requirements.txt
Normal file
@ -0,0 +1,48 @@
|
||||
# M365 GDPR Scanner — Python dependencies
|
||||
# Python 3.11+ required (3.13+ not recommended — spaCy compatibility)
|
||||
|
||||
# ── Web server ────────────────────────────────────────────────────────────────
|
||||
flask>=3.0
|
||||
|
||||
# ── Microsoft 365 authentication ─────────────────────────────────────────────
|
||||
msal>=1.28 # OAuth device code + client credentials flow
|
||||
requests>=2.31 # Microsoft Graph API HTTP client
|
||||
|
||||
# ── Document scanning ─────────────────────────────────────────────────────────
|
||||
pdfplumber>=0.11 # PDF text extraction
|
||||
python-docx>=1.1 # Word document scanning
|
||||
openpyxl>=3.1 # Excel scanning + export
|
||||
|
||||
# ── Image processing ──────────────────────────────────────────────────────────
|
||||
Pillow>=10.0 # Image thumbnails + EXIF extraction (always-on)
|
||||
opencv-python>=4.9 # Face detection (opt-in — Scan photos for faces)
|
||||
numpy>=1.26 # Required by opencv-python
|
||||
|
||||
# ── NER / PII detection ───────────────────────────────────────────────────────
|
||||
# spaCy 3.7 supports Python 3.8–3.12. Do NOT upgrade past Python 3.12.
|
||||
spacy>=3.7,<4.0
|
||||
|
||||
# ── PDF scanning (optional — improves accuracy) ───────────────────────────────
|
||||
pymupdf>=1.24 # Physical PDF text layer access (fallback: pdfplumber)
|
||||
|
||||
# ── Encryption ───────────────────────────────────────────────────────────────
|
||||
cryptography>=42.0 # Fernet — SMTP password encrypted at rest
|
||||
|
||||
# ── Packaging / desktop ───────────────────────────────────────────────────────
|
||||
pyinstaller>=6.0
|
||||
pyinstaller-hooks-contrib>=2024.0
|
||||
pywebview>=5.0 # Native app window
|
||||
pystray>=0.19 # System tray icon
|
||||
|
||||
# ── File system scanning (optional) ──────────────────────────────────────────
|
||||
smbprotocol>=1.13 # SMB2/3 network share scanning without mounting
|
||||
keyring>=25.0 # OS keychain credential storage for SMB passwords
|
||||
python-dotenv>=1.0 # .env file fallback for headless SMB credentials
|
||||
|
||||
# ── Scheduler (#19) ──────────────────────────────────────────────────────────
|
||||
APScheduler>=3.10 # In-process scheduled scans
|
||||
|
||||
# ── Google Workspace scanning (#10) ──────────────────────────────────────────
|
||||
google-auth>=2.0 # Service account + domain-wide delegation
|
||||
google-auth-httplib2 # HTTP transport for google-auth
|
||||
google-api-python-client>=2.0 # Gmail API + Drive API + Admin Directory API
|
||||
21
routes/CLAUDE.md
Normal file
21
routes/CLAUDE.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Routes — Architecture Rules
|
||||
|
||||
## SSE constraints
|
||||
SSE routes must live in `gdpr_scanner.py`, not blueprints — blueprints can't stream.
|
||||
|
||||
M365 scan emits `scan_done`; Google emits `google_scan_done`; file scan emits `file_scan_done`. Never mix them up.
|
||||
|
||||
## scan_progress source field
|
||||
All three scan engines must include `"source": "m365"` / `"google"` / `"file"` in every `scan_progress` SSE event. Never remove this field — the frontend uses it to route progress to the correct segment.
|
||||
|
||||
## file_sources
|
||||
`file_sources` in profiles are stored as source ID strings by the JS frontend. The scheduler resolves them via `_load_file_sources()` before calling `run_file_scan()`.
|
||||
|
||||
## Circular import prohibition
|
||||
`scan_engine.py` and `gdpr_scanner.py` must not import each other. `scan_engine` imports from `sse`, `checkpoint`, `app_config`, `cpr_detector`; `gdpr_scanner` imports scan functions from `scan_engine`.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **`_load_settings()` return** — does NOT include `file_sources`. Returns only: sources, user_ids, options, retention_years, fiscal_year_end, email_to.
|
||||
- **`_save_settings()` clobbers profile fields** — called on every M365 scan start with only M365 sources/user_ids/options. The fix in `app_config.py` preserves `google_sources` and `file_sources` and rebuilds `sources` as `m365_src + google_src + file_src`. Do not simplify away this merge logic.
|
||||
- **`loadLastScanSummary()` timing** — must only be called after the first `/api/scan/status` poll resolves (inside `_sseWatchdog` in `results.js`, guarded by `_initialStatusChecked`). Calling it on `DOMContentLoaded` shows a stale "no results" card during a live scan after a hard refresh.
|
||||
8
routes/__init__.py
Normal file
8
routes/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
"""
|
||||
GDPR Scanner — Flask route blueprints.
|
||||
|
||||
Each module registers one Blueprint and imports shared state from
|
||||
gdpr_scanner (the application entry point). Import order matters:
|
||||
blueprints must be registered after `app` and all shared globals
|
||||
(flagged_items, _connector, etc.) are defined.
|
||||
"""
|
||||
386
routes/app_routes.py
Normal file
386
routes/app_routes.py
Normal file
@ -0,0 +1,386 @@
|
||||
"""
|
||||
App-level routes: about, language, version
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from flask import Blueprint, Response, jsonify, request
|
||||
from pathlib import Path
|
||||
from routes import state
|
||||
from app_config import _set_lang_override, _load_lang_forced
|
||||
|
||||
bp = Blueprint("app_routes", __name__)
|
||||
|
||||
_APP_VERSION = (Path(__file__).parent.parent / "VERSION").read_text().strip()
|
||||
_LANG_DIR = (Path(sys._MEIPASS) if getattr(sys, "frozen", False)
|
||||
else Path(__file__).parent.parent) / "lang"
|
||||
|
||||
|
||||
@bp.route("/api/about")
|
||||
def about_info():
|
||||
import platform
|
||||
info = {"python": platform.python_version(), "app": _APP_VERSION}
|
||||
try:
|
||||
import msal as _msal
|
||||
info["msal"] = getattr(_msal, "__version__", "installed")
|
||||
except ImportError:
|
||||
info["msal"] = "not installed"
|
||||
try:
|
||||
import requests as _req
|
||||
info["requests"] = getattr(_req, "__version__", "installed")
|
||||
except ImportError:
|
||||
info["requests"] = "not installed"
|
||||
try:
|
||||
import openpyxl as _xl
|
||||
info["openpyxl"] = getattr(_xl, "__version__", "installed")
|
||||
except ImportError:
|
||||
info["openpyxl"] = "not installed"
|
||||
return jsonify(info)
|
||||
|
||||
|
||||
@bp.route("/api/langs")
|
||||
def get_langs():
|
||||
display_names = {
|
||||
"da": "Dansk", "en": "English", "de": "Deutsch",
|
||||
"fr": "Français", "nl": "Nederlands", "sv": "Svenska",
|
||||
"no": "Norsk", "fi": "Suomi", "es": "Español",
|
||||
"it": "Italiano", "pl": "Polski", "pt": "Português",
|
||||
}
|
||||
langs = []
|
||||
if _LANG_DIR.exists():
|
||||
seen = set()
|
||||
for f in sorted(list(_LANG_DIR.glob("*.json")) + list(_LANG_DIR.glob("*.lang"))):
|
||||
code = f.stem
|
||||
if code not in seen:
|
||||
seen.add(code)
|
||||
langs.append({"code": code, "name": display_names.get(code, code.upper())})
|
||||
langs.sort(key=lambda x: x["code"])
|
||||
return jsonify({"langs": langs, "current": state.LANG.get("_lang_code", "en")})
|
||||
|
||||
|
||||
@bp.route("/api/set_lang", methods=["POST"])
|
||||
def set_lang():
|
||||
data = request.get_json(force=True) or {}
|
||||
code = str(data.get("lang", "en")).strip().lower()[:10]
|
||||
_set_lang_override(code)
|
||||
state.LANG = _load_lang_forced(code)
|
||||
return jsonify({"status": "ok", "lang": code, "translations": state.LANG})
|
||||
|
||||
|
||||
@bp.route("/api/lang")
|
||||
def get_lang_json():
|
||||
"""Return the current language translations as JSON."""
|
||||
return jsonify(state.LANG)
|
||||
|
||||
|
||||
@bp.route("/manual")
|
||||
def manual():
|
||||
"""Serve the user manual as a styled, printable HTML page.
|
||||
Respects ?lang=da|en; falls back to the current UI language."""
|
||||
import sys as _sys
|
||||
|
||||
lang = request.args.get("lang", "").strip().lower() or \
|
||||
state.LANG.get("_lang_code", "da")
|
||||
lang = lang if lang in ("da", "en") else "da"
|
||||
|
||||
_here = Path(_sys._MEIPASS) if getattr(_sys, "frozen", False) \
|
||||
else Path(__file__).parent.parent
|
||||
fname = "MANUAL-DA.md" if lang == "da" else "MANUAL-EN.md"
|
||||
md_path = _here / "docs" / "manuals" / fname
|
||||
if not md_path.exists():
|
||||
return f"Manual file not found: {fname}", 404
|
||||
|
||||
md_text = md_path.read_text(encoding="utf-8")
|
||||
body_html = _md_to_html(md_text)
|
||||
|
||||
title = "GDPR Scanner — Brugermanual" if lang == "da" \
|
||||
else "GDPR Scanner — User Manual"
|
||||
print_label = "Udskriv" if lang == "da" else "Print"
|
||||
other_lang = "en" if lang == "da" else "da"
|
||||
other_label = "English" if lang == "da" else "Dansk"
|
||||
|
||||
page = f"""<!DOCTYPE html>
|
||||
<html lang="{lang}">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||
<title>{title}</title>
|
||||
<style>
|
||||
:root {{
|
||||
--text: #1a1a1a;
|
||||
--muted: #555;
|
||||
--border: #ddd;
|
||||
--accent: #0060b0;
|
||||
--bg: #fff;
|
||||
--surface: #f6f8fa;
|
||||
--code-bg: #f0f0f0;
|
||||
}}
|
||||
*, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
|
||||
body {{
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||||
font-size: 15px;
|
||||
line-height: 1.7;
|
||||
color: var(--text);
|
||||
background: var(--bg);
|
||||
max-width: 860px;
|
||||
margin: 0 auto;
|
||||
padding: 32px 24px 64px;
|
||||
}}
|
||||
h1 {{ font-size: 1.9em; margin: 0 0 4px; color: var(--text); }}
|
||||
h2 {{ font-size: 1.35em; margin: 2.2em 0 .6em; padding-bottom: .3em;
|
||||
border-bottom: 2px solid var(--border); color: var(--text); }}
|
||||
h3 {{ font-size: 1.1em; margin: 1.6em 0 .4em; color: var(--text); }}
|
||||
h4 {{ font-size: 1em; margin: 1.2em 0 .3em; color: var(--muted); }}
|
||||
p {{ margin: .6em 0; }}
|
||||
a {{ color: var(--accent); text-decoration: none; }}
|
||||
a:hover {{ text-decoration: underline; }}
|
||||
strong {{ font-weight: 600; }}
|
||||
em {{ font-style: italic; }}
|
||||
hr {{ border: none; border-top: 1px solid var(--border); margin: 1.8em 0; }}
|
||||
blockquote {{
|
||||
border-left: 3px solid var(--accent);
|
||||
margin: .8em 0;
|
||||
padding: .4em 1em;
|
||||
background: var(--surface);
|
||||
border-radius: 0 4px 4px 0;
|
||||
color: var(--muted);
|
||||
}}
|
||||
code {{
|
||||
font-family: "SF Mono", Consolas, "Liberation Mono", monospace;
|
||||
font-size: .88em;
|
||||
background: var(--code-bg);
|
||||
padding: 1px 5px;
|
||||
border-radius: 3px;
|
||||
}}
|
||||
pre {{
|
||||
background: var(--code-bg);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 14px 16px;
|
||||
overflow-x: auto;
|
||||
margin: .8em 0;
|
||||
font-size: .85em;
|
||||
line-height: 1.5;
|
||||
}}
|
||||
pre code {{ background: none; padding: 0; font-size: inherit; }}
|
||||
ul, ol {{ margin: .5em 0 .5em 1.6em; }}
|
||||
li {{ margin: .25em 0; }}
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: .8em 0;
|
||||
font-size: .93em;
|
||||
}}
|
||||
th, td {{
|
||||
border: 1px solid var(--border);
|
||||
padding: 7px 12px;
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
}}
|
||||
th {{
|
||||
background: var(--surface);
|
||||
font-weight: 600;
|
||||
}}
|
||||
tr:nth-child(even) td {{ background: #fafafa; }}
|
||||
|
||||
/* ── Top toolbar ── */
|
||||
.manual-toolbar {{
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-bottom: 28px;
|
||||
padding-bottom: 14px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}}
|
||||
.manual-toolbar .spacer {{ flex: 1; }}
|
||||
.toolbar-btn {{
|
||||
font-size: 13px;
|
||||
padding: 5px 14px;
|
||||
border-radius: 6px;
|
||||
border: 1px solid var(--border);
|
||||
background: var(--surface);
|
||||
color: var(--text);
|
||||
cursor: pointer;
|
||||
text-decoration: none;
|
||||
display: inline-block;
|
||||
}}
|
||||
.toolbar-btn:hover {{ background: var(--border); }}
|
||||
.toolbar-btn.primary {{
|
||||
background: var(--accent);
|
||||
color: #fff;
|
||||
border-color: var(--accent);
|
||||
}}
|
||||
.toolbar-btn.primary:hover {{ opacity: .88; }}
|
||||
|
||||
/* ── Table of contents ── */
|
||||
.toc {{
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 16px 20px;
|
||||
margin: 1.2em 0 2em;
|
||||
font-size: .93em;
|
||||
}}
|
||||
.toc ol {{ margin: .3em 0 0 1.2em; }}
|
||||
.toc li {{ margin: .3em 0; }}
|
||||
|
||||
/* ── Print ── */
|
||||
@media print {{
|
||||
.manual-toolbar {{ display: none !important; }}
|
||||
body {{ max-width: 100%; padding: 0; font-size: 12pt; }}
|
||||
h2 {{ page-break-before: always; }}
|
||||
h2:first-of-type {{ page-break-before: avoid; }}
|
||||
pre, blockquote, table {{ page-break-inside: avoid; }}
|
||||
a {{ color: var(--text); text-decoration: none; }}
|
||||
a[href^="http"]::after {{ content: " (" attr(href) ")"; font-size: .8em; color: var(--muted); }}
|
||||
tr:nth-child(even) td {{ background: #f5f5f5; }}
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="manual-toolbar">
|
||||
<strong style="font-size:14px">{title}</strong>
|
||||
<span class="spacer"></span>
|
||||
<a class="toolbar-btn" href="/manual?lang={other_lang}">{other_label}</a>
|
||||
<button class="toolbar-btn primary" onclick="window.print()">🖨 {print_label}</button>
|
||||
</div>
|
||||
{body_html}
|
||||
</body>
|
||||
</html>"""
|
||||
return Response(page, mimetype="text/html")
|
||||
|
||||
|
||||
def _md_to_html(md: str) -> str:
|
||||
"""Lightweight Markdown → HTML converter (no external dependencies).
|
||||
Handles headings, tables, lists, blockquotes, code blocks, bold/italic,
|
||||
inline code, links, and horizontal rules."""
|
||||
import re, html as _html
|
||||
|
||||
def inline(text: str) -> str:
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
|
||||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
|
||||
text = re.sub(r'`(.+?)`', lambda m: '<code>' + _html.escape(m.group(1)) + '</code>', text)
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
|
||||
return text
|
||||
|
||||
def make_anchor(text: str) -> str:
|
||||
return re.sub(r'[^\w\s-]', '', text.lower()).strip().replace(' ', '-')
|
||||
|
||||
result = []
|
||||
lines = md.splitlines()
|
||||
i = 0
|
||||
|
||||
in_code = False
|
||||
code_buf = []
|
||||
in_list = False
|
||||
list_type = None
|
||||
list_buf = []
|
||||
in_table = False
|
||||
tbl_buf = []
|
||||
|
||||
def flush_list():
|
||||
nonlocal in_list, list_type, list_buf
|
||||
if not in_list:
|
||||
return
|
||||
tag = list_type
|
||||
result.append(f'<{tag}>')
|
||||
for item in list_buf:
|
||||
result.append(f' <li>{inline(item)}</li>')
|
||||
result.append(f'</{tag}>')
|
||||
in_list = False; list_buf = []; list_type = None
|
||||
|
||||
def flush_table():
|
||||
nonlocal in_table, tbl_buf
|
||||
if not in_table or len(tbl_buf) < 2:
|
||||
in_table = False; tbl_buf = []; return
|
||||
heads = [c.strip() for c in tbl_buf[0].strip('|').split('|')]
|
||||
result.append('<table>')
|
||||
result.append('<thead><tr>' + ''.join(f'<th>{inline(h)}</th>' for h in heads) + '</tr></thead>')
|
||||
result.append('<tbody>')
|
||||
for row in tbl_buf[2:]:
|
||||
cols = [c.strip() for c in row.strip('|').split('|')]
|
||||
result.append('<tr>' + ''.join(f'<td>{inline(c)}</td>' for c in cols) + '</tr>')
|
||||
result.append('</tbody></table>')
|
||||
in_table = False; tbl_buf = []
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
i += 1
|
||||
|
||||
# ── fenced code block ──────────────────────────────────────────
|
||||
if line.startswith('```'):
|
||||
if not in_code:
|
||||
flush_list(); flush_table()
|
||||
in_code = True; code_buf = []
|
||||
else:
|
||||
in_code = False
|
||||
escaped = _html.escape('\n'.join(code_buf))
|
||||
result.append(f'<pre><code>{escaped}</code></pre>')
|
||||
continue
|
||||
if in_code:
|
||||
code_buf.append(line)
|
||||
continue
|
||||
|
||||
# ── table row ─────────────────────────────────────────────────
|
||||
if line.strip().startswith('|') and '|' in line[1:]:
|
||||
flush_list()
|
||||
in_table = True
|
||||
tbl_buf.append(line)
|
||||
continue
|
||||
elif in_table:
|
||||
flush_table()
|
||||
|
||||
# ── blank line ────────────────────────────────────────────────
|
||||
if not line.strip():
|
||||
flush_list()
|
||||
result.append('')
|
||||
continue
|
||||
|
||||
# ── heading ───────────────────────────────────────────────────
|
||||
m = re.match(r'^(#{1,6})\s+(.+)$', line)
|
||||
if m:
|
||||
flush_list()
|
||||
lvl = len(m.group(1))
|
||||
text = m.group(2)
|
||||
anc = make_anchor(text)
|
||||
result.append(f'<h{lvl} id="{anc}">{inline(text)}</h{lvl}>')
|
||||
continue
|
||||
|
||||
# ── horizontal rule ───────────────────────────────────────────
|
||||
if re.match(r'^-{3,}$', line.strip()):
|
||||
flush_list()
|
||||
result.append('<hr>')
|
||||
continue
|
||||
|
||||
# ── blockquote ────────────────────────────────────────────────
|
||||
if line.startswith('> '):
|
||||
flush_list()
|
||||
result.append(f'<blockquote>{inline(line[2:])}</blockquote>')
|
||||
continue
|
||||
|
||||
# ── unordered list ────────────────────────────────────────────
|
||||
m = re.match(r'^- (.+)$', line)
|
||||
if m:
|
||||
if not in_list or list_type != 'ul':
|
||||
flush_list()
|
||||
in_list = True; list_type = 'ul'; list_buf = []
|
||||
list_buf.append(m.group(1))
|
||||
continue
|
||||
|
||||
# ── ordered list ─────────────────────────────────────────────
|
||||
m = re.match(r'^\d+\. (.+)$', line)
|
||||
if m:
|
||||
if not in_list or list_type != 'ol':
|
||||
flush_list()
|
||||
in_list = True; list_type = 'ol'; list_buf = []
|
||||
list_buf.append(m.group(1))
|
||||
continue
|
||||
|
||||
# ── paragraph ────────────────────────────────────────────────
|
||||
flush_list()
|
||||
result.append(f'<p>{inline(line)}</p>')
|
||||
|
||||
flush_list()
|
||||
flush_table()
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
179
routes/auth.py
Normal file
179
routes/auth.py
Normal file
@ -0,0 +1,179 @@
|
||||
"""
|
||||
Microsoft 365 authentication routes
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import threading
|
||||
from flask import Blueprint, jsonify, request
|
||||
from routes import state
|
||||
from app_config import _load_config, _save_config
|
||||
|
||||
try:
|
||||
from m365_connector import M365Connector, M365Error, MSAL_OK
|
||||
except ImportError:
|
||||
MSAL_OK = False
|
||||
M365Connector = None # type: ignore[assignment,misc]
|
||||
class M365Error(Exception): pass # type: ignore[no-redef]
|
||||
|
||||
bp = Blueprint("auth", __name__)
|
||||
|
||||
|
||||
@bp.route("/api/auth/status")
|
||||
def auth_status():
|
||||
cfg = _load_config()
|
||||
if not MSAL_OK:
|
||||
return jsonify({"authenticated": False, "error": "msal not installed",
|
||||
"client_id": cfg.get("client_id",""), "tenant_id": cfg.get("tenant_id","")})
|
||||
|
||||
saved_secret = cfg.get("client_secret", "")
|
||||
saved_cid = cfg.get("client_id", "")
|
||||
saved_tid = cfg.get("tenant_id", "")
|
||||
|
||||
# Rebuild connector if:
|
||||
# • none exists yet, OR
|
||||
# • the saved secret doesn't match what the current connector was built with
|
||||
# (user entered a secret after previously connecting without one)
|
||||
connector_secret = getattr(state.connector, "client_secret", None)
|
||||
need_rebuild = (
|
||||
not state.connector
|
||||
or connector_secret != saved_secret
|
||||
or getattr(state.connector, "client_id", None) != saved_cid
|
||||
)
|
||||
|
||||
if need_rebuild and saved_cid and saved_tid:
|
||||
try:
|
||||
state.connector = M365Connector(saved_cid, saved_tid, client_secret=saved_secret)
|
||||
if state.connector.is_app_mode:
|
||||
state.connector.authenticate_app_mode()
|
||||
except Exception:
|
||||
state.connector = None
|
||||
|
||||
if state.connector and state.connector.is_authenticated():
|
||||
try:
|
||||
info = state.connector.get_user_info()
|
||||
return jsonify({"authenticated": True,
|
||||
"display_name": info.get("displayName",""),
|
||||
"email": info.get("mail") or info.get("userPrincipalName",""),
|
||||
"client_id": saved_cid,
|
||||
"tenant_id": saved_tid,
|
||||
"client_secret": saved_secret,
|
||||
"app_mode": state.connector.is_app_mode})
|
||||
except Exception:
|
||||
pass
|
||||
return jsonify({"authenticated": False,
|
||||
"client_id": saved_cid,
|
||||
"tenant_id": saved_tid,
|
||||
"client_secret": saved_secret})
|
||||
|
||||
|
||||
@bp.route("/api/auth/start", methods=["POST"])
|
||||
def auth_start():
|
||||
if not MSAL_OK:
|
||||
return jsonify({"error": "msal not installed — run: pip install msal"})
|
||||
data = request.get_json() or {}
|
||||
client_id = data.get("client_id","").strip()
|
||||
tenant_id = data.get("tenant_id","").strip()
|
||||
client_secret = data.get("client_secret","").strip()
|
||||
if not client_id or not tenant_id:
|
||||
return jsonify({"error": "client_id and tenant_id required"})
|
||||
try:
|
||||
state.connector = M365Connector(client_id, tenant_id, client_secret=client_secret)
|
||||
|
||||
if state.connector.is_app_mode:
|
||||
# Application mode — acquire token immediately, no device code
|
||||
state.connector.authenticate_app_mode()
|
||||
_save_config({"client_id": client_id, "tenant_id": tenant_id,
|
||||
"client_secret": client_secret})
|
||||
return jsonify({"mode": "application"})
|
||||
|
||||
# Delegated mode — start device code flow
|
||||
state.pending_flow = state.connector.get_device_code_flow()
|
||||
state.auth_poll_result = None
|
||||
_save_config({"client_id": client_id, "tenant_id": tenant_id, "client_secret": ""})
|
||||
|
||||
flow_copy = state.pending_flow
|
||||
def _do_auth():
|
||||
try:
|
||||
ok = state.connector.complete_device_code_flow(flow_copy)
|
||||
state.auth_poll_result = "ok" if ok else "Sign-in failed"
|
||||
except M365Error as e:
|
||||
state.auth_poll_result = str(e)
|
||||
except Exception as e:
|
||||
state.auth_poll_result = str(e)
|
||||
threading.Thread(target=_do_auth, daemon=True).start()
|
||||
|
||||
return jsonify({
|
||||
"mode": "delegated",
|
||||
"user_code": state.pending_flow["user_code"],
|
||||
"verification_uri": state.pending_flow["verification_uri"],
|
||||
"message": state.pending_flow["message"],
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)})
|
||||
|
||||
|
||||
@bp.route("/api/auth/poll", methods=["POST"])
|
||||
def auth_poll():
|
||||
if not state.connector or not state.pending_flow:
|
||||
return jsonify({"status": "error", "error": "No pending flow"})
|
||||
# Return current poll result (set by background thread)
|
||||
result = state.auth_poll_result
|
||||
if result == "ok":
|
||||
state.auth_poll_result = None
|
||||
state.pending_flow = None
|
||||
return jsonify({"status": "ok"})
|
||||
elif result and result != "pending":
|
||||
state.auth_poll_result = None
|
||||
state.pending_flow = None
|
||||
return jsonify({"status": "error", "error": result})
|
||||
return jsonify({"status": "pending"})
|
||||
|
||||
|
||||
@bp.route("/api/auth/userinfo")
|
||||
def auth_userinfo():
|
||||
if not state.connector:
|
||||
return jsonify({"error": "not connected"}), 401
|
||||
try:
|
||||
return jsonify(state.connector.get_user_info())
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@bp.route("/api/auth/signout", methods=["POST"])
|
||||
def auth_signout():
|
||||
if state.connector:
|
||||
try: state.connector.sign_out()
|
||||
except Exception: pass
|
||||
state.connector = None
|
||||
# Also clear the delegated token cache so a fresh sign-in is required
|
||||
from m365_connector import _TOKEN_CACHE_FILE
|
||||
try:
|
||||
if _TOKEN_CACHE_FILE.exists():
|
||||
_TOKEN_CACHE_FILE.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
return jsonify({"status": "ok"})
|
||||
|
||||
|
||||
@bp.route("/api/auth/config", methods=["GET", "POST"])
|
||||
def auth_config():
|
||||
"""GET: return saved config (secret masked). POST: update config directly."""
|
||||
if request.method == "POST":
|
||||
data = request.get_json() or {}
|
||||
client_id = data.get("client_id", "").strip()
|
||||
tenant_id = data.get("tenant_id", "").strip()
|
||||
client_secret = data.get("client_secret", "").strip()
|
||||
if not client_id or not tenant_id:
|
||||
return jsonify({"error": "client_id and tenant_id required"}), 400
|
||||
_save_config({"client_id": client_id, "tenant_id": tenant_id,
|
||||
"client_secret": client_secret})
|
||||
# Force connector rebuild on next request
|
||||
state.connector = None
|
||||
return jsonify({"status": "saved", "app_mode": bool(client_secret)})
|
||||
cfg = _load_config()
|
||||
secret = cfg.get("client_secret", "")
|
||||
return jsonify({
|
||||
"client_id": cfg.get("client_id", ""),
|
||||
"tenant_id": cfg.get("tenant_id", ""),
|
||||
"has_secret": bool(secret),
|
||||
"secret_preview": (secret[:4] + "…" + secret[-4:]) if len(secret) > 8 else ("***" if secret else ""),
|
||||
})
|
||||
591
routes/database.py
Normal file
591
routes/database.py
Normal file
@ -0,0 +1,591 @@
|
||||
"""
|
||||
Database stats, disposition, export/import, admin PIN, preview, thumbnail
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from flask import Blueprint, Response, jsonify, request
|
||||
from routes import state
|
||||
from app_config import _set_admin_pin, _verify_admin_pin, _admin_pin_is_set
|
||||
from checkpoint import _clear_checkpoint, _DELTA_PATH
|
||||
from cpr_detector import _extract_exif, _html_esc, _placeholder_svg
|
||||
|
||||
try:
|
||||
from gdpr_db import get_db as _get_db
|
||||
DB_OK = True
|
||||
except ImportError:
|
||||
DB_OK = False
|
||||
def _get_db(*a, **kw): return None # type: ignore[misc]
|
||||
|
||||
try:
|
||||
import document_scanner as _ds # noqa: F401
|
||||
SCANNER_OK = True
|
||||
except ImportError:
|
||||
SCANNER_OK = False
|
||||
|
||||
bp = Blueprint("database", __name__)
|
||||
|
||||
|
||||
@bp.route("/api/db/stats")
|
||||
def db_stats():
|
||||
"""Return stats for the latest (or specified) scan, plus aggregate counts."""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
scan_id = request.args.get("scan_id", type=int)
|
||||
db = _get_db()
|
||||
data = db.get_stats(scan_id) or {}
|
||||
# Add aggregate counts the Settings panel needs — query directly so they
|
||||
# are correct even if no scan has finished_at set yet
|
||||
try:
|
||||
import sqlite3 as _sq
|
||||
con = _sq.connect(db._path)
|
||||
con.row_factory = _sq.Row
|
||||
data["total_items"] = con.execute("SELECT COUNT(*) FROM flagged_items").fetchone()[0]
|
||||
data["flagged_items"] = data["total_items"]
|
||||
data["total_scans"] = con.execute("SELECT COUNT(*) FROM scans").fetchone()[0]
|
||||
data["finished_scans"]= con.execute("SELECT COUNT(*) FROM scans WHERE finished_at IS NOT NULL").fetchone()[0]
|
||||
if not data.get("flagged_count"):
|
||||
data["flagged_count"] = data["total_items"]
|
||||
if not data.get("total_scanned"):
|
||||
data["total_scanned"] = con.execute("SELECT COALESCE(SUM(total_scanned),0) FROM scans").fetchone()[0]
|
||||
con.close()
|
||||
except Exception:
|
||||
data.setdefault("total_items", 0)
|
||||
data.setdefault("flagged_items", 0)
|
||||
data.setdefault("total_scans", 0)
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
@bp.route("/api/db/trend")
|
||||
def db_trend():
|
||||
"""Return scan history for trend chart (last 20 scans)."""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
n = request.args.get("n", default=20, type=int)
|
||||
return jsonify(_get_db().get_trend(n))
|
||||
|
||||
|
||||
@bp.route("/api/db/scans")
|
||||
def db_scans():
|
||||
"""List recent completed scans."""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
return jsonify(_get_db().scans_list())
|
||||
|
||||
|
||||
@bp.route("/api/db/subject", methods=["POST"])
|
||||
def db_subject_lookup():
|
||||
"""Find all items containing a given CPR number.
|
||||
Body: {cpr: "DDMMYY-XXXX"}
|
||||
The CPR is hashed before querying -- never stored in plaintext.
|
||||
"""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
data = request.get_json() or {}
|
||||
cpr = data.get("cpr", "").strip().replace("-", "").replace(" ", "")
|
||||
if not cpr:
|
||||
return jsonify({"error": "cpr required"}), 400
|
||||
items = _get_db().lookup_data_subject(cpr)
|
||||
return jsonify({"count": len(items), "items": items})
|
||||
|
||||
|
||||
@bp.route("/api/db/overdue")
|
||||
def db_overdue():
|
||||
"""Return items older than the retention threshold.
|
||||
|
||||
Query params:
|
||||
years int, default 5
|
||||
fiscal_year_end MM-DD string, e.g. 12-31 (omit for rolling window)
|
||||
scan_id int (omit for latest scan)
|
||||
"""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
years = request.args.get("years", default=5, type=int)
|
||||
fiscal_year_end = request.args.get("fiscal_year_end", default=None)
|
||||
scan_id = request.args.get("scan_id", type=int)
|
||||
try:
|
||||
from gdpr_db import overdue_cutoff
|
||||
cutoff = overdue_cutoff(years, fiscal_year_end)
|
||||
items = _get_db().get_overdue_items(years, scan_id, fiscal_year_end)
|
||||
except ValueError as e:
|
||||
return jsonify({"error": str(e)}), 400
|
||||
return jsonify({
|
||||
"count": len(items),
|
||||
"cutoff_date": cutoff,
|
||||
"cutoff_mode": "fiscal" if fiscal_year_end else "rolling",
|
||||
"fiscal_year_end": fiscal_year_end,
|
||||
"years": years,
|
||||
"items": items,
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/db/disposition", methods=["POST"])
|
||||
def db_set_disposition():
|
||||
"""Set a compliance disposition on a flagged item.
|
||||
Body: {item_id, status, legal_basis?, notes?, reviewed_by?}
|
||||
Status values: unreviewed | retain-legal | retain-legitimate | retain-contract |
|
||||
delete-scheduled | deleted | personal-use
|
||||
"""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
data = request.get_json() or {}
|
||||
item_id = data.get("item_id", "")
|
||||
if not item_id:
|
||||
return jsonify({"error": "item_id required"}), 400
|
||||
_get_db().set_disposition(
|
||||
item_id,
|
||||
status = data.get("status", "unreviewed"),
|
||||
legal_basis = data.get("legal_basis", ""),
|
||||
notes = data.get("notes", ""),
|
||||
reviewed_by = data.get("reviewed_by", ""),
|
||||
)
|
||||
return jsonify({"status": "saved"})
|
||||
|
||||
|
||||
@bp.route("/api/db/disposition/<item_id>")
|
||||
def db_get_disposition(item_id):
|
||||
"""Get the current disposition for an item."""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
d = _get_db().get_disposition(item_id)
|
||||
return jsonify(d or {"status": "unreviewed"})
|
||||
|
||||
|
||||
@bp.route("/api/db/flagged")
|
||||
def db_flagged_items():
|
||||
"""Return flagged items from the most recent completed scan session.
|
||||
Used by the read-only viewer to load results without an active SSE connection.
|
||||
"""
|
||||
if not DB_OK: return jsonify([])
|
||||
items = _get_db().get_session_items()
|
||||
# Normalise JSON-encoded columns the same way scan_engine does for SSE cards
|
||||
import json as _json
|
||||
out = []
|
||||
for row in items:
|
||||
row["special_category"] = _json.loads(row.get("special_category") or "[]") if isinstance(row.get("special_category"), str) else row.get("special_category", [])
|
||||
row["exif"] = _json.loads(row.get("exif_json") or "{}") if isinstance(row.get("exif_json"), str) else row.get("exif", {})
|
||||
row.pop("exif_json", None)
|
||||
out.append(row)
|
||||
return jsonify(out)
|
||||
|
||||
|
||||
@bp.route("/api/db/deletion_log")
|
||||
def db_deletion_log():
|
||||
"""Return the deletion audit log.
|
||||
Query params: limit (int, default 500), reason (str filter)
|
||||
"""
|
||||
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
||||
limit = request.args.get("limit", default=500, type=int)
|
||||
reason = request.args.get("reason", default=None)
|
||||
rows = _get_db().get_deletion_log(limit=limit, reason=reason)
|
||||
stats = _get_db().deletion_log_stats()
|
||||
return jsonify({"stats": stats, "entries": rows})
|
||||
|
||||
|
||||
@bp.route("/api/db/reset", methods=["POST"])
|
||||
def db_reset():
|
||||
"""Reset the database and clear in-memory scan results.
|
||||
Requires {confirm: "yes", pin: "<admin_pin>"} in request body.
|
||||
"""
|
||||
data = request.get_json() or {}
|
||||
if data.get("confirm") != "yes":
|
||||
return jsonify({"error": "confirm=yes required"}), 400
|
||||
if _admin_pin_is_set():
|
||||
pin = data.get("pin", "")
|
||||
if not _verify_admin_pin(pin):
|
||||
return jsonify({"error": "incorrect_pin"}), 403
|
||||
if not DB_OK:
|
||||
return jsonify({"error": "database not available"}), 503
|
||||
try:
|
||||
_get_db().reset()
|
||||
state.flagged_items = []
|
||||
state.scan_meta = {}
|
||||
_clear_checkpoint()
|
||||
if _DELTA_PATH.exists():
|
||||
_DELTA_PATH.unlink()
|
||||
return jsonify({"ok": True, "message": "Database reset. All scan results cleared."})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@bp.route("/api/admin/pin", methods=["GET"])
|
||||
def admin_pin_status():
|
||||
"""Return whether an admin PIN has been set."""
|
||||
return jsonify({"pin_set": _admin_pin_is_set()})
|
||||
|
||||
|
||||
@bp.route("/api/admin/pin", methods=["POST"])
|
||||
def admin_pin_set():
|
||||
"""Set or change the admin PIN.
|
||||
Body: {current_pin: "..", new_pin: ".."}
|
||||
If no PIN is currently set, current_pin is not required.
|
||||
"""
|
||||
data = request.get_json() or {}
|
||||
new_pin = data.get("new_pin", "").strip()
|
||||
if not new_pin:
|
||||
return jsonify({"error": "new_pin required"}), 400
|
||||
if _admin_pin_is_set():
|
||||
if not _verify_admin_pin(data.get("current_pin", "")):
|
||||
return jsonify({"error": "incorrect_pin"}), 403
|
||||
_set_admin_pin(new_pin)
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@bp.route("/api/db/export")
|
||||
def db_export():
|
||||
"""Export the database to a structured ZIP and return it as a download.
|
||||
The ZIP contains 8 JSON files (see ScanDB.export_db for details).
|
||||
CPR numbers are stored as SHA-256 hashes only — never in plaintext.
|
||||
Thumbnails are stripped to keep the download small. (#11)
|
||||
"""
|
||||
if not DB_OK:
|
||||
return jsonify({"error": "database not available"}), 503
|
||||
import tempfile, datetime as _dt
|
||||
try:
|
||||
ts = _dt.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"gdpr_export_{ts}.zip"
|
||||
with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tf:
|
||||
tmp = Path(tf.name)
|
||||
try:
|
||||
_get_db().export_db(tmp)
|
||||
data = tmp.read_bytes()
|
||||
finally:
|
||||
try: tmp.unlink()
|
||||
except Exception: pass
|
||||
return Response(
|
||||
data,
|
||||
mimetype="application/zip",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||
)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
|
||||
|
||||
|
||||
@bp.route("/api/db/import", methods=["POST"])
|
||||
def db_import():
|
||||
"""Import a previously exported ZIP archive into the database. (#11)
|
||||
|
||||
Multipart form:
|
||||
file — the export ZIP
|
||||
mode — "merge" (default) or "replace"
|
||||
confirm — must be "yes" when mode == "replace"
|
||||
"""
|
||||
if not DB_OK:
|
||||
return jsonify({"error": "database not available"}), 503
|
||||
import tempfile
|
||||
f = request.files.get("file")
|
||||
if not f:
|
||||
return jsonify({"error": "no file uploaded"}), 400
|
||||
mode = request.form.get("mode", "merge")
|
||||
confirm = request.form.get("confirm", "")
|
||||
if mode == "replace" and confirm != "yes":
|
||||
return jsonify({"error": "confirm=yes required for replace mode"}), 400
|
||||
try:
|
||||
tmp = Path(tempfile.mktemp(suffix=".zip", prefix="gdpr_import_"))
|
||||
f.save(str(tmp))
|
||||
result = _get_db().import_db(tmp, mode=mode)
|
||||
tmp.unlink(missing_ok=True)
|
||||
return jsonify({"ok": True, "mode": mode, "imported": result})
|
||||
except (ValueError, FileNotFoundError) as e:
|
||||
return jsonify({"error": str(e)}), 400
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@bp.route("/api/preview/<item_id>")
|
||||
def get_preview(item_id):
|
||||
"""Return a preview URL or HTML for a flagged item."""
|
||||
source_type = request.args.get("source_type", "")
|
||||
account_id = request.args.get("account_id", "me") or "me"
|
||||
|
||||
# Local and SMB file sources — re-read file and render preview
|
||||
if source_type in ("local", "smb"):
|
||||
item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
|
||||
full_path = item_meta.get("full_path", "")
|
||||
name = item_meta.get("name", "")
|
||||
ext = Path(name).suffix.lower() if name else ""
|
||||
|
||||
if not full_path:
|
||||
return jsonify({"error": "File path not available — rescan to enable preview"})
|
||||
|
||||
if source_type == "smb":
|
||||
return jsonify({
|
||||
"type": "info",
|
||||
"html": f"<p style='color:var(--muted);font-size:12px'>SMB preview requires re-reading the file over the network. Open the file directly: <code>{full_path}</code></p>",
|
||||
})
|
||||
|
||||
try:
|
||||
file_path = Path(full_path).expanduser()
|
||||
if not file_path.exists():
|
||||
return jsonify({"error": f"File not found: {full_path}"})
|
||||
|
||||
size = file_path.stat().st_size
|
||||
|
||||
# Images — return as data URI
|
||||
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
|
||||
import base64 as _b64
|
||||
mime = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
|
||||
"gif": "image/gif", "webp": "image/webp", "bmp": "image/bmp"}.get(ext.lstrip("."), "image/jpeg")
|
||||
data = _b64.b64encode(file_path.read_bytes()).decode()
|
||||
_exif = item_meta.get("exif") or _extract_exif(file_path.read_bytes(), name)
|
||||
exif_html = ""
|
||||
if _exif:
|
||||
rows = []
|
||||
if _exif.get("gps"):
|
||||
g = _exif["gps"]
|
||||
rows.append(f'<tr><td>📍 GPS</td><td><a href="{g["maps_url"]}" target="_blank" style="color:#7ec8d0">{g["lat"]}, {g["lon"]}</a></td></tr>')
|
||||
if _exif.get("author"):
|
||||
rows.append(f'<tr><td>👤 Author</td><td>{_html_esc(_exif["author"])}</td></tr>')
|
||||
if _exif.get("datetime"):
|
||||
rows.append(f'<tr><td>📅 Date</td><td>{_html_esc(_exif["datetime"])}</td></tr>')
|
||||
if _exif.get("device"):
|
||||
rows.append(f'<tr><td>📷 Device</td><td>{_html_esc(_exif["device"])}</td></tr>')
|
||||
for field, val in (_exif.get("pii_fields") or {}).items():
|
||||
if field not in ("Artist",):
|
||||
rows.append(f'<tr><td>{_html_esc(field)}</td><td>{_html_esc(str(val)[:200])}</td></tr>')
|
||||
if rows:
|
||||
exif_html = ('<details style="margin:8px 12px;font-size:11px">'
|
||||
'<summary style="cursor:pointer;color:#888">EXIF data</summary>'
|
||||
'<table style="border-collapse:collapse;width:100%;margin-top:6px">'
|
||||
+ "".join(f'<tr style="border-top:1px solid #333"><td style="padding:4px 8px;color:#888;width:120px;white-space:nowrap">{r.split("</td><td>")[0].replace("<tr><td>","")}</td><td style="padding:4px 8px;word-break:break-all">{r.split("</td><td>")[1].replace("</td></tr>","")}</td></tr>' for r in rows)
|
||||
+ '</table></details>')
|
||||
html = f'<div style="text-align:center;padding:12px"><img src="data:{mime};base64,{data}" style="max-width:100%;max-height:60vh;border-radius:6px"></div>{exif_html}'
|
||||
return jsonify({"type": "html", "html": html})
|
||||
|
||||
# Text-based files — render with highlighted CPR numbers
|
||||
if ext in {".txt", ".csv", ".eml", ".md", ".log", ".xml", ".json", ".html", ".htm"}:
|
||||
if size > 2 * 1024 * 1024:
|
||||
return jsonify({"error": "File too large for inline preview (>2 MB)"})
|
||||
raw = file_path.read_bytes().decode("utf-8", errors="replace")
|
||||
import html as _html, re as _re
|
||||
escaped = _html.escape(raw[:50000])
|
||||
escaped = _re.sub(
|
||||
r"(\d{6}[-\s]?\d{4})",
|
||||
r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
|
||||
escaped
|
||||
)
|
||||
html_out = (
|
||||
'<pre style="font-family:var(--mono);font-size:11px;white-space:pre-wrap;'
|
||||
'word-break:break-all;padding:12px;color:var(--text);line-height:1.6">'
|
||||
+ escaped + "</pre>"
|
||||
)
|
||||
return jsonify({"type": "html", "html": html_out})
|
||||
|
||||
# PDF — render first 5 pages as text using pdfplumber
|
||||
if ext == ".pdf":
|
||||
if size > 20 * 1024 * 1024:
|
||||
return jsonify({"error": "File too large for preview (>20 MB)"})
|
||||
if SCANNER_OK:
|
||||
try:
|
||||
import pdfplumber as _plumber, io as _io, html as _h
|
||||
pages_html = []
|
||||
with _plumber.open(_io.BytesIO(file_path.read_bytes())) as pdf:
|
||||
total = len(pdf.pages)
|
||||
for i, page in enumerate(pdf.pages[:5]):
|
||||
text = page.extract_text() or ""
|
||||
if not text.strip():
|
||||
text = f"[Page {i+1}: image-only or OCR required]"
|
||||
import re as _re
|
||||
escaped = _re.sub(
|
||||
r"(\d{6}[-\s]?\d{4})",
|
||||
r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
|
||||
_h.escape(text)
|
||||
)
|
||||
pages_html.append(
|
||||
f'<div style="border-bottom:1px solid #333;padding:10px 0;margin-bottom:8px">'
|
||||
f'<div style="font-size:9px;color:#666;margin-bottom:4px">Page {i+1}</div>'
|
||||
f'<pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;margin:0;line-height:1.6">{escaped}</pre>'
|
||||
f'</div>'
|
||||
)
|
||||
note = f'<div style="font-size:10px;color:#666;padding:6px 0">Showing {min(5,total)} of {total} page(s)</div>' if total > 5 else ""
|
||||
html_out = f'<div style="padding:10px">{note}{"".join(pages_html)}</div>'
|
||||
return jsonify({"type": "html", "html": html_out})
|
||||
except Exception:
|
||||
pass
|
||||
html_out = (
|
||||
f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
|
||||
f'<div style="font-size:40px">📄</div>'
|
||||
f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
|
||||
f'<div style="font-size:11px;color:var(--muted)">{round(size/1024,1)} KB</div>'
|
||||
f'<div style="margin-top:12px;font-size:11px;color:var(--muted)">{_html_esc(full_path)}</div>'
|
||||
f'</div>'
|
||||
)
|
||||
return jsonify({"type": "html", "html": html_out})
|
||||
|
||||
# Word/Excel/CSV — render content or show metadata
|
||||
if SCANNER_OK and ext in {".xlsx", ".xlsm", ".csv"}:
|
||||
try:
|
||||
import html as _hh, re as _re, io as _io
|
||||
if ext == ".csv":
|
||||
raw = file_path.read_bytes().decode("utf-8", errors="replace")
|
||||
rows = [r for r in raw.splitlines()[:50]]
|
||||
table_rows = ""
|
||||
for i, row in enumerate(rows):
|
||||
cols = row.split(",")
|
||||
style = "background:#2a2a2a" if i % 2 == 0 else ""
|
||||
cells = "".join(f'<td style="padding:3px 8px;border:1px solid #333;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{_hh.escape(str(c)[:80])}</td>' for c in cols)
|
||||
table_rows += f'<tr style="{style}">{cells}</tr>'
|
||||
html_out = f'<div style="padding:8px;overflow-x:auto"><table style="border-collapse:collapse;font-size:11px;color:var(--text)">{table_rows}</table></div>'
|
||||
else:
|
||||
import openpyxl as _xl
|
||||
wb = _xl.load_workbook(_io.BytesIO(file_path.read_bytes()), read_only=True, data_only=True)
|
||||
tabs = []
|
||||
for sheet_name in wb.sheetnames[:3]:
|
||||
ws = wb[sheet_name]
|
||||
table_rows = ""
|
||||
for i, row in enumerate(ws.iter_rows(max_row=50, values_only=True)):
|
||||
style = "background:#2a2a2a" if i % 2 == 0 else ""
|
||||
cells = "".join(
|
||||
f'<td style="padding:3px 8px;border:1px solid #333;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">'
|
||||
f'{_hh.escape(str(c)[:80]) if c is not None else ""}</td>'
|
||||
for c in row
|
||||
)
|
||||
table_rows += f'<tr style="{style}">{cells}</tr>'
|
||||
tabs.append(
|
||||
f'<div style="margin-bottom:12px">'
|
||||
f'<div style="font-size:10px;color:#888;margin-bottom:4px">📋 {_hh.escape(sheet_name)}</div>'
|
||||
f'<div style="overflow-x:auto"><table style="border-collapse:collapse;font-size:11px;color:var(--text)">{table_rows}</table></div>'
|
||||
f'</div>'
|
||||
)
|
||||
html_out = '<div style="padding:8px">' + "".join(tabs) + '</div>'
|
||||
return jsonify({"type": "html", "html": html_out})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if SCANNER_OK and ext in {".docx", ".doc"}:
|
||||
try:
|
||||
import io as _io, html as _hh, re as _re
|
||||
from docx import Document as _Doc
|
||||
doc = _Doc(_io.BytesIO(file_path.read_bytes()))
|
||||
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()][:80]
|
||||
text = "\n".join(paragraphs)
|
||||
escaped = _re.sub(
|
||||
r"(\d{6}[-\s]?\d{4})",
|
||||
r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
|
||||
_hh.escape(text)
|
||||
)
|
||||
html_out = f'<div style="padding:12px"><pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;line-height:1.7">{escaped}</pre></div>'
|
||||
return jsonify({"type": "html", "html": html_out})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
html_out = (
|
||||
f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
|
||||
f'<div style="font-size:40px">📄</div>'
|
||||
f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
|
||||
f'<div style="font-size:11px;color:var(--muted)">{round(size/1024,1)} KB · {ext.upper().lstrip(".")} file</div>'
|
||||
f'<div style="margin-top:12px;font-size:11px;color:var(--muted)">{_html_esc(full_path)}</div>'
|
||||
f'</div>'
|
||||
)
|
||||
return jsonify({"type": "html", "html": html_out})
|
||||
|
||||
except PermissionError:
|
||||
return jsonify({"error": f"Permission denied: {full_path}"})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)})
|
||||
|
||||
if not state.connector:
|
||||
return jsonify({"error": "not authenticated"}), 401
|
||||
|
||||
item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
|
||||
drive_id = item_meta.get("drive_id", "")
|
||||
|
||||
try:
|
||||
if source_type == "email":
|
||||
uid = account_id
|
||||
try:
|
||||
msg = state.connector._get(
|
||||
f"/{'me' if uid == 'me' else 'users/' + uid}/messages/{item_id}",
|
||||
{"$select": "subject,from,receivedDateTime,body"}
|
||||
)
|
||||
except Exception as e:
|
||||
return jsonify({"error": f"Could not load email: {e}"})
|
||||
|
||||
sender = msg.get("from", {}).get("emailAddress", {})
|
||||
from_str = f"{sender.get('name', '')} <{sender.get('address', '')}>"
|
||||
date_str = (msg.get("receivedDateTime") or "")[:10]
|
||||
body_html = msg.get("body", {}).get("content", "") or ""
|
||||
content_type = msg.get("body", {}).get("contentType", "text")
|
||||
import html as _html
|
||||
if content_type == "text":
|
||||
body_html = "<pre style='white-space:pre-wrap;font-family:sans-serif'>" + _html.escape(body_html) + "</pre>"
|
||||
|
||||
att_list = item_meta.get("attachments", [])
|
||||
att_html = ""
|
||||
if att_list:
|
||||
def _att_row(a):
|
||||
cpr_badge = f'<span class="att-cpr">{a["cpr_count"]} CPR</span>' if a["cpr_count"] else ''
|
||||
name_esc = _html.escape(a["name"])
|
||||
return f'<div class="att-row"><span class="att-name">{name_esc}</span>{cpr_badge}</div>'
|
||||
rows = "".join(_att_row(a) for a in att_list)
|
||||
att_html = f"""
|
||||
<div class="att-section">
|
||||
<div class="att-header">📎 Attachments ({len(att_list)})</div>
|
||||
{rows}
|
||||
</div>"""
|
||||
|
||||
page = f"""<!DOCTYPE html><html><head><meta charset="utf-8">
|
||||
<style>
|
||||
*, *::before, *::after {{ box-sizing: border-box; max-width: 100%; }}
|
||||
html, body {{ margin: 0; padding: 0; overflow-x: hidden; }}
|
||||
body {{ font-family: -apple-system, sans-serif; font-size: 13px; padding: 12px 16px;
|
||||
background: #fff; color: #111; word-break: break-word; }}
|
||||
img {{ max-width: 100% !important; height: auto !important; }}
|
||||
table {{ max-width: 100% !important; table-layout: fixed; word-break: break-word; }}
|
||||
.hdr {{ border-bottom: 1px solid #eee; margin-bottom: 12px; padding-bottom: 10px; }}
|
||||
.hdr-row {{ color: #555; font-size: 12px; margin-bottom: 3px; }}
|
||||
.hdr-row b {{ color: #111; }}
|
||||
.att-section {{ margin-top: 16px; border-top: 1px solid #eee; padding-top: 10px; }}
|
||||
.att-header {{ font-size: 12px; font-weight: 600; color: #555; margin-bottom: 6px; }}
|
||||
.att-row {{ display: flex; align-items: center; gap: 8px; font-size: 12px;
|
||||
padding: 4px 0; border-bottom: 1px solid #f0f0f0; }}
|
||||
.att-name {{ flex: 1; color: #333; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}
|
||||
.att-cpr {{ background: #fff0f0; color: #c00; font-size: 11px; padding: 1px 6px;
|
||||
border-radius: 10px; font-weight: 600; white-space: nowrap; }}
|
||||
::-webkit-scrollbar {{ width: 4px; height: 4px; }}
|
||||
::-webkit-scrollbar-track {{ background: transparent; }}
|
||||
::-webkit-scrollbar-thumb {{ background: #aaa; border-radius: 2px; }}
|
||||
* {{ scrollbar-width: thin; scrollbar-color: #aaa transparent; }}
|
||||
</style></head><body>
|
||||
<div class="hdr">
|
||||
<div class="hdr-row"><b>From:</b> {from_str}</div>
|
||||
<div class="hdr-row"><b>Date:</b> {date_str}</div>
|
||||
<div class="hdr-row"><b>Subject:</b> {_html.escape(msg.get('subject', '(no subject)'))}</div>
|
||||
</div>
|
||||
{body_html}{att_html}
|
||||
</body></html>"""
|
||||
return jsonify({"type": "html", "html": page})
|
||||
|
||||
else:
|
||||
# OneDrive / SharePoint / Teams — use Graph's embed preview API
|
||||
preview_url = None
|
||||
errors = []
|
||||
|
||||
endpoints_to_try = []
|
||||
if drive_id:
|
||||
endpoints_to_try.append(f"/drives/{drive_id}/items/{item_id}/preview")
|
||||
uid = account_id
|
||||
if uid and uid != "me":
|
||||
endpoints_to_try.append(f"/users/{uid}/drive/items/{item_id}/preview")
|
||||
endpoints_to_try.append(f"/me/drive/items/{item_id}/preview")
|
||||
|
||||
for ep in endpoints_to_try:
|
||||
try:
|
||||
data = state.connector._post(ep, {})
|
||||
preview_url = data.get("getUrl") or data.get("postUrl")
|
||||
if preview_url:
|
||||
break
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
|
||||
if preview_url:
|
||||
return jsonify({"type": "iframe", "url": preview_url})
|
||||
return jsonify({"error": "No preview available for this file type. " + "; ".join(errors[:1])})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)})
|
||||
|
||||
|
||||
@bp.route("/api/thumb")
|
||||
def thumb():
|
||||
"""Fallback thumbnail for non-image files."""
|
||||
name = request.args.get("name", "file")
|
||||
ext = Path(name).suffix.lower()
|
||||
svg_b64 = _placeholder_svg(ext, name)
|
||||
data = base64.b64decode(svg_b64)
|
||||
return Response(data, mimetype="image/svg+xml",
|
||||
headers={"Cache-Control": "public, max-age=3600"})
|
||||
303
routes/email.py
Normal file
303
routes/email.py
Normal file
@ -0,0 +1,303 @@
|
||||
"""
|
||||
SMTP configuration, test, and report sending
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from flask import Blueprint, jsonify, request
|
||||
from routes import state
|
||||
from app_config import _load_smtp_config, _save_smtp_config
|
||||
from routes.export import _build_excel_bytes
|
||||
|
||||
bp = Blueprint("email", __name__)
|
||||
|
||||
|
||||
def _send_report_email(xl_bytes: bytes, fname: str,
|
||||
smtp_cfg: dict, recipients: list[str]) -> None:
|
||||
"""Send the scan report Excel as an email attachment via SMTP."""
|
||||
import smtplib as _smtp
|
||||
import email.mime.text as _mime_text
|
||||
import email.mime.multipart as _mime_mp
|
||||
import email.mime.base as _mime_base
|
||||
import email.encoders as _encoders
|
||||
import datetime as _dt
|
||||
|
||||
host = smtp_cfg.get("host", "").strip()
|
||||
port = int(smtp_cfg.get("port", 587))
|
||||
username = smtp_cfg.get("username", "").strip()
|
||||
password = smtp_cfg.get("password", "")
|
||||
from_addr = smtp_cfg.get("from_addr", "").strip() or username
|
||||
use_ssl = bool(smtp_cfg.get("use_ssl", False))
|
||||
use_tls = bool(smtp_cfg.get("use_tls", True)) and not use_ssl
|
||||
|
||||
if not host:
|
||||
raise ValueError("No SMTP host configured")
|
||||
|
||||
subject = f"GDPR Scanner \u2014 scan report {_dt.datetime.now().strftime('%Y-%m-%d')}"
|
||||
body_html = (
|
||||
"<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
|
||||
"<h2 style='color:#1F3864'>\u2601\ufe0f GDPR Scanner \u2014 scan report</h2>"
|
||||
f"<p>Please find the latest scan report attached ({fname}).</p>"
|
||||
f"<p style='color:#888;font-size:12px'>Generated: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>"
|
||||
"</body></html>"
|
||||
)
|
||||
|
||||
msg = _mime_mp.MIMEMultipart("mixed")
|
||||
msg["Subject"] = subject
|
||||
msg["From"] = from_addr
|
||||
msg["To"] = ", ".join(recipients)
|
||||
msg.attach(_mime_text.MIMEText(body_html, "html"))
|
||||
|
||||
part = _mime_base.MIMEBase(
|
||||
"application",
|
||||
"vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
)
|
||||
part.set_payload(xl_bytes)
|
||||
_encoders.encode_base64(part)
|
||||
part.add_header("Content-Disposition", f'attachment; filename="{fname}"')
|
||||
msg.attach(part)
|
||||
|
||||
if use_ssl:
|
||||
server = _smtp.SMTP_SSL(host, port, timeout=30)
|
||||
else:
|
||||
server = _smtp.SMTP(host, port, timeout=30)
|
||||
with server:
|
||||
server.ehlo()
|
||||
if use_tls:
|
||||
server.starttls()
|
||||
server.ehlo()
|
||||
if username and password:
|
||||
server.login(username, password)
|
||||
server.sendmail(from_addr, recipients, msg.as_string())
|
||||
|
||||
|
||||
def _send_email_graph(subject: str, html_body: str,
|
||||
recipients: list[str],
|
||||
attachment_bytes: bytes = None,
|
||||
attachment_name: str = None) -> None:
|
||||
"""Send an email via Microsoft Graph API using the current connector token.
|
||||
Requires Mail.Send permission (delegated or application).
|
||||
Raises on failure."""
|
||||
if not state.connector or not state.connector.is_authenticated():
|
||||
raise RuntimeError("Not connected to Microsoft 365")
|
||||
|
||||
to_list = [{"emailAddress": {"address": r}} for r in recipients]
|
||||
message: dict = {
|
||||
"subject": subject,
|
||||
"body": {"contentType": "HTML", "content": html_body},
|
||||
"toRecipients": to_list,
|
||||
}
|
||||
if attachment_bytes and attachment_name:
|
||||
import base64 as _b64
|
||||
message["attachments"] = [{
|
||||
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||
"name": attachment_name,
|
||||
"contentType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"contentBytes": _b64.b64encode(attachment_bytes).decode(),
|
||||
}]
|
||||
|
||||
if state.connector.is_app_mode:
|
||||
smtp_cfg = _load_smtp_config()
|
||||
sender = smtp_cfg.get("from_addr") or smtp_cfg.get("username") or recipients[0]
|
||||
state.connector._post(f"/users/{sender}/sendMail", {"message": message, "saveToSentItems": False})
|
||||
else:
|
||||
state.connector._post("/me/sendMail", {"message": message, "saveToSentItems": False})
|
||||
|
||||
|
||||
@bp.route("/api/smtp/config", methods=["GET"])
|
||||
def smtp_config_get():
|
||||
"""Return saved SMTP config (password redacted — never sent to client)."""
|
||||
cfg = _load_smtp_config()
|
||||
safe = {k: v for k, v in cfg.items() if k != "password"}
|
||||
safe["has_password"] = bool(cfg.get("password"))
|
||||
return jsonify(safe)
|
||||
|
||||
|
||||
@bp.route("/api/smtp/config", methods=["POST"])
|
||||
def smtp_config_save():
|
||||
"""Save SMTP config. Omitting 'password' preserves any previously saved password."""
|
||||
data = request.get_json() or {}
|
||||
existing = _load_smtp_config()
|
||||
if not data.get("password") and existing.get("password"):
|
||||
data["password"] = existing["password"]
|
||||
_save_smtp_config(data)
|
||||
return jsonify({"status": "saved"})
|
||||
|
||||
|
||||
@bp.route("/api/smtp/test", methods=["POST"])
|
||||
def smtp_test():
|
||||
"""Send a test email. Tries Microsoft Graph API first (no SMTP config needed),
|
||||
falls back to SMTP if Graph is unavailable."""
|
||||
import datetime as _dt
|
||||
saved = _load_smtp_config()
|
||||
recipients = saved.get("recipients", [])
|
||||
if isinstance(recipients, str):
|
||||
recipients = [r.strip() for r in recipients.replace(";", ",").split(",") if r.strip()]
|
||||
if not recipients:
|
||||
return jsonify({"error": "No recipients configured — add at least one recipient and save first"}), 400
|
||||
|
||||
subject = f"GDPR Scanner — test email ({_dt.datetime.now().strftime('%Y-%m-%d %H:%M')})"
|
||||
body_html = (
|
||||
"<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
|
||||
"<h2 style='color:#1F3864'>☁️ GDPR Scanner — test email</h2>"
|
||||
"<p>This is a test email confirming that your email configuration is working correctly.</p>"
|
||||
f"<p style='color:#888;font-size:12px'>Sent: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>"
|
||||
"</body></html>"
|
||||
)
|
||||
|
||||
# Try Graph API first
|
||||
if state.connector and state.connector.is_authenticated():
|
||||
try:
|
||||
_send_email_graph(subject, body_html, recipients)
|
||||
return jsonify({"ok": True,
|
||||
"message": f"Test email sent via Microsoft Graph to {', '.join(recipients)}"})
|
||||
except Exception as graph_err:
|
||||
graph_error_str = str(graph_err)
|
||||
else:
|
||||
graph_error_str = None
|
||||
|
||||
# Fall back to SMTP
|
||||
host = saved.get("host", "").strip()
|
||||
port = int(saved.get("port", 587))
|
||||
username = saved.get("username", "").strip()
|
||||
password = saved.get("password", "")
|
||||
from_addr = saved.get("from_addr", "").strip() or username
|
||||
use_ssl = bool(saved.get("use_ssl", False))
|
||||
use_tls = bool(saved.get("use_tls", True)) and not use_ssl
|
||||
|
||||
if not host:
|
||||
return jsonify({"error": "No SMTP host configured. To send via Microsoft 365 Graph (no SMTP needed), add Mail.Send to your Azure app registration."}), 400
|
||||
|
||||
try:
|
||||
import smtplib as _smtp
|
||||
import email.mime.text as _mime_text
|
||||
import email.mime.multipart as _mime_mp
|
||||
msg = _mime_mp.MIMEMultipart("alternative")
|
||||
msg["Subject"] = subject
|
||||
msg["From"] = from_addr
|
||||
msg["To"] = ", ".join(recipients)
|
||||
msg.attach(_mime_text.MIMEText(body_html, "html"))
|
||||
if use_ssl:
|
||||
server = _smtp.SMTP_SSL(host, port, timeout=15)
|
||||
else:
|
||||
server = _smtp.SMTP(host, port, timeout=15)
|
||||
with server:
|
||||
server.ehlo()
|
||||
if use_tls:
|
||||
server.starttls()
|
||||
server.ehlo()
|
||||
if username and password:
|
||||
server.login(username, password)
|
||||
server.sendmail(from_addr, recipients, msg.as_string())
|
||||
suffix = " (⚠ Graph also failed — Mail.Send permission not granted)" if graph_error_str else ""
|
||||
return jsonify({"ok": True, "message": f"Test email sent via SMTP to {', '.join(recipients)}{suffix}"})
|
||||
except Exception as smtp_err:
|
||||
err_str = str(smtp_err)
|
||||
_h = host.lower()
|
||||
_corp_m365 = "office365" in _h or "microsoft" in _h
|
||||
_personal_ms = not _corp_m365 and any(s in _h for s in ("outlook", "live", "hotmail"))
|
||||
_gmail_host = "gmail" in _h or "smtp.google" in _h
|
||||
_auth_err = "5.7.57" in err_str or "530" in err_str or "535" in err_str or \
|
||||
"534" in err_str or "not authenticated" in err_str.lower() or \
|
||||
"Username and Password" in err_str
|
||||
_conn_err = "nodename nor servname" in err_str or "Name or service not known" in err_str or \
|
||||
"getaddrinfo" in err_str or "Connection refused" in err_str or \
|
||||
"Errno 8" in err_str or "Errno 111" in err_str or "Errno 61" in err_str or \
|
||||
"timed out" in err_str.lower()
|
||||
if _conn_err:
|
||||
err_str = (f"Could not connect to SMTP server \"{host}\" on port {port}. "
|
||||
f"Check that the hostname and port are correct.")
|
||||
elif _corp_m365 and _auth_err:
|
||||
err_str = ("M365 blocked SMTP AUTH. Fix: enable Authenticated SMTP in the M365 admin centre "
|
||||
"(Users → Active users → [user] → Mail → Manage email apps → Authenticated SMTP), "
|
||||
"or add Mail.Send to your Azure app to use Graph instead.")
|
||||
elif (_personal_ms or _gmail_host) and _auth_err:
|
||||
provider = "Microsoft" if _personal_ms else "Google"
|
||||
url = "account.microsoft.com/security" if _personal_ms else "myaccount.google.com → Security → 2-Step Verification"
|
||||
err_str = (f"Authentication failed — {provider} blocks regular passwords for SMTP when MFA is enabled.\n\n"
|
||||
f"Fix: create an App Password at {url} → App passwords "
|
||||
f"and use that instead of your normal password.")
|
||||
elif graph_error_str:
|
||||
err_str = f"SMTP: {err_str} | Graph also unavailable (Mail.Send not granted)"
|
||||
return jsonify({"error": err_str}), 200
|
||||
|
||||
|
||||
@bp.route("/api/send_report", methods=["POST"])
|
||||
def send_report():
|
||||
"""Build Excel and email it to the requested recipients.
|
||||
Tries Microsoft Graph API first, falls back to SMTP."""
|
||||
if not state.flagged_items:
|
||||
return jsonify({"error": "No results to send — run a scan first"}), 400
|
||||
|
||||
data = request.get_json() or {}
|
||||
smtp_cfg = _load_smtp_config()
|
||||
recipients = data.get("recipients", []) or smtp_cfg.get("recipients", [])
|
||||
if isinstance(recipients, str):
|
||||
recipients = [r.strip() for r in recipients.replace(";", ",").split(",") if r.strip()]
|
||||
if data.get("smtp"):
|
||||
smtp_cfg = {**smtp_cfg, **data["smtp"]}
|
||||
if not recipients:
|
||||
return jsonify({"error": "No recipients specified"}), 400
|
||||
|
||||
try:
|
||||
xl_bytes, fname = _build_excel_bytes()
|
||||
except Exception as e:
|
||||
return jsonify({"error": f"Excel build failed: {e}"}), 500
|
||||
|
||||
import datetime as _dt
|
||||
subject = f"GDPR Scanner — scan report {_dt.datetime.now().strftime('%Y-%m-%d')}"
|
||||
body_html = (
|
||||
"<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
|
||||
"<h2 style='color:#1F3864'>\u2601\ufe0f GDPR Scanner \u2014 scan report</h2>"
|
||||
f"<p>Please find the latest scan report attached ({fname}).</p>"
|
||||
f"<p style='color:#888;font-size:12px'>Generated: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}<br>"
|
||||
f"Items flagged: {len(state.flagged_items)}</p>"
|
||||
"</body></html>"
|
||||
)
|
||||
|
||||
# Try Graph API first
|
||||
if state.connector and state.connector.is_authenticated():
|
||||
try:
|
||||
_send_email_graph(subject, body_html, recipients,
|
||||
attachment_bytes=xl_bytes, attachment_name=fname)
|
||||
return jsonify({"status": "sent", "method": "graph",
|
||||
"recipients": recipients, "filename": fname})
|
||||
except Exception as graph_err:
|
||||
graph_err_str = str(graph_err)
|
||||
if "403" in graph_err_str or "Forbidden" in graph_err_str \
|
||||
or "Mail.Send" in graph_err_str or "insufficient" in graph_err_str.lower():
|
||||
return jsonify({"error": (
|
||||
"Mail.Send permission not granted on the Azure app registration. "
|
||||
"Go to Azure AD → App registrations → [your app] → API permissions → "
|
||||
"Add → Microsoft Graph → Mail.Send → Grant admin consent."
|
||||
)}), 500
|
||||
|
||||
# Fall back to SMTP
|
||||
try:
|
||||
_send_report_email(xl_bytes, fname, smtp_cfg, recipients)
|
||||
return jsonify({"status": "sent", "method": "smtp",
|
||||
"recipients": recipients, "filename": fname})
|
||||
except Exception as e:
|
||||
err = str(e)
|
||||
_h2 = smtp_cfg.get("host", "").lower()
|
||||
_p2 = int(smtp_cfg.get("port", 587))
|
||||
_corp_m365_2 = "office365" in _h2 or "microsoft" in _h2
|
||||
_personal_ms_2 = not _corp_m365_2 and any(s in _h2 for s in ("outlook", "live", "hotmail"))
|
||||
_gmail_2 = "gmail" in _h2 or "smtp.google" in _h2
|
||||
_auth_err_2 = "5.7.57" in err or "530" in err or "535" in err or \
|
||||
"534" in err or "not authenticated" in err.lower()
|
||||
_conn_err_2 = "nodename nor servname" in err or "Name or service not known" in err or \
|
||||
"getaddrinfo" in err or "Connection refused" in err or \
|
||||
"Errno 8" in err or "Errno 111" in err or "Errno 61" in err or \
|
||||
"timed out" in err.lower()
|
||||
if _conn_err_2:
|
||||
err = (f"Could not connect to SMTP server \"{_h2}\" on port {_p2}. "
|
||||
f"Check that the hostname and port are correct.")
|
||||
elif _corp_m365_2 and _auth_err_2:
|
||||
err = (f"{err}\n\nTip: Enable SMTP AUTH for this mailbox in the Microsoft 365 admin centre, "
|
||||
"or connect to M365 first so the scanner can send via Microsoft Graph instead.")
|
||||
elif (_personal_ms_2 or _gmail_2) and _auth_err_2:
|
||||
provider2 = "Microsoft" if _personal_ms_2 else "Google"
|
||||
url2 = "account.microsoft.com/security" if _personal_ms_2 else "myaccount.google.com → Security → 2-Step Verification"
|
||||
err = (f"Authentication failed — {provider2} blocks regular passwords for SMTP when MFA is enabled.\n\n"
|
||||
f"Fix: create an App Password at {url2} → App passwords "
|
||||
f"and use that instead of your normal password.")
|
||||
return jsonify({"error": err}), 500
|
||||
1222
routes/export.py
Normal file
1222
routes/export.py
Normal file
File diff suppressed because it is too large
Load Diff
246
routes/google_auth.py
Normal file
246
routes/google_auth.py
Normal file
@ -0,0 +1,246 @@
|
||||
"""
|
||||
Google Workspace authentication routes.
|
||||
|
||||
Endpoints:
|
||||
GET /api/google/auth/status — is a service account loaded?
|
||||
POST /api/google/auth/connect — save key JSON + optional admin_email
|
||||
POST /api/google/auth/disconnect — remove saved key + clear connector
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from flask import Blueprint, jsonify, request
|
||||
import json
|
||||
import threading
|
||||
|
||||
from routes import state
|
||||
|
||||
bp = Blueprint("google_auth", __name__)
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
import gdpr_scanner as _m
|
||||
if hasattr(_m, name):
|
||||
return getattr(_m, name)
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
@bp.route("/api/google/auth/status")
|
||||
def google_auth_status():
|
||||
"""Return current Google connection state."""
|
||||
from google_connector import GOOGLE_AUTH_OK, load_saved_key
|
||||
if not GOOGLE_AUTH_OK:
|
||||
return jsonify({
|
||||
"connected": False,
|
||||
"error": "google-auth not installed — run: pip install google-auth google-auth-httplib2 google-api-python-client",
|
||||
"libs_ok": False,
|
||||
})
|
||||
|
||||
key = load_saved_key()
|
||||
if not key:
|
||||
return jsonify({"connected": False, "libs_ok": True})
|
||||
|
||||
sa_email = key.get("client_email", "")
|
||||
project_id = key.get("project_id", "")
|
||||
admin_email = ""
|
||||
|
||||
# Read persisted admin_email from config
|
||||
cfg = _load_google_config()
|
||||
admin_email = cfg.get("admin_email", "")
|
||||
|
||||
# Rebuild connector in state if not present
|
||||
if not state.google_connector:
|
||||
try:
|
||||
from google_connector import GoogleConnector
|
||||
state.google_connector = GoogleConnector(key, admin_email=admin_email)
|
||||
except Exception as e:
|
||||
return jsonify({"connected": False, "libs_ok": True,
|
||||
"error": str(e), "sa_email": sa_email})
|
||||
|
||||
return jsonify({
|
||||
"connected": True,
|
||||
"libs_ok": True,
|
||||
"sa_email": sa_email,
|
||||
"project_id": project_id,
|
||||
"admin_email": admin_email,
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/google/auth/connect", methods=["POST"])
|
||||
def google_auth_connect():
|
||||
"""
|
||||
Accept a service account key JSON + optional admin_email.
|
||||
Body: { "key_json": "<raw JSON string or object>", "admin_email": "admin@domain.com" }
|
||||
"""
|
||||
from google_connector import GOOGLE_AUTH_OK, save_key, GoogleConnector
|
||||
if not GOOGLE_AUTH_OK:
|
||||
return jsonify({"error": "google-auth not installed"}), 503
|
||||
|
||||
data = request.get_json() or {}
|
||||
raw_key = data.get("key_json", "")
|
||||
admin_email = data.get("admin_email", "").strip()
|
||||
|
||||
# Accept both a JSON string and an already-parsed object
|
||||
if isinstance(raw_key, str):
|
||||
try:
|
||||
key_dict = json.loads(raw_key)
|
||||
except json.JSONDecodeError as e:
|
||||
return jsonify({"error": f"Invalid JSON: {e}"}), 400
|
||||
elif isinstance(raw_key, dict):
|
||||
key_dict = raw_key
|
||||
else:
|
||||
return jsonify({"error": "key_json must be a JSON string or object"}), 400
|
||||
|
||||
if key_dict.get("type") != "service_account":
|
||||
return jsonify({"error": "File must be a service_account JSON key (type != service_account)"}), 400
|
||||
|
||||
# Validate by building a connector
|
||||
try:
|
||||
conn = GoogleConnector(key_dict, admin_email=admin_email)
|
||||
if not conn.is_authenticated():
|
||||
return jsonify({"error": "Credentials did not validate — check the key file"}), 400
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 400
|
||||
|
||||
save_key(key_dict)
|
||||
_save_google_config({"admin_email": admin_email})
|
||||
|
||||
state.google_connector = conn
|
||||
|
||||
return jsonify({
|
||||
"ok": True,
|
||||
"sa_email": key_dict.get("client_email", ""),
|
||||
"project_id": key_dict.get("project_id", ""),
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/google/auth/disconnect", methods=["POST"])
|
||||
def google_auth_disconnect():
|
||||
"""Remove saved service account key and clear the connector."""
|
||||
from google_connector import delete_key
|
||||
delete_key()
|
||||
_save_google_config({})
|
||||
state.google_connector = None
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
# ── Personal Google account (device-code OAuth) ───────────────────────────────
|
||||
|
||||
@bp.route("/api/google/personal/status")
|
||||
def google_personal_status():
|
||||
"""Check whether a personal Google OAuth token is present and valid."""
|
||||
from google_connector import GOOGLE_AUTH_OK, load_personal_token, PersonalGoogleConnector
|
||||
if not GOOGLE_AUTH_OK:
|
||||
return jsonify({"connected": False, "libs_ok": False, "auth_mode": "personal"})
|
||||
|
||||
token_data = load_personal_token()
|
||||
if not token_data:
|
||||
return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal"})
|
||||
|
||||
if not isinstance(state.google_connector, PersonalGoogleConnector):
|
||||
try:
|
||||
conn = PersonalGoogleConnector(token_data)
|
||||
if conn.is_authenticated():
|
||||
state.google_connector = conn
|
||||
else:
|
||||
return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal"})
|
||||
except Exception as e:
|
||||
return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal",
|
||||
"error": str(e)})
|
||||
|
||||
try:
|
||||
info = state.google_connector.get_user_info()
|
||||
return jsonify({
|
||||
"connected": True,
|
||||
"libs_ok": True,
|
||||
"auth_mode": "personal",
|
||||
"email": info.get("email", ""),
|
||||
"displayName": info.get("displayName", ""),
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({"connected": False, "libs_ok": True, "auth_mode": "personal",
|
||||
"error": str(e)})
|
||||
|
||||
|
||||
@bp.route("/api/google/personal/start", methods=["POST"])
|
||||
def google_personal_start():
|
||||
"""Initiate a Google device-code flow for a personal account."""
|
||||
from google_connector import GOOGLE_AUTH_OK, PersonalGoogleConnector
|
||||
if not GOOGLE_AUTH_OK:
|
||||
return jsonify({"error": "google-auth not installed"}), 503
|
||||
|
||||
data = request.get_json() or {}
|
||||
client_id = data.get("client_id", "").strip()
|
||||
client_secret = data.get("client_secret", "").strip()
|
||||
if not client_id or not client_secret:
|
||||
return jsonify({"error": "client_id and client_secret required"}), 400
|
||||
|
||||
try:
|
||||
flow = PersonalGoogleConnector.get_device_code_flow(client_id, client_secret)
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 400
|
||||
|
||||
state.google_pending_flow = flow
|
||||
state.google_poll_result = None
|
||||
|
||||
def _do_auth():
|
||||
try:
|
||||
conn = PersonalGoogleConnector.complete_device_code_flow(flow)
|
||||
state.google_connector = conn
|
||||
state.google_poll_result = "ok"
|
||||
except Exception as e:
|
||||
state.google_poll_result = str(e)
|
||||
|
||||
threading.Thread(target=_do_auth, daemon=True).start()
|
||||
|
||||
return jsonify({
|
||||
"user_code": flow["user_code"],
|
||||
"verification_url": flow["verification_url"],
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/google/personal/poll", methods=["POST"])
|
||||
def google_personal_poll():
|
||||
"""Check whether the device-code sign-in has completed."""
|
||||
result = state.google_poll_result
|
||||
if result == "ok":
|
||||
state.google_poll_result = None
|
||||
state.google_pending_flow = None
|
||||
return jsonify({"status": "ok"})
|
||||
if result and result != "pending":
|
||||
state.google_poll_result = None
|
||||
state.google_pending_flow = None
|
||||
return jsonify({"status": "error", "error": result})
|
||||
return jsonify({"status": "pending"})
|
||||
|
||||
|
||||
@bp.route("/api/google/personal/signout", methods=["POST"])
|
||||
def google_personal_signout():
|
||||
"""Delete the stored personal OAuth token and clear the connector."""
|
||||
from google_connector import delete_personal_token, PersonalGoogleConnector
|
||||
delete_personal_token()
|
||||
if isinstance(state.google_connector, PersonalGoogleConnector):
|
||||
state.google_connector = None
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
# ── Config helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
from pathlib import Path as _Path
|
||||
_DATA_DIR = _Path.home() / ".gdprscanner"
|
||||
_DATA_DIR.mkdir(exist_ok=True)
|
||||
_GOOGLE_CONFIG = _DATA_DIR / "google.json"
|
||||
|
||||
|
||||
def _load_google_config() -> dict:
|
||||
if _GOOGLE_CONFIG.exists():
|
||||
try:
|
||||
return json.loads(_GOOGLE_CONFIG.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _save_google_config(cfg: dict) -> None:
|
||||
try:
|
||||
_GOOGLE_CONFIG.write_text(json.dumps(cfg, indent=2))
|
||||
except Exception:
|
||||
pass
|
||||
328
routes/google_scan.py
Normal file
328
routes/google_scan.py
Normal file
@ -0,0 +1,328 @@
|
||||
"""
|
||||
Google Workspace scan routes.
|
||||
|
||||
Endpoints:
|
||||
POST /api/google/scan/start — kick off a Gmail + Drive scan
|
||||
POST /api/google/scan/cancel — abort running Google scan
|
||||
GET /api/google/scan/users — list workspace users via Admin SDK
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from flask import Blueprint, jsonify, request
|
||||
import logging
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from routes import state
|
||||
from routes.state import _google_scan_lock as _scan_lock, _google_scan_abort as _scan_abort
|
||||
|
||||
bp = Blueprint("google_scan", __name__)
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
import gdpr_scanner as _m
|
||||
if hasattr(_m, name):
|
||||
return getattr(_m, name)
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
# ── Scan lock shared with M365 scan so both can't run simultaneously ──────────
|
||||
# _scan_lock / _scan_abort live in routes/state.py; resolved via gdpr_scanner.__getattr__.
|
||||
|
||||
|
||||
@bp.route("/api/google/scan/users")
|
||||
def google_scan_users():
|
||||
"""Return list of workspace users available via Admin SDK."""
|
||||
conn = state.google_connector
|
||||
if not conn:
|
||||
return jsonify({"error": "not connected"}), 401
|
||||
try:
|
||||
users = conn.list_users()
|
||||
return jsonify({"users": users})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@bp.route("/api/google/scan/start", methods=["POST"])
|
||||
def google_scan_start():
|
||||
"""
|
||||
Start a Google Workspace scan.
|
||||
|
||||
Body (all optional):
|
||||
{
|
||||
"sources": ["gmail", "gdrive"], // default: both
|
||||
"user_emails": ["a@dom.com"], // default: all users via Admin SDK
|
||||
"options": {
|
||||
"max_messages": 2000,
|
||||
"max_files": 5000,
|
||||
"max_attach_mb": 20,
|
||||
"scan_body": true,
|
||||
"scan_attachments":true,
|
||||
"max_file_mb": 50
|
||||
}
|
||||
}
|
||||
"""
|
||||
conn = state.google_connector
|
||||
if not conn:
|
||||
return jsonify({"error": "not connected to Google Workspace"}), 401
|
||||
|
||||
if not _scan_lock.acquire(blocking=False):
|
||||
return jsonify({"error": "scan already running"}), 409
|
||||
|
||||
options = request.get_json() or {}
|
||||
_scan_abort.clear()
|
||||
|
||||
def _run():
|
||||
try:
|
||||
_run_google_scan(options)
|
||||
finally:
|
||||
_scan_lock.release()
|
||||
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@bp.route("/api/google/scan/cancel", methods=["POST"])
|
||||
def google_scan_cancel():
|
||||
_scan_abort.set()
|
||||
return jsonify({"status": "cancelling"})
|
||||
|
||||
|
||||
# ── Scan engine ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _run_google_scan(options: dict):
|
||||
"""
|
||||
Core Google Workspace scan loop.
|
||||
|
||||
Mirrors the M365 scan structure:
|
||||
broadcast("scan_start")
|
||||
for each user:
|
||||
for each source (gmail / gdrive):
|
||||
for each item:
|
||||
scan bytes → broadcast card
|
||||
broadcast("scan_done")
|
||||
"""
|
||||
import gdpr_scanner as _m
|
||||
|
||||
broadcast = _m.broadcast
|
||||
_scan_bytes = _m._scan_bytes
|
||||
flagged_items = _m.flagged_items
|
||||
LANG = _m.LANG
|
||||
|
||||
# Import DB helpers
|
||||
try:
|
||||
from gdpr_db import get_db as _get_db
|
||||
DB_OK = True
|
||||
except ImportError:
|
||||
DB_OK = False
|
||||
def _get_db(*a, **kw): return None
|
||||
|
||||
from scan_engine import _with_disposition
|
||||
|
||||
conn = state.google_connector
|
||||
if not conn:
|
||||
broadcast("scan_error", {"file": "auth", "error": "Not connected to Google Workspace"})
|
||||
broadcast("google_scan_done", {"flagged_count": 0, "total_scanned": 0})
|
||||
return
|
||||
|
||||
import time as _time
|
||||
_sse_buffer_clear = getattr(_m, '_sse_buffer', None)
|
||||
if _sse_buffer_clear is not None:
|
||||
_sse_buffer_clear.clear()
|
||||
|
||||
sources = options.get("sources", ["gmail", "gdrive"])
|
||||
# user_emails may come at top level or inside options
|
||||
user_emails = options.get("user_emails", [])
|
||||
scan_opts = options.get("options", {})
|
||||
max_messages = int(scan_opts.get("max_messages", 2000))
|
||||
max_files = int(scan_opts.get("max_files", 5000))
|
||||
max_attach_mb = float(scan_opts.get("max_attach_mb", 20.0))
|
||||
max_file_mb = float(scan_opts.get("max_file_mb", 50.0))
|
||||
scan_body = bool(scan_opts.get("scan_body", True))
|
||||
scan_att = bool(scan_opts.get("scan_attachments", True))
|
||||
|
||||
# Resolve users: explicit list → Admin SDK → fall back to SA email itself
|
||||
_user_role_map: dict = {} # email → role
|
||||
_user_display_map: dict = {} # email → display name
|
||||
if not user_emails:
|
||||
try:
|
||||
ws_users = conn.list_users()
|
||||
user_emails = [u["email"] for u in ws_users if u.get("email")]
|
||||
_user_role_map = {u["email"]: u.get("userRole", "other") for u in ws_users}
|
||||
_user_display_map = {u["email"]: u.get("displayName", u["email"]) for u in ws_users}
|
||||
except Exception as e:
|
||||
# Admin SDK unavailable — scan only the delegated admin account
|
||||
broadcast("scan_phase", {"phase": f"Admin SDK unavailable ({e}) — scanning service account email only"})
|
||||
user_emails = [conn.get_service_account_email()]
|
||||
# SA email itself is not a mailbox; use admin_email if set
|
||||
if conn._admin_email:
|
||||
user_emails = [conn._admin_email]
|
||||
|
||||
# If user_emails came from the request, try to get display names and roles
|
||||
if user_emails and not _user_role_map:
|
||||
try:
|
||||
ws_users = conn.list_users()
|
||||
_user_role_map = {u["email"]: u.get("userRole", "other") for u in ws_users}
|
||||
_user_display_map = {u["email"]: u.get("displayName", u["email"]) for u in ws_users}
|
||||
except Exception:
|
||||
_user_display_map = {}
|
||||
|
||||
if not user_emails:
|
||||
broadcast("scan_error", {"file": "users", "error": "No users to scan — set admin email or provide user_emails"})
|
||||
broadcast("google_scan_done", {"flagged_count": 0, "total_scanned": 0})
|
||||
return
|
||||
|
||||
source_labels = []
|
||||
if "gmail" in sources: source_labels.append("Gmail")
|
||||
if "gdrive" in sources: source_labels.append("Google Drive")
|
||||
|
||||
broadcast("scan_start", {"sources": source_labels})
|
||||
broadcast("scan_phase", {"phase": f"Google Workspace scan · {len(user_emails)} user(s) · " + ", ".join(source_labels)})
|
||||
|
||||
# Open DB
|
||||
_db = _get_db() if DB_OK else None
|
||||
_db_scan_id = None
|
||||
if _db:
|
||||
try:
|
||||
_db_scan_id = _db.begin_scan(options)
|
||||
except Exception as e:
|
||||
logger.error("[google_scan] begin_scan failed: %s", e)
|
||||
|
||||
total_flagged = 0
|
||||
total_scanned = 0
|
||||
t_start = _time.monotonic()
|
||||
|
||||
def _check_abort():
|
||||
from gdpr_scanner import _scan_abort as _sa
|
||||
if _sa.is_set():
|
||||
broadcast("scan_cancelled", {"completed": total_scanned})
|
||||
return True
|
||||
return False
|
||||
|
||||
def _broadcast_card(item_meta: dict, cprs: list, pii_counts=None):
|
||||
nonlocal total_flagged
|
||||
card = {
|
||||
"id": item_meta.get("id", ""),
|
||||
"name": item_meta.get("name", ""),
|
||||
"source": item_meta.get("_source", ""),
|
||||
"source_type": item_meta.get("_source_type", ""),
|
||||
"cpr_count": len(cprs),
|
||||
"url": item_meta.get("_url", ""),
|
||||
"size_kb": round(item_meta.get("size", 0) / 1024, 1),
|
||||
"modified": (item_meta.get("lastModifiedDateTime") or item_meta.get("receivedDateTime") or "")[:10],
|
||||
"thumb_b64": "",
|
||||
"thumb_mime": "image/svg+xml",
|
||||
"risk": None,
|
||||
"account_id": item_meta.get("_account_id", ""),
|
||||
"account_name": item_meta.get("_account", ""),
|
||||
"user_role": _user_role_map.get(user_email, "other"),
|
||||
"drive_id": "",
|
||||
"attachments": [],
|
||||
"folder": "",
|
||||
"transfer_risk": "",
|
||||
"special_category": [],
|
||||
"face_count": 0,
|
||||
"exif": {},
|
||||
}
|
||||
flagged_items.append(card)
|
||||
broadcast("scan_file_flagged", _with_disposition(card, _db))
|
||||
total_flagged += 1
|
||||
if _db and _db_scan_id:
|
||||
try:
|
||||
_db.save_item(_db_scan_id, card, cprs, pii_counts=pii_counts)
|
||||
except Exception as e:
|
||||
logger.error("[google_scan] save_item failed: %s", e)
|
||||
|
||||
# ── Per-user scan loop ────────────────────────────────────────────────────
|
||||
from google_connector import GoogleError
|
||||
|
||||
for user_email in user_emails:
|
||||
_display_name = _user_display_map.get(user_email, user_email)
|
||||
if _check_abort():
|
||||
return
|
||||
|
||||
broadcast("scan_phase", {"phase": f"Google Workspace \u2014 {user_email}"})
|
||||
|
||||
# ── Gmail ─────────────────────────────────────────────────────────────
|
||||
if "gmail" in sources:
|
||||
try:
|
||||
broadcast("scan_phase", {"phase": f"{user_email} — Gmail"})
|
||||
for meta, data in conn.iter_gmail_messages(
|
||||
user_email,
|
||||
max_messages=max_messages,
|
||||
scan_body=scan_body,
|
||||
scan_attachments=scan_att,
|
||||
max_attach_mb=max_attach_mb,
|
||||
):
|
||||
if _check_abort():
|
||||
return
|
||||
total_scanned += 1
|
||||
broadcast("scan_file", {"file": meta.get("name", "")})
|
||||
broadcast("scan_progress", {
|
||||
"scanned": total_scanned,
|
||||
"flagged": total_flagged,
|
||||
"file": meta.get("name", ""),
|
||||
"pct": min(90, 10 + total_scanned // 10),
|
||||
"source": "google",
|
||||
})
|
||||
try:
|
||||
meta["_account"] = _display_name
|
||||
result = _scan_bytes(data, meta.get("name", "msg.txt"))
|
||||
except Exception as e:
|
||||
broadcast("scan_error", {"file": meta.get("name", ""), "error": str(e)})
|
||||
continue
|
||||
cprs = result.get("cprs", [])
|
||||
pii_counts = result.get("pii_counts")
|
||||
if cprs or (pii_counts and any(pii_counts.values())):
|
||||
_broadcast_card(meta, cprs, pii_counts)
|
||||
except GoogleError as e:
|
||||
broadcast("scan_error", {"file": f"Gmail/{user_email}", "error": str(e)})
|
||||
except Exception as e:
|
||||
broadcast("scan_error", {"file": f"Gmail/{user_email}", "error": str(e)})
|
||||
|
||||
# ── Google Drive ──────────────────────────────────────────────────────
|
||||
if "gdrive" in sources:
|
||||
try:
|
||||
broadcast("scan_phase", {"phase": f"{user_email} — Google Drive"})
|
||||
for meta, data in conn.iter_drive_files(
|
||||
user_email,
|
||||
max_files=max_files,
|
||||
max_file_mb=max_file_mb,
|
||||
):
|
||||
if _check_abort():
|
||||
return
|
||||
total_scanned += 1
|
||||
broadcast("scan_file", {"file": meta.get("name", "")})
|
||||
broadcast("scan_progress", {
|
||||
"scanned": total_scanned,
|
||||
"flagged": total_flagged,
|
||||
"file": meta.get("name", ""),
|
||||
"pct": min(90, 10 + total_scanned // 10),
|
||||
"source": "google",
|
||||
})
|
||||
try:
|
||||
meta["_account"] = _display_name
|
||||
result = _scan_bytes(data, meta.get("name", "file"))
|
||||
except Exception as e:
|
||||
broadcast("scan_error", {"file": meta.get("name", ""), "error": str(e)})
|
||||
continue
|
||||
cprs = result.get("cprs", [])
|
||||
pii_counts = result.get("pii_counts")
|
||||
if cprs or (pii_counts and any(pii_counts.values())):
|
||||
_broadcast_card(meta, cprs, pii_counts)
|
||||
except GoogleError as e:
|
||||
broadcast("scan_error", {"file": f"Drive/{user_email}", "error": str(e)})
|
||||
except Exception as e:
|
||||
broadcast("scan_error", {"file": f"Drive/{user_email}", "error": str(e)})
|
||||
|
||||
elapsed = _time.monotonic() - t_start
|
||||
broadcast("scan_done", {
|
||||
"flagged_count": total_flagged,
|
||||
"total_scanned": total_scanned,
|
||||
"elapsed_seconds": round(elapsed, 1),
|
||||
})
|
||||
if _db and _db_scan_id:
|
||||
try:
|
||||
_db.end_scan(_db_scan_id, total_scanned, total_flagged)
|
||||
except Exception:
|
||||
pass
|
||||
47
routes/profiles.py
Normal file
47
routes/profiles.py
Normal file
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Scan profiles
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from flask import Blueprint, jsonify, request
|
||||
from app_config import _profiles_load, _profile_save, _profile_delete, _profile_get
|
||||
|
||||
bp = Blueprint("profiles", __name__)
|
||||
|
||||
|
||||
@bp.route("/api/profiles", methods=["GET"])
|
||||
def profiles_list():
|
||||
"""Return all saved profiles."""
|
||||
return jsonify({"profiles": _profiles_load()})
|
||||
|
||||
|
||||
@bp.route("/api/profiles/save", methods=["POST"])
|
||||
def profiles_save():
|
||||
"""Create or update a profile."""
|
||||
profile = request.get_json() or {}
|
||||
if not profile.get("name"):
|
||||
return jsonify({"error": "name required"}), 400
|
||||
saved = _profile_save(profile)
|
||||
return jsonify({"status": "saved", "profile": saved})
|
||||
|
||||
|
||||
@bp.route("/api/profiles/delete", methods=["POST"])
|
||||
def profiles_delete():
|
||||
"""Delete a profile by name or id."""
|
||||
data = request.get_json() or {}
|
||||
key = data.get("name") or data.get("id", "")
|
||||
if not key:
|
||||
return jsonify({"error": "name or id required"}), 400
|
||||
ok = _profile_delete(key)
|
||||
return jsonify({"status": "deleted" if ok else "not_found"})
|
||||
|
||||
|
||||
@bp.route("/api/profiles/get")
|
||||
def profiles_get():
|
||||
"""Return a single profile by name or id."""
|
||||
key = request.args.get("name") or request.args.get("id", "")
|
||||
p = _profile_get(key)
|
||||
if not p:
|
||||
return jsonify({"error": "not found"}), 404
|
||||
return jsonify({"profile": p})
|
||||
|
||||
|
||||
137
routes/scan.py
Normal file
137
routes/scan.py
Normal file
@ -0,0 +1,137 @@
|
||||
"""
|
||||
Scan stream, start/stop, checkpoint, settings, delta
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import threading
|
||||
from flask import Blueprint, jsonify, request
|
||||
from routes import state
|
||||
from app_config import (
|
||||
_save_settings, _load_settings,
|
||||
_load_src_toggles, _save_src_toggles,
|
||||
)
|
||||
from checkpoint import (
|
||||
_checkpoint_key, _load_checkpoint, _clear_checkpoint,
|
||||
_load_delta_tokens, _DELTA_PATH,
|
||||
)
|
||||
|
||||
bp = Blueprint("scan", __name__)
|
||||
|
||||
|
||||
@bp.route("/api/scan/status")
|
||||
def scan_status():
|
||||
"""Lightweight status check — is a scan running? What scan_id?"""
|
||||
import sse as _sse_mod
|
||||
acquired = state._scan_lock.acquire(blocking=False)
|
||||
if acquired:
|
||||
state._scan_lock.release()
|
||||
return jsonify({
|
||||
"running": not acquired,
|
||||
"scan_id": _sse_mod._current_scan_id or None,
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/src_toggles", methods=["GET", "POST"])
|
||||
def src_toggles():
|
||||
"""GET: return source toggle state. POST: save."""
|
||||
if request.method == "POST":
|
||||
_save_src_toggles(request.get_json() or {})
|
||||
return jsonify({"ok": True})
|
||||
return jsonify(_load_src_toggles())
|
||||
|
||||
|
||||
@bp.route("/api/scan/start", methods=["POST"])
|
||||
def scan_start():
|
||||
if not state.connector:
|
||||
return jsonify({"error": "not authenticated"}), 401
|
||||
if not state._scan_lock.acquire(blocking=False):
|
||||
return jsonify({"error": "scan already running"}), 409
|
||||
options = request.get_json() or {}
|
||||
state._scan_abort.clear()
|
||||
profile_id = options.pop("profile_id", None)
|
||||
_save_settings({
|
||||
"sources": options.get("sources", []),
|
||||
"user_ids": options.get("user_ids", []),
|
||||
"options": options.get("options", {}),
|
||||
}, profile_id=profile_id)
|
||||
def _run():
|
||||
from scan_engine import run_scan
|
||||
try:
|
||||
run_scan(options)
|
||||
finally:
|
||||
state._scan_lock.release()
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@bp.route("/api/scan/stop", methods=["POST"])
|
||||
def scan_stop():
|
||||
state._scan_abort.set()
|
||||
return jsonify({"status": "stopping"})
|
||||
|
||||
|
||||
@bp.route("/api/scan/checkpoint", methods=["POST"])
|
||||
def scan_checkpoint_info():
|
||||
"""Return info about any saved checkpoint for the given scan options.
|
||||
If check_only=true, just reports whether a scan is currently running."""
|
||||
options = request.get_json() or {}
|
||||
if options.get("check_only"):
|
||||
acquired = state._scan_lock.acquire(blocking=False)
|
||||
if acquired:
|
||||
state._scan_lock.release()
|
||||
return jsonify({"running": not acquired})
|
||||
key = _checkpoint_key(options)
|
||||
cp = _load_checkpoint(key)
|
||||
if not cp:
|
||||
return jsonify({"exists": False})
|
||||
return jsonify({
|
||||
"exists": True,
|
||||
"scanned_count": len(cp.get("scanned_ids", [])),
|
||||
"flagged_count": len(cp.get("flagged", [])),
|
||||
"started_at": cp.get("meta", {}).get("started_at"),
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/scan/clear_checkpoint", methods=["POST"])
|
||||
def scan_clear_checkpoint():
|
||||
"""Discard any saved checkpoint so the next scan starts fresh."""
|
||||
_clear_checkpoint()
|
||||
return jsonify({"status": "cleared"})
|
||||
|
||||
|
||||
@bp.route("/api/settings/save", methods=["POST"])
|
||||
def settings_save():
|
||||
"""Persist scan settings so they can be reused by --headless mode."""
|
||||
payload = request.get_json() or {}
|
||||
_save_settings(payload)
|
||||
return jsonify({"status": "saved"})
|
||||
|
||||
|
||||
@bp.route("/api/settings/load")
|
||||
def settings_load():
|
||||
"""Return previously saved scan settings (for --headless setup guidance)."""
|
||||
s = _load_settings()
|
||||
if not s:
|
||||
return jsonify({"exists": False})
|
||||
return jsonify({"exists": True, "settings": s})
|
||||
|
||||
|
||||
@bp.route("/api/delta/status")
|
||||
def delta_status():
|
||||
"""Return info about stored delta tokens."""
|
||||
tokens = _load_delta_tokens()
|
||||
return jsonify({
|
||||
"count": len(tokens),
|
||||
"keys": list(tokens.keys()),
|
||||
"exists": len(tokens) > 0,
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/delta/clear", methods=["POST"])
|
||||
def delta_clear():
|
||||
"""Discard all stored delta tokens (next scan will be a full scan)."""
|
||||
try:
|
||||
if _DELTA_PATH.exists():
|
||||
_DELTA_PATH.unlink()
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
return jsonify({"status": "cleared"})
|
||||
156
routes/scheduler.py
Normal file
156
routes/scheduler.py
Normal file
@ -0,0 +1,156 @@
|
||||
"""
|
||||
Scheduler API routes — multi-job CRUD, status, history, run-now.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from flask import Blueprint, jsonify, request
|
||||
import sys, os, threading
|
||||
|
||||
bp = Blueprint("scheduler", __name__)
|
||||
|
||||
# Return JSON for any unhandled exception in this blueprint
|
||||
@bp.errorhandler(Exception)
|
||||
def _handle_error(e):
|
||||
import traceback; traceback.print_exc()
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Ensure the project root is on sys.path so `import scheduler` finds
|
||||
# our scheduler.py and not any stdlib module.
|
||||
def _sm():
|
||||
import scan_scheduler as _s
|
||||
return _s
|
||||
|
||||
|
||||
def _sched():
|
||||
import scan_scheduler as _s
|
||||
return _s.scan_scheduler
|
||||
|
||||
def _db():
|
||||
import gdpr_scanner as _m
|
||||
return _m._get_db() if _m.DB_OK else None
|
||||
|
||||
|
||||
# ── Job list ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@bp.route("/api/scheduler/jobs", methods=["GET"])
|
||||
def scheduler_jobs_list():
|
||||
return jsonify({"jobs": _sm().load_jobs()})
|
||||
|
||||
|
||||
@bp.route("/api/scheduler/jobs/save", methods=["POST"])
|
||||
def scheduler_jobs_save():
|
||||
try:
|
||||
sm = _sm()
|
||||
data = request.get_json() or {}
|
||||
jobs = sm.load_jobs()
|
||||
job_id = (data.get("id") or "").strip()
|
||||
if job_id:
|
||||
for i, j in enumerate(jobs):
|
||||
if j["id"] == job_id:
|
||||
jobs[i] = {**sm._DEFAULT_JOB, **j, **data}
|
||||
sm.save_jobs(jobs)
|
||||
try:
|
||||
_sched().reload()
|
||||
except Exception:
|
||||
pass
|
||||
return jsonify({"ok": True, "job": jobs[i]})
|
||||
# New job
|
||||
job = sm._new_job(data)
|
||||
jobs.append(job)
|
||||
sm.save_jobs(jobs)
|
||||
try:
|
||||
_sched().reload()
|
||||
except Exception:
|
||||
pass
|
||||
return jsonify({"ok": True, "job": job})
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@bp.route("/api/scheduler/jobs/delete", methods=["POST"])
|
||||
def scheduler_jobs_delete():
|
||||
try:
|
||||
sm = _sm()
|
||||
job_id = (request.get_json() or {}).get("id", "")
|
||||
if not job_id:
|
||||
return jsonify({"error": "id required"}), 400
|
||||
jobs = [j for j in sm.load_jobs() if j["id"] != job_id]
|
||||
sm.save_jobs(jobs)
|
||||
try:
|
||||
_sched().reload()
|
||||
except Exception:
|
||||
pass
|
||||
return jsonify({"ok": True})
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
# ── Run now ───────────────────────────────────────────────────────────────────
|
||||
|
||||
@bp.route("/api/scheduler/jobs/run_now", methods=["POST"])
|
||||
def scheduler_jobs_run_now():
|
||||
job_id = (request.get_json() or {}).get("id", "")
|
||||
s = _sched()
|
||||
if job_id in s._running_jobs:
|
||||
return jsonify({"error": "Job already running"}), 409
|
||||
if s.is_running:
|
||||
return jsonify({"error": "Another scan is already running"}), 409
|
||||
threading.Thread(target=s._execute_scan, args=[job_id], daemon=True).start()
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
# ── Status ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@bp.route("/api/scheduler/status")
|
||||
def scheduler_status():
|
||||
return jsonify(_sched().get_status())
|
||||
|
||||
|
||||
# ── History ───────────────────────────────────────────────────────────────────
|
||||
|
||||
@bp.route("/api/scheduler/history")
|
||||
def scheduler_history():
|
||||
db = _db()
|
||||
if not db:
|
||||
return jsonify({"runs": []})
|
||||
try:
|
||||
limit = int(request.args.get("limit", 20))
|
||||
job_id = request.args.get("job_id")
|
||||
try:
|
||||
runs = db.get_schedule_runs(limit=limit, job_id=job_id)
|
||||
except TypeError:
|
||||
runs = db.get_schedule_runs(limit=limit)
|
||||
return jsonify({"runs": runs})
|
||||
except Exception as e:
|
||||
return jsonify({"runs": [], "error": str(e)})
|
||||
|
||||
|
||||
# ── Backward-compat single-job endpoints ─────────────────────────────────────
|
||||
|
||||
@bp.route("/api/scheduler/config", methods=["GET"])
|
||||
def scheduler_config_get():
|
||||
return jsonify(_sm().load_schedule_config())
|
||||
|
||||
|
||||
@bp.route("/api/scheduler/config", methods=["POST"])
|
||||
def scheduler_config_save():
|
||||
sm = _sm()
|
||||
data = request.get_json() or {}
|
||||
merged = {**sm.load_schedule_config(), **data}
|
||||
sm.save_schedule_config(merged)
|
||||
s = _sched()
|
||||
s.reload()
|
||||
return jsonify({"status": "saved", "config": merged,
|
||||
"next_run": s.next_run_time()})
|
||||
|
||||
|
||||
@bp.route("/api/scheduler/run_now", methods=["POST"])
|
||||
def scheduler_run_now():
|
||||
s = _sched()
|
||||
if s.is_running:
|
||||
return jsonify({"error": "Scheduled scan already running"}), 409
|
||||
threading.Thread(target=s._execute_scan, args=[None], daemon=True).start()
|
||||
return jsonify({"status": "started"})
|
||||
100
routes/sources.py
Normal file
100
routes/sources.py
Normal file
@ -0,0 +1,100 @@
|
||||
"""
|
||||
File sources and file scan
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import threading
|
||||
from flask import Blueprint, jsonify, request
|
||||
from routes import state
|
||||
from app_config import _load_file_sources, _save_file_sources
|
||||
|
||||
try:
|
||||
from file_scanner import store_smb_password, SMB_OK as _SMB_OK
|
||||
_FILE_SCANNER_OK = True
|
||||
except ImportError:
|
||||
_FILE_SCANNER_OK = False
|
||||
_SMB_OK = False
|
||||
def store_smb_password(*a, **kw): return False # type: ignore[misc]
|
||||
|
||||
bp = Blueprint("sources", __name__)
|
||||
|
||||
|
||||
@bp.route("/api/file_sources", methods=["GET"])
|
||||
def file_sources_list():
|
||||
"""Return all saved file source definitions."""
|
||||
sources = _load_file_sources()
|
||||
return jsonify({
|
||||
"sources": sources,
|
||||
"smb_available": _SMB_OK,
|
||||
"scanner_ok": _FILE_SCANNER_OK,
|
||||
})
|
||||
|
||||
|
||||
@bp.route("/api/file_sources/save", methods=["POST"])
|
||||
def file_sources_save():
|
||||
"""Add or update a file source. Assigns a UUID if id is missing."""
|
||||
import uuid as _uuid
|
||||
data = request.get_json() or {}
|
||||
path = data.get("path", "").strip()
|
||||
if not path:
|
||||
return jsonify({"error": "path required"}), 400
|
||||
sources = _load_file_sources()
|
||||
uid = data.get("id") or ""
|
||||
for i, s in enumerate(sources):
|
||||
if s.get("id") == uid:
|
||||
sources[i] = {**s, **data}
|
||||
_save_file_sources(sources)
|
||||
return jsonify({"ok": True, "source": sources[i]})
|
||||
data["id"] = data.get("id") or str(_uuid.uuid4())
|
||||
sources.append(data)
|
||||
_save_file_sources(sources)
|
||||
return jsonify({"ok": True, "source": data})
|
||||
|
||||
|
||||
@bp.route("/api/file_sources/delete", methods=["POST"])
|
||||
def file_sources_delete():
|
||||
"""Remove a file source by id."""
|
||||
uid = (request.get_json() or {}).get("id", "")
|
||||
if not uid:
|
||||
return jsonify({"error": "id required"}), 400
|
||||
sources = [s for s in _load_file_sources() if s.get("id") != uid]
|
||||
_save_file_sources(sources)
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@bp.route("/api/file_sources/store_creds", methods=["POST"])
|
||||
def file_sources_store_creds():
|
||||
"""Store SMB password in the OS keychain."""
|
||||
if not _FILE_SCANNER_OK:
|
||||
return jsonify({"error": "file_scanner not available"}), 503
|
||||
data = request.get_json() or {}
|
||||
smb_host = data.get("smb_host", "")
|
||||
smb_user = data.get("smb_user", "")
|
||||
password = data.get("password", "")
|
||||
key = data.get("keychain_key") or smb_user
|
||||
if not smb_user or not password:
|
||||
return jsonify({"error": "smb_user and password required"}), 400
|
||||
ok = store_smb_password(smb_host, smb_user, password, key)
|
||||
if ok:
|
||||
return jsonify({"ok": True, "keychain_key": key})
|
||||
return jsonify({"error": "keyring not available — install: pip install keyring"}), 500
|
||||
|
||||
|
||||
@bp.route("/api/file_scan/start", methods=["POST"])
|
||||
def file_scan_start():
|
||||
"""Start a file system scan for a single file source."""
|
||||
if not _FILE_SCANNER_OK:
|
||||
return jsonify({"error": "file_scanner not available"}), 503
|
||||
if not state._scan_lock.acquire(blocking=False):
|
||||
return jsonify({"error": "scan already running"}), 409
|
||||
source = request.get_json() or {}
|
||||
state._scan_abort.clear()
|
||||
|
||||
def _run():
|
||||
from scan_engine import run_file_scan
|
||||
try:
|
||||
run_file_scan(source)
|
||||
finally:
|
||||
state._scan_lock.release()
|
||||
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
return jsonify({"status": "started"})
|
||||
41
routes/state.py
Normal file
41
routes/state.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""
|
||||
Shared mutable state for GDPR Scanner.
|
||||
|
||||
All modules (gdpr_scanner.py and route blueprints) import from here.
|
||||
This avoids circular imports while keeping a single source of truth
|
||||
for every global that routes need to read or write.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from m365_connector import M365Connector
|
||||
|
||||
# ── Auth ──────────────────────────────────────────────────────────────────────
|
||||
connector: "M365Connector | None" = None
|
||||
pending_flow: "dict | None" = None
|
||||
auth_poll_result: "dict | None" = None
|
||||
|
||||
# ── Google Workspace ──────────────────────────────────────────────────────────
|
||||
google_connector = None # GoogleConnector | PersonalGoogleConnector | None
|
||||
google_pending_flow: "dict | None" = None
|
||||
google_poll_result: "str | None" = None
|
||||
|
||||
# ── Scan concurrency ──────────────────────────────────────────────────────────
|
||||
import threading as _threading
|
||||
_scan_lock = _threading.Lock()
|
||||
_scan_abort = _threading.Event()
|
||||
_google_scan_lock = _threading.Lock()
|
||||
_google_scan_abort = _threading.Event()
|
||||
|
||||
# ── Scan results (in-memory session cache) ────────────────────────────────────
|
||||
flagged_items: list = []
|
||||
scan_meta: dict = {}
|
||||
|
||||
# ── i18n ─────────────────────────────────────────────────────────────────────
|
||||
LANG: dict = {}
|
||||
|
||||
# ── Art. 9 keyword data ───────────────────────────────────────────────────────
|
||||
compiled_keywords: list = [] # list of compiled re.Pattern
|
||||
keyword_data: dict = {} # raw keyword dict from JSON
|
||||
keyword_flat: list = [] # flat list of keyword strings
|
||||
217
routes/users.py
Normal file
217
routes/users.py
Normal file
@ -0,0 +1,217 @@
|
||||
"""
|
||||
User listing, role overrides, license debug
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
import traceback
|
||||
from flask import Blueprint, jsonify, request
|
||||
from routes import state
|
||||
from app_config import (
|
||||
_load_role_overrides, _save_role_overrides, _resolve_display_name,
|
||||
)
|
||||
|
||||
bp = Blueprint("users", __name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@bp.route("/api/users")
|
||||
def get_users():
|
||||
"""List all tenant users for account selection."""
|
||||
if not state.connector:
|
||||
return jsonify({"error": "not authenticated"}), 401
|
||||
try:
|
||||
users = state.connector.list_users()
|
||||
out = []
|
||||
seen = set()
|
||||
|
||||
# Build SKU map for role classification.
|
||||
# get_subscribed_skus() tries /subscribedSkus → /me/licenseDetails.
|
||||
# Then always merge per-user licenseDetails on top — this ensures we
|
||||
# have skuPartNumbers for every distinct SKU in the tenant, not just
|
||||
# the admin's own license (which is all /me/licenseDetails returns).
|
||||
try:
|
||||
sku_map = state.connector.get_subscribed_skus()
|
||||
except Exception:
|
||||
sku_map = {}
|
||||
|
||||
try:
|
||||
per_user = state.connector.build_sku_map_from_users(users)
|
||||
if per_user:
|
||||
added = len(set(per_user) - set(sku_map))
|
||||
sku_map.update(per_user)
|
||||
if added:
|
||||
logger.info("[skus] merged %d additional SKU(s) from per-user licenseDetails", added)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Load any manual role overrides set by the admin
|
||||
_role_overrides = _load_role_overrides()
|
||||
|
||||
def _build_user(u: dict, is_me: bool = False) -> dict:
|
||||
_em = u.get("mail") or u.get("userPrincipalName", "")
|
||||
_auto = state.connector.classify_user_role(
|
||||
u.get("assignedLicenses", []), sku_map
|
||||
)
|
||||
# Manual override takes precedence over auto-classification
|
||||
_role = _role_overrides.get(u["id"], _auto)
|
||||
return {
|
||||
"id": u["id"],
|
||||
"displayName": _resolve_display_name(u.get("displayName", ""), _em),
|
||||
"email": _em,
|
||||
"isMe": is_me,
|
||||
"userRole": _role,
|
||||
"roleOverride": u["id"] in _role_overrides,
|
||||
}
|
||||
|
||||
if state.connector.is_app_mode:
|
||||
for u in users:
|
||||
uid = u.get("id")
|
||||
if uid and uid not in seen:
|
||||
seen.add(uid)
|
||||
out.append(_build_user(u))
|
||||
else:
|
||||
me = state.connector.get_user_info()
|
||||
me_id = me.get("id")
|
||||
for u in ([me] + users):
|
||||
uid = u.get("id")
|
||||
if uid and uid not in seen:
|
||||
seen.add(uid)
|
||||
out.append(_build_user(u, is_me=(uid == me_id)))
|
||||
|
||||
# Log a warning when no users were classified — helps diagnose
|
||||
# tenants with SKUs not yet in m365_skus.json
|
||||
classified = [u for u in out if u["userRole"] in ("student", "staff")]
|
||||
if out and not classified:
|
||||
unknown_skus: set = set()
|
||||
for u in users[:20]: # sample first 20 to keep it brief
|
||||
for lic in u.get("assignedLicenses", []):
|
||||
sid = lic.get("skuId", "")
|
||||
if sid:
|
||||
unknown_skus.add(sid)
|
||||
logger.warning(
|
||||
"[role] 0/%d users classified — no SKUs in m365_skus.json matched. "
|
||||
"Unrecognised SKU IDs (sample): %s. "
|
||||
"Add them to classification/m365_skus.json or use /api/users/license_debug.",
|
||||
len(out), sorted(unknown_skus)[:10],
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
"users": out,
|
||||
"sku_map_available": bool(sku_map),
|
||||
"unclassified": len(out) - len(classified),
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
|
||||
|
||||
|
||||
@bp.route("/api/users/license_debug")
|
||||
def license_debug():
|
||||
"""Full diagnostic: runtime SKU sets, sku_map, per-user trace, and step-by-step
|
||||
classification walk for every user — enough to diagnose any remaining issue."""
|
||||
if not state.connector:
|
||||
return jsonify({"error": "not authenticated"}), 401
|
||||
try:
|
||||
users = state.connector.list_users()
|
||||
sku_map = state.connector.get_subscribed_skus()
|
||||
try:
|
||||
sku_map.update(state.connector.build_sku_map_from_users(users))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Per-user trace with step-by-step classification walk
|
||||
out = []
|
||||
for u in users[:100]:
|
||||
lics = u.get("assignedLicenses", [])
|
||||
role = state.connector.classify_user_role(lics, sku_map)
|
||||
|
||||
# Walk each licence exactly as classify_user_role does
|
||||
lic_trace = []
|
||||
for lic in lics:
|
||||
raw_id = lic.get("skuId", "")
|
||||
low_id = raw_id.lower()
|
||||
name = sku_map.get(low_id) or sku_map.get(raw_id) or "?"
|
||||
lic_trace.append({
|
||||
"skuId": raw_id,
|
||||
"skuName": name,
|
||||
"in_staff": low_id in state.connector._STAFF_SKU_IDS,
|
||||
"in_student": low_id in state.connector._STUDENT_SKU_IDS,
|
||||
"frag_staff": next((f for f in state.connector._STAFF_SKU_FRAGMENTS
|
||||
if f in name.upper()), None),
|
||||
"frag_student": next((f for f in state.connector._STUDENT_SKU_FRAGMENTS
|
||||
if f in name.upper()), None),
|
||||
})
|
||||
|
||||
out.append({
|
||||
"displayName": u.get("displayName", ""),
|
||||
"email": u.get("mail") or u.get("userPrincipalName", ""),
|
||||
"role": role,
|
||||
"licences": lic_trace,
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
# Runtime state — proves whether m365_skus.json loaded correctly
|
||||
"runtime": {
|
||||
"student_ids_count": len(state.connector._STUDENT_SKU_IDS),
|
||||
"staff_ids_count": len(state.connector._STAFF_SKU_IDS),
|
||||
"student_fragments": list(state.connector._STUDENT_SKU_FRAGMENTS),
|
||||
"staff_fragments": list(state.connector._STAFF_SKU_FRAGMENTS),
|
||||
"sku_map_entries": len(sku_map),
|
||||
"sku_file_path": str(state.connector._sku_file_path()),
|
||||
},
|
||||
"student_ids": sorted(state.connector._STUDENT_SKU_IDS),
|
||||
"staff_ids": sorted(state.connector._STAFF_SKU_IDS),
|
||||
"sku_map": sku_map,
|
||||
"users": out,
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
|
||||
|
||||
|
||||
@bp.route("/api/users/lookup")
|
||||
def lookup_user():
|
||||
"""Look up a single user by UPN or email."""
|
||||
if not state.connector:
|
||||
return jsonify({"error": "not authenticated"}), 401
|
||||
upn = request.args.get("upn", "").strip()
|
||||
if not upn:
|
||||
return jsonify({"error": "upn required"}), 400
|
||||
try:
|
||||
data = state.connector._get(f"/users/{upn}", {"$select": "id,displayName,mail,userPrincipalName"})
|
||||
_email = data.get("mail") or data.get("userPrincipalName", upn)
|
||||
return jsonify({
|
||||
"id": data["id"],
|
||||
"displayName": _resolve_display_name(data.get("displayName", ""), _email, upn),
|
||||
"email": _email,
|
||||
"isMe": False,
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 404
|
||||
|
||||
|
||||
@bp.route("/api/users/role_override", methods=["GET"])
|
||||
def role_override_get():
|
||||
"""Return all manual role overrides as {user_id: role}."""
|
||||
return jsonify(_load_role_overrides())
|
||||
|
||||
|
||||
@bp.route("/api/users/role_override", methods=["POST"])
|
||||
def role_override_set():
|
||||
"""Set or clear a manual role override for one user.
|
||||
|
||||
Body: {user_id, role} — role is 'student' | 'staff' | 'other' | '' (clear).
|
||||
"""
|
||||
data = request.get_json() or {}
|
||||
uid = data.get("user_id", "").strip()
|
||||
role = data.get("role", "").strip().lower()
|
||||
if not uid:
|
||||
return jsonify({"error": "user_id required"}), 400
|
||||
if role and role not in ("student", "staff", "other"):
|
||||
return jsonify({"error": "role must be student | staff | other | '' (clear)"}), 400
|
||||
overrides = _load_role_overrides()
|
||||
if role:
|
||||
overrides[uid] = role
|
||||
else:
|
||||
overrides.pop(uid, None)
|
||||
_save_role_overrides(overrides)
|
||||
return jsonify({"ok": True, "user_id": uid, "role": role or None,
|
||||
"total_overrides": len(overrides)})
|
||||
152
routes/viewer.py
Normal file
152
routes/viewer.py
Normal file
@ -0,0 +1,152 @@
|
||||
"""
|
||||
Read-only viewer token + PIN management routes (#33).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import time
|
||||
from flask import Blueprint, jsonify, request, session
|
||||
from app_config import (
|
||||
create_viewer_token,
|
||||
validate_viewer_token,
|
||||
revoke_viewer_token,
|
||||
cleanup_expired_viewer_tokens,
|
||||
_load_viewer_tokens,
|
||||
get_viewer_pin_hash,
|
||||
set_viewer_pin,
|
||||
verify_viewer_pin,
|
||||
clear_viewer_pin,
|
||||
)
|
||||
|
||||
bp = Blueprint("viewer", __name__)
|
||||
|
||||
# Simple brute-force guard: keyed by remote IP.
|
||||
_pin_attempts: dict[str, list[float]] = {}
|
||||
_MAX_ATTEMPTS = 5
|
||||
_WINDOW_S = 300 # 5 minutes
|
||||
|
||||
|
||||
def _pin_rate_limit(ip: str) -> bool:
|
||||
"""Return True if the IP is rate-limited (too many recent failures)."""
|
||||
now = time.time()
|
||||
times = [t for t in _pin_attempts.get(ip, []) if now - t < _WINDOW_S]
|
||||
_pin_attempts[ip] = times
|
||||
return len(times) >= _MAX_ATTEMPTS
|
||||
|
||||
|
||||
def _pin_record_failure(ip: str) -> None:
|
||||
now = time.time()
|
||||
_pin_attempts.setdefault(ip, []).append(now)
|
||||
|
||||
|
||||
def _pin_clear_failures(ip: str) -> None:
|
||||
_pin_attempts.pop(ip, None)
|
||||
|
||||
|
||||
# ── Token endpoints ───────────────────────────────────────────────────────────
|
||||
|
||||
@bp.route("/api/viewer/tokens", methods=["GET"])
|
||||
def list_tokens():
|
||||
cleanup_expired_viewer_tokens()
|
||||
tokens = _load_viewer_tokens()
|
||||
safe = [
|
||||
{
|
||||
"token_hint": t["token"][:8] + "…",
|
||||
"token": t["token"],
|
||||
"label": t.get("label", ""),
|
||||
"created_at": t.get("created_at"),
|
||||
"expires_at": t.get("expires_at"),
|
||||
"last_used_at": t.get("last_used_at"),
|
||||
}
|
||||
for t in tokens
|
||||
]
|
||||
return jsonify(safe)
|
||||
|
||||
|
||||
@bp.route("/api/viewer/tokens", methods=["POST"])
|
||||
def create_token():
|
||||
body = request.get_json(silent=True) or {}
|
||||
label = str(body.get("label", "")).strip()
|
||||
expires_days = body.get("expires_days")
|
||||
if expires_days is not None:
|
||||
try:
|
||||
expires_days = int(expires_days)
|
||||
if expires_days <= 0:
|
||||
return jsonify({"error": "expires_days must be a positive integer"}), 400
|
||||
except (TypeError, ValueError):
|
||||
return jsonify({"error": "expires_days must be a positive integer"}), 400
|
||||
entry = create_viewer_token(label=label, expires_days=expires_days)
|
||||
return jsonify(entry), 201
|
||||
|
||||
|
||||
@bp.route("/api/viewer/tokens/<token>", methods=["DELETE"])
|
||||
def delete_token(token: str):
|
||||
if not token:
|
||||
return jsonify({"error": "token required"}), 400
|
||||
removed = revoke_viewer_token(token)
|
||||
if not removed:
|
||||
return jsonify({"error": "token not found"}), 404
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@bp.route("/api/viewer/tokens/validate", methods=["POST"])
|
||||
def validate_token():
|
||||
body = request.get_json(silent=True) or {}
|
||||
token = str(body.get("token", "")).strip()
|
||||
entry = validate_viewer_token(token)
|
||||
if entry is None:
|
||||
return jsonify({"valid": False}), 401
|
||||
return jsonify({"valid": True, "label": entry.get("label", ""), "expires_at": entry.get("expires_at")})
|
||||
|
||||
|
||||
# ── PIN endpoints ─────────────────────────────────────────────────────────────
|
||||
|
||||
@bp.route("/api/viewer/pin", methods=["GET"])
|
||||
def pin_status():
|
||||
"""Return whether a viewer PIN is currently set."""
|
||||
return jsonify({"pin_set": bool(get_viewer_pin_hash())})
|
||||
|
||||
|
||||
@bp.route("/api/viewer/pin", methods=["POST"])
|
||||
def pin_set():
|
||||
"""Set or change the viewer PIN.
|
||||
Body: {pin: "...", current_pin: "..."}
|
||||
current_pin required only when a PIN is already set.
|
||||
"""
|
||||
body = request.get_json(silent=True) or {}
|
||||
new_pin = str(body.get("pin", "")).strip()
|
||||
if not new_pin:
|
||||
return jsonify({"error": "pin required"}), 400
|
||||
if not new_pin.isdigit() or not (4 <= len(new_pin) <= 8):
|
||||
return jsonify({"error": "PIN must be 4–8 digits"}), 400
|
||||
if get_viewer_pin_hash():
|
||||
if not verify_viewer_pin(str(body.get("current_pin", "")).strip()):
|
||||
return jsonify({"error": "current PIN is incorrect"}), 403
|
||||
set_viewer_pin(new_pin)
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@bp.route("/api/viewer/pin", methods=["DELETE"])
|
||||
def pin_clear():
|
||||
"""Remove the viewer PIN. Requires current PIN if one is set."""
|
||||
body = request.get_json(silent=True) or {}
|
||||
if get_viewer_pin_hash():
|
||||
if not verify_viewer_pin(str(body.get("current_pin", "")).strip()):
|
||||
return jsonify({"error": "current PIN is incorrect"}), 403
|
||||
clear_viewer_pin()
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@bp.route("/api/viewer/pin/verify", methods=["POST"])
|
||||
def pin_verify():
|
||||
"""Verify a PIN submission and set a viewer session on success."""
|
||||
ip = request.remote_addr or "unknown"
|
||||
if _pin_rate_limit(ip):
|
||||
return jsonify({"error": "Too many failed attempts. Try again later."}), 429
|
||||
body = request.get_json(silent=True) or {}
|
||||
pin = str(body.get("pin", "")).strip()
|
||||
if not verify_viewer_pin(pin):
|
||||
_pin_record_failure(ip)
|
||||
remaining = _MAX_ATTEMPTS - len(_pin_attempts.get(ip, []))
|
||||
return jsonify({"error": "Incorrect PIN", "remaining": max(0, remaining)}), 401
|
||||
_pin_clear_failures(ip)
|
||||
session["viewer_ok"] = True
|
||||
return jsonify({"ok": True})
|
||||
14
run_tests.sh
Executable file
14
run_tests.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
# Activate venv
|
||||
if [[ ! -f venv/bin/activate ]]; then
|
||||
echo "ERROR: venv not found. Run: python -m venv venv && pip install -r requirements.txt" >&2
|
||||
exit 1
|
||||
fi
|
||||
source venv/bin/activate
|
||||
|
||||
exec python -m pytest "$@"
|
||||
1161
scan_engine.py
Normal file
1161
scan_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
489
scan_scheduler.py
Normal file
489
scan_scheduler.py
Normal file
@ -0,0 +1,489 @@
|
||||
"""
|
||||
Scheduler — in-process APScheduler wrapper for automated GDPR scans.
|
||||
|
||||
Supports multiple independent named scan jobs.
|
||||
Config stored in ~/.gdpr_scanner_schedule.json as {"jobs": [...]}.
|
||||
Old single-job format is migrated automatically on first load.
|
||||
Run history persisted in the SQLite DB (schedule_runs table).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
APSCHEDULER_OK = True
|
||||
except ImportError:
|
||||
APSCHEDULER_OK = False
|
||||
|
||||
# ── Config file ───────────────────────────────────────────────────────────────
|
||||
_DATA_DIR = Path.home() / ".gdprscanner"
|
||||
_DATA_DIR.mkdir(exist_ok=True)
|
||||
_SCHEDULE_PATH = _DATA_DIR / "schedule.json"
|
||||
|
||||
_DEFAULT_JOB: dict[str, Any] = {
|
||||
"id": "",
|
||||
"name": "Scheduled scan",
|
||||
"enabled": False,
|
||||
"frequency": "daily",
|
||||
"day_of_week": "mon",
|
||||
"day_of_month": 1,
|
||||
"hour": 2,
|
||||
"minute": 0,
|
||||
"profile_id": "",
|
||||
"auto_email": False,
|
||||
"auto_retention": False,
|
||||
"retention_years": None,
|
||||
"fiscal_year_end": None,
|
||||
}
|
||||
|
||||
_DEFAULT_CONFIG = _DEFAULT_JOB # backward-compat alias
|
||||
|
||||
|
||||
def _new_job(overrides: dict | None = None) -> dict:
|
||||
job = dict(_DEFAULT_JOB)
|
||||
job["id"] = str(uuid.uuid4())
|
||||
if overrides:
|
||||
job.update(overrides)
|
||||
return job
|
||||
|
||||
|
||||
def load_jobs() -> list[dict]:
|
||||
"""Return list of job dicts. Migrates old single-job format automatically.
|
||||
Also assigns UUIDs to any jobs that were saved without one."""
|
||||
try:
|
||||
if _SCHEDULE_PATH.exists():
|
||||
data = json.loads(_SCHEDULE_PATH.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict) and "jobs" in data:
|
||||
jobs = [{**_DEFAULT_JOB, **j} for j in data["jobs"]]
|
||||
# Ensure every job has a non-empty id
|
||||
changed = False
|
||||
for j in jobs:
|
||||
if not j.get("id"):
|
||||
j["id"] = str(uuid.uuid4())
|
||||
changed = True
|
||||
if changed:
|
||||
_save_jobs_file(jobs)
|
||||
return jobs
|
||||
# Old format: migrate to single-job list
|
||||
if isinstance(data, dict):
|
||||
job = _new_job({**data, "name": "Scheduled scan"})
|
||||
_save_jobs_file([job])
|
||||
return [job]
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def save_jobs(jobs: list[dict]) -> None:
|
||||
_save_jobs_file(jobs)
|
||||
|
||||
|
||||
def _save_jobs_file(jobs: list[dict]) -> None:
|
||||
tmp = _SCHEDULE_PATH.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps({"jobs": jobs}, indent=2), encoding="utf-8")
|
||||
tmp.replace(_SCHEDULE_PATH)
|
||||
try:
|
||||
_SCHEDULE_PATH.chmod(0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# Backward-compat shims
|
||||
def load_schedule_config() -> dict:
|
||||
jobs = load_jobs()
|
||||
return jobs[0] if jobs else dict(_DEFAULT_JOB)
|
||||
|
||||
|
||||
def save_schedule_config(cfg: dict) -> None:
|
||||
jobs = load_jobs()
|
||||
if jobs:
|
||||
jobs[0] = {**_DEFAULT_JOB, **cfg}
|
||||
else:
|
||||
jobs = [_new_job(cfg)]
|
||||
save_jobs(jobs)
|
||||
|
||||
|
||||
def _build_trigger(job: dict) -> "CronTrigger":
|
||||
freq = job.get("frequency", "daily")
|
||||
hour = int(job.get("hour", 2))
|
||||
minute = int(job.get("minute", 0))
|
||||
if freq == "weekly":
|
||||
return CronTrigger(day_of_week=job.get("day_of_week", "mon"),
|
||||
hour=hour, minute=minute)
|
||||
elif freq == "monthly":
|
||||
return CronTrigger(day=int(job.get("day_of_month", 1)),
|
||||
hour=hour, minute=minute)
|
||||
return CronTrigger(hour=hour, minute=minute)
|
||||
|
||||
|
||||
def _ap_id(job_id: str) -> str:
|
||||
return f"gdpr_scan_{job_id}"
|
||||
|
||||
|
||||
# ── Scheduler class ───────────────────────────────────────────────────────────
|
||||
|
||||
class ScanScheduler:
|
||||
|
||||
def __init__(self):
|
||||
self._scheduler: BackgroundScheduler | None = None
|
||||
self._lock = threading.Lock()
|
||||
self._last_runs: dict[str, dict] = {}
|
||||
self._running_jobs: set[str] = set()
|
||||
|
||||
# ── Lifecycle ─────────────────────────────────────────────────────────
|
||||
|
||||
def start(self) -> bool:
|
||||
if not APSCHEDULER_OK:
|
||||
return False
|
||||
self._scheduler = BackgroundScheduler(
|
||||
daemon=True,
|
||||
job_defaults={"coalesce": True, "max_instances": 1,
|
||||
"misfire_grace_time": 3600},
|
||||
)
|
||||
self._scheduler.start()
|
||||
self.reload()
|
||||
return True
|
||||
|
||||
def stop(self):
|
||||
if self._scheduler:
|
||||
self._scheduler.shutdown(wait=False)
|
||||
self._scheduler = None
|
||||
|
||||
def reload(self):
|
||||
if not self._scheduler:
|
||||
return
|
||||
for job in self._scheduler.get_jobs():
|
||||
if job.id.startswith("gdpr_scan_"):
|
||||
self._scheduler.remove_job(job.id)
|
||||
for job_cfg in load_jobs():
|
||||
if job_cfg.get("enabled"):
|
||||
self._scheduler.add_job(
|
||||
self._execute_scan,
|
||||
trigger=_build_trigger(job_cfg),
|
||||
id=_ap_id(job_cfg["id"]),
|
||||
name=job_cfg.get("name", "GDPR scheduled scan"),
|
||||
args=[job_cfg["id"]],
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
def next_run_time(self, job_id: str | None = None) -> str | None:
|
||||
if not self._scheduler:
|
||||
return None
|
||||
if job_id:
|
||||
job = self._scheduler.get_job(_ap_id(job_id))
|
||||
if job and job.next_run_time:
|
||||
return job.next_run_time.isoformat()
|
||||
return None
|
||||
times = [j.next_run_time for j in self._scheduler.get_jobs()
|
||||
if j.id.startswith("gdpr_scan_") and j.next_run_time]
|
||||
return min(times).isoformat() if times else None
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return bool(self._running_jobs)
|
||||
|
||||
def get_status(self) -> dict:
|
||||
jobs = load_jobs()
|
||||
job_statuses = []
|
||||
for j in jobs:
|
||||
jid = j["id"]
|
||||
job_statuses.append({
|
||||
"id": jid,
|
||||
"name": j.get("name", ""),
|
||||
"enabled": j.get("enabled", False),
|
||||
"next_run": self.next_run_time(jid),
|
||||
"is_running": jid in self._running_jobs,
|
||||
"last_run": self._last_runs.get(jid),
|
||||
})
|
||||
return {
|
||||
"available": APSCHEDULER_OK,
|
||||
"jobs": job_statuses,
|
||||
"enabled": any(j.get("enabled") for j in jobs),
|
||||
"next_run": self.next_run_time(),
|
||||
"is_running": bool(self._running_jobs),
|
||||
}
|
||||
|
||||
# ── Execute scan ──────────────────────────────────────────────────────
|
||||
|
||||
def _execute_scan(self, job_id: str | None = None):
|
||||
jobs = load_jobs()
|
||||
if not jobs:
|
||||
return
|
||||
if job_id:
|
||||
job_cfg = next((j for j in jobs if j["id"] == job_id), None)
|
||||
if not job_cfg:
|
||||
return
|
||||
else:
|
||||
job_cfg = jobs[0]
|
||||
job_id = job_cfg["id"]
|
||||
|
||||
if job_id in self._running_jobs:
|
||||
return
|
||||
with self._lock:
|
||||
if job_id in self._running_jobs:
|
||||
return
|
||||
self._running_jobs.add(job_id)
|
||||
|
||||
run = {
|
||||
"started_at": time.time(), "finished_at": None,
|
||||
"status": "running",
|
||||
"job_id": job_id, "job_name": job_cfg.get("name", ""),
|
||||
"profile_id": job_cfg.get("profile_id", ""),
|
||||
"flagged": 0, "scanned": 0, "emailed": 0, "error": "",
|
||||
}
|
||||
self._last_runs[job_id] = run
|
||||
db_run_id: int | None = None
|
||||
_m = None
|
||||
logger.info("[scheduler] Starting job '%s'", job_cfg.get("name", ""))
|
||||
|
||||
try:
|
||||
import gdpr_scanner as _m
|
||||
try:
|
||||
db = _m._get_db()
|
||||
if db:
|
||||
try:
|
||||
db_run_id = db.begin_schedule_run(
|
||||
profile_id=job_cfg.get("profile_id", ""),
|
||||
job_id=job_id,
|
||||
job_name=job_cfg.get("name", ""),
|
||||
)
|
||||
except TypeError:
|
||||
db_run_id = db.begin_schedule_run(
|
||||
profile_id=job_cfg.get("profile_id", ""))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_m.broadcast("scheduler_started", {
|
||||
"time": datetime.now(timezone.utc).isoformat(),
|
||||
"job_name": job_cfg.get("name", ""),
|
||||
})
|
||||
|
||||
from routes import state
|
||||
# If connector not set, attempt to restore from saved config
|
||||
if not state.connector or not state.connector.is_authenticated():
|
||||
try:
|
||||
cfg_saved = _m._load_config()
|
||||
cid = cfg_saved.get("client_id", "")
|
||||
tid = cfg_saved.get("tenant_id", "")
|
||||
secret = cfg_saved.get("client_secret", "")
|
||||
if cid and tid:
|
||||
from m365_connector import M365Connector
|
||||
conn = M365Connector(cid, tid, client_secret=secret)
|
||||
if conn.is_app_mode:
|
||||
conn.authenticate_app_mode()
|
||||
if conn.is_authenticated():
|
||||
state.connector = conn
|
||||
except Exception as _e:
|
||||
pass
|
||||
if not state.connector or not state.connector.is_authenticated():
|
||||
raise RuntimeError("Not authenticated")
|
||||
|
||||
if not _m._scan_lock.acquire(blocking=False):
|
||||
logger.info("[scheduler] Scan already running — skipping job '%s'", job_cfg.get("name", job_id))
|
||||
_m.broadcast("scheduler_debug", {"msg": f"Skipped — a scan is already running"})
|
||||
return
|
||||
|
||||
try:
|
||||
# Sync connector into gdpr_scanner's module global —
|
||||
# run_scan() reads _connector directly, not state.connector
|
||||
_m._connector = state.connector
|
||||
_m._scan_abort.clear()
|
||||
options = self._build_options(job_cfg)
|
||||
options.setdefault("options", {})["_scheduled"] = True
|
||||
# Fire M365 scan if M365 sources are included
|
||||
m365_sources = [s for s in options.get("sources", [])
|
||||
if s in ("email","onedrive","sharepoint","teams")]
|
||||
if m365_sources:
|
||||
opts_m365 = dict(options, sources=m365_sources)
|
||||
_m.run_scan(opts_m365)
|
||||
# Fire file scan for each file source in the profile
|
||||
# file_sources may be IDs (strings) or full dicts — resolve either
|
||||
_all_file_sources = {s["id"]: s for s in (_m._load_file_sources() or []) if isinstance(s, dict)}
|
||||
for fs in options.get("file_sources", []):
|
||||
# Resolve string IDs to full source dicts
|
||||
if isinstance(fs, str):
|
||||
fs = _all_file_sources.get(fs, {"path": fs, "label": fs})
|
||||
if not isinstance(fs, dict) or not fs.get("path"):
|
||||
logger.warning("[scheduler] skipping invalid file source: %r", fs)
|
||||
continue
|
||||
try:
|
||||
_m.run_file_scan(fs)
|
||||
except Exception as _fse:
|
||||
import traceback as _tb2
|
||||
_label = fs.get('label', fs.get('path', str(fs)))
|
||||
logger.error("[scheduler] file scan error (%s): %s\n%s", _label, _fse, _tb2.format_exc())
|
||||
finally:
|
||||
_m._scan_lock.release()
|
||||
|
||||
# Fire Google scan if Google sources are in the profile and
|
||||
# a Google connector is available.
|
||||
google_sources = options.get("google_sources", [])
|
||||
if not google_sources:
|
||||
# Legacy profiles store everything in sources[]
|
||||
google_sources = [s for s in options.get("sources", [])
|
||||
if s in ("gmail", "gdrive")]
|
||||
if google_sources and state.google_connector:
|
||||
from routes.google_scan import (
|
||||
_run_google_scan as _rgs,
|
||||
_scan_lock as _gsl,
|
||||
_scan_abort as _gsa,
|
||||
)
|
||||
if _gsl.acquire(blocking=False):
|
||||
try:
|
||||
_gsa.clear()
|
||||
logger.info("[scheduler] Starting Google scan — sources=%s", google_sources)
|
||||
_rgs({
|
||||
"sources": google_sources,
|
||||
"user_emails": [], # empty → scan all workspace users
|
||||
"options": options.get("options", {}),
|
||||
})
|
||||
except Exception as _ge:
|
||||
import traceback as _tb3
|
||||
logger.error("[scheduler] Google scan error: %s\n%s", _ge, _tb3.format_exc())
|
||||
finally:
|
||||
_gsl.release()
|
||||
else:
|
||||
logger.info("[scheduler] Google scan already running — skipping")
|
||||
|
||||
run["flagged"] = len(_m.flagged_items)
|
||||
run["scanned"] = _m.scan_meta.get("total_scanned", 0)
|
||||
run["status"] = "completed"
|
||||
logger.info("[scheduler] Job '%s' completed — %d flagged, %d scanned",
|
||||
job_cfg.get("name", ""), run["flagged"], run["scanned"])
|
||||
|
||||
if job_cfg.get("auto_email") and state.flagged_items:
|
||||
try:
|
||||
self._send_email_report(job_cfg)
|
||||
run["emailed"] = 1
|
||||
except Exception as e:
|
||||
run["error"] = f"Scan OK, email failed: {e}"
|
||||
|
||||
if job_cfg.get("auto_retention") and job_cfg.get("retention_years"):
|
||||
try:
|
||||
self._run_retention(job_cfg)
|
||||
except Exception as e:
|
||||
err = f"Retention failed: {e}"
|
||||
run["error"] = f"{run['error']} | {err}" if run["error"] else err
|
||||
|
||||
_m.broadcast("scheduler_done", {
|
||||
"flagged": run["flagged"], "scanned": run["scanned"],
|
||||
"emailed": run["emailed"], "job_name": job_cfg.get("name", ""),
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
import traceback as _tb
|
||||
_tb_str = _tb.format_exc()
|
||||
logger.error("[scheduler] Job failed:\n%s", _tb_str)
|
||||
run["status"] = "failed"
|
||||
run["error"] = str(e)
|
||||
try:
|
||||
if _m:
|
||||
# Include last 3 lines of traceback in UI for diagnosis
|
||||
_tb_lines = _tb_str.strip().splitlines()
|
||||
_tb_short = ' | '.join(_tb_lines[-4:]) if len(_tb_lines) >= 4 else _tb_str
|
||||
_m.broadcast("scheduler_error", {"error": str(e) + ' | ' + _tb_short})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
finally:
|
||||
run["finished_at"] = time.time()
|
||||
self._last_runs[job_id] = run
|
||||
self._running_jobs.discard(job_id)
|
||||
if db_run_id and _m:
|
||||
try:
|
||||
db = _m._get_db()
|
||||
if db:
|
||||
db.finish_schedule_run(db_run_id, **{
|
||||
k: run[k] for k in
|
||||
("status", "flagged", "scanned", "emailed", "error")
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
def _build_options(self, job_cfg: dict) -> dict:
|
||||
import gdpr_scanner as _m
|
||||
pid = job_cfg.get("profile_id", "")
|
||||
logger.info("[scheduler] Job '%s' — profile_id='%s'", job_cfg.get("name", ""), pid)
|
||||
if pid:
|
||||
p = _m._profile_get(pid)
|
||||
if p:
|
||||
# Derive google_sources from dedicated field; fall back to
|
||||
# filtering the combined sources array for legacy profiles.
|
||||
_all_src = p.get("sources", [])
|
||||
_gs_fallback = [s for s in _all_src if s in ("gmail", "gdrive")]
|
||||
opts = {"sources": _all_src,
|
||||
"user_ids": p.get("user_ids", []),
|
||||
"options": p.get("options", {}),
|
||||
"file_sources": p.get("file_sources", []),
|
||||
"google_sources": p.get("google_sources", _gs_fallback)}
|
||||
logger.info("[scheduler] Profile '%s': sources=%s, users=%d",
|
||||
p.get("name", pid), opts["sources"], len(opts.get("user_ids", [])))
|
||||
_m.broadcast("scheduler_debug", {
|
||||
"msg": f"Using profile '{p.get('name',pid)}': sources={opts['sources']}, users={len(opts.get("user_ids",[]))}"})
|
||||
return opts
|
||||
logger.info("[scheduler] Profile '%s' not found — using saved settings", pid)
|
||||
_m.broadcast("scheduler_debug", {"msg": f"Profile id '{pid}' not found — falling back to saved settings"})
|
||||
saved = _m._load_settings()
|
||||
if saved:
|
||||
logger.info("[scheduler] Saved settings: sources=%s, users=%d",
|
||||
saved.get("sources"), len(saved.get("user_ids", [])))
|
||||
_m.broadcast("scheduler_debug", {
|
||||
"msg": f"Using saved settings: sources={saved.get('sources')}, users={len(saved.get('user_ids',[]))}"})
|
||||
return saved or {"sources": ["email", "onedrive"], "user_ids": [], "options": {}}
|
||||
|
||||
def _send_email_report(self, job_cfg: dict):
|
||||
import gdpr_scanner as _m
|
||||
xl_bytes, fname = _m._build_excel_bytes()
|
||||
smtp_cfg = _m._load_smtp_config()
|
||||
recipients = smtp_cfg.get("recipients", [])
|
||||
if isinstance(recipients, str):
|
||||
recipients = [r.strip() for r in recipients.replace(";", ",").split(",") if r.strip()]
|
||||
if not recipients:
|
||||
raise RuntimeError("No email recipients configured")
|
||||
job_name = job_cfg.get("name", "scheduled scan")
|
||||
subject = f"GDPR Scanner — {job_name} {datetime.now().strftime('%Y-%m-%d %H:%M')}"
|
||||
body = (
|
||||
"<html><body style='font-family:Arial,sans-serif;color:#333;padding:24px'>"
|
||||
"<h2 style='color:#1F3864'>🕐 GDPR Scanner — scheduled scan report</h2>"
|
||||
f"<p>Job: <strong>{job_name}</strong></p>"
|
||||
f"<p>Scan completed. {len(_m.flagged_items)} item(s) flagged.</p>"
|
||||
f"<p>Report attached: {fname}</p></body></html>")
|
||||
from routes.email import _send_email_graph
|
||||
from routes import state
|
||||
if state.connector and state.connector.is_authenticated():
|
||||
try:
|
||||
_send_email_graph(subject, body, recipients,
|
||||
attachment_bytes=xl_bytes, attachment_name=fname)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
_m._send_report_email(xl_bytes, fname, smtp_cfg, recipients)
|
||||
|
||||
def _run_retention(self, job_cfg: dict):
|
||||
import gdpr_scanner as _m
|
||||
if not _m.DB_OK:
|
||||
return
|
||||
db = _m._get_db()
|
||||
if not db:
|
||||
return
|
||||
overdue = db.get_overdue_items(int(job_cfg["retention_years"]),
|
||||
fiscal_year_end=job_cfg.get("fiscal_year_end"))
|
||||
if overdue:
|
||||
_m._do_retention_delete(overdue)
|
||||
|
||||
|
||||
# ── Module-level singleton ────────────────────────────────────────────────────
|
||||
scan_scheduler = ScanScheduler()
|
||||
50
skus/education.json
Normal file
50
skus/education.json
Normal file
@ -0,0 +1,50 @@
|
||||
{
|
||||
"_description": "Microsoft Education SKU classification for GDPR Scanner role detection.",
|
||||
"_source": "https://learn.microsoft.com/en-us/entra/identity/users/licensing-service-plan-reference",
|
||||
"_note": "student_ids and staff_ids MUST be disjoint. student is checked first — any overlap causes Faculty users to be misclassified as students. Add new SKUs here; no code change required.",
|
||||
"student_ids": {
|
||||
"314c4481-f395-4525-be8b-2ec4bb1e9d91": "Microsoft 365 A1 for students (STANDARDWOFFPACK_STUDENT)",
|
||||
"c32f9321-a627-406d-a114-1f9c81aaafac": "Microsoft 365 A1 for students (OFFICESUBSCRIPTION_STUDENT / new commerce CSP)",
|
||||
"e82ae690-a2d5-4d76-8d30-7c6e01e6022e": "Microsoft 365 A3 for students",
|
||||
"98b6e773-24d4-4c0d-a968-6e787a1f8204": "Microsoft 365 A5 for students",
|
||||
"46c119d4-0379-4a9d-85e4-97c66d3f909e": "Microsoft 365 A1 for students (student use benefit)",
|
||||
"e960f18a-dd80-4a07-82aa-1744b52d22ba": "Office 365 A1 for students",
|
||||
"78e66a63-337a-4a9a-8959-41c6654dfb56": "Office 365 A3 for students",
|
||||
"8fc2205d-4e51-4401-97f0-8c895b11bed4": "Office 365 A5 for students",
|
||||
"12b8c807-2e20-48fc-b453-542b6ee9d171": "Microsoft 365 A1 for students (device)",
|
||||
"d37cc85e-b4c5-4e39-b1d3-e54fb6dd5d63": "Office 365 A1 for students (device)",
|
||||
"160d616a-4b30-4c5a-9a0b-e06b31a82b4b": "Office 365 A3 for students (device)",
|
||||
"8a89b70c-9c52-4e4a-ab05-5a5e14c6c4f4": "Microsoft Teams Essentials (EDU Student)",
|
||||
"a4e376bd-c61c-4517-878d-55e43f5fc13b": "Microsoft 365 A1 for students (new commerce)"
|
||||
},
|
||||
"staff_ids": {
|
||||
"94763226-9b3c-4e75-a931-5c89701abe66": "Microsoft 365 A1 for faculty",
|
||||
"f30db892-07e9-47e9-837c-80727f46fd3d": "Microsoft Power Automate Free (assigned to faculty)",
|
||||
"4b590615-0888-425a-a965-b3bf7789848d": "Microsoft 365 A3 for faculty",
|
||||
"e578b273-6db4-4691-bba0-8d691f4da603": "Microsoft 365 A5 for faculty",
|
||||
"2d61d025-d6aa-49aa-b8f9-ca2ebb63e3ab": "Microsoft 365 A1 for faculty (faculty use benefit)",
|
||||
"a4585165-0533-458a-97e3-c400570268c4": "Office 365 A1 for faculty",
|
||||
"0c266dff-15dd-4b49-8397-2bb16070ed52": "Office 365 A3 for faculty",
|
||||
"1e7e1070-8ccb-4aca-b470-d7cb538cb70e": "Office 365 A5 for faculty",
|
||||
"15b1d32e-5f65-4a21-a4c4-d1a0e2ee3f8e": "Office 365 A3 for faculty (device)",
|
||||
"ba04c29e-5b81-4f69-a5f6-c4c7d6bade97": "Microsoft 365 A1 for faculty (new commerce)",
|
||||
"c2273bd0-dff7-4215-9ef5-2c7bcfb06425": "Microsoft 365 Apps for Faculty"
|
||||
},
|
||||
"student_fragments": [
|
||||
"STUDENT",
|
||||
"STU_",
|
||||
"_STU",
|
||||
"STANDARDWOFFPACK_STUDENT",
|
||||
"STANDARDWOFFPACK_IW_STUDENT",
|
||||
"OFFICESUBSCRIPTION_STUDENT"
|
||||
],
|
||||
"staff_fragments": [
|
||||
"FACULTY",
|
||||
"FAC_",
|
||||
"_FAC",
|
||||
"TEACHER",
|
||||
"STANDARDWOFFPACK_FACULTY",
|
||||
"STANDARDWOFFPACK_IW_FACULTY",
|
||||
"OFFICESUBSCRIPTION_FACULTY"
|
||||
]
|
||||
}
|
||||
26
skus/google_ou_roles.json
Normal file
26
skus/google_ou_roles.json
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"_description": "Google Workspace Organizational Unit (OU) path → role mapping for GDPRScanner.",
|
||||
"_note": "orgUnitPath values from Google Admin Console → Directory → Organisational units. Matching is prefix-based and case-insensitive — '/Elever/Indskoling' matches the '/Elever' student rule. Rules are evaluated top-to-bottom; first match wins. Edit this file to match your school's OU structure — no code change required.",
|
||||
"_source": "Google Admin Console → Directory → Administrer organisationsenheder",
|
||||
"student_ou_prefixes": [
|
||||
"/Elever",
|
||||
"/Students",
|
||||
"/Elev",
|
||||
"/Pupils"
|
||||
],
|
||||
"staff_ou_prefixes": [
|
||||
"/Personale",
|
||||
"/Staff",
|
||||
"/Lærere",
|
||||
"/Ansatte",
|
||||
"/Teachers",
|
||||
"/Admin"
|
||||
],
|
||||
"_examples": {
|
||||
"gudenaaskolen.dk example": {
|
||||
"student": "/Elever → student",
|
||||
"staff": "/Personale → staff",
|
||||
"admin": "/Admin → staff"
|
||||
}
|
||||
}
|
||||
}
|
||||
54
sse.py
Normal file
54
sse.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
sse.py — Server-Sent Events for GDPRScanner.
|
||||
|
||||
Provides:
|
||||
broadcast(event, data) — push an event to all connected browsers
|
||||
_sse_queues — list of per-connection Queue objects
|
||||
_sse_buffer — deque replay buffer for late-connecting browsers
|
||||
_current_scan_id — injected into every broadcast message
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
from collections import deque
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── SSE state ─────────────────────────────────────────────────────────────────
|
||||
_sse_queues: list = []
|
||||
_sse_buffer: deque = deque(maxlen=500)
|
||||
_current_scan_id: str = ""
|
||||
|
||||
def broadcast(event: str, data: dict):
|
||||
global _current_scan_id
|
||||
if _current_scan_id:
|
||||
data = {**data, "scan_id": _current_scan_id}
|
||||
msg = f"event: {event}\ndata: {json.dumps(data)}\n\n"
|
||||
_sse_buffer.append(msg) # buffer for SSE replay on reconnect
|
||||
for q in list(_sse_queues):
|
||||
try:
|
||||
q.put_nowait(msg)
|
||||
except queue.Full:
|
||||
pass
|
||||
# Clear scan_id after scan_done so replay knows the scan is finished
|
||||
if event == "scan_done" and _current_scan_id:
|
||||
_current_scan_id = ""
|
||||
# When no browser is watching (e.g. scheduled scan), log key events
|
||||
if not _sse_queues:
|
||||
if event == "scan_phase":
|
||||
logger.info("[scan] %s", data.get("phase", ""))
|
||||
elif event == "scan_progress":
|
||||
file = data.get("file") or data.get("name", "")
|
||||
if file:
|
||||
logger.info("[scan] %s/%s — %s", data.get("completed", ""), data.get("total", ""), file)
|
||||
elif event in ("scan_error", "scheduler_error"):
|
||||
logger.error("[scan] %s", data.get("error", "") or data.get("file", ""))
|
||||
elif event == "scan_done":
|
||||
logger.info("[scan] Done — %d flagged, %d scanned",
|
||||
data.get("flagged_count", 0), data.get("total_scanned", 0))
|
||||
elif event == "scheduler_started":
|
||||
logger.info("[scan] Scheduler started — %s", data.get("job_name", ""))
|
||||
elif event == "scheduler_done":
|
||||
logger.info("[scan] Scheduler done — %d flagged", data.get("flagged", 0))
|
||||
|
||||
5
start_gdpr.sh
Executable file
5
start_gdpr.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# GDPRScanner — launch script (uses ./venv)
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/venv/bin/activate"
|
||||
exec python3 "$SCRIPT_DIR/gdpr_scanner.py" "${@}"
|
||||
28
static/js/CLAUDE.md
Normal file
28
static/js/CLAUDE.md
Normal file
@ -0,0 +1,28 @@
|
||||
# static/js — JS Rules
|
||||
|
||||
## Profile dropdown — loader model
|
||||
Profiles are **loaders**, not persistent modes. Selecting one pushes settings into the sidebar; the sidebar is always the live state.
|
||||
|
||||
- `_setProfileClearBtn(visible)` must be called alongside every assignment to `S._activeProfileId`.
|
||||
- **Do not re-add a selectable `value=""` option to `#profileSelect`** — deliberately removed in v1.6.6.
|
||||
|
||||
## Profile editor source panel race condition
|
||||
`_pmgmtSaveFullEdit` detects whether Google/file checkboxes have rendered by querying the DOM directly:
|
||||
```javascript
|
||||
const googleRendered = !!document.querySelector('#peSourcesPanel input[data-source-type="google"]');
|
||||
const fileRendered = !!document.querySelector('#peSourcesPanel input[data-source-type="file"]');
|
||||
```
|
||||
Never revert to `!!window._googleConnected` / `_fileSources.length > 0` — those async proxies can be `true` before the panel has rendered, silently clearing the user's source selection on save.
|
||||
|
||||
## Progress bar phase parsing
|
||||
`_setProgressPhase(phase)` in `scan.js` parses the phase string against `_PHASE_SOURCE_MAP`:
|
||||
1. Source found **and** ` — ` (em-dash) present → split, resolve via `_resolveDisplayName()`, update `S._progressCurrentUser`.
|
||||
2. Source found **but no dash** → show pill + `S._progressCurrentUser` (handles sub-phases like folder counts).
|
||||
3. No source match → plain text fallback.
|
||||
|
||||
`_PHASE_SOURCE_MAP` ordering matters — `Google Workspace` must appear before `Gmail` in the map. The email regex uses `/iu` flags — do not drop the `i`.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **Profile editor accounts** — default to unchecked. Only explicitly saved `user_ids` are checked.
|
||||
- **Date presets** — stored as `years * 365` (integer days). Do not use `* 365.25`.
|
||||
198
static/js/auth.js
Normal file
198
static/js/auth.js
Normal file
@ -0,0 +1,198 @@
|
||||
import { S } from './state.js';
|
||||
// ── Auth ─────────────────────────────────────────────────────────────────────
|
||||
function handleSignIn() {
|
||||
try {
|
||||
startAuth().catch(function(e) {
|
||||
alert('Sign-in error: ' + (e.message || String(e)));
|
||||
});
|
||||
} catch(e) {
|
||||
alert('Sign-in error: ' + (e.message || String(e)));
|
||||
}
|
||||
}
|
||||
|
||||
async function startAuth() {
|
||||
const clientId = document.getElementById('clientId').value.trim();
|
||||
const tenantId = document.getElementById('tenantId').value.trim();
|
||||
const clientSecret = document.getElementById('clientSecret').value.trim();
|
||||
if (!clientId || !tenantId) { alert('Enter Client ID and Tenant ID'); return; }
|
||||
|
||||
// Persist credentials first so they survive restarts regardless of auth outcome
|
||||
await fetch('/api/auth/config', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({client_id: clientId, tenant_id: tenantId, client_secret: clientSecret})
|
||||
});
|
||||
|
||||
const r = await fetch('/api/auth/start', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({client_id: clientId, tenant_id: tenantId, client_secret: clientSecret})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { alert(d.error); return; }
|
||||
|
||||
if (d.mode === 'application') {
|
||||
// App mode — token acquired immediately, no device code step needed
|
||||
document.getElementById('configForm').style.display = 'none';
|
||||
document.getElementById('deviceCodeBackdrop').classList.add('open');
|
||||
document.getElementById('deviceCode').textContent = '—';
|
||||
document.getElementById('authStatus').className = 'auth-status success';
|
||||
document.getElementById('authStatus').textContent = '✓ Connected (Application mode — org-wide access)';
|
||||
setTimeout(onAuthenticated, 900);
|
||||
return;
|
||||
}
|
||||
|
||||
document.getElementById('configForm').style.display = 'none';
|
||||
document.getElementById('deviceCodeBackdrop').classList.add('open');
|
||||
document.getElementById('deviceCode').textContent = d.user_code;
|
||||
|
||||
pollAuth();
|
||||
}
|
||||
|
||||
async function pollAuth() {
|
||||
const r = await fetch('/api/auth/poll', {method: 'POST'});
|
||||
const d = await r.json();
|
||||
if (d.status === 'pending') {
|
||||
setTimeout(pollAuth, 3000);
|
||||
} else if (d.status === 'ok') {
|
||||
document.getElementById('authStatus').className = 'auth-status success';
|
||||
document.getElementById('authStatus').textContent = '✓ Signed in!';
|
||||
setTimeout(onAuthenticated, 800);
|
||||
} else {
|
||||
document.getElementById('authStatus').className = 'auth-status error';
|
||||
document.getElementById('authStatus').textContent = '✗ ' + (d.error || 'Sign-in failed');
|
||||
document.getElementById('configForm').style.display = 'block';
|
||||
document.getElementById('deviceCodeBackdrop').classList.remove('open');
|
||||
}
|
||||
}
|
||||
|
||||
function cancelAuth() {
|
||||
document.getElementById('configForm').style.display = 'block';
|
||||
document.getElementById('deviceCodeBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
let _currentDisplayName = '';
|
||||
|
||||
function _setModeBadge(isAppMode, displayName) {
|
||||
S._currentAppMode = isAppMode;
|
||||
_currentDisplayName = displayName || '';
|
||||
// Keep Sources modal status dot in sync if it's open
|
||||
const dot = document.getElementById('srcM365StatusDot');
|
||||
if (dot) dot.className = 'srcmgmt-status ' + (isAppMode !== null && isAppMode !== undefined ? 'green' : 'grey');
|
||||
}
|
||||
|
||||
async function onAuthenticated() {
|
||||
const r = await fetch('/api/auth/status');
|
||||
const d = await r.json();
|
||||
if (d.display_name || d.displayName || d.email) {
|
||||
_setModeBadge(d.app_mode, d.display_name || d.displayName || d.email);
|
||||
}
|
||||
document.getElementById('authScreen').style.display = 'none';
|
||||
document.getElementById('scannerScreen').style.display = 'flex';
|
||||
loadUsers();
|
||||
loadTrend(); // show existing trend if DB has history
|
||||
loadProfiles(); // populate profile dropdown (15c)
|
||||
}
|
||||
|
||||
function reconfigure() {
|
||||
// Show the auth screen with current credentials pre-filled so user can
|
||||
// update the client secret without losing client_id / tenant_id.
|
||||
document.getElementById('scannerScreen').style.display = 'none';
|
||||
document.getElementById('authScreen').style.display = 'flex';
|
||||
document.getElementById('configForm').style.display = 'block';
|
||||
document.getElementById('deviceCodeBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
async function signOut() {
|
||||
await fetch('/api/auth/signout', {method: 'POST'});
|
||||
document.getElementById('scannerScreen').style.display = 'none';
|
||||
document.getElementById('authScreen').style.display = 'flex';
|
||||
document.getElementById('configForm').style.display = 'block';
|
||||
document.getElementById('deviceCodeBackdrop').classList.remove('open');
|
||||
S.flaggedData = []; S.filteredData = [];
|
||||
document.getElementById('grid').innerHTML = '';
|
||||
document.getElementById('grid').style.display = 'none';
|
||||
const _lss2 = document.getElementById('lastScanSummary'); if (_lss2) _lss2.style.display = 'none';
|
||||
document.getElementById('emptyState').style.display = 'flex';
|
||||
}
|
||||
|
||||
// ── Check auth on load ────────────────────────────────────────────────────────
|
||||
|
||||
// Date presets
|
||||
(function() {
|
||||
const presets = document.querySelectorAll('.date-preset');
|
||||
const hidden = document.getElementById('olderThan');
|
||||
const dateIn = document.getElementById('olderThanDate');
|
||||
function setPreset(btn) {
|
||||
presets.forEach(p => p.classList.remove('selected'));
|
||||
btn.classList.add('selected');
|
||||
const years = parseInt(btn.dataset.years);
|
||||
if (years === 0) {
|
||||
hidden.value = '0';
|
||||
dateIn.value = new Date().toISOString().slice(0, 10);
|
||||
} else {
|
||||
const d = new Date();
|
||||
d.setFullYear(d.getFullYear() - years);
|
||||
hidden.value = Math.round(years * 365.25).toString();
|
||||
dateIn.value = d.toISOString().slice(0, 10);
|
||||
}
|
||||
}
|
||||
presets.forEach(btn => btn.addEventListener('click', () => setPreset(btn)));
|
||||
dateIn.addEventListener('change', () => {
|
||||
presets.forEach(p => p.classList.remove('selected'));
|
||||
if (dateIn.value) {
|
||||
const diffDays = Math.round((Date.now() - new Date(dateIn.value)) / 86400000);
|
||||
hidden.value = diffDays.toString();
|
||||
} else {
|
||||
hidden.value = '0';
|
||||
}
|
||||
});
|
||||
// Trigger default (2yr selected)
|
||||
const def = document.querySelector('.date-preset.selected');
|
||||
if (def) setPreset(def);
|
||||
// Toggle attach size row visibility
|
||||
document.getElementById('optAttachments').addEventListener('change', function() {
|
||||
document.getElementById('attachSizeRow').style.opacity = this.checked ? '1' : '0.4';
|
||||
});
|
||||
})();
|
||||
|
||||
// ── Viewer mode bootstrap ─────────────────────────────────────────────────────
|
||||
if (window.VIEWER_MODE) {
|
||||
document.body.classList.add('viewer-mode');
|
||||
document.getElementById('authScreen').style.display = 'none';
|
||||
document.getElementById('scannerScreen').style.display = 'flex';
|
||||
try { loadTrend(); } catch(e) {}
|
||||
} else {
|
||||
(async function() {
|
||||
try {
|
||||
const r = await fetch('/api/auth/status');
|
||||
const d = await r.json();
|
||||
if (d.authenticated) {
|
||||
// Load saved credentials into fields
|
||||
if (d.client_id) document.getElementById('clientId').value = d.client_id;
|
||||
if (d.tenant_id) document.getElementById('tenantId').value = d.tenant_id;
|
||||
if (d.client_secret) document.getElementById('clientSecret').value = d.client_secret;
|
||||
_setModeBadge(d.app_mode, d.display_name || d.email || '');
|
||||
document.getElementById('authScreen').style.display = 'none';
|
||||
document.getElementById('scannerScreen').style.display = 'flex';
|
||||
try { loadUsers(); } catch(e) {}
|
||||
try { loadProfiles(); } catch(e) {}
|
||||
try { loadTrend(); } catch(e) {}
|
||||
} else {
|
||||
// Pre-fill saved credentials
|
||||
if (d.client_id) document.getElementById('clientId').value = d.client_id;
|
||||
if (d.tenant_id) document.getElementById('tenantId').value = d.tenant_id;
|
||||
if (d.client_secret) document.getElementById('clientSecret').value = d.client_secret;
|
||||
}
|
||||
} catch(e) { console.error('Auth status check failed:', e); }
|
||||
})();
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.handleSignIn = handleSignIn;
|
||||
window.startAuth = startAuth;
|
||||
window.pollAuth = pollAuth;
|
||||
window.cancelAuth = cancelAuth;
|
||||
window._setModeBadge = _setModeBadge;
|
||||
window.onAuthenticated = onAuthenticated;
|
||||
window.reconfigure = reconfigure;
|
||||
window.signOut = signOut;
|
||||
window._currentDisplayName = _currentDisplayName;
|
||||
684
static/js/connector.js
Normal file
684
static/js/connector.js
Normal file
@ -0,0 +1,684 @@
|
||||
import { S } from './state.js';
|
||||
// ── Unified Source Management (#17) ──────────────────────────────────────────
|
||||
|
||||
function openSourcesMgmt(tab) {
|
||||
document.getElementById('srcMgmtBackdrop').classList.add('open');
|
||||
switchSrcTab(tab || 'm365');
|
||||
smRefreshStatus();
|
||||
smGoogleRefreshStatus();
|
||||
srcFileRenderList();
|
||||
}
|
||||
|
||||
function closeSourcesMgmt() {
|
||||
document.getElementById('srcMgmtBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
function switchSrcTab(tab) {
|
||||
['m365','google','files'].forEach(function(t) {
|
||||
document.getElementById('srcPane' + t.charAt(0).toUpperCase() + t.slice(1))
|
||||
.classList.toggle('active', t === tab);
|
||||
const btn = document.getElementById('srcTab' + t.charAt(0).toUpperCase() + t.slice(1));
|
||||
if (btn) btn.classList.toggle('active', t === tab);
|
||||
});
|
||||
// Capitalise pane ids correctly: srcPaneM365, srcPaneGoogle, srcPaneFiles
|
||||
const paneMap = {m365:'M365', google:'Google', files:'Files'};
|
||||
['m365','google','files'].forEach(function(t) {
|
||||
const pane = document.getElementById('srcPane' + paneMap[t]);
|
||||
if (pane) pane.classList.toggle('active', t === tab);
|
||||
const btn = document.getElementById('srcTab' + paneMap[t]);
|
||||
if (btn) btn.classList.toggle('active', t === tab);
|
||||
});
|
||||
}
|
||||
|
||||
// ── M365 pane ─────────────────────────────────────────────────────────────────
|
||||
|
||||
function smRefreshStatus() {
|
||||
const dot = document.getElementById('srcM365StatusDot');
|
||||
const label = document.getElementById('srcM365StatusLabel');
|
||||
const sub = document.getElementById('srcM365StatusSub');
|
||||
const disc = document.getElementById('smDisconnectBtn');
|
||||
const st = document.getElementById('smConnStatus');
|
||||
if (!dot) return;
|
||||
|
||||
// Load saved credentials and auth status from the correct endpoints
|
||||
fetch('/api/auth/status').then(function(r){ return r.json(); }).then(function(d) {
|
||||
// Pre-fill credential fields
|
||||
const cidEl = document.getElementById('smClientId');
|
||||
const tidEl = document.getElementById('smTenantId');
|
||||
const secEl = document.getElementById('smClientSecret');
|
||||
if (cidEl && d.client_id) cidEl.value = d.client_id;
|
||||
if (tidEl && d.tenant_id) tidEl.value = d.tenant_id;
|
||||
if (secEl && d.client_secret) secEl.value = d.client_secret.length > 4 ? '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' : '';
|
||||
|
||||
if (d.authenticated) {
|
||||
dot.className = 'srcmgmt-status green';
|
||||
const who = d.display_name || d.email || '';
|
||||
const mode = d.app_mode ? t('m365_mode_app_short','App mode') : t('m365_mode_delegated_short','Delegated');
|
||||
label.textContent = who || t('m365_srcmgmt_connected','Connected');
|
||||
sub.textContent = mode + (d.email && d.display_name ? ' \u00b7 ' + d.email : '');
|
||||
if (disc) disc.style.display = '';
|
||||
if (st) st.textContent = '';
|
||||
} else {
|
||||
dot.className = 'srcmgmt-status grey';
|
||||
label.textContent = t('m365_srcmgmt_not_connected','Not connected');
|
||||
sub.textContent = '';
|
||||
if (disc) disc.style.display = 'none';
|
||||
if (st) st.textContent = '';
|
||||
}
|
||||
}).catch(function(){
|
||||
if (dot) dot.className = 'srcmgmt-status grey';
|
||||
});
|
||||
}
|
||||
|
||||
async function smConnect() {
|
||||
const cid = document.getElementById('smClientId').value.trim();
|
||||
const tid = document.getElementById('smTenantId').value.trim();
|
||||
const rawSec = document.getElementById('smClientSecret').value;
|
||||
// If field shows placeholder dots and user hasn't changed it, use saved secret (send empty to keep it)
|
||||
const sec = (rawSec === '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022') ? '' : rawSec.trim();
|
||||
const st = document.getElementById('smConnStatus');
|
||||
if (!cid || !tid) { st.style.color='var(--danger)'; st.textContent=t('m365_err_creds_required','Client ID and Tenant ID required'); return; }
|
||||
st.style.color='var(--muted)'; st.textContent=t('m365_connecting','Connecting...');
|
||||
|
||||
// Persist credentials
|
||||
await fetch('/api/auth/config', {
|
||||
method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body: JSON.stringify({client_id:cid, tenant_id:tid, client_secret:sec})
|
||||
});
|
||||
|
||||
// Start auth — same as the auth screen flow
|
||||
try {
|
||||
const r = await fetch('/api/auth/start', {
|
||||
method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body: JSON.stringify({client_id:cid, tenant_id:tid, client_secret:sec})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
|
||||
|
||||
if (d.mode === 'application') {
|
||||
// App mode — no device code needed
|
||||
st.style.color='var(--accent)'; st.textContent='\u2714 '+t('m365_connected','Connected');
|
||||
closeSourcesMgmt();
|
||||
setTimeout(onAuthenticated, 400);
|
||||
} else {
|
||||
// Delegated — show device code flow, close modal
|
||||
closeSourcesMgmt();
|
||||
document.getElementById('clientId').value = cid;
|
||||
document.getElementById('tenantId').value = tid;
|
||||
document.getElementById('clientSecret').value = sec;
|
||||
document.getElementById('configForm').style.display = 'none';
|
||||
document.getElementById('authScreen').style.display = 'flex';
|
||||
document.getElementById('deviceCodeBackdrop').classList.add('open');
|
||||
document.getElementById('deviceCode').textContent = d.user_code || '\u2014';
|
||||
pollAuth();
|
||||
}
|
||||
} catch(e) { st.style.color='var(--danger)'; st.textContent=e.message; }
|
||||
}
|
||||
|
||||
function smDisconnect() {
|
||||
if (!confirm(t('m365_signout_confirm','Disconnect and clear credentials?'))) return;
|
||||
fetch('/api/auth/signout', {method:'POST'}).then(function(){
|
||||
closeSourcesMgmt();
|
||||
signOut();
|
||||
});
|
||||
}
|
||||
|
||||
// ── Google Workspace pane ─────────────────────────────────────────────────────
|
||||
|
||||
// Parsed key dict held in memory while the pane is open — cleared on disconnect
|
||||
var _googleKeyDict = null;
|
||||
var _googleAuthMode = 'workspace';
|
||||
|
||||
function smGoogleSetMode(mode) {
|
||||
_googleAuthMode = mode;
|
||||
var saSection = document.getElementById('smGoogleSaSection');
|
||||
var personalSection = document.getElementById('smGooglePersonalSection');
|
||||
var wsSetup = document.getElementById('smGoogleWorkspaceSetup');
|
||||
var btnWs = document.getElementById('smGoogleModeWorkspace');
|
||||
var btnPl = document.getElementById('smGoogleModePersonal');
|
||||
var isPersonal = (mode === 'personal');
|
||||
if (saSection) saSection.style.display = isPersonal ? 'none' : '';
|
||||
if (personalSection) personalSection.style.display = isPersonal ? '' : 'none';
|
||||
if (wsSetup) wsSetup.style.display = isPersonal ? 'none' : '';
|
||||
if (btnWs) { btnWs.style.background = isPersonal ? 'var(--surface)' : 'var(--accent)'; btnWs.style.color = isPersonal ? 'var(--text)' : '#fff'; }
|
||||
if (btnPl) { btnPl.style.background = isPersonal ? 'var(--accent)' : 'var(--surface)'; btnPl.style.color = isPersonal ? '#fff' : 'var(--text)'; }
|
||||
}
|
||||
|
||||
function smGoogleRefreshStatus() {
|
||||
var wsPromise = fetch('/api/google/auth/status').then(function(r){ return r.json(); }).catch(function(){ return {}; });
|
||||
var personalPromise = fetch('/api/google/personal/status').then(function(r){ return r.json(); }).catch(function(){ return {connected: false}; });
|
||||
|
||||
Promise.all([wsPromise, personalPromise]).then(function(results) {
|
||||
var ws = results[0];
|
||||
var personal = results[1];
|
||||
var dot = document.getElementById('srcGoogleStatusDot');
|
||||
var label = document.getElementById('srcGoogleStatusLabel');
|
||||
var sub = document.getElementById('srcGoogleStatusSub');
|
||||
var disc = document.getElementById('smGoogleDisconnectBtn');
|
||||
var srcs = document.getElementById('smGoogleSourcesGroup');
|
||||
var signOutBtn = document.getElementById('smGooglePersonalSignOutBtn');
|
||||
var signInBtn = document.getElementById('smGooglePersonalSignInBtn');
|
||||
if (!dot) return;
|
||||
|
||||
if (ws.libs_ok === false) {
|
||||
dot.className = 'srcmgmt-status amber';
|
||||
label.textContent = t('m365_google_libs_missing', 'Libraries not installed');
|
||||
sub.textContent = 'pip install google-auth google-auth-httplib2 google-api-python-client';
|
||||
if (disc) disc.style.display = 'none';
|
||||
if (srcs) srcs.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
|
||||
if (personal.connected) {
|
||||
smGoogleSetMode('personal');
|
||||
window._googleConnected = true;
|
||||
dot.className = 'srcmgmt-status green';
|
||||
label.textContent = personal.email || personal.displayName || t('m365_srcmgmt_connected', 'Connected');
|
||||
sub.textContent = t('m365_google_mode_personal', 'Personal account');
|
||||
if (disc) disc.style.display = 'none';
|
||||
if (srcs) srcs.style.display = '';
|
||||
if (signOutBtn) signOutBtn.style.display = '';
|
||||
if (signInBtn) signInBtn.style.display = 'none';
|
||||
} else if (ws.connected) {
|
||||
smGoogleSetMode('workspace');
|
||||
window._googleConnected = true;
|
||||
dot.className = 'srcmgmt-status green';
|
||||
label.textContent = ws.sa_email || t('m365_srcmgmt_connected', 'Connected');
|
||||
sub.textContent = (ws.project_id ? ws.project_id + ' · ' : '') + (ws.admin_email || '');
|
||||
if (disc) disc.style.display = '';
|
||||
if (srcs) srcs.style.display = '';
|
||||
if (signOutBtn) signOutBtn.style.display = 'none';
|
||||
if (signInBtn) signInBtn.style.display = '';
|
||||
var ae = document.getElementById('smGoogleAdminEmail');
|
||||
if (ae && ws.admin_email && !ae.value) ae.value = ws.admin_email;
|
||||
var gm = document.getElementById('smGoogleSrcGmail');
|
||||
var gd = document.getElementById('smGoogleSrcDrive');
|
||||
if (gm && ws.src_gmail !== undefined) gm.checked = !!ws.src_gmail;
|
||||
if (gd && ws.src_drive !== undefined) gd.checked = !!ws.src_drive;
|
||||
} else {
|
||||
window._googleConnected = false;
|
||||
dot.className = 'srcmgmt-status grey';
|
||||
label.textContent = t('m365_srcmgmt_not_connected', 'Not connected');
|
||||
sub.textContent = ws.error || personal.error || '';
|
||||
if (disc) disc.style.display = 'none';
|
||||
if (srcs) srcs.style.display = 'none';
|
||||
if (signOutBtn) signOutBtn.style.display = 'none';
|
||||
if (signInBtn) signInBtn.style.display = '';
|
||||
}
|
||||
renderSourcesPanel();
|
||||
// If the profile editor is open and its source panel has no Google checkboxes yet,
|
||||
// re-render it now that connection status is known.
|
||||
if (document.getElementById('pmgmtEditor')?.classList.contains('open') &&
|
||||
!document.querySelector('#peSourcesPanel input[data-source-type="google"]')) {
|
||||
var _peCheckedIds = Array.from(document.querySelectorAll('#peSourcesPanel input[type=checkbox]'))
|
||||
.filter(function(cb) { return cb.checked; }).map(function(cb) { return cb.dataset.sourceId; });
|
||||
var _peProfile = window._pmgmtEditId ? (S._profiles.find(function(p) { return p.id === window._pmgmtEditId; }) || window._pmgmtNewDraft) : window._pmgmtNewDraft;
|
||||
if (_peProfile) {
|
||||
var _peSavedIds = (_peProfile.sources||[]).concat(_peProfile.google_sources||[]).concat(_peProfile.file_sources||[]);
|
||||
_renderEditorSources(_peCheckedIds.concat(_peSavedIds));
|
||||
}
|
||||
}
|
||||
if (window._googleConnected) {
|
||||
_mergeGoogleUsers();
|
||||
} else {
|
||||
// Remove standalone Google users; reset merged 'both' users back to M365
|
||||
S._allUsers = S._allUsers.filter(function(u){ return (u.platform||'m365') !== 'google'; });
|
||||
S._allUsers.forEach(function(u) {
|
||||
if (u.platform === 'both') { u.platform = 'm365'; delete u.googleEmail; }
|
||||
});
|
||||
renderAccountList();
|
||||
}
|
||||
}).catch(function() {
|
||||
var dot = document.getElementById('srcGoogleStatusDot');
|
||||
if (dot) dot.className = 'srcmgmt-status grey';
|
||||
});
|
||||
}
|
||||
|
||||
// Wire up file input to read + validate JSON immediately
|
||||
(function() {
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
var fi = document.getElementById('smGoogleKeyFile');
|
||||
if (!fi) return;
|
||||
fi.addEventListener('change', function() {
|
||||
var f = fi.files && fi.files[0];
|
||||
if (!f) { _googleKeyDict = null; return; }
|
||||
var reader = new FileReader();
|
||||
reader.onload = function(e) {
|
||||
try {
|
||||
_googleKeyDict = JSON.parse(e.target.result);
|
||||
var nameEl = document.getElementById('smGoogleKeyName');
|
||||
if (nameEl) nameEl.textContent = _googleKeyDict.client_email ? '✔ ' + _googleKeyDict.client_email.split('@')[0] : '✔ loaded';
|
||||
} catch(err) {
|
||||
_googleKeyDict = null;
|
||||
var st = document.getElementById('smGoogleConnStatus');
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent = t('m365_google_invalid_json','Invalid JSON file'); }
|
||||
}
|
||||
};
|
||||
reader.readAsText(f);
|
||||
});
|
||||
});
|
||||
})();
|
||||
|
||||
async function smGoogleConnect() {
|
||||
var st = document.getElementById('smGoogleConnStatus');
|
||||
var adminEmail = (document.getElementById('smGoogleAdminEmail') || {}).value || '';
|
||||
|
||||
if (!_googleKeyDict) {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent = t('m365_google_key_required','Select a service account JSON key file'); }
|
||||
return;
|
||||
}
|
||||
if (st) { st.style.color='var(--muted)'; st.textContent = t('m365_connecting','Connecting...'); }
|
||||
|
||||
try {
|
||||
var r = await fetch('/api/google/auth/connect', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({key_json: _googleKeyDict, admin_email: adminEmail})
|
||||
});
|
||||
var d = await r.json();
|
||||
if (d.error) {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent = d.error; }
|
||||
return;
|
||||
}
|
||||
if (st) { st.style.color='var(--accent)'; st.textContent = '✔ ' + t('m365_connected','Connected'); }
|
||||
smGoogleRefreshStatus();
|
||||
} catch(e) {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent = e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
function smGoogleDisconnect() {
|
||||
if (!confirm(t('m365_signout_confirm','Disconnect and clear credentials?'))) return;
|
||||
fetch('/api/google/auth/disconnect', {method:'POST'}).then(function() {
|
||||
_googleKeyDict = null;
|
||||
var fi = document.getElementById('smGoogleKeyFile');
|
||||
if (fi) fi.value = '';
|
||||
var nameEl = document.getElementById('smGoogleKeyName');
|
||||
if (nameEl) nameEl.textContent = '';
|
||||
var st = document.getElementById('smGoogleConnStatus');
|
||||
if (st) st.textContent = '';
|
||||
smGoogleRefreshStatus();
|
||||
});
|
||||
}
|
||||
|
||||
async function smGooglePersonalStart() {
|
||||
var clientId = (document.getElementById('smGooglePersonalClientId') || {}).value || '';
|
||||
var clientSecret = (document.getElementById('smGooglePersonalClientSecret') || {}).value || '';
|
||||
var st = document.getElementById('smGooglePersonalConnStatus');
|
||||
if (!clientId || !clientSecret) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = t('m365_google_personal_creds_required', 'Client ID and secret required'); }
|
||||
return;
|
||||
}
|
||||
if (st) { st.style.color = 'var(--muted)'; st.textContent = t('m365_connecting', 'Connecting...'); }
|
||||
try {
|
||||
var r = await fetch('/api/google/personal/start', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({client_id: clientId, client_secret: clientSecret})
|
||||
});
|
||||
var d = await r.json();
|
||||
if (d.error) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = d.error; }
|
||||
return;
|
||||
}
|
||||
var box = document.getElementById('smGoogleDeviceBox');
|
||||
var codeEl = document.getElementById('smGoogleDeviceCode');
|
||||
var urlEl = document.getElementById('smGoogleDeviceUrl');
|
||||
var pollSt = document.getElementById('smGooglePollStatus');
|
||||
if (box) box.style.display = '';
|
||||
if (codeEl) codeEl.textContent = d.user_code || '—';
|
||||
if (urlEl) { urlEl.href = d.verification_url || 'https://google.com/device'; urlEl.textContent = (d.verification_url || 'https://google.com/device').replace('https://', ''); }
|
||||
if (pollSt) { pollSt.style.color = 'var(--muted)'; pollSt.textContent = '⏳ ' + t('m365_auth_waiting', 'Waiting for sign-in…'); }
|
||||
if (st) st.textContent = '';
|
||||
smGooglePersonalPoll();
|
||||
} catch(e) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
function smGooglePersonalPoll() {
|
||||
fetch('/api/google/personal/poll', {method: 'POST'})
|
||||
.then(function(r) { return r.json(); })
|
||||
.then(function(d) {
|
||||
var pollSt = document.getElementById('smGooglePollStatus');
|
||||
if (d.status === 'pending') {
|
||||
setTimeout(smGooglePersonalPoll, 3000);
|
||||
} else if (d.status === 'ok') {
|
||||
if (pollSt) { pollSt.style.color = 'var(--success)'; pollSt.textContent = '✓ ' + t('m365_connected', 'Connected'); }
|
||||
setTimeout(function() {
|
||||
var box = document.getElementById('smGoogleDeviceBox');
|
||||
if (box) box.style.display = 'none';
|
||||
smGoogleRefreshStatus();
|
||||
}, 1000);
|
||||
} else {
|
||||
if (pollSt) { pollSt.style.color = 'var(--danger)'; pollSt.textContent = '✗ ' + (d.error || 'Sign-in failed'); }
|
||||
setTimeout(function() {
|
||||
var box = document.getElementById('smGoogleDeviceBox');
|
||||
if (box) box.style.display = 'none';
|
||||
}, 3000);
|
||||
}
|
||||
})
|
||||
.catch(function() { setTimeout(smGooglePersonalPoll, 5000); });
|
||||
}
|
||||
|
||||
function smGooglePersonalSignOut() {
|
||||
if (!confirm(t('m365_signout_confirm', 'Disconnect and clear credentials?'))) return;
|
||||
fetch('/api/google/personal/signout', {method: 'POST'}).then(function() {
|
||||
smGoogleRefreshStatus();
|
||||
});
|
||||
}
|
||||
|
||||
// Returns {sources, options} reflecting current Google pane state — used by scan launcher
|
||||
function getGoogleScanOptions() {
|
||||
var sources = [];
|
||||
if (document.getElementById('smGoogleSrcGmail') && document.getElementById('smGoogleSrcGmail').checked) sources.push('gmail');
|
||||
if (document.getElementById('smGoogleSrcDrive') && document.getElementById('smGoogleSrcDrive').checked) sources.push('gdrive');
|
||||
return {sources: sources, options: {}};
|
||||
}
|
||||
|
||||
// ── File sources pane ─────────────────────────────────────────────────────────
|
||||
|
||||
function srcFileRenderList() {
|
||||
const list = document.getElementById('srcFileList');
|
||||
if (!list) return;
|
||||
if (!S._fileSources.length) {
|
||||
list.innerHTML = '<div class="fsrc-empty">'+t('m365_file_sources_empty','No file sources yet.')+'</div>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = S._fileSources.map(function(s) {
|
||||
const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
|
||||
const icon = isSmb ? '\uD83C\uDF10' : '\uD83D\uDCC1';
|
||||
const sid = _esc(s.id||'');
|
||||
const slabel = _esc(s.label||s.path||'');
|
||||
return '<div class="fsrc-row">'
|
||||
+'<div class="fsrc-row-head">'
|
||||
+'<span class="fsrc-row-label">'+icon+' '+slabel+'</span>'
|
||||
+'<div class="fsrc-actions">'
|
||||
+'<button class="btn-scan" onclick="srcFileScan(\''+sid+'\')">▶ '+t('m365_fsrc_scan_btn','Scan')+'</button>'
|
||||
+'<button class="btn-edit" onclick="srcFileEdit(\''+sid+'\')" style="background:none;border:1px solid var(--border);color:var(--muted);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer">'+t('m365_fsrc_edit_btn','Edit')+'</button>'
|
||||
+'<button class="btn-del" onclick="srcFileDelete(\''+sid+'\',\''+slabel+'\')">'+t('m365_profile_delete','Delete')+'</button>'
|
||||
+'</div></div>'
|
||||
+'<div class="fsrc-row-path">'+_esc(s.path||'')+(s.smb_user?' \u00b7 \uD83D\uDC64 '+_esc(s.smb_user):'')+'</div>'
|
||||
+'</div>';
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function srcFileDetectSmb() {
|
||||
const p = document.getElementById('srcFilePath').value;
|
||||
const isSmb = p.startsWith('//') || p.startsWith('\\\\');
|
||||
document.getElementById('srcFileSmbFields').style.display = isSmb ? 'flex' : 'none';
|
||||
if (isSmb && !document.getElementById('srcFileSmbHost').value) {
|
||||
document.getElementById('srcFileSmbHost').value = p.replace(/^[\/\\]+/,'').split(/[\/\\]/)[0];
|
||||
}
|
||||
}
|
||||
|
||||
function srcFileAutoName() {
|
||||
const labelEl = document.getElementById('srcFileLabel');
|
||||
if (labelEl._userEdited) return;
|
||||
const p = document.getElementById('srcFilePath').value.trim();
|
||||
if (!p) { labelEl.value=''; return; }
|
||||
const parts = p.replace(/[\/\\]+$/,'').split(/[\/\\]/);
|
||||
if ((p.startsWith('//')||p.startsWith('\\\\')) && parts.filter(function(x){return x;}).length>=2) {
|
||||
const segs = parts.filter(function(x){return x;});
|
||||
labelEl.value = segs[0]+(segs[1]?' / '+segs[1]:'');
|
||||
} else {
|
||||
labelEl.value = parts[parts.length-1]||p;
|
||||
}
|
||||
}
|
||||
|
||||
async function srcFileAdd() {
|
||||
const label = document.getElementById('srcFileLabel').value.trim();
|
||||
const path = document.getElementById('srcFilePath').value.trim();
|
||||
const smbHost = document.getElementById('srcFileSmbHost').value.trim();
|
||||
const smbUser = document.getElementById('srcFileSmbUser').value.trim();
|
||||
const smbPw = document.getElementById('srcFileSmbPw').value;
|
||||
const stat = document.getElementById('srcFileStatus');
|
||||
if (!label) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_name_required','Name is required.'); document.getElementById('srcFileLabel').focus(); return; }
|
||||
if (!path) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_path_required','Path is required.'); return; }
|
||||
stat.style.color='var(--muted)'; stat.textContent=t('m365_fsrc_saving','Saving...');
|
||||
if (smbPw && smbUser) {
|
||||
try { await fetch('/api/file_sources/store_creds',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({smb_host:smbHost,smb_user:smbUser,password:smbPw})}); } catch(e){}
|
||||
}
|
||||
try {
|
||||
const editId = document.getElementById('srcFileEditId');
|
||||
const existingId = editId ? editId.value : '';
|
||||
const body = {label, path, smb_host:smbHost, smb_user:smbUser};
|
||||
if (existingId) body.id = existingId;
|
||||
const r = await fetch('/api/file_sources/save',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)});
|
||||
const d = await r.json();
|
||||
if (d.error) { stat.style.color='var(--danger)'; stat.textContent=d.error; return; }
|
||||
['srcFileLabel','srcFilePath','srcFileSmbHost','srcFileSmbUser','srcFileSmbPw'].forEach(function(id){const el=document.getElementById(id);if(el){el.value='';el._userEdited=false;}});
|
||||
if (editId) editId.value='';
|
||||
const addBtn=document.getElementById('srcFileAddBtn'); if(addBtn) addBtn.textContent=t('m365_fsrc_add_btn','Add');
|
||||
document.getElementById('srcFileSmbFields').style.display='none';
|
||||
stat.style.color='var(--accent)'; stat.textContent='\u2714 '+t('m365_fsrc_saved','Source saved');
|
||||
await _loadFileSources();
|
||||
srcFileRenderList();
|
||||
log(t('m365_fsrc_saved','Source saved')+': '+label);
|
||||
} catch(e){ stat.style.color='var(--danger)'; stat.textContent=e.message; }
|
||||
}
|
||||
|
||||
function srcFileEdit(id) {
|
||||
const s = S._fileSources.find(function(x){return x.id===id;});
|
||||
if (!s) return;
|
||||
const labelEl = document.getElementById('srcFileLabel');
|
||||
const pathEl = document.getElementById('srcFilePath');
|
||||
const hostEl = document.getElementById('srcFileSmbHost');
|
||||
const userEl = document.getElementById('srcFileSmbUser');
|
||||
const pwEl = document.getElementById('srcFileSmbPw');
|
||||
const editId = document.getElementById('srcFileEditId');
|
||||
if (labelEl) { labelEl.value = s.label||''; labelEl._userEdited = true; }
|
||||
if (pathEl) pathEl.value = s.path||'';
|
||||
if (hostEl) hostEl.value = s.smb_host||'';
|
||||
if (userEl) userEl.value = s.smb_user||'';
|
||||
if (pwEl) pwEl.value = s.smb_user ? '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' : '';
|
||||
if (editId) editId.value = id;
|
||||
const isSmb = (s.path||'').startsWith('//') || (s.path||'').startsWith('\\\\');
|
||||
const smbFields = document.getElementById('srcFileSmbFields');
|
||||
if (smbFields) smbFields.style.display = isSmb ? 'flex' : 'none';
|
||||
const btn = document.getElementById('srcFileAddBtn');
|
||||
if (btn) btn.textContent = t('m365_fsrc_save_changes','Save changes');
|
||||
const stat = document.getElementById('srcFileStatus');
|
||||
if (stat) { stat.style.color='var(--muted)'; stat.textContent='Editing: '+_esc(s.label||s.path||''); }
|
||||
}
|
||||
|
||||
async function srcFileDelete(id, label) {
|
||||
if (!confirm(t('m365_profile_delete_confirm','Delete')+' "'+label+'"?')) return;
|
||||
await fetch('/api/file_sources/delete',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({id})});
|
||||
await _loadFileSources();
|
||||
srcFileRenderList();
|
||||
}
|
||||
|
||||
async function srcFileScan(id) {
|
||||
const source = S._fileSources.find(function(s){ return s.id===id; });
|
||||
if (!source) return;
|
||||
closeSourcesMgmt();
|
||||
log(t('m365_fsrc_scan_start','Starting file scan')+': '+(source.label||source.path));
|
||||
try {
|
||||
const r = await fetch('/api/file_scan/start',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(source)});
|
||||
const d = await r.json();
|
||||
if (d.error) log('File scan error: '+d.error,'err');
|
||||
} catch(e){ log('File scan error: '+e.message,'err'); }
|
||||
}
|
||||
|
||||
// Redirect old openFileSourcesModal() to the new unified modal
|
||||
function openFileSourcesModal() { openSourcesMgmt('files'); }
|
||||
function closeFileSourcesModal() { closeSourcesMgmt(); }
|
||||
|
||||
// ── File Sources (#8) ─────────────────────────────────────────────────────────
|
||||
|
||||
async function _loadFileSources() {
|
||||
try {
|
||||
const r = await fetch('/api/file_sources');
|
||||
const d = await r.json();
|
||||
S._fileSources = d.sources || [];
|
||||
_renderFileSources(d.smb_available);
|
||||
renderSourcesPanel();
|
||||
// Re-apply any pending profile source selection (file sources render after load)
|
||||
if (S._pendingProfileSources.length) {
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-type="file"]').forEach(function(cb) {
|
||||
cb.checked = S._pendingProfileSources.includes(cb.dataset.sourceId);
|
||||
});
|
||||
S._pendingProfileSources = [];
|
||||
}
|
||||
// If the profile editor is open and has no file checkboxes yet, re-render it now.
|
||||
if (document.getElementById('pmgmtEditor')?.classList.contains('open') &&
|
||||
!document.querySelector('#peSourcesPanel input[data-source-type="file"]') &&
|
||||
S._fileSources.length > 0) {
|
||||
var _peCheckedIds = Array.from(document.querySelectorAll('#peSourcesPanel input[type=checkbox]'))
|
||||
.filter(function(cb) { return cb.checked; }).map(function(cb) { return cb.dataset.sourceId; });
|
||||
var _peProfile = window._pmgmtEditId ? (S._profiles.find(function(p) { return p.id === window._pmgmtEditId; }) || window._pmgmtNewDraft) : window._pmgmtNewDraft;
|
||||
if (_peProfile) {
|
||||
var _peSavedIds = (_peProfile.sources||[]).concat(_peProfile.google_sources||[]).concat(_peProfile.file_sources||[]);
|
||||
_renderEditorSources(_peCheckedIds.concat(_peSavedIds));
|
||||
}
|
||||
}
|
||||
} catch(e) {
|
||||
const s = document.getElementById('fsrcStatus');
|
||||
if (s) { s.style.color = 'var(--danger)'; s.textContent = 'Error: ' + e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
function _renderFileSources() {
|
||||
const list = document.getElementById('fsrcList');
|
||||
if (!list) return;
|
||||
if (!S._fileSources.length) {
|
||||
list.innerHTML = '<div class="fsrc-empty">' + t('m365_file_sources_empty','No file sources yet.') + '</div>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = S._fileSources.map(function(s) {
|
||||
const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
|
||||
const icon = isSmb ? '\uD83C\uDF10' : '\uD83D\uDCC1';
|
||||
const userPart = s.smb_user ? ' \u00b7 \uD83D\uDC64 ' + _esc(s.smb_user) : '';
|
||||
const sid = _esc(s.id || '');
|
||||
const slabel = _esc(s.label || s.path || '');
|
||||
return '<div class="fsrc-row">'
|
||||
+ '<div class="fsrc-row-head">'
|
||||
+ '<span class="fsrc-row-label">' + icon + ' ' + slabel + '</span>'
|
||||
+ '<div class="fsrc-actions">'
|
||||
+ '<button class="btn-scan" onclick="fsrcScan(\'' + sid + '\')">▶ ' + t('m365_fsrc_scan_btn','Scan') + '</button>'
|
||||
+ '<button class="btn-del" onclick="fsrcDelete(\'' + sid + '\',\'' + slabel + '\')">' + t('m365_profile_delete','Delete') + '</button>'
|
||||
+ '</div></div>'
|
||||
+ '<div class="fsrc-row-path">' + _esc(s.path || '') + userPart + '</div>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function fsrcDetectSmb() {
|
||||
const p = document.getElementById('fsrcPath').value;
|
||||
const isSmb = p.startsWith('//') || p.startsWith('\\\\');
|
||||
document.getElementById('fsrcSmbFields').style.display = isSmb ? 'flex' : 'none';
|
||||
if (isSmb && !document.getElementById('fsrcSmbHost').value) {
|
||||
document.getElementById('fsrcSmbHost').value = p.replace(/^[\/\\]+/,'').split(/[\/\\]/)[0];
|
||||
}
|
||||
}
|
||||
|
||||
function fsrcAutoName() {
|
||||
// Suggest a name from the path only if the user hasn't typed one yet
|
||||
const labelEl = document.getElementById('fsrcLabel');
|
||||
if (labelEl._userEdited) return;
|
||||
const p = document.getElementById('fsrcPath').value.trim();
|
||||
if (!p) { labelEl.value = ''; return; }
|
||||
// Extract last meaningful path segment
|
||||
const parts = p.replace(/[/\\]+$/, '').split(/[/\\]/);
|
||||
const last = parts[parts.length - 1] || parts[parts.length - 2] || p;
|
||||
// For SMB paths like //nas/share use "nas / share"
|
||||
if ((p.startsWith('//') || p.startsWith('\\\\')) && parts.length >= 3) {
|
||||
const host = parts.find(function(x){ return x.length > 0; }) || '';
|
||||
const share = parts.filter(function(x){ return x.length > 0; })[1] || '';
|
||||
labelEl.value = share ? host + ' / ' + share : host;
|
||||
} else {
|
||||
labelEl.value = last;
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const labelEl = document.getElementById('fsrcLabel');
|
||||
if (labelEl) {
|
||||
labelEl.addEventListener('input', function() { labelEl._userEdited = !!labelEl.value; });
|
||||
}
|
||||
const srcFileLabelEl = document.getElementById('srcFileLabel');
|
||||
if (srcFileLabelEl) {
|
||||
srcFileLabelEl.addEventListener('input', function() { srcFileLabelEl._userEdited = !!srcFileLabelEl.value; });
|
||||
}
|
||||
});
|
||||
|
||||
async function fsrcAddSource() {
|
||||
const path = document.getElementById('fsrcPath').value.trim();
|
||||
const label = document.getElementById('fsrcLabel').value.trim() || path;
|
||||
const smbHost = document.getElementById('fsrcSmbHost').value.trim();
|
||||
const smbUser = document.getElementById('fsrcSmbUser').value.trim();
|
||||
const smbPw = document.getElementById('fsrcSmbPw').value;
|
||||
const stat = document.getElementById('fsrcStatus');
|
||||
if (!label) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_name_required','Name is required.'); document.getElementById('fsrcLabel').focus(); return; }
|
||||
if (!path) { stat.style.color='var(--danger)'; stat.textContent=t('m365_fsrc_path_required','Path is required.'); return; }
|
||||
stat.style.color='var(--muted)'; stat.textContent=t('m365_fsrc_saving','Saving...');
|
||||
if (smbPw && smbUser) {
|
||||
try { await fetch('/api/file_sources/store_creds',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({smb_host:smbHost,smb_user:smbUser,password:smbPw})}); } catch(e){}
|
||||
}
|
||||
try {
|
||||
const r = await fetch('/api/file_sources/save',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({label,path,smb_host:smbHost,smb_user:smbUser})});
|
||||
const d = await r.json();
|
||||
if (d.error) { stat.style.color='var(--danger)'; stat.textContent=d.error; return; }
|
||||
['fsrcLabel','fsrcPath','fsrcSmbHost','fsrcSmbUser','fsrcSmbPw'].forEach(function(id){const el=document.getElementById(id);if(el){el.value='';el._userEdited=false;}});
|
||||
document.getElementById('fsrcSmbFields').style.display='none';
|
||||
stat.style.color='var(--accent)'; stat.textContent='\u2714 '+t('m365_fsrc_saved','Source saved');
|
||||
await _loadFileSources();
|
||||
log(t('m365_fsrc_saved','Source saved')+': '+label);
|
||||
} catch(e){ stat.style.color='var(--danger)'; stat.textContent=e.message; }
|
||||
}
|
||||
|
||||
async function fsrcDelete(id, label) {
|
||||
if (!confirm(t('m365_profile_delete_confirm','Delete')+' "'+label+'"?')) return;
|
||||
try {
|
||||
await fetch('/api/file_sources/delete',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({id})});
|
||||
await _loadFileSources();
|
||||
log(t('m365_profile_deleted','Deleted')+': '+label);
|
||||
} catch(e){ const s=document.getElementById('fsrcStatus'); if(s) s.textContent=e.message; }
|
||||
}
|
||||
|
||||
async function fsrcScan(id) {
|
||||
const source = S._fileSources.find(function(s){ return s.id===id; });
|
||||
if (!source) return;
|
||||
closeFileSourcesModal();
|
||||
log(t('m365_fsrc_scan_start','Starting file scan')+': '+(source.label||source.path));
|
||||
try {
|
||||
const r = await fetch('/api/file_scan/start',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(source)});
|
||||
const d = await r.json();
|
||||
if (d.error) log('File scan error: '+d.error,'err');
|
||||
} catch(e){ log('File scan error: '+e.message,'err'); }
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.openSourcesMgmt = openSourcesMgmt;
|
||||
window.closeSourcesMgmt = closeSourcesMgmt;
|
||||
window.switchSrcTab = switchSrcTab;
|
||||
window.smRefreshStatus = smRefreshStatus;
|
||||
window.smConnect = smConnect;
|
||||
window.smDisconnect = smDisconnect;
|
||||
window.smGoogleSetMode = smGoogleSetMode;
|
||||
window.smGoogleRefreshStatus = smGoogleRefreshStatus;
|
||||
window.smGoogleConnect = smGoogleConnect;
|
||||
window.smGoogleDisconnect = smGoogleDisconnect;
|
||||
window.smGooglePersonalStart = smGooglePersonalStart;
|
||||
window.smGooglePersonalPoll = smGooglePersonalPoll;
|
||||
window.smGooglePersonalSignOut = smGooglePersonalSignOut;
|
||||
window.getGoogleScanOptions = getGoogleScanOptions;
|
||||
window.srcFileRenderList = srcFileRenderList;
|
||||
window.srcFileDetectSmb = srcFileDetectSmb;
|
||||
window.srcFileAutoName = srcFileAutoName;
|
||||
window.srcFileAdd = srcFileAdd;
|
||||
window.srcFileEdit = srcFileEdit;
|
||||
window.srcFileDelete = srcFileDelete;
|
||||
window.srcFileScan = srcFileScan;
|
||||
window.openFileSourcesModal = openFileSourcesModal;
|
||||
window.closeFileSourcesModal = closeFileSourcesModal;
|
||||
window._loadFileSources = _loadFileSources;
|
||||
window._renderFileSources = _renderFileSources;
|
||||
window.fsrcDetectSmb = fsrcDetectSmb;
|
||||
window.fsrcAutoName = fsrcAutoName;
|
||||
window.fsrcAddSource = fsrcAddSource;
|
||||
window.fsrcDelete = fsrcDelete;
|
||||
window.fsrcScan = fsrcScan;
|
||||
window._googleKeyDict = _googleKeyDict;
|
||||
window._googleAuthMode = _googleAuthMode;
|
||||
341
static/js/log.js
Normal file
341
static/js/log.js
Normal file
@ -0,0 +1,341 @@
|
||||
import { S } from './state.js';
|
||||
// ── Log ──────────────────────────────────────────────────────────────────────
|
||||
const _LOG_SESSION_KEY = 'gdpr_log_session';
|
||||
const _LOG_MAX_LINES = 300;
|
||||
let _logFilter = 'all'; // 'all' | 'err'
|
||||
|
||||
// Maps keywords found in phase strings → {label, pillClass}
|
||||
// Emoji patterns cover phases that have no source keyword in text
|
||||
// (e.g. "📂 skolehaver: 1 msg(s)" — 📂 is only used for mail folders)
|
||||
const _PHASE_SOURCE_MAP = [
|
||||
{ re: /OneDrive/i, label: 'OneDrive', cls: 'progress-src-m365' },
|
||||
{ re: /SharePoint/i, label: 'SharePoint', cls: 'progress-src-m365' },
|
||||
{ re: /\bTeams\b/i, label: 'Teams', cls: 'progress-src-m365' },
|
||||
{ re: /E-?mail|emails?|msg\(s\)|\uD83D\uDCC2/iu, label: 'Outlook', cls: 'progress-src-m365' },
|
||||
{ re: /Google Workspace/i, label: 'Gmail', cls: 'progress-src-google' },
|
||||
{ re: /Google Drive/i, label: 'GDrive', cls: 'progress-src-google' },
|
||||
{ re: /Gmail/i, label: 'Gmail', cls: 'progress-src-google' },
|
||||
{ re: /\bfil(er|S.es)?\b/i, label: 'Local', cls: 'progress-src-file' },
|
||||
];
|
||||
|
||||
function _escHtml(s) {
|
||||
return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
||||
}
|
||||
|
||||
// Resolve an email address to a display name using S._allUsers, and strip
|
||||
// trailing count suffixes like ": 3 file(s)" or ": 5 msg(s)".
|
||||
function _resolveDisplayName(text) {
|
||||
if (!text) return text;
|
||||
const stripped = text.replace(/:\s*\d+\s*(file\(s\)|files?|filer|msg\(s\)|folders?)[\u2026\.]*\s*$/iu, '').trim();
|
||||
const check = stripped || text;
|
||||
if (check.includes('@')) {
|
||||
const email = check.toLowerCase();
|
||||
const user = S._allUsers.find(function(u) {
|
||||
return (u.email || '').toLowerCase() === email ||
|
||||
(u.googleEmail || '').toLowerCase() === email;
|
||||
});
|
||||
if (user) return user.displayName;
|
||||
}
|
||||
return stripped || text;
|
||||
}
|
||||
|
||||
// Tracks the most recent user name shown — used for sub-phases (e.g. mail folder counts)
|
||||
// that don't repeat the username in their phase string.
|
||||
|
||||
function _setProgressPhase(phase) {
|
||||
const who = document.getElementById('progressWho');
|
||||
if (!who) return;
|
||||
|
||||
// Find source from the full phase string first
|
||||
let srcEntry = null;
|
||||
for (const s of _PHASE_SOURCE_MAP) {
|
||||
if (s.re.test(phase)) { srcEntry = s; break; }
|
||||
}
|
||||
|
||||
// Try "Left — Right" split (em-dash / en-dash only — plain hyphens cause false splits)
|
||||
const dashMatch = phase.match(/^(.+?)\s+[\u2014\u2013]\s+(.+?)[\u2026\.]*\s*$/u);
|
||||
|
||||
if (srcEntry && dashMatch) {
|
||||
const left = dashMatch[1].trim();
|
||||
const right = dashMatch[2].trim();
|
||||
// Full name is whichever side doesn't contain the source keyword
|
||||
const raw = srcEntry.re.test(left) ? right : left;
|
||||
const displayName = _resolveDisplayName(raw);
|
||||
S._progressCurrentUser = displayName;
|
||||
who.innerHTML =
|
||||
'<span class="progress-src-pill ' + srcEntry.cls + '">' + srcEntry.label + '</span>' +
|
||||
'<span class="progress-user">' + _escHtml(displayName) + '</span>';
|
||||
return;
|
||||
}
|
||||
|
||||
if (srcEntry) {
|
||||
// Source identified but no dash split (e.g. "📂 Indbakke: 3 msg(s)").
|
||||
// Re-use last known user rather than showing a folder path.
|
||||
const displayName = S._progressCurrentUser ||
|
||||
phase.replace(/^[\u{1F000}-\u{1FFFF}\u{2600}-\u{27FF}\s]+/u, '').trim();
|
||||
who.innerHTML =
|
||||
'<span class="progress-src-pill ' + srcEntry.cls + '">' + srcEntry.label + '</span>' +
|
||||
'<span class="progress-user">' + _escHtml(displayName) + '</span>';
|
||||
return;
|
||||
}
|
||||
|
||||
// Informational phase (Auth mode, Delta mode, Resuming, …) — keep pill cleared
|
||||
who.innerHTML = '<span class="progress-phase">' + _escHtml(phase) + '</span>';
|
||||
}
|
||||
|
||||
function _clearProgressBar() {
|
||||
_setProgressPhase('');
|
||||
document.getElementById('progressStats').textContent = '';
|
||||
document.getElementById('progressEta').textContent = '';
|
||||
document.getElementById('progressFile').textContent = '';
|
||||
}
|
||||
|
||||
function _renderProgressSegments() {
|
||||
const track = document.getElementById('progressTrack');
|
||||
if (!track) return;
|
||||
const sources = [
|
||||
{ key: 'm365', active: S._m365ScanRunning, color: 'var(--accent)', label: 'M365' },
|
||||
{ key: 'google', active: S._googleScanRunning, color: '#3a7d44', label: 'GWS' },
|
||||
{ key: 'file', active: S._fileScanRunning, color: '#7a6a9e', label: 'Files' },
|
||||
].filter(function(s) { return s.active; });
|
||||
if (!sources.length) { track.innerHTML = ''; return; }
|
||||
track.innerHTML = sources.map(function(s, i) {
|
||||
return '<div class="progress-seg"' + (i < sources.length - 1 ? '' : '') + '>' +
|
||||
'<div class="progress-seg-fill" id="progressFill_' + s.key + '" style="background:' + s.color + ';width:' + (S._srcPct[s.key] || 0) + '%"></div>' +
|
||||
'</div>';
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function _logAtBottom(p) {
|
||||
return p.scrollHeight - p.scrollTop - p.clientHeight < 24;
|
||||
}
|
||||
|
||||
function log(msg, cls='') {
|
||||
const p = document.getElementById('logPanel');
|
||||
const live = document.getElementById('logLive');
|
||||
const atBottom = _logAtBottom(p);
|
||||
const d = document.createElement('div');
|
||||
const timestamp = new Date().toLocaleTimeString();
|
||||
d.className = 'log-line' + (cls ? ' log-' + cls : '');
|
||||
d.textContent = timestamp + ' ' + msg;
|
||||
// Insert before live indicator (always last)
|
||||
if (live) p.insertBefore(d, live); else p.appendChild(d);
|
||||
// Apply filter
|
||||
if (_logFilter === 'err' && !cls) d.classList.add('log-err-hidden');
|
||||
if (atBottom) p.scrollTop = p.scrollHeight;
|
||||
// Persist to sessionStorage
|
||||
try {
|
||||
const lines = JSON.parse(sessionStorage.getItem(_LOG_SESSION_KEY) || '[]');
|
||||
lines.push({ t: timestamp, msg, cls });
|
||||
if (lines.length > _LOG_MAX_LINES) lines.splice(0, lines.length - _LOG_MAX_LINES);
|
||||
sessionStorage.setItem(_LOG_SESSION_KEY, JSON.stringify(lines));
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
function setLogLive(msg) {
|
||||
const live = document.getElementById('logLive');
|
||||
if (!live) return;
|
||||
if (msg) {
|
||||
live.style.display = 'block';
|
||||
live.textContent = '▶ ' + msg;
|
||||
const p = document.getElementById('logPanel');
|
||||
if (_logAtBottom(p)) p.scrollTop = p.scrollHeight;
|
||||
} else {
|
||||
live.style.display = 'none';
|
||||
live.textContent = '';
|
||||
}
|
||||
}
|
||||
|
||||
function setLogFilter(filter) {
|
||||
_logFilter = filter;
|
||||
document.getElementById('logFilterAll').classList.toggle('active', filter === 'all');
|
||||
document.getElementById('logFilterErr').classList.toggle('active', filter === 'err');
|
||||
document.querySelectorAll('#logPanel .log-line:not(#logLive)').forEach(function(d) {
|
||||
const isErr = d.classList.contains('log-err') || d.classList.contains('log-warn');
|
||||
d.classList.toggle('log-err-hidden', filter === 'err' && !isErr);
|
||||
});
|
||||
}
|
||||
|
||||
function copyLog() {
|
||||
const lines = [];
|
||||
document.querySelectorAll('#logPanel .log-line:not(#logLive)').forEach(function(d) {
|
||||
lines.push(d.textContent);
|
||||
});
|
||||
navigator.clipboard.writeText(lines.join('\n')).then(function() {
|
||||
const btn = document.querySelector('.log-copy-btn');
|
||||
if (btn) { btn.textContent = '✓ Copied'; setTimeout(function(){ btn.textContent = '⎘ Copy'; }, 1500); }
|
||||
}).catch(function() {});
|
||||
}
|
||||
|
||||
function _restoreLog() {
|
||||
try {
|
||||
const lines = JSON.parse(sessionStorage.getItem(_LOG_SESSION_KEY) || '[]');
|
||||
if (!lines.length) return;
|
||||
const p = document.getElementById('logPanel');
|
||||
const live = document.getElementById('logLive');
|
||||
lines.forEach(function(entry) {
|
||||
const d = document.createElement('div');
|
||||
d.className = 'log-line' + (entry.cls ? ' log-' + entry.cls : '');
|
||||
d.textContent = entry.t + ' ' + entry.msg;
|
||||
if (live) p.insertBefore(d, live); else p.appendChild(d);
|
||||
});
|
||||
p.scrollTop = p.scrollHeight;
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
function _initLogResize() {
|
||||
const handle = document.getElementById('logResizeHandle');
|
||||
const wrap = document.getElementById('logWrap');
|
||||
const panel = document.getElementById('logPanel');
|
||||
if (!handle || !wrap || !panel) return;
|
||||
let startY, startH;
|
||||
handle.addEventListener('pointerdown', function(e) {
|
||||
startY = e.clientY;
|
||||
startH = panel.getBoundingClientRect().height;
|
||||
document.body.style.cursor = 'ns-resize';
|
||||
document.body.style.userSelect = 'none';
|
||||
handle.setPointerCapture(e.pointerId);
|
||||
handle.addEventListener('pointermove', onDrag);
|
||||
handle.addEventListener('pointerup', onUp);
|
||||
handle.addEventListener('pointercancel', onUp);
|
||||
e.preventDefault();
|
||||
});
|
||||
function onDrag(e) {
|
||||
const ROW = 18; // 16px line-height + 2px margin-bottom
|
||||
const PAD = 10; // 6px padding-top + 6px padding-bottom - 2px (no margin on last line)
|
||||
const MIN_ROWS = 2;
|
||||
const MAX_ROWS = 30;
|
||||
const delta = startY - e.clientY; // drag up = taller
|
||||
const rawH = Math.max(60, Math.min(600, startH + delta));
|
||||
const rows = Math.round((rawH - PAD) / ROW);
|
||||
const snapped = Math.max(MIN_ROWS, Math.min(MAX_ROWS, rows)) * ROW + PAD;
|
||||
panel.style.height = snapped + 'px';
|
||||
}
|
||||
function onUp(e) {
|
||||
document.body.style.cursor = '';
|
||||
document.body.style.userSelect = '';
|
||||
handle.releasePointerCapture(e.pointerId);
|
||||
handle.removeEventListener('pointermove', onDrag);
|
||||
handle.removeEventListener('pointerup', onUp);
|
||||
handle.removeEventListener('pointercancel', onUp);
|
||||
}
|
||||
}
|
||||
|
||||
function _initPreviewResize() {
|
||||
const handle = document.getElementById('previewResizeHandle');
|
||||
const panel = document.getElementById('previewPanel');
|
||||
if (!handle || !panel) return;
|
||||
const MIN_W = 280;
|
||||
const MAX_W = Math.round(window.innerWidth * 0.7);
|
||||
let startX, startW;
|
||||
handle.addEventListener('pointerdown', function(e) {
|
||||
if (panel.classList.contains('hidden')) return;
|
||||
startX = e.clientX;
|
||||
startW = panel.getBoundingClientRect().width;
|
||||
document.body.style.cursor = 'col-resize';
|
||||
document.body.style.userSelect = 'none';
|
||||
handle.setPointerCapture(e.pointerId);
|
||||
handle.addEventListener('pointermove', onDrag);
|
||||
handle.addEventListener('pointerup', onUp);
|
||||
handle.addEventListener('pointercancel', onUp);
|
||||
e.preventDefault();
|
||||
});
|
||||
function onDrag(e) {
|
||||
const delta = startX - e.clientX; // drag left = wider
|
||||
const w = Math.max(MIN_W, Math.min(MAX_W, startW + delta));
|
||||
panel.style.width = w + 'px';
|
||||
}
|
||||
function onUp(e) {
|
||||
document.body.style.cursor = '';
|
||||
document.body.style.userSelect = '';
|
||||
handle.releasePointerCapture(e.pointerId);
|
||||
handle.removeEventListener('pointermove', onDrag);
|
||||
handle.removeEventListener('pointerup', onUp);
|
||||
handle.removeEventListener('pointercancel', onUp);
|
||||
sessionStorage.setItem('gdpr_preview_width', parseInt(panel.style.width));
|
||||
}
|
||||
}
|
||||
|
||||
// Called by renderSourcesPanel() after every re-render.
|
||||
// Pins the panel to its natural scroll height (all sources visible) unless the
|
||||
// user has previously dragged it smaller, in which case that saved height is
|
||||
// restored — but only if it's still smaller than the new content height.
|
||||
function _fitSourcesPanel() {
|
||||
const panel = document.getElementById('sourcesPanel');
|
||||
if (!panel) return;
|
||||
panel.style.height = ''; // clear to measure natural content height
|
||||
const natural = panel.scrollHeight;
|
||||
try {
|
||||
const saved = parseInt(localStorage.getItem('gdpr_sources_h'));
|
||||
if (saved && saved < natural) {
|
||||
panel.style.height = saved + 'px'; // honour user's smaller preference
|
||||
return;
|
||||
}
|
||||
} catch(e) {}
|
||||
panel.style.height = natural + 'px'; // default: show everything
|
||||
}
|
||||
|
||||
function _initSourcesResize() {
|
||||
const handle = document.getElementById('sourcesResizeHandle');
|
||||
const panel = document.getElementById('sourcesPanel');
|
||||
if (!handle || !panel) return;
|
||||
|
||||
let startY, startH, maxH;
|
||||
handle.addEventListener('pointerdown', function(e) {
|
||||
startY = e.clientY;
|
||||
startH = panel.getBoundingClientRect().height;
|
||||
// Max = natural scroll height (enough to show all sources — no more)
|
||||
panel.style.height = '';
|
||||
maxH = panel.scrollHeight;
|
||||
panel.style.height = startH + 'px';
|
||||
document.body.style.cursor = 'ns-resize';
|
||||
document.body.style.userSelect = 'none';
|
||||
handle.setPointerCapture(e.pointerId);
|
||||
handle.addEventListener('pointermove', onDrag);
|
||||
handle.addEventListener('pointerup', onUp);
|
||||
handle.addEventListener('pointercancel', onUp);
|
||||
e.preventDefault();
|
||||
});
|
||||
function onDrag(e) {
|
||||
const ROW = 22; // ~21px per .source-check row (padding:3px 0 + ~15px content)
|
||||
const MIN_H = ROW * 2;
|
||||
const delta = e.clientY - startY; // drag down = taller, drag up = shorter
|
||||
const rawH = Math.max(MIN_H, Math.min(maxH, startH + delta));
|
||||
const snapped = Math.round(rawH / ROW) * ROW;
|
||||
panel.style.height = Math.max(MIN_H, Math.min(maxH, snapped)) + 'px';
|
||||
}
|
||||
function onUp(e) {
|
||||
document.body.style.cursor = '';
|
||||
document.body.style.userSelect = '';
|
||||
handle.releasePointerCapture(e.pointerId);
|
||||
handle.removeEventListener('pointermove', onDrag);
|
||||
handle.removeEventListener('pointerup', onUp);
|
||||
handle.removeEventListener('pointercancel', onUp);
|
||||
const h = parseInt(panel.style.height);
|
||||
try {
|
||||
if (h >= maxH) localStorage.removeItem('gdpr_sources_h'); // back to full — forget preference
|
||||
else localStorage.setItem('gdpr_sources_h', h);
|
||||
} catch(e) {}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window._escHtml = _escHtml;
|
||||
window._resolveDisplayName = _resolveDisplayName;
|
||||
window._setProgressPhase = _setProgressPhase;
|
||||
window._clearProgressBar = _clearProgressBar;
|
||||
window._renderProgressSegments = _renderProgressSegments;
|
||||
window._logAtBottom = _logAtBottom;
|
||||
window.log = log;
|
||||
window.setLogLive = setLogLive;
|
||||
window.setLogFilter = setLogFilter;
|
||||
window.copyLog = copyLog;
|
||||
window._restoreLog = _restoreLog;
|
||||
window._initLogResize = _initLogResize;
|
||||
window._initPreviewResize = _initPreviewResize;
|
||||
window._initSourcesResize = _initSourcesResize;
|
||||
window._fitSourcesPanel = _fitSourcesPanel;
|
||||
window._LOG_SESSION_KEY = _LOG_SESSION_KEY;
|
||||
window._LOG_MAX_LINES = _LOG_MAX_LINES;
|
||||
window._logFilter = _logFilter;
|
||||
window._PHASE_SOURCE_MAP = _PHASE_SOURCE_MAP;
|
||||
709
static/js/profiles.js
Normal file
709
static/js/profiles.js
Normal file
@ -0,0 +1,709 @@
|
||||
import { S } from './state.js';
|
||||
// ── Profiles (15c) ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async function loadProfiles() {
|
||||
try {
|
||||
const r = await fetch('/api/profiles');
|
||||
if (!r.ok) return;
|
||||
const d = await r.json();
|
||||
S._profiles = d.profiles || [];
|
||||
_renderProfileSelect();
|
||||
} catch(e) { /* profiles not critical */ }
|
||||
}
|
||||
|
||||
function _renderProfileSelect() {
|
||||
const sel = document.getElementById('profileSelect');
|
||||
if (!sel) return;
|
||||
const prev = sel.value;
|
||||
// Clear all except the placeholder option (first)
|
||||
while (sel.options.length > 1) sel.remove(1);
|
||||
for (const p of S._profiles) {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = p.id;
|
||||
const last = p.last_run ? ' — ' + p.last_run.slice(0, 10) : '';
|
||||
opt.textContent = p.name + last;
|
||||
opt.title = p.description || '';
|
||||
sel.appendChild(opt);
|
||||
}
|
||||
// Restore selection if the profile still exists; else fall back to placeholder
|
||||
if (prev && [...sel.options].some(o => o.value === prev)) {
|
||||
sel.value = prev;
|
||||
} else {
|
||||
sel.value = '';
|
||||
S._activeProfileId = null;
|
||||
const clrBtn = document.getElementById('profileClearBtn');
|
||||
if (clrBtn) clrBtn.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
function _setProfileClearBtn(visible) {
|
||||
const btn = document.getElementById('profileClearBtn');
|
||||
if (btn) btn.style.display = visible ? 'inline-block' : 'none';
|
||||
}
|
||||
|
||||
function onProfileChange() {
|
||||
const sel = document.getElementById('profileSelect');
|
||||
const id = sel.value;
|
||||
if (!id) return; // placeholder can't be selected (disabled), guard anyway
|
||||
const profile = S._profiles.find(p => p.id === id);
|
||||
if (!profile) return;
|
||||
S._activeProfileId = id;
|
||||
_setProfileClearBtn(true);
|
||||
_applyProfile(profile);
|
||||
}
|
||||
|
||||
// Clear the active profile label without touching sidebar settings.
|
||||
// The sidebar already reflects the loaded (or manually adjusted) state.
|
||||
function clearActiveProfile() {
|
||||
S._activeProfileId = null;
|
||||
const sel = document.getElementById('profileSelect');
|
||||
if (sel) sel.value = '';
|
||||
_setProfileClearBtn(false);
|
||||
}
|
||||
|
||||
|
||||
function _applyProfile(profile) {
|
||||
// ── Sources ──────────────────────────────────────────────────────────────
|
||||
// Restore source selections from profile — works for both M365 and file sources.
|
||||
// File sources may not be rendered yet (they load async), so store their IDs
|
||||
// in S._pendingProfileSources for renderSourcesPanel() to apply after re-render.
|
||||
const profileSources = profile.sources || [];
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-id]').forEach(function(cb) {
|
||||
cb.checked = profileSources.includes(cb.dataset.sourceId);
|
||||
});
|
||||
_updateAccountsVisibility();
|
||||
// Deferred file sources — store IDs now, apply when _loadFileSources() resolves.
|
||||
// Don't filter against S._fileSources here — it may be empty at this point.
|
||||
const _knownSourceIds = new Set(['email', 'onedrive', 'sharepoint', 'teams', 'gmail', 'gdrive']);
|
||||
S._pendingProfileSources = (profile.file_sources && profile.file_sources.length)
|
||||
? profile.file_sources.slice()
|
||||
: profileSources.filter(function(id) { return !_knownSourceIds.has(id); });
|
||||
// Deferred Google sources — store IDs now, apply when smGoogleRefreshStatus() resolves.
|
||||
const googleIds = profile.google_sources
|
||||
|| profileSources.filter(function(id) { return id === 'gmail' || id === 'gdrive'; });
|
||||
S._pendingGoogleSources = googleIds.slice();
|
||||
|
||||
// ── Options ───────────────────────────────────────────────────────────────
|
||||
const opts = profile.options || {};
|
||||
|
||||
if (opts.email_body !== undefined) {
|
||||
const el = document.getElementById('optEmailBody');
|
||||
if (el) el.checked = opts.email_body;
|
||||
}
|
||||
|
||||
if (opts.attachments !== undefined) {
|
||||
const el = document.getElementById('optAttachments');
|
||||
if (el) {
|
||||
el.checked = opts.attachments;
|
||||
// Update the size row opacity directly
|
||||
const sizeRow = document.getElementById('attachSizeRow');
|
||||
if (sizeRow) sizeRow.style.opacity = opts.attachments ? '1' : '0.4';
|
||||
}
|
||||
}
|
||||
|
||||
if (opts.max_attach_mb !== undefined) {
|
||||
const el = document.getElementById('optMaxAttachMB');
|
||||
if (el) el.value = opts.max_attach_mb;
|
||||
}
|
||||
|
||||
if (opts.max_emails !== undefined) {
|
||||
const el = document.getElementById('optMaxEmails');
|
||||
if (el) el.value = opts.max_emails;
|
||||
}
|
||||
|
||||
if (opts.delta !== undefined) {
|
||||
const el = document.getElementById('optDelta');
|
||||
if (el) el.checked = opts.delta;
|
||||
}
|
||||
|
||||
if (opts.scan_photos !== undefined) {
|
||||
const el = document.getElementById('optScanPhotos');
|
||||
if (el) el.checked = opts.scan_photos;
|
||||
}
|
||||
|
||||
// ── Date filter ───────────────────────────────────────────────────────────
|
||||
const days = opts.older_than_days;
|
||||
if (days !== undefined) {
|
||||
const hidden = document.getElementById('olderThan');
|
||||
const dateIn = document.getElementById('olderThanDate');
|
||||
const presets = document.querySelectorAll('.date-preset');
|
||||
if (hidden) hidden.value = days;
|
||||
if (dateIn) {
|
||||
if (!days) {
|
||||
dateIn.value = '';
|
||||
} else {
|
||||
const d = new Date();
|
||||
d.setDate(d.getDate() - days);
|
||||
dateIn.value = d.toISOString().slice(0, 10);
|
||||
}
|
||||
}
|
||||
// Highlight matching preset button
|
||||
presets.forEach(p => {
|
||||
const y = parseInt(p.dataset.years || '0');
|
||||
const presetDays = y === 0 ? 0 : y * 365;
|
||||
if (y === 0) {
|
||||
p.classList.toggle('selected', !days);
|
||||
} else {
|
||||
p.classList.toggle('selected', days > 0 && presetDays === days);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ── Retention ─────────────────────────────────────────────────────────────
|
||||
const retEnabled = !!(opts.retention_enabled || profile.retention_years);
|
||||
const retEl = document.getElementById('optRetention');
|
||||
if (retEl) {
|
||||
retEl.checked = retEnabled;
|
||||
// Show/hide panel directly
|
||||
const panel = document.getElementById('retentionPanel');
|
||||
if (panel) panel.style.display = retEnabled ? 'block' : 'none';
|
||||
}
|
||||
if (profile.retention_years) {
|
||||
const el = document.getElementById('optRetentionYears');
|
||||
if (el) el.value = profile.retention_years;
|
||||
}
|
||||
if (profile.fiscal_year_end) {
|
||||
const el = document.getElementById('optFiscalYearEnd');
|
||||
if (el) el.value = profile.fiscal_year_end;
|
||||
}
|
||||
updateRetentionCutoffHint && updateRetentionCutoffHint();
|
||||
|
||||
// ── User selection ────────────────────────────────────────────────────────
|
||||
if (profile.user_ids === 'all') {
|
||||
S._allUsers.forEach(u => { u.selected = true; });
|
||||
if (S._allUsers.length) renderAccountList();
|
||||
} else if (Array.isArray(profile.user_ids) && profile.user_ids.length) {
|
||||
window._pendingProfileUserIds = profile.user_ids.map(u => u.id || u);
|
||||
_applyPendingProfileUsers();
|
||||
} else if (Array.isArray(profile.user_ids) && profile.user_ids.length === 0) {
|
||||
// Explicitly empty list — deselect everyone so previous sidebar state doesn't persist
|
||||
S._allUsers.forEach(u => { u.selected = false; });
|
||||
if (S._allUsers.length) renderAccountList();
|
||||
}
|
||||
|
||||
log(t('m365_profile_applied', 'Profile loaded') + ': ' + profile.name);
|
||||
}
|
||||
|
||||
function _applyPendingProfileUsers() {
|
||||
const ids = window._pendingProfileUserIds;
|
||||
if (!ids || !ids.length || !S._allUsers.length) return;
|
||||
// Select only the users listed in the profile
|
||||
S._allUsers.forEach(u => { u.selected = ids.includes(u.id); });
|
||||
renderAccountList();
|
||||
window._pendingProfileUserIds = null;
|
||||
}
|
||||
|
||||
async function saveCurrentAsProfile() {
|
||||
const name = prompt(t('m365_profile_save_prompt', 'Profile name:'),
|
||||
S._activeProfileId
|
||||
? (S._profiles.find(p => p.id === S._activeProfileId) || {}).name || ''
|
||||
: '');
|
||||
if (!name) return;
|
||||
const { sources, fileSources, googleSources, allSources, user_ids, options } = buildScanPayload();
|
||||
const existing = S._profiles.find(p => p.name.toLowerCase() === name.toLowerCase());
|
||||
const profile = {
|
||||
id: existing?.id || '',
|
||||
name,
|
||||
description: existing?.description || '',
|
||||
sources: allSources,
|
||||
google_sources: googleSources,
|
||||
user_ids,
|
||||
options,
|
||||
retention_years: parseInt(document.getElementById('optRetentionYears')?.value) || null,
|
||||
fiscal_year_end: document.getElementById('optFiscalYearEnd')?.value || '',
|
||||
email_to: '',
|
||||
file_sources: fileSources,
|
||||
};
|
||||
try {
|
||||
const r = await fetch('/api/profiles/save', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(profile)
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { alert(d.error); return; }
|
||||
await loadProfiles();
|
||||
// Select the newly saved profile
|
||||
const sel = document.getElementById('profileSelect');
|
||||
if (sel) { sel.value = d.profile.id; S._activeProfileId = d.profile.id; _setProfileClearBtn(true); }
|
||||
log(t('m365_profile_saved', 'Profile saved') + ': ' + name);
|
||||
} catch(e) {
|
||||
alert('Save failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Profile management modal (#15d) ──────────────────────────────────────────
|
||||
|
||||
function openProfileMgmtModal() {
|
||||
try { _renderProfileMgmt(); } catch(e) { console.error('[profiles] _renderProfileMgmt threw:', e); }
|
||||
document.getElementById('pmgmtBackdrop').classList.add('open');
|
||||
// Auto-open editor for the first profile
|
||||
if (S._profiles.length > 0) {
|
||||
try { _pmgmtOpenEditor(S._profiles[0].id); } catch(e) { console.error('[profiles] _pmgmtOpenEditor threw:', e); }
|
||||
}
|
||||
}
|
||||
|
||||
function closeProfileMgmt() {
|
||||
document.getElementById('pmgmtBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
function _sourceLabel(id) {
|
||||
const known = {email:'Outlook', onedrive:'OneDrive', sharepoint:'SharePoint', teams:'Teams', gmail:'Gmail', gdrive:'Google Drive'};
|
||||
if (known[id]) return known[id];
|
||||
const fs = S._fileSources.find(s => s.id === id);
|
||||
return fs ? (fs.label || fs.path || id) : id;
|
||||
}
|
||||
|
||||
function _renderProfileMgmt() {
|
||||
const list = document.getElementById('pmgmtList');
|
||||
if (!list) return;
|
||||
const saved = S._profiles.filter(p => p.name !== 'Default' || S._profiles.length === 1);
|
||||
if (!saved.length) {
|
||||
list.innerHTML = `<div class="pmgmt-empty">${t('m365_profile_no_profiles','No saved profiles yet. Use 💾 to save the current sidebar settings as a profile.')}</div>`;
|
||||
return;
|
||||
}
|
||||
list.innerHTML = '';
|
||||
for (const p of S._profiles) {
|
||||
const sources = (p.sources || []).map(_sourceLabel).join(', ') || '—';
|
||||
const lastRun = p.last_run ? p.last_run.slice(0,16).replace('T',' ') : t('m365_profile_never','never');
|
||||
const isActive = p.id === S._activeProfileId;
|
||||
const row = document.createElement('div');
|
||||
row.className = 'pmgmt-row';
|
||||
row.dataset.id = p.id;
|
||||
row.onclick = function() { _pmgmtOpenEditor(p.id); };
|
||||
row.innerHTML = `
|
||||
<div class="pmgmt-row-head">
|
||||
<span class="pmgmt-name">${_esc(p.name)}${isActive ? ' <span style="color:var(--accent);font-weight:400;font-size:10px">● activ</span>' : ''}</span>
|
||||
<div class="pmgmt-actions">
|
||||
<div style="display:flex;border:1px solid var(--border);border-radius:5px;overflow:hidden">
|
||||
<button class="btn-use" onclick="event.stopPropagation();_pmgmtUse('${p.id}')" style="border-radius:0;border:none;border-right:1px solid var(--border)" data-i18n="m365_profile_use">Brug</button>
|
||||
<button onclick="event.stopPropagation();_pmgmtDuplicate('${p.id}')" style="border-radius:0;border:none" data-i18n="m365_profile_duplicate">Kopier</button>
|
||||
</div>
|
||||
<button class="btn-del" onclick="event.stopPropagation();_pmgmtDelete('${p.id}','${_esc(p.name)}')" data-i18n="m365_profile_delete">Slet</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pmgmt-sources">${_esc(sources)}</div>
|
||||
${p.description ? `<div class="pmgmt-desc">${_esc(p.description)}</div>` : ''}
|
||||
<div class="pmgmt-meta">${t('m365_profile_last_run','Last run')}: ${lastRun}</div>
|
||||
`;
|
||||
list.appendChild(row);
|
||||
}
|
||||
}
|
||||
|
||||
function _esc(s) {
|
||||
return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"');
|
||||
}
|
||||
|
||||
function _pmgmtUse(id) {
|
||||
const profile = S._profiles.find(p => p.id === id);
|
||||
if (!profile) return;
|
||||
S._activeProfileId = id;
|
||||
_setProfileClearBtn(true);
|
||||
_applyProfile(profile);
|
||||
// Sync the topbar dropdown
|
||||
const sel = document.getElementById('profileSelect');
|
||||
if (sel) sel.value = id;
|
||||
closeProfileMgmt();
|
||||
}
|
||||
|
||||
function _pmgmtOpenEditor(id) {
|
||||
const profile = S._profiles.find(p => p.id === id);
|
||||
if (!profile) return;
|
||||
_openEditorForProfile(profile);
|
||||
}
|
||||
|
||||
function _openEditorForProfile(profile) {
|
||||
const id = profile.id || '';
|
||||
window._pmgmtEditId = id;
|
||||
_pmgmtRoleActive = '';
|
||||
// Highlight active row
|
||||
document.querySelectorAll('.pmgmt-row').forEach(r => r.classList.toggle('active', id && r.dataset.id === id));
|
||||
document.getElementById('pmgmtEditorTitle').textContent = profile.name;
|
||||
const body = document.getElementById('pmgmtEditorBody');
|
||||
const allSources = profile.sources || [];
|
||||
const opts = profile.options || {};
|
||||
const srcCheck = (id) => allSources.includes(id) ? 'checked' : '';
|
||||
|
||||
// Build account list from S._allUsers
|
||||
const savedIds = new Set((profile.user_ids || []).map(u => u.id || u));
|
||||
// If no saved IDs match current users, treat as all-selected (new profile or users changed)
|
||||
const anyMatch = savedIds.size > 0 && S._allUsers.some(u => savedIds.has(u.id));
|
||||
const accountRows = S._allUsers.map(u => {
|
||||
// Only check if the user was explicitly saved — default to unchecked like the main window
|
||||
const checked = anyMatch && savedIds.has(u.id) ? 'checked' : '';
|
||||
const platBadge = u.platform === 'both' ? '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:linear-gradient(90deg,#E6F1FB 50%,#EAF3DE 50%);color:#1a4a1a;font-weight:500;border:0.5px solid #b5d4b5">M365+GWS</span>'
|
||||
: (u.platform || 'm365') === 'google' ? '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#EAF3DE;color:#3B6D11;font-weight:500">GWS</span>'
|
||||
: '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#E6F1FB;color:#185FA5;font-weight:500">M365</span>';
|
||||
const roleBadge = u.userRole === 'student' ? t('role_student','Elev') : u.userRole === 'staff' ? t('role_staff','Ansat') : t('role_other','Anden');
|
||||
return `<label class="pmgmt-acct-row" data-uid="${_esc(u.id)}"><input type="checkbox" ${checked} data-uid="${_esc(u.id)}"><span style="flex:1;color:var(--color-text-primary);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${_esc(u.displayName)}</span>${platBadge}<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#D3D1C7;color:#444441">${roleBadge}</span></label>`;
|
||||
}).join('');
|
||||
|
||||
body.innerHTML = `
|
||||
<div>
|
||||
<div class="pmgmt-editor-section-title">Navn</div>
|
||||
<input id="pmgmtEditName" type="text" value="${_esc(profile.name)}" style="width:100%;margin-bottom:6px">
|
||||
<textarea id="pmgmtEditDesc" style="width:100%;font-size:12px;height:44px;resize:none" placeholder="Beskrivelse (valgfri)">${_esc(profile.description || '')}</textarea>
|
||||
</div>
|
||||
<div style="display:flex;gap:0;flex:1;min-height:0">
|
||||
<div style="flex:1;display:flex;flex-direction:column;gap:14px;overflow-y:auto;padding-right:16px">
|
||||
<div>
|
||||
<div class="pmgmt-editor-section-title">Kilder</div>
|
||||
<div id="peSourcesPanel"></div>
|
||||
</div>
|
||||
<div>
|
||||
<div class="pmgmt-editor-section-title">
|
||||
<span>Konti</span>
|
||||
<div style="display:flex;gap:4px;align-items:center">
|
||||
<div style="display:flex;background:var(--bg);border:1px solid var(--border);border-radius:6px;overflow:hidden">
|
||||
<button type="button" id="peRoleAll" onclick="_pmgmtRoleFilter('')" style="font-size:10px;height:22px;padding:0 7px;border:none;border-right:1px solid var(--border);background:var(--accent);color:#fff;cursor:pointer;box-sizing:border-box">${t('m365_filter_all','Alle')}</button>
|
||||
<button type="button" id="peRoleStaff" onclick="_pmgmtRoleFilter('staff')" style="font-size:10px;height:22px;padding:0 7px;border:none;border-right:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('role_staff','Ansat')}</button>
|
||||
<button type="button" id="peRoleStudent" onclick="_pmgmtRoleFilter('student')" style="font-size:10px;height:22px;padding:0 7px;border:none;background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('role_student','Elev')}</button>
|
||||
</div>
|
||||
<div style="display:flex;background:var(--bg);border:1px solid var(--border);border-radius:6px;overflow:hidden">
|
||||
<button type="button" onclick="_pmgmtSelectAllAccounts(true)" style="font-size:10px;height:22px;padding:0 7px;border:none;border-right:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('btn_all','Alle')}</button>
|
||||
<button type="button" onclick="_pmgmtSelectAllAccounts(false)" style="font-size:10px;height:22px;padding:0 7px;border:none;background:none;color:var(--muted);cursor:pointer;box-sizing:border-box">${t('btn_none','Ingen')}</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:flex;gap:6px;margin-bottom:4px">
|
||||
<input type="text" id="pmgmtAcctSearch" placeholder="Søg konti…" style="flex:1;font-size:12px" oninput="_pmgmtFilterAccounts(this.value)">
|
||||
<button type="button" onclick="_pmgmtAddManual()" style="font-size:11px;padding:3px 10px;border-radius:5px;border:1px solid var(--border);background:none;color:var(--muted);cursor:pointer;white-space:nowrap">+ Tilføj konto</button>
|
||||
</div>
|
||||
<div class="pmgmt-account-list" id="pmgmtAcctList">${accountRows}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pmgmt-settings-col" style="overflow-y:auto">
|
||||
<div class="pmgmt-editor-section-title">Indstillinger</div>
|
||||
<div style="display:flex;flex-direction:column;gap:6px;font-size:12px">
|
||||
<label style="font-size:11px;color:var(--muted)">${t('m365_opt_date_from','Scan e-mails/filer fra')}</label>
|
||||
<div class="datepicker-wrap">
|
||||
<input type="date" id="peOptDate" autocomplete="off" value="${(function(){ if(!opts.older_than_days) return ''; var d=new Date(); d.setDate(d.getDate()-opts.older_than_days); return d.toISOString().slice(0,10); }())}" onchange="_peSetDate(this.value)">
|
||||
<div class="date-presets">
|
||||
<button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===365 ? 'selected' : ''}" data-years="1" onclick="_peSetYear(1)">${t('m365_preset_1yr','1 år')}</button>
|
||||
<button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===730 ? 'selected' : ''}" data-years="2" onclick="_peSetYear(2)">${t('m365_preset_2yr','2 år')}</button>
|
||||
<button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===1825 ? 'selected' : ''}" data-years="5" onclick="_peSetYear(5)">${t('m365_preset_5yr','5 år')}</button>
|
||||
<button type="button" class="date-preset peYearBtn ${(opts.older_than_days||0)===3650 ? 'selected' : ''}" data-years="10" onclick="_peSetYear(10)">${t('m365_preset_10yr','10 år')}</button>
|
||||
<button type="button" class="date-preset peYearBtn ${!(opts.older_than_days) ? 'selected' : ''}" data-years="0" onclick="_peSetYear(0)">${t('m365_preset_any','Alle')}</button>
|
||||
</div>
|
||||
</div>
|
||||
<input type="hidden" id="peOptDays" value="${opts.older_than_days || 0}">
|
||||
<hr style="border:none;border-top:1px solid var(--pmgmt-divider);margin:2px 0">
|
||||
<div class="pmgmt-opt-row"><span>${t('m365_opt_email_body','Scan e-mailindhold')}</span><label class="toggle"><input type="checkbox" id="peOptBody" ${opts.email_body !== false ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
|
||||
<div class="pmgmt-opt-row"><span>${t('m365_opt_attachments','Scan vedhæftede filer')}</span><label class="toggle"><input type="checkbox" id="peOptAtt" ${opts.attachments !== false ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
|
||||
<div class="pmgmt-opt-row"><span style="color:var(--muted)">${t('m365_opt_max_attach','Maks. vedhæftet filstørrelse (MB)')}</span><input type="number" id="peOptMaxAttach" value="${opts.max_attach_mb || 20}" min="1" max="100" style="width:46px;padding:3px 6px;font-size:11px;text-align:right"></div>
|
||||
<div class="pmgmt-opt-row"><span>${t('m365_opt_max_emails','Maks. e-mails pr. bruger')}</span><input type="number" id="peOptMaxEmails" value="${opts.max_emails || 2000}" min="10" max="50000" style="width:56px;padding:3px 6px;font-size:11px;text-align:right"></div>
|
||||
<div class="pmgmt-opt-row"><span>${t('m365_opt_delta','Delta-scanning')}</span><label class="toggle"><input type="checkbox" id="peOptDelta" ${opts.delta ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
|
||||
<div class="pmgmt-opt-row"><span>${t('m365_opt_scan_photos','Søg efter ansigter i billeder')}</span><label class="toggle"><input type="checkbox" id="peOptPhotos" ${opts.scan_photos ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
|
||||
<hr style="border:none;border-top:1px solid var(--pmgmt-divider);margin:2px 0">
|
||||
<div class="pmgmt-opt-row"><span>${t('m365_opt_retention','Opbevaringspolitik')}</span><label class="toggle"><input type="checkbox" id="peOptRetention" ${profile.retention_years ? 'checked' : ''}><span class="toggle-slider"></span></label></div>
|
||||
<div style="padding:7px 8px;background:var(--bg);border-radius:6px">
|
||||
<div class="pmgmt-opt-row" style="margin-bottom:5px"><span style="color:var(--muted)">${t('m365_ret_years','Opbevaringsår')}</span><input type="number" id="peOptRetYears" value="${profile.retention_years || 5}" min="1" max="30" style="width:46px;padding:3px 6px;font-size:11px;text-align:right"></div>
|
||||
<div style="display:flex;flex-direction:column;gap:3px">
|
||||
<label style="font-size:11px;color:var(--muted)">${t('m365_ret_fy_end','Regnskabsår slut')}</label>
|
||||
<select id="peOptFiscalYearEnd" style="font-size:11px;padding:3px 6px;width:100%">
|
||||
<option value="" ${!profile.fiscal_year_end ? 'selected' : ''}>${t('m365_ret_fy_rolling','Rullende (i dag)')}</option>
|
||||
<option value="12-31" ${profile.fiscal_year_end==='12-31' ? 'selected' : ''}>${t('m365_ret_fy_dec','31 dec (Bogføringsloven)')}</option>
|
||||
<option value="06-30" ${profile.fiscal_year_end==='06-30' ? 'selected' : ''}>${t('m365_ret_fy_jun','30 jun')}</option>
|
||||
<option value="03-31" ${profile.fiscal_year_end==='03-31' ? 'selected' : ''}>${t('m365_ret_fy_mar','31 mar')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;;
|
||||
document.getElementById('pmgmtEditorPlaceholder')?.remove();
|
||||
document.getElementById('pmgmtEditor').classList.add('open');
|
||||
_renderEditorSources((profile.sources || []).concat(profile.google_sources || []).concat(profile.file_sources || []));
|
||||
}
|
||||
|
||||
function _peSetDate(val) {
|
||||
if (!val) return;
|
||||
const ms = new Date() - new Date(val);
|
||||
const days = Math.round(ms / 86400000);
|
||||
const hidden = document.getElementById('peOptDays');
|
||||
if (hidden) hidden.value = days;
|
||||
// Clear selected year buttons since user picked a custom date
|
||||
document.querySelectorAll('.peYearBtn').forEach(b => b.classList.remove('selected'));
|
||||
}
|
||||
|
||||
function _peSetYear(years) {
|
||||
const days = years === 0 ? 0 : years * 365;
|
||||
const hidden = document.getElementById('peOptDays');
|
||||
if (hidden) hidden.value = days;
|
||||
document.querySelectorAll('.peYearBtn').forEach(function(btn) {
|
||||
const y = parseInt(btn.dataset.years);
|
||||
const active = (years === 0 && y === 0) || (years > 0 && y === years);
|
||||
btn.classList.toggle('selected', active);
|
||||
});
|
||||
// Sync the date input
|
||||
var dateInput = document.getElementById('peOptDate');
|
||||
if (dateInput) {
|
||||
if (days === 0) { dateInput.value = ''; }
|
||||
else { var d = new Date(); d.setDate(d.getDate()-days); dateInput.value = d.toISOString().slice(0,10); }
|
||||
}
|
||||
}
|
||||
|
||||
function _renderEditorSources(checkedIds) {
|
||||
const panel = document.getElementById('peSourcesPanel');
|
||||
if (!panel) return;
|
||||
let html = '';
|
||||
_M365_SOURCES.forEach(function(s) {
|
||||
const toggle = s.toggleId ? document.getElementById(s.toggleId) : null;
|
||||
if (toggle && !toggle.checked) return;
|
||||
const isChecked = checkedIds.includes(s.id);
|
||||
html += '<label class="source-check">'
|
||||
+ '<input type="checkbox" data-source-id="' + s.id + '" data-source-type="m365"' + (isChecked ? ' checked' : '') + '>'
|
||||
+ '<span class="source-icon">' + s.icon + '</span>'
|
||||
+ '<span class="source-label">' + t(s.labelKey, s.labelDefault) + '</span>'
|
||||
+ '</label>';
|
||||
});
|
||||
if (window._googleConnected) {
|
||||
var gmailOn = !document.getElementById('smGoogleSrcGmail') || document.getElementById('smGoogleSrcGmail').checked;
|
||||
var driveOn = !document.getElementById('smGoogleSrcDrive') || document.getElementById('smGoogleSrcDrive').checked;
|
||||
if (gmailOn || driveOn) html += '<hr style="border:none;border-top:1px solid var(--border);margin:4px 0">';
|
||||
if (gmailOn) {
|
||||
html += '<label class="source-check"><input type="checkbox" data-source-id="gmail" data-source-type="google"' + (checkedIds.includes('gmail') ? ' checked' : '') + '><span class="source-icon">📧</span><span class="source-label">Gmail</span></label>';
|
||||
}
|
||||
if (driveOn) {
|
||||
html += '<label class="source-check"><input type="checkbox" data-source-id="gdrive" data-source-type="google"' + (checkedIds.includes('gdrive') ? ' checked' : '') + '><span class="source-icon">📁</span><span class="source-label">Google Drive</span></label>';
|
||||
}
|
||||
}
|
||||
if (S._fileSources.length > 0) {
|
||||
html += '<hr style="border:none;border-top:1px solid var(--border);margin:4px 0">';
|
||||
S._fileSources.forEach(function(s) {
|
||||
const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
|
||||
html += '<label class="source-check"><input type="checkbox" data-source-id="' + _esc(s.id) + '" data-source-type="file"' + (checkedIds.includes(s.id) ? ' checked' : '') + '><span class="source-icon">' + (isSmb ? '🌐' : '📁') + '</span><span class="source-label" title="' + _esc(s.path||'') + '">' + _esc(s.label||s.path||s.id) + '</span></label>';
|
||||
});
|
||||
}
|
||||
panel.innerHTML = html;
|
||||
}
|
||||
|
||||
function _pmgmtNewProfile() {
|
||||
// Create a blank profile shell and open the editor
|
||||
const blank = {
|
||||
id: '',
|
||||
name: '',
|
||||
description: '',
|
||||
sources: [],
|
||||
google_sources: [],
|
||||
user_ids: [],
|
||||
options: {},
|
||||
file_sources: [],
|
||||
};
|
||||
// Temporarily add to S._profiles so the editor can find it
|
||||
window._pmgmtNewDraft = blank;
|
||||
_openEditorForProfile(blank);
|
||||
}
|
||||
|
||||
function _pmgmtCloseEditor() {
|
||||
document.getElementById('pmgmtEditor').classList.remove('open');
|
||||
document.querySelectorAll('.pmgmt-row').forEach(r => r.classList.remove('active'));
|
||||
window._pmgmtEditId = null;
|
||||
closeProfileMgmt();
|
||||
}
|
||||
|
||||
function _pmgmtSelectAllAccounts(checked) {
|
||||
document.querySelectorAll('#pmgmtAcctList label input[type=checkbox]').forEach(function(cb) {
|
||||
if (cb.closest('label').style.display !== 'none') cb.checked = checked;
|
||||
});
|
||||
}
|
||||
|
||||
let _pmgmtRoleActive = '';
|
||||
function _pmgmtRoleFilter(role) {
|
||||
_pmgmtRoleActive = role;
|
||||
// Update button styles
|
||||
['peRoleAll','peRoleStaff','peRoleStudent'].forEach(function(id) {
|
||||
const btn = document.getElementById(id);
|
||||
if (!btn) return;
|
||||
const isActive = (id === 'peRoleAll' && role === '') || (id === 'peRoleStaff' && role === 'staff') || (id === 'peRoleStudent' && role === 'student');
|
||||
btn.style.background = isActive ? 'var(--accent)' : 'none';
|
||||
btn.style.color = isActive ? '#fff' : 'var(--muted)';
|
||||
btn.style.border = isActive ? '1px solid var(--accent)' : '1px solid var(--border)';
|
||||
});
|
||||
// Apply filter combined with any active text search
|
||||
_pmgmtFilterAccounts(document.getElementById('pmgmtAcctSearch')?.value || '');
|
||||
}
|
||||
|
||||
function _pmgmtAddManual() {
|
||||
const email = prompt('E-mail adresse:');
|
||||
if (!email || !email.trim()) return;
|
||||
const list = document.getElementById('pmgmtAcctList');
|
||||
if (!list) return;
|
||||
const id = 'manual:' + email.trim().toLowerCase();
|
||||
if (list.querySelector(`input[data-uid="${id}"]`)) return; // already exists
|
||||
const lbl = document.createElement('label');
|
||||
lbl.className = 'pmgmt-acct-row';
|
||||
lbl.innerHTML = `<input type="checkbox" checked data-uid="${_esc(id)}"><span style="flex:1;color:var(--text);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${_esc(email.trim())}</span><span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#D3D1C7;color:#444441">Manuel</span>`;
|
||||
list.appendChild(lbl);
|
||||
}
|
||||
|
||||
function _pmgmtFilterAccounts(q) {
|
||||
q = (q || '').toLowerCase();
|
||||
document.querySelectorAll('#pmgmtAcctList label').forEach(function(lbl) {
|
||||
var name = (lbl.querySelector('span') || {}).textContent || '';
|
||||
var uid = lbl.querySelector('input')?.dataset?.uid || '';
|
||||
var user = S._allUsers.find(u => u.id === uid);
|
||||
var roleOk = !_pmgmtRoleActive || (user && user.userRole === _pmgmtRoleActive);
|
||||
var nameOk = !q || name.toLowerCase().includes(q);
|
||||
lbl.style.display = (roleOk && nameOk) ? '' : 'none';
|
||||
});
|
||||
}
|
||||
|
||||
async function _pmgmtSaveFullEdit() {
|
||||
const id = window._pmgmtEditId;
|
||||
const profile = (id ? S._profiles.find(p => p.id === id) : null) || window._pmgmtNewDraft || {};
|
||||
const name = document.getElementById('pmgmtEditName')?.value?.trim();
|
||||
if (!name) { alert(t('m365_profile_name_required','Profile name is required.')); return; }
|
||||
const peSources = Array.from(document.querySelectorAll('#peSourcesPanel input[type=checkbox]:checked'));
|
||||
const m365Sources = peSources.filter(cb => cb.dataset.sourceType === 'm365').map(cb => cb.dataset.sourceId);
|
||||
const googleSources = peSources.filter(cb => cb.dataset.sourceType === 'google').map(cb => cb.dataset.sourceId);
|
||||
const fileSources = peSources.filter(cb => cb.dataset.sourceType === 'file').map(cb => cb.dataset.sourceId);
|
||||
// Check whether the checkboxes were actually rendered in the editor DOM —
|
||||
// NOT whether Google is connected or file sources are loaded. Those are async
|
||||
// and may not have resolved when the editor first opened, leaving the panel
|
||||
// without checkboxes even though the connection exists. Using the DOM as the
|
||||
// source of truth avoids a race-condition that silently cleared google/file sources.
|
||||
const googleRendered = !!document.querySelector('#peSourcesPanel input[data-source-type="google"]');
|
||||
const fileRendered = !!document.querySelector('#peSourcesPanel input[data-source-type="file"]');
|
||||
const effectiveGoogleSources = googleRendered ? googleSources : (profile.google_sources || []);
|
||||
const effectiveFileSources = fileRendered ? fileSources : (profile.file_sources || []);
|
||||
const allSources = m365Sources.concat(effectiveGoogleSources).concat(effectiveFileSources);
|
||||
const user_ids = Array.from(document.querySelectorAll('#pmgmtAcctList input[type=checkbox]:checked'))
|
||||
.map(cb => cb.dataset.uid)
|
||||
.filter(Boolean);
|
||||
const updated = {
|
||||
...profile,
|
||||
name,
|
||||
description: document.getElementById('pmgmtEditDesc')?.value?.trim() || '',
|
||||
sources: allSources,
|
||||
google_sources: effectiveGoogleSources,
|
||||
file_sources: effectiveFileSources,
|
||||
user_ids,
|
||||
options: {
|
||||
...(profile.options || {}),
|
||||
older_than_days: parseInt(document.getElementById('peOptDays')?.value) || 0,
|
||||
email_body: document.getElementById('peOptBody')?.checked ?? true,
|
||||
attachments: document.getElementById('peOptAtt')?.checked ?? true,
|
||||
max_attach_mb: parseInt(document.getElementById('peOptMaxAttach')?.value) || 20,
|
||||
max_emails: parseInt(document.getElementById('peOptMaxEmails')?.value) || 2000,
|
||||
delta: document.getElementById('peOptDelta')?.checked ?? false,
|
||||
scan_photos: document.getElementById('peOptPhotos')?.checked ?? false,
|
||||
},
|
||||
retention_years: document.getElementById('peOptRetention')?.checked ? (parseInt(document.getElementById('peOptRetYears')?.value) || 5) : null,
|
||||
fiscal_year_end: document.getElementById('peOptRetention')?.checked ? (document.getElementById('peOptFiscalYearEnd')?.value || '') : '',
|
||||
};
|
||||
try {
|
||||
const r = await fetch('/api/profiles/save', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(updated)
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { alert(d.error); return; }
|
||||
await loadProfiles();
|
||||
window._pmgmtNewDraft = null;
|
||||
log(t('m365_profile_saved','Profile saved') + ': ' + name);
|
||||
// Show inline saved feedback without closing the modal
|
||||
const footer = document.querySelector('#pmgmtEditor > div:last-child');
|
||||
if (footer) {
|
||||
const fb = document.createElement('span');
|
||||
fb.textContent = '✓ ' + t('m365_profile_saved', 'Saved');
|
||||
fb.style.cssText = 'font-size:11px;color:var(--success);margin-right:auto';
|
||||
footer.prepend(fb);
|
||||
setTimeout(function() { fb.remove(); }, 2000);
|
||||
}
|
||||
// Re-open the editor for the saved profile so it reflects the saved state
|
||||
const saved = S._profiles.find(function(p) { return p.name === name; });
|
||||
if (saved) { window._pmgmtEditId = saved.id; }
|
||||
} catch(e) { alert('Save failed: ' + e.message); }
|
||||
}
|
||||
|
||||
|
||||
async function _pmgmtSaveEdit(id) {
|
||||
const name = document.getElementById(`pmgmt-edit-name-${id}`)?.value?.trim();
|
||||
const desc = document.getElementById(`pmgmt-edit-desc-${id}`)?.value?.trim();
|
||||
if (!name) { alert(t('m365_profile_name_required','Profile name is required.')); return; }
|
||||
const profile = S._profiles.find(p => p.id === id);
|
||||
if (!profile) return;
|
||||
const updated = { ...profile, name, description: desc || '' };
|
||||
try {
|
||||
const r = await fetch('/api/profiles/save', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(updated)
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { alert(d.error); return; }
|
||||
await loadProfiles();
|
||||
_renderProfileMgmt();
|
||||
log(t('m365_profile_saved','Profile saved') + ': ' + name);
|
||||
} catch(e) { alert('Save failed: ' + e.message); }
|
||||
}
|
||||
|
||||
async function _pmgmtDuplicate(id) {
|
||||
const profile = S._profiles.find(p => p.id === id);
|
||||
if (!profile) return;
|
||||
const base = profile.name.replace(/ \(copy( \d+)?\)$/, '');
|
||||
// Find a unique name
|
||||
let n = 1, name = base + ' (copy)';
|
||||
while (S._profiles.some(p => p.name === name)) { n++; name = `${base} (copy ${n})`; }
|
||||
const copy = { ...profile, id: '', name, last_run: null, last_scan_id: null };
|
||||
try {
|
||||
const r = await fetch('/api/profiles/save', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(copy)
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { alert(d.error); return; }
|
||||
await loadProfiles();
|
||||
_renderProfileMgmt();
|
||||
log(t('m365_profile_duplicated','Profile duplicated') + ': ' + name);
|
||||
} catch(e) { alert('Duplicate failed: ' + e.message); }
|
||||
}
|
||||
|
||||
async function _pmgmtDelete(id, name) {
|
||||
if (!confirm(t('m365_profile_delete_confirm','Delete profile') + ' "' + name + '"?')) return;
|
||||
try {
|
||||
const r = await fetch('/api/profiles/delete', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({ id })
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { alert(d.error); return; }
|
||||
if (S._activeProfileId === id) { S._activeProfileId = null; _setProfileClearBtn(false); }
|
||||
await loadProfiles();
|
||||
_renderProfileMgmt();
|
||||
log(t('m365_profile_deleted','Profile deleted') + ': ' + name);
|
||||
} catch(e) { alert('Delete failed: ' + e.message); }
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.loadProfiles = loadProfiles;
|
||||
window._renderProfileSelect = _renderProfileSelect;
|
||||
window._setProfileClearBtn = _setProfileClearBtn;
|
||||
window.onProfileChange = onProfileChange;
|
||||
window.clearActiveProfile = clearActiveProfile;
|
||||
window._applyProfile = _applyProfile;
|
||||
window._applyPendingProfileUsers = _applyPendingProfileUsers;
|
||||
window.saveCurrentAsProfile = saveCurrentAsProfile;
|
||||
window.openProfileMgmtModal = openProfileMgmtModal;
|
||||
window.closeProfileMgmt = closeProfileMgmt;
|
||||
window._sourceLabel = _sourceLabel;
|
||||
window._renderProfileMgmt = _renderProfileMgmt;
|
||||
window._esc = _esc;
|
||||
window._pmgmtUse = _pmgmtUse;
|
||||
window._pmgmtOpenEditor = _pmgmtOpenEditor;
|
||||
window._openEditorForProfile = _openEditorForProfile;
|
||||
window._peSetDate = _peSetDate;
|
||||
window._peSetYear = _peSetYear;
|
||||
window._renderEditorSources = _renderEditorSources;
|
||||
window._pmgmtNewProfile = _pmgmtNewProfile;
|
||||
window._pmgmtCloseEditor = _pmgmtCloseEditor;
|
||||
window._pmgmtSelectAllAccounts = _pmgmtSelectAllAccounts;
|
||||
window._pmgmtRoleFilter = _pmgmtRoleFilter;
|
||||
window._pmgmtAddManual = _pmgmtAddManual;
|
||||
window._pmgmtFilterAccounts = _pmgmtFilterAccounts;
|
||||
window._pmgmtSaveFullEdit = _pmgmtSaveFullEdit;
|
||||
window._pmgmtSaveEdit = _pmgmtSaveEdit;
|
||||
window._pmgmtDuplicate = _pmgmtDuplicate;
|
||||
window._pmgmtDelete = _pmgmtDelete;
|
||||
window._pmgmtRoleActive = _pmgmtRoleActive;
|
||||
886
static/js/results.js
Normal file
886
static/js/results.js
Normal file
@ -0,0 +1,886 @@
|
||||
import { S } from './state.js';
|
||||
// ── Cards ─────────────────────────────────────────────────────────────────────
|
||||
const SOURCE_BADGES = {
|
||||
email: ['📧', 'badge-email', 'Outlook'],
|
||||
gmail: ['📧', 'badge-gmail', 'Gmail'],
|
||||
gdrive: ['📁', 'badge-gdrive', 'GDrive'],
|
||||
onedrive: ['💾', 'badge-onedrive', 'OneDrive'],
|
||||
sharepoint: ['🌐', 'badge-sharepoint', 'SharePoint'],
|
||||
teams: ['💬', 'badge-teams', 'Teams'],
|
||||
local: ['📁', 'badge-local', 'Local'],
|
||||
smb: ['🌐', 'badge-smb', 'Network'],
|
||||
};
|
||||
|
||||
function appendCard(f) {
|
||||
const search = document.getElementById('filterSearch').value.trim().toLowerCase();
|
||||
const srcVal = document.getElementById('filterSource').value;
|
||||
if (search && !f.name.toLowerCase().includes(search)) return;
|
||||
if (srcVal && f.source_type !== srcVal) return;
|
||||
|
||||
const grid = document.getElementById('grid');
|
||||
const [icon, badgeCls, label] = SOURCE_BADGES[f.source_type] || ['📄', '', f.source_type];
|
||||
const src = f.thumb_b64
|
||||
? 'data:' + f.thumb_mime + ';base64,' + f.thumb_b64
|
||||
: '/api/thumb?name=' + encodeURIComponent(f.name) + '&type=' + encodeURIComponent(f.source_type);
|
||||
|
||||
const card = document.createElement('div');
|
||||
card.className = 'card' + (S.isListView ? ' list-view' : '');
|
||||
card.dataset.id = f.id;
|
||||
card.onclick = () => openPreview(f);
|
||||
|
||||
const delBtn = window.VIEWER_MODE ? '' : `<button class="card-delete-btn" title="${t('m365_delete_confirm','Delete')}" onclick="event.stopPropagation();deleteItem(${JSON.stringify(f).replace(/"/g,'"')},this.closest('.card'))">🗑</button>`;
|
||||
|
||||
if (S.isListView) {
|
||||
card.innerHTML = `
|
||||
<div style="font-size:24px; flex-shrink:0">${icon}</div>
|
||||
<div class="card-info list-info">
|
||||
<div class="card-name" title="${f.name}">${f.name}</div>
|
||||
<div class="card-meta">${f.size_kb} KB · ${f.modified || ''}${f.folder ? ' · 📂 ' + f.folder : ''}</div>
|
||||
<div class="card-source"><span class="source-badge ${badgeCls}">${label}</span> ${f.source || ''}${f.account_name ? ' · <span class="account-pill" title="' + f.account_name + '">' + (f.user_role === 'student' ? '<span class="role-badge">' + t('role_student','Elev') + '</span>' : f.user_role === 'staff' ? '<span class="role-badge">' + t('role_staff','Ansat') + '</span>' : '') + f.account_name + '</span>' : ''}${f.transfer_risk === 'external-recipient' ? ' <span class="role-pill" style="background:#7B2D00;color:#FFD0B0">⚠ Ext.</span>' : f.transfer_risk ? ' <span class="role-pill" style="background:#003D7B;color:#B0D4FF">🔗</span>' : ''}</div>
|
||||
</div>
|
||||
<span class="cpr-badge">${f.cpr_count} CPR</span>
|
||||
${f.face_count > 0 ? '<span class="photo-face-badge">' + f.face_count + ' ' + t('m365_badge_faces', f.face_count === 1 ? 'face' : 'faces') + '</span> ' : ''}
|
||||
${f.exif && f.exif.gps ? '<span class="photo-face-badge" style="background:#0a3a5a;color:#7ec8d0">🌍 GPS</span> ' : ''}
|
||||
${f.special_category && f.special_category.length ? '<span class="special-cat-badge">⚠ Art.9 — ' + f.special_category.filter(function(s){return s !== 'gps_location' && s !== 'exif_pii';}).join(', ') + '</span> ' : ''}${f.overdue ? '<span class="overdue-badge">🗓 Overdue</span>' : ''}
|
||||
${delBtn}`;
|
||||
} else {
|
||||
card.innerHTML = `
|
||||
<div class="thumb-wrap"><img src="${src}" alt="${f.name}" loading="lazy"></div>
|
||||
<div class="card-info">
|
||||
<div class="card-name" title="${f.name}">${f.name}</div>
|
||||
<div class="card-meta">${f.size_kb} KB · ${f.modified || ''}</div>
|
||||
${f.folder ? `<div class="card-meta" style="font-size:10px" title="${f.folder}">📂 ${f.folder}</div>` : ''}
|
||||
<div class="card-source"><span class="source-badge ${badgeCls}">${label}</span>${f.account_name ? ' <span class="account-pill" title="' + f.account_name + '">' + (f.user_role === "student" ? '<span class="role-badge">' + t("role_student","Elev") + "</span>" : f.user_role === "staff" ? '<span class="role-badge">' + t("role_staff","Ansat") + "</span>" : "") + f.account_name + '</span>' : ''}${f.transfer_risk === "external-recipient" ? ' <span class="role-pill" style="background:#7B2D00;color:#FFD0B0">⚠ Ext.</span>' : f.transfer_risk ? ' <span class="role-pill" style="background:#003D7B;color:#B0D4FF">🔗</span>' : ''}</div>
|
||||
<span class="cpr-badge">${f.cpr_count} CPR</span>${f.face_count > 0 ? ' <span class="photo-face-badge">' + f.face_count + ' ' + t('m365_badge_faces', f.face_count === 1 ? 'face' : 'faces') + '</span>' : ''}${f.exif && f.exif.gps ? ' <span class="photo-face-badge" style="background:#0a3a5a;color:#7ec8d0">🌍 GPS</span>' : ''}${f.overdue ? ' <span class="overdue-badge">🗓 Overdue</span>' : ''}
|
||||
</div>
|
||||
${delBtn}`;
|
||||
}
|
||||
grid.appendChild(card);
|
||||
}
|
||||
|
||||
function renderGrid(files) {
|
||||
const grid = document.getElementById('grid');
|
||||
grid.innerHTML = '';
|
||||
files.forEach(f => appendCard(f));
|
||||
}
|
||||
|
||||
// ── Preview panel ─────────────────────────────────────────────────────────────
|
||||
let _previewItemId = null;
|
||||
|
||||
async function openPreview(f) {
|
||||
// Highlight selected card
|
||||
document.querySelectorAll('.card.selected').forEach(c => c.classList.remove('selected'));
|
||||
const cardEl = document.querySelector(`.card[data-id="${CSS.escape(f.id)}"]`);
|
||||
if (cardEl) cardEl.classList.add('selected');
|
||||
|
||||
const panel = document.getElementById('previewPanel');
|
||||
const frame = document.getElementById('previewFrame');
|
||||
const loading = document.getElementById('previewLoading');
|
||||
const title = document.getElementById('previewTitle');
|
||||
const meta = document.getElementById('previewMeta');
|
||||
|
||||
panel.classList.remove('hidden');
|
||||
const _savedW = sessionStorage.getItem('gdpr_preview_width');
|
||||
if (_savedW) panel.style.width = _savedW + 'px';
|
||||
title.textContent = f.name;
|
||||
frame.style.display = 'none';
|
||||
loading.style.display = 'flex';
|
||||
loading.textContent = 'Loading preview…';
|
||||
|
||||
meta.innerHTML = [
|
||||
f.account_name ? `<span style="font-weight:500">👤 ${f.account_name}</span>` : '',
|
||||
f.source ? `<span>${f.source}</span>` : '',
|
||||
f.size_kb ? `<span>${f.size_kb} KB</span>` : '',
|
||||
f.modified ? `<span>${f.modified}</span>` : '',
|
||||
f.cpr_count ? `<span style="color:var(--danger)">${f.cpr_count} CPR</span>` : '',
|
||||
f.url ? `<button class="preview-open-btn" onclick="window.open('${f.url}','_blank')">${t("m365_preview_open","Open in M365 ↗")}</button>` : '',
|
||||
].filter(Boolean).join('');
|
||||
|
||||
_previewItemId = f.id;
|
||||
loadDisposition(f.id); // load disposition for this item (#6)
|
||||
|
||||
try {
|
||||
const r = await fetch('/api/preview/' + encodeURIComponent(f.id)
|
||||
+ '?source_type=' + encodeURIComponent(f.source_type || '')
|
||||
+ '&account_id=' + encodeURIComponent(f.account_id || ''));
|
||||
const d = await r.json();
|
||||
|
||||
if (_previewItemId !== f.id) return; // stale — user clicked another card
|
||||
|
||||
if (d.error) {
|
||||
loading.textContent = d.error;
|
||||
return;
|
||||
}
|
||||
|
||||
if (d.type === 'local') {
|
||||
loading.style.display = 'none';
|
||||
frame.style.display = 'block';
|
||||
frame.srcdoc = `<html><body style="font-family:sans-serif;color:#ccc;background:#1e1e1e;padding:24px;display:flex;flex-direction:column;align-items:center;justify-content:center;height:80vh;gap:12px">
|
||||
<div style="font-size:40px">📁</div>
|
||||
<div style="font-size:14px;font-weight:600">${d.name || f.name}</div>
|
||||
<div style="font-size:11px;color:#888">${t('m365_preview_local_file','Local file — no cloud preview available')}</div>
|
||||
<div style="font-size:10px;color:#666;word-break:break-all;max-width:400px;text-align:center">${d.path || ''}</div>
|
||||
</body></html>`;
|
||||
return;
|
||||
}
|
||||
|
||||
if (d.type === 'html' && d.html) {
|
||||
loading.style.display = 'none';
|
||||
frame.style.display = 'block';
|
||||
const theme = document.body.dataset.theme === 'dark' ? '#1e1e1e' : '#ffffff';
|
||||
const textColor = document.body.dataset.theme === 'dark' ? '#e0e0e0' : '#111111';
|
||||
const mutedColor = document.body.dataset.theme === 'dark' ? '#888' : '#666';
|
||||
frame.srcdoc = `<html><body style="margin:0;background:${theme};color:${textColor};font-family:sans-serif;--muted:${mutedColor};--text:${textColor};--mono:monospace">${d.html}</body></html>`;
|
||||
return;
|
||||
}
|
||||
|
||||
if (d.type === 'info' && d.html) {
|
||||
loading.style.display = 'none';
|
||||
frame.style.display = 'block';
|
||||
const theme = document.body.dataset.theme === 'dark' ? '#1e1e1e' : '#ffffff';
|
||||
frame.srcdoc = `<html><body style="margin:0;padding:20px;background:${theme};color:#888;font-family:sans-serif">${d.html}</body></html>`;
|
||||
return;
|
||||
}
|
||||
|
||||
if (d.type === 'iframe' && d.url) {
|
||||
frame.src = d.url;
|
||||
frame.onload = () => {
|
||||
loading.style.display = 'none';
|
||||
frame.style.display = 'block';
|
||||
};
|
||||
} else if (d.type === 'html') {
|
||||
const blob = new Blob([d.html], {type: 'text/html'});
|
||||
frame.src = URL.createObjectURL(blob);
|
||||
frame.onload = () => {
|
||||
loading.style.display = 'none';
|
||||
frame.style.display = 'block';
|
||||
};
|
||||
} else {
|
||||
loading.textContent = t('m365_preview_open','Open in M365') + ' — No preview available.';
|
||||
}
|
||||
} catch(e) {
|
||||
loading.textContent = 'Preview failed: ' + e.message;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Retention policy (#1) ────────────────────────────────────────────────────
|
||||
|
||||
function toggleRetentionPanel() {
|
||||
const enabled = document.getElementById('optRetention').checked;
|
||||
document.getElementById('retentionPanel').style.display = enabled ? 'block' : 'none';
|
||||
if (enabled) updateRetentionCutoffHint();
|
||||
}
|
||||
|
||||
function updateRetentionCutoffHint() {
|
||||
const years = parseInt(document.getElementById('optRetentionYears')?.value) || 5;
|
||||
const fyEnd = document.getElementById('optFiscalYearEnd')?.value || '';
|
||||
const hint = document.getElementById('retentionCutoffHint');
|
||||
if (!hint) return;
|
||||
// Compute cutoff client-side for instant feedback
|
||||
const today = new Date();
|
||||
let cutoff;
|
||||
if (fyEnd) {
|
||||
const [mm, dd] = fyEnd.split('-').map(Number);
|
||||
let fyEndDate = new Date(today.getFullYear(), mm - 1, dd);
|
||||
if (fyEndDate >= today) fyEndDate = new Date(today.getFullYear() - 1, mm - 1, dd);
|
||||
cutoff = new Date(fyEndDate); cutoff.setFullYear(cutoff.getFullYear() - years);
|
||||
} else {
|
||||
cutoff = new Date(today); cutoff.setFullYear(cutoff.getFullYear() - years);
|
||||
}
|
||||
const iso = cutoff.toISOString().split('T')[0];
|
||||
const mode = fyEnd ? t('m365_ret_mode_fiscal', 'fiscal year') : t('m365_ret_mode_rolling', 'rolling');
|
||||
hint.textContent = t('m365_ret_cutoff_hint', 'Items modified before') + ' ' + iso + ' (' + mode + ') ' + t('m365_ret_cutoff_flagged', 'will be flagged');
|
||||
}
|
||||
|
||||
// Mark cards as overdue after scan completes or on load
|
||||
async function markOverdueCards() {
|
||||
const retentionEnabled = document.getElementById('optRetention')?.checked;
|
||||
if (!retentionEnabled) return;
|
||||
const years = parseInt(document.getElementById('optRetentionYears')?.value) || 5;
|
||||
const fyEnd = document.getElementById('optFiscalYearEnd')?.value || '';
|
||||
try {
|
||||
const params = new URLSearchParams({years});
|
||||
if (fyEnd) params.set('fiscal_year_end', fyEnd);
|
||||
const r = await fetch('/api/db/overdue?' + params);
|
||||
const d = await r.json();
|
||||
if (!d.items) return;
|
||||
const overdueIds = new Set(d.items.map(i => i.id));
|
||||
// Mark S.flaggedData entries
|
||||
S.flaggedData.forEach(f => { f.overdue = overdueIds.has(f.id); });
|
||||
// Re-render to show badges
|
||||
renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
|
||||
if (d.count > 0) {
|
||||
log('🗓 ' + d.count + ' ' + t('m365_overdue_found', 'overdue item(s) found') + ' (cutoff: ' + d.cutoff_date + ')', 'warn');
|
||||
}
|
||||
} catch(e) { /* DB not available -- skip */ }
|
||||
}
|
||||
|
||||
// Pre-filter bulk delete to overdue items
|
||||
async function preFilterOverdue() {
|
||||
const years = parseInt(document.getElementById('optRetentionYears')?.value) || 5;
|
||||
const fyEnd = document.getElementById('optFiscalYearEnd')?.value || '';
|
||||
try {
|
||||
const params = new URLSearchParams({years});
|
||||
if (fyEnd) params.set('fiscal_year_end', fyEnd);
|
||||
const r = await fetch('/api/db/overdue?' + params);
|
||||
const d = await r.json();
|
||||
if (d.cutoff_date) {
|
||||
document.getElementById('bdOlderThan').value = d.cutoff_date;
|
||||
updateBdPreview();
|
||||
}
|
||||
} catch(e) {
|
||||
// Fallback: compute client-side
|
||||
const today = new Date();
|
||||
const cutoff = new Date(today); cutoff.setFullYear(cutoff.getFullYear() - years);
|
||||
document.getElementById('bdOlderThan').value = cutoff.toISOString().split('T')[0];
|
||||
updateBdPreview();
|
||||
}
|
||||
}
|
||||
|
||||
function clearBdFilters() {
|
||||
document.getElementById('bdSource').value = '';
|
||||
document.getElementById('bdMinCpr').value = '1';
|
||||
document.getElementById('bdOlderThan').value = '';
|
||||
updateBdPreview();
|
||||
}
|
||||
|
||||
// ── Data subject lookup (#4) ──────────────────────────────────────────────
|
||||
|
||||
let _dsubItems = []; // items from last lookup, for bulk delete
|
||||
|
||||
function openSubjectModal() {
|
||||
document.getElementById("dsubBackdrop").classList.add("open");
|
||||
document.getElementById("dsubInput").value = "";
|
||||
document.getElementById("dsubStatus").textContent = "";
|
||||
document.getElementById("dsubResults").innerHTML = "";
|
||||
document.getElementById("dsubDeleteBtn").style.display = "none";
|
||||
_dsubItems = [];
|
||||
setTimeout(() => document.getElementById("dsubInput").focus(), 80);
|
||||
}
|
||||
|
||||
function closeDsubModal() {
|
||||
document.getElementById("dsubBackdrop").classList.remove("open");
|
||||
}
|
||||
|
||||
async function runSubjectLookup() {
|
||||
const cpr = document.getElementById("dsubInput").value.trim();
|
||||
if (!cpr) return;
|
||||
const statusEl = document.getElementById("dsubStatus");
|
||||
const resultsEl = document.getElementById("dsubResults");
|
||||
const deleteBtn = document.getElementById("dsubDeleteBtn");
|
||||
statusEl.textContent = t("m365_subject_searching", "Searching…");
|
||||
resultsEl.innerHTML = "";
|
||||
deleteBtn.style.display = "none";
|
||||
_dsubItems = [];
|
||||
try {
|
||||
const r = await fetch("/api/db/subject", {
|
||||
method: "POST", headers: {"Content-Type":"application/json"},
|
||||
body: JSON.stringify({cpr})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { statusEl.textContent = d.error; return; }
|
||||
if (!d.count) {
|
||||
statusEl.textContent = t("m365_subject_not_found", "No flagged items found for this CPR number.");
|
||||
return;
|
||||
}
|
||||
statusEl.textContent = d.count + " " + t("m365_subject_found", "item(s) found");
|
||||
_dsubItems = d.items;
|
||||
resultsEl.innerHTML = d.items.map(item => `
|
||||
<div class="dsub-result-row">
|
||||
<div class="dsub-result-name" title="${item.name}">${item.name}</div>
|
||||
<div class="dsub-result-meta">${item.source_type || ""}</div>
|
||||
<div class="dsub-result-meta">${item.modified || ""}</div>
|
||||
<div class="dsub-result-meta" style="color:var(--danger)">${item.cpr_count} CPR</div>
|
||||
</div>
|
||||
`).join("");
|
||||
if (d.count > 0) deleteBtn.style.display = "block";
|
||||
} catch(e) {
|
||||
statusEl.textContent = "Error: " + e.message;
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteSubjectItems() {
|
||||
if (!_dsubItems.length) return;
|
||||
const count = _dsubItems.length;
|
||||
if (!confirm(`${count} ${t("m365_subject_delete_confirm", "item(s) will be permanently deleted. Continue?")}`))
|
||||
return;
|
||||
const ids = _dsubItems.map(i => i.id);
|
||||
const statusEl = document.getElementById("dsubStatus");
|
||||
statusEl.textContent = t("m365_bulk_deleting", "Deleting…");
|
||||
try {
|
||||
const r = await fetch("/api/delete_bulk", {
|
||||
method: "POST", headers: {"Content-Type":"application/json"},
|
||||
body: JSON.stringify({ids, reason: "data-subject-request"})
|
||||
});
|
||||
const d = await r.json();
|
||||
statusEl.textContent = `${d.deleted || 0} ${t("m365_bulk_deleted","deleted")}`;
|
||||
document.getElementById("dsubDeleteBtn").style.display = "none";
|
||||
document.getElementById("dsubResults").innerHTML = "";
|
||||
_dsubItems = [];
|
||||
// Refresh grid
|
||||
S.flaggedData = S.flaggedData.filter(f => !ids.includes(f.id));
|
||||
S.filteredData = S.filteredData.filter(f => !ids.includes(f.id));
|
||||
renderGrid();
|
||||
updateStats();
|
||||
} catch(e) {
|
||||
statusEl.textContent = "Delete failed: " + e.message;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Disposition tagging (#6) ───────────────────────────────────────────────
|
||||
|
||||
let _dispositionItemId = null;
|
||||
|
||||
async function loadDisposition(itemId) {
|
||||
_dispositionItemId = itemId;
|
||||
const row = document.getElementById("dispositionRow");
|
||||
const sel = document.getElementById("dispositionSelect");
|
||||
const saved = document.getElementById("dispositionSaved");
|
||||
row.style.display = "flex";
|
||||
saved.textContent = "";
|
||||
try {
|
||||
const r = await fetch("/api/db/disposition/" + encodeURIComponent(itemId));
|
||||
const d = await r.json();
|
||||
if (d.error) return; // DB not available -- hide row
|
||||
const status = d.status || "unreviewed";
|
||||
sel.value = status;
|
||||
// Cache on S.flaggedData item so the filter bar works without extra API calls
|
||||
const item = S.flaggedData.find(f => f.id === itemId);
|
||||
if (item) item.disposition = status;
|
||||
} catch(e) {
|
||||
row.style.display = "none";
|
||||
}
|
||||
}
|
||||
|
||||
async function saveDisposition() {
|
||||
if (!_dispositionItemId) return;
|
||||
const status = document.getElementById("dispositionSelect").value;
|
||||
const savedEl = document.getElementById("dispositionSaved");
|
||||
savedEl.textContent = "";
|
||||
try {
|
||||
await fetch("/api/db/disposition", {
|
||||
method: "POST", headers: {"Content-Type":"application/json"},
|
||||
body: JSON.stringify({item_id: _dispositionItemId, status})
|
||||
});
|
||||
savedEl.textContent = t("m365_disp_saved", "✓ Saved");
|
||||
setTimeout(() => { savedEl.textContent = ""; }, 2000);
|
||||
// Update cached value on the S.flaggedData item
|
||||
const item = S.flaggedData.find(f => f.id === _dispositionItemId);
|
||||
if (item) item.disposition = status;
|
||||
// Refresh card badge if a disposition filter is active
|
||||
const dispFilter = document.getElementById("filterDisposition")?.value;
|
||||
if (dispFilter) applyFilters();
|
||||
} catch(e) {
|
||||
savedEl.textContent = "Error";
|
||||
}
|
||||
}
|
||||
|
||||
function closePreview() {
|
||||
const panel = document.getElementById('previewPanel');
|
||||
panel.style.width = ''; // clear inline width so CSS .hidden { width:0 } takes effect
|
||||
panel.classList.add('hidden');
|
||||
document.getElementById('previewFrame').src = '';
|
||||
document.querySelectorAll('.card.selected').forEach(c => c.classList.remove('selected'));
|
||||
_previewItemId = null;
|
||||
}
|
||||
|
||||
document.addEventListener('keydown', e => {
|
||||
if (e.key === 'Escape') { closeAbout(); closeModeInfo(); closeBulkDelete(); closePreview(); closeDsubModal(); closeSmtpModal(); closeProfileMgmt(); closeImportDBModal(); closeFileSourcesModal(); closeSourcesMgmt(); closeSettings(); closePinPrompt(); }
|
||||
});
|
||||
|
||||
// ── Delete ────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function deleteItem(f, cardEl) {
|
||||
if (!confirm(t('m365_delete_confirm', 'Delete') + ' "' + f.name + '"?\n\n' + t('m365_delete_warning', 'This cannot be undone.'))) return;
|
||||
try {
|
||||
const r = await fetch('/api/delete_item', {
|
||||
method: 'POST', headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({id: f.id, source_type: f.source_type, account_id: f.account_id, drive_id: f.drive_id})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.ok) {
|
||||
S.flaggedData = S.flaggedData.filter(x => x.id !== f.id);
|
||||
S.filteredData = S.filteredData.filter(x => x.id !== f.id);
|
||||
if (cardEl) cardEl.remove();
|
||||
updateStats();
|
||||
log(t('m365_log_deleted', 'Deleted:') + ' ' + f.name, 'ok');
|
||||
if (_previewItemId === f.id) closePreview();
|
||||
} else {
|
||||
log(t('m365_log_delete_failed', 'Delete failed:') + ' ' + (d.error || '?'), 'err');
|
||||
}
|
||||
} catch(e) {
|
||||
log(t('m365_log_delete_failed', 'Delete failed:') + ' ' + e.message, 'err');
|
||||
}
|
||||
}
|
||||
|
||||
// ── Bulk delete modal ─────────────────────────────────────────────────────────
|
||||
|
||||
function openBulkDelete() {
|
||||
applyI18n();
|
||||
updateBdPreview();
|
||||
document.getElementById('bulkDeleteBackdrop').classList.add('open');
|
||||
}
|
||||
function closeBulkDelete() {
|
||||
document.getElementById('bulkDeleteBackdrop').classList.remove('open');
|
||||
document.getElementById('bdProgress').textContent = '';
|
||||
}
|
||||
|
||||
function _bdFilters() {
|
||||
return {
|
||||
source_type: document.getElementById('bdSource').value,
|
||||
min_cpr: parseInt(document.getElementById('bdMinCpr').value) || 1,
|
||||
older_than_date: document.getElementById('bdOlderThan').value,
|
||||
};
|
||||
}
|
||||
|
||||
function _bdMatches() {
|
||||
const f = _bdFilters();
|
||||
return S.flaggedData.filter(x => {
|
||||
if (f.source_type && x.source_type !== f.source_type) return false;
|
||||
if (x.cpr_count < f.min_cpr) return false;
|
||||
if (f.older_than_date && x.modified > f.older_than_date) return false;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
function updateBdPreview() {
|
||||
const matches = _bdMatches();
|
||||
const prev = document.getElementById('bdPreview');
|
||||
if (!prev) return;
|
||||
if (matches.length === 0) {
|
||||
prev.textContent = t('m365_bulk_no_match', 'No items match these criteria.');
|
||||
document.getElementById('bdConfirmBtn').disabled = true;
|
||||
} else {
|
||||
prev.innerHTML = `<strong style="color:var(--danger)">${matches.length}</strong> ${t('m365_bulk_match_count', 'item(s) will be deleted')}`;
|
||||
document.getElementById('bdConfirmBtn').disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ── Auto-connect SSE on page load (#21) ──────────────────────────────────────
|
||||
// ── SSE connection management ────────────────────────────────────────────────
|
||||
// The browser keeps an SSE connection to /api/scan/stream for live scan events.
|
||||
// Problem: idle SSE connections silently die (Flask/Werkzeug threading, proxies,
|
||||
// OS TCP keepalive). EventSource auto-reconnects, but during the reconnect
|
||||
// window a scheduled scan's events are lost.
|
||||
//
|
||||
// Solution: a polling watchdog checks /api/scan/status every few seconds.
|
||||
// When it detects a running scan (manual or scheduled), it ensures the SSE
|
||||
// connection is alive and the progress UI is visible.
|
||||
|
||||
let _sseWatchdogTimer = null;
|
||||
let _initialStatusChecked = false;
|
||||
const _SSE_POLL_INTERVAL = 4000; // ms between status polls
|
||||
|
||||
function _ensureSSE() {
|
||||
// Open SSE if not already open or if the existing connection is dead
|
||||
if (S.es && S.es.readyState !== EventSource.CLOSED) return;
|
||||
if (S.es) { try { S.es.close(); } catch(_){} }
|
||||
console.log('[SSE] Opening connection to /api/scan/stream');
|
||||
S.es = new EventSource('/api/scan/stream');
|
||||
S.es.onopen = function() { console.log('[SSE] Connection established'); };
|
||||
S.es.onerror = function(e) {
|
||||
console.warn('[SSE] Connection error (will auto-reconnect)', e);
|
||||
};
|
||||
_attachScanListeners(S.es);
|
||||
_attachSchedulerListeners(S.es);
|
||||
}
|
||||
|
||||
function _sseWatchdog() {
|
||||
fetch('/api/scan/status').then(function(r) { return r.json(); }).then(function(status) {
|
||||
if (status.running) {
|
||||
// A scan is in progress — make sure SSE is connected and progress UI is visible
|
||||
_ensureSSE();
|
||||
if (!S._m365ScanRunning && !S._googleScanRunning && !S._fileScanRunning) {
|
||||
document.getElementById('scanBtn').disabled = true;
|
||||
document.getElementById('stopBtn').style.display = 'inline-block';
|
||||
// /api/scan/status checks the M365 lock — if running=true it's an M365 scan
|
||||
S._m365ScanRunning = true; _renderProgressSegments();
|
||||
document.getElementById('progressFile').textContent = t('m365_sse_reconnecting', 'Reconnecting to running scan…');
|
||||
log(t('m365_sse_reconnecting', 'Reconnecting to running scan…'));
|
||||
}
|
||||
}
|
||||
if (!_initialStatusChecked) {
|
||||
_initialStatusChecked = true;
|
||||
if (!status.running) loadLastScanSummary();
|
||||
}
|
||||
// When no scan is running, we still keep polling — the SSE connection
|
||||
// may have died and we need to detect the *next* scheduled scan.
|
||||
// The SSE itself is only opened/reopened when a scan is detected.
|
||||
}).catch(function(err) {
|
||||
// Status endpoint unavailable — server might be restarting
|
||||
console.warn('[SSE] status poll failed:', err);
|
||||
});
|
||||
}
|
||||
|
||||
function _autoConnectSSEIfRunning() {
|
||||
// Open initial SSE connection
|
||||
_ensureSSE();
|
||||
// Check if a scan is already running (e.g. scheduled scan started before page load)
|
||||
_sseWatchdog();
|
||||
// Start polling watchdog — catches scheduled scans that start later
|
||||
if (!_sseWatchdogTimer) {
|
||||
_sseWatchdogTimer = setInterval(_sseWatchdog, _SSE_POLL_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Viewer mode result loader ─────────────────────────────────────────────────
|
||||
async function _loadViewerResults() {
|
||||
try {
|
||||
const r = await fetch('/api/db/flagged');
|
||||
const items = await r.json();
|
||||
if (!Array.isArray(items) || items.length === 0) {
|
||||
// Show last-scan summary card (stats only, no items yet)
|
||||
const panel = document.getElementById('lastScanSummary');
|
||||
const empty = document.getElementById('emptyState');
|
||||
const r2 = await fetch('/api/db/stats');
|
||||
const stats = await r2.json();
|
||||
if (stats.scan_id && panel && empty) {
|
||||
const dateStr = stats.finished_at
|
||||
? new Date(stats.finished_at * 1000).toLocaleDateString('da-DK', {day:'numeric', month:'short', year:'numeric'})
|
||||
: '—';
|
||||
const srcLabels = {email:'Outlook',onedrive:'OneDrive',sharepoint:'SharePoint',teams:'Teams',
|
||||
gmail:'Gmail',gdrive:'Drive',local:'Lokale filer',smb:'SMB'};
|
||||
const srcStr = Object.keys(stats.by_source || {}).map(s => srcLabels[s] || s).join(' · ') || '—';
|
||||
panel.innerHTML =
|
||||
'<div class="last-scan-card">' +
|
||||
'<h3>' + t('last_scan_title', 'Seneste scanning') + '</h3>' +
|
||||
'<div class="last-scan-stats">' +
|
||||
'<div class="last-scan-stat"><span class="val">' + (stats.flagged_count || 0) + '</span><span class="lbl">' + t('last_scan_hits', 'Fund') + '</span></div>' +
|
||||
'<div class="last-scan-stat"><span class="val">' + (stats.unique_subjects || 0) + '</span><span class="lbl">' + t('last_scan_subjects', 'Unikke CPR') + '</span></div>' +
|
||||
'<div class="last-scan-stat"><span class="val">' + (stats.total_scanned || 0) + '</span><span class="lbl">' + t('last_scan_scanned', 'Scannet') + '</span></div>' +
|
||||
'</div>' +
|
||||
'<div style="margin-top:12px;font-size:11px;color:var(--muted)">' + dateStr + ' · ' + srcStr + '</div>' +
|
||||
'</div>';
|
||||
empty.style.display = 'none';
|
||||
panel.style.display = 'flex';
|
||||
}
|
||||
return;
|
||||
}
|
||||
S.flaggedData = items;
|
||||
S.filteredData = [];
|
||||
const grid = document.getElementById('grid');
|
||||
const emptyState = document.getElementById('emptyState');
|
||||
const lastScan = document.getElementById('lastScanSummary');
|
||||
if (emptyState) emptyState.style.display = 'none';
|
||||
if (lastScan) lastScan.style.display = 'none';
|
||||
if (grid) grid.style.display = 'grid';
|
||||
renderGrid(items);
|
||||
try { loadTrend(); } catch(_) {}
|
||||
} catch(e) {
|
||||
console.error('[viewer] failed to load results:', e);
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
_restoreLog();
|
||||
_initLogResize();
|
||||
_initPreviewResize();
|
||||
_initSourcesResize();
|
||||
restoreSectionStates();
|
||||
if (window.VIEWER_MODE) {
|
||||
_loadViewerResults();
|
||||
return;
|
||||
}
|
||||
_loadFileSources();
|
||||
_autoConnectSSEIfRunning(); // populates S._fileSources then calls renderSourcesPanel()
|
||||
smGoogleRefreshStatus(); // sets _googleConnected and re-renders sources panel
|
||||
// Restore all source toggle states
|
||||
fetch('/api/src_toggles').then(function(r){ return r.json(); }).then(function(d) {
|
||||
_restoreM365SourceToggles(d);
|
||||
var gm = document.getElementById('smGoogleSrcGmail');
|
||||
var gd = document.getElementById('smGoogleSrcDrive');
|
||||
if (gm && d.src_gmail !== undefined) { gm.checked = !!d.src_gmail; }
|
||||
if (gd && d.src_drive !== undefined) { gd.checked = !!d.src_drive; }
|
||||
}).catch(function(){});
|
||||
|
||||
// ── macOS pywebview: push content below traffic-light buttons ─────────────
|
||||
// In frameless pywebview windows on macOS the content starts at y=0, behind
|
||||
// the system close/minimise/maximise buttons (~28px). Apply a padding only
|
||||
// when running inside pywebview AND on macOS (navigator.platform contains Mac).
|
||||
if (window.pywebview && navigator.platform.toLowerCase().includes('mac')) {
|
||||
document.body.style.paddingTop = '30px';
|
||||
}
|
||||
|
||||
['bdSource','bdMinCpr','bdOlderThan'].forEach(id => {
|
||||
const el = document.getElementById(id);
|
||||
if (el) el.addEventListener('input', updateBdPreview);
|
||||
});
|
||||
['optRetentionYears','optFiscalYearEnd'].forEach(id => {
|
||||
const el = document.getElementById(id);
|
||||
if (el) el.addEventListener('change', updateRetentionCutoffHint);
|
||||
});
|
||||
window.addEventListener('resize', () => {
|
||||
const tp = document.getElementById('trendPanel');
|
||||
if (tp && tp.style.display !== 'none') loadTrend();
|
||||
});
|
||||
const deltaCb = document.getElementById('optDelta');
|
||||
if (deltaCb) {
|
||||
deltaCb.addEventListener('change', () => {
|
||||
if (deltaCb.checked) checkDeltaStatus();
|
||||
else document.getElementById('deltaStatusRow').style.display = 'none';
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
async function executeBulkDelete() {
|
||||
const matches = _bdMatches();
|
||||
if (!matches.length) return;
|
||||
const confirmMsg = matches.length + ' ' + t('m365_bulk_confirm_q', 'item(s) will be permanently deleted. Continue?');
|
||||
if (!confirm(confirmMsg)) return;
|
||||
|
||||
const btn = document.getElementById('bdConfirmBtn');
|
||||
const prog = document.getElementById('bdProgress');
|
||||
btn.disabled = true;
|
||||
prog.textContent = t('m365_bulk_deleting', 'Deleting…');
|
||||
|
||||
try {
|
||||
const r = await fetch('/api/delete_bulk', {
|
||||
method: 'POST', headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({ ids: matches.map(x => x.id), filters: {} })
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.ok) {
|
||||
const deletedSet = new Set(matches.map(x => x.id));
|
||||
S.flaggedData = S.flaggedData.filter(x => !deletedSet.has(x.id));
|
||||
S.filteredData = S.filteredData.filter(x => !deletedSet.has(x.id));
|
||||
renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
|
||||
updateStats();
|
||||
prog.innerHTML = `<span style="color:var(--ok,#4c4)">✓ ${d.deleted} ${t('m365_bulk_deleted', 'deleted')}</span>` +
|
||||
(d.failed ? ` · <span style="color:var(--danger)">${d.failed} ${t('m365_bulk_failed', 'failed')}</span>` : '');
|
||||
if (d.errors && d.errors.length) {
|
||||
d.errors.forEach(err => log('✗ ' + err.name + ': ' + err.error, 'err'));
|
||||
}
|
||||
log(t('m365_log_bulk_done', 'Bulk delete:') + ' ' + d.deleted + ' deleted, ' + d.failed + ' failed', d.failed ? 'err' : 'ok');
|
||||
if (d.failed === 0) setTimeout(closeBulkDelete, 1800);
|
||||
} else {
|
||||
prog.textContent = d.error || 'Error';
|
||||
}
|
||||
} catch(e) {
|
||||
prog.textContent = e.message;
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
function applyFilters() {
|
||||
const search = document.getElementById('filterSearch').value.trim().toLowerCase();
|
||||
const srcVal = document.getElementById('filterSource').value;
|
||||
const dispVal = document.getElementById('filterDisposition')?.value || '';
|
||||
const transferVal = document.getElementById('filterTransfer')?.value || '';
|
||||
const specialVal = document.getElementById('filterSpecial')?.value || '';
|
||||
S.filteredData = S.flaggedData.filter(f => {
|
||||
if (search && !f.name.toLowerCase().includes(search)) return false;
|
||||
if (srcVal && f.source_type !== srcVal) return false;
|
||||
if (dispVal && (f.disposition || 'unreviewed') !== dispVal) return false;
|
||||
if (transferVal && (f.transfer_risk || '') !== transferVal) return false;
|
||||
if (specialVal === '1' && !(f.special_category && f.special_category.length)) return false;
|
||||
if (specialVal === 'photo' && !(f.face_count > 0)) return false;
|
||||
return true;
|
||||
});
|
||||
const grid = document.getElementById('grid');
|
||||
if (S.filteredData.length === 0 && S.flaggedData.length > 0) {
|
||||
grid.style.display = 'none';
|
||||
document.getElementById('emptyState').innerHTML =
|
||||
`<div class="empty-icon">🔍</div><div class="empty-text">${t('m365_no_matches','No matches')}</div>`;
|
||||
document.getElementById('emptyState').style.display = 'flex';
|
||||
} else {
|
||||
document.getElementById('emptyState').style.display = 'none';
|
||||
grid.style.display = S.isListView ? 'block' : 'grid';
|
||||
renderGrid(S.filteredData);
|
||||
}
|
||||
}
|
||||
|
||||
async function exportExcel() {
|
||||
if (!S.flaggedData || S.flaggedData.length === 0) {
|
||||
log(t('m365_export_no_data', 'No results to export.'));
|
||||
return;
|
||||
}
|
||||
if (window.pywebview && window.pywebview.api && window.pywebview.api.save_excel) {
|
||||
try {
|
||||
const r = await window.pywebview.api.save_excel();
|
||||
if (r && r.ok) { log('Excel exported: ' + r.path); }
|
||||
else if (r && r.error && r.error !== 'cancelled') { alert('Export failed: ' + r.error); }
|
||||
} catch(e) { alert('Export failed: ' + e.message); }
|
||||
return;
|
||||
}
|
||||
const btn = document.getElementById('exportBtn');
|
||||
if (btn) { btn.disabled = true; btn.textContent = '⏳'; }
|
||||
try {
|
||||
// In pywebview (macOS/Windows app), blob URL downloads don't work —
|
||||
// use the native save dialog exposed via the JS API instead.
|
||||
if (window.pywebview && window.pywebview.api && window.pywebview.api.save_excel) {
|
||||
const result = await window.pywebview.api.save_excel();
|
||||
if (result && result.ok) {
|
||||
log(t('m365_export_done', 'Excel export ready.'), 'ok');
|
||||
} else {
|
||||
if (result && result.error && result.error !== 'cancelled') {
|
||||
log('Export error: ' + result.error, 'err');
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Browser / localhost fallback: fetch as blob and trigger download
|
||||
const r = await fetch('/api/export_excel');
|
||||
if (!r.ok) {
|
||||
const err = await r.json().catch(() => ({error: 'Export failed'}));
|
||||
log('Export error: ' + (err.error || r.status), 'err');
|
||||
return;
|
||||
}
|
||||
const blob = await r.blob();
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
const disp = r.headers.get('Content-Disposition') || '';
|
||||
const match = disp.match(/filename=([^\s;]+)/);
|
||||
a.href = url;
|
||||
a.download = match ? match[1] : 'export.xlsx';
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
log(t('m365_export_done', 'Excel export ready.'), 'ok');
|
||||
} catch(e) {
|
||||
log('Export error: ' + e.message, 'err');
|
||||
} finally {
|
||||
if (btn) { btn.disabled = false; btn.innerHTML = '⬇ Excel'; }
|
||||
}
|
||||
}
|
||||
|
||||
async function exportArticle30() {
|
||||
if (!S.flaggedData || S.flaggedData.length === 0) {
|
||||
log(t('m365_export_no_data', 'No results to export.'));
|
||||
return;
|
||||
}
|
||||
if (window.pywebview && window.pywebview.api && window.pywebview.api.save_article30) {
|
||||
try {
|
||||
const r = await window.pywebview.api.save_article30();
|
||||
if (r && r.ok) { log('Article 30 exported: ' + r.path); }
|
||||
else if (r && r.error && r.error !== 'cancelled') { alert('Export failed: ' + r.error); }
|
||||
} catch(e) { alert('Export failed: ' + e.message); }
|
||||
return;
|
||||
}
|
||||
const btn = document.getElementById('exportA30Btn');
|
||||
if (btn) { btn.disabled = true; btn.textContent = '⏳'; }
|
||||
try {
|
||||
const r = await fetch('/api/export_article30');
|
||||
if (!r.ok) {
|
||||
const err = await r.json().catch(() => ({error: 'Export failed'}));
|
||||
log('Article 30 export error: ' + (err.error || r.status), 'err');
|
||||
return;
|
||||
}
|
||||
const blob = await r.blob();
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
const disp = r.headers.get('Content-Disposition') || '';
|
||||
const match = disp.match(/filename=([^\s;]+)/);
|
||||
a.href = url;
|
||||
a.download = match ? match[1] : 'article30.docx';
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
log(t('m365_article30_done', 'Article 30 report ready.'), 'ok');
|
||||
} catch(e) {
|
||||
log('Article 30 export error: ' + e.message, 'err');
|
||||
} finally {
|
||||
if (btn) { btn.disabled = false; btn.innerHTML = '📋 Art.30'; }
|
||||
}
|
||||
}
|
||||
|
||||
function clearFilters() {
|
||||
document.getElementById('filterSearch').value = '';
|
||||
document.getElementById('filterSource').value = '';
|
||||
const fd = document.getElementById('filterDisposition');
|
||||
if (fd) fd.value = '';
|
||||
const ft = document.getElementById('filterTransfer');
|
||||
if (ft) ft.value = '';
|
||||
const fs = document.getElementById('filterSpecial');
|
||||
if (fs) fs.value = '';
|
||||
applyFilters();
|
||||
}
|
||||
|
||||
function toggleView() {
|
||||
S.isListView = !S.isListView;
|
||||
document.getElementById('listViewBtn').textContent = S.isListView ? t('m365_btn_grid_view', '⊞ Grid') : t('m365_btn_list_view', '☰ List');
|
||||
document.getElementById('grid').className = S.isListView ? '' : 'grid';
|
||||
document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
|
||||
renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
|
||||
}
|
||||
|
||||
// ── Hint tooltips ─────────────────────────────────────────────────────────────
|
||||
|
||||
function toggleHint(icon) {
|
||||
const isActive = icon.classList.contains('active');
|
||||
// Close all open hints first
|
||||
document.querySelectorAll('.hint-icon.active').forEach(function(el) {
|
||||
el.classList.remove('active');
|
||||
const b = el.nextElementSibling;
|
||||
if (b && b.classList.contains('hint-bubble')) b.style.display = '';
|
||||
});
|
||||
if (!isActive) {
|
||||
icon.classList.add('active');
|
||||
// Position bubble using fixed coords so it escapes sidebar stacking context
|
||||
const bubble = icon.nextElementSibling;
|
||||
if (bubble && bubble.classList.contains('hint-bubble')) {
|
||||
bubble.style.display = 'block';
|
||||
const rect = icon.getBoundingClientRect();
|
||||
bubble.style.top = Math.round(rect.top + rect.height / 2 - bubble.offsetHeight / 2) + 'px';
|
||||
bubble.style.left = Math.round(rect.right + 8) + 'px';
|
||||
}
|
||||
// Close when clicking anywhere else
|
||||
setTimeout(function() {
|
||||
document.addEventListener('click', function closeHint(e) {
|
||||
if (!e.target.classList.contains('hint-icon')) {
|
||||
document.querySelectorAll('.hint-icon.active').forEach(function(el) {
|
||||
el.classList.remove('active');
|
||||
});
|
||||
document.querySelectorAll('.hint-bubble').forEach(function(el) {
|
||||
el.style.display = '';
|
||||
});
|
||||
document.removeEventListener('click', closeHint);
|
||||
}
|
||||
});
|
||||
}, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.appendCard = appendCard;
|
||||
window.renderGrid = renderGrid;
|
||||
window.openPreview = openPreview;
|
||||
window.toggleRetentionPanel = toggleRetentionPanel;
|
||||
window.updateRetentionCutoffHint = updateRetentionCutoffHint;
|
||||
window.markOverdueCards = markOverdueCards;
|
||||
window.preFilterOverdue = preFilterOverdue;
|
||||
window.clearBdFilters = clearBdFilters;
|
||||
window.openSubjectModal = openSubjectModal;
|
||||
window.closeDsubModal = closeDsubModal;
|
||||
window.runSubjectLookup = runSubjectLookup;
|
||||
window.deleteSubjectItems = deleteSubjectItems;
|
||||
window.loadDisposition = loadDisposition;
|
||||
window.saveDisposition = saveDisposition;
|
||||
window.closePreview = closePreview;
|
||||
window.deleteItem = deleteItem;
|
||||
window.openBulkDelete = openBulkDelete;
|
||||
window.closeBulkDelete = closeBulkDelete;
|
||||
window._bdFilters = _bdFilters;
|
||||
window._bdMatches = _bdMatches;
|
||||
window.updateBdPreview = updateBdPreview;
|
||||
window._ensureSSE = _ensureSSE;
|
||||
window._sseWatchdog = _sseWatchdog;
|
||||
window._autoConnectSSEIfRunning = _autoConnectSSEIfRunning;
|
||||
window._loadViewerResults = _loadViewerResults;
|
||||
window.executeBulkDelete = executeBulkDelete;
|
||||
window.applyFilters = applyFilters;
|
||||
window.exportExcel = exportExcel;
|
||||
window.exportArticle30 = exportArticle30;
|
||||
window.clearFilters = clearFilters;
|
||||
window.toggleView = toggleView;
|
||||
window.toggleHint = toggleHint;
|
||||
window.SOURCE_BADGES = SOURCE_BADGES;
|
||||
window._previewItemId = _previewItemId;
|
||||
window._dsubItems = _dsubItems;
|
||||
window._dispositionItemId = _dispositionItemId;
|
||||
window._sseWatchdogTimer = _sseWatchdogTimer;
|
||||
window._initialStatusChecked = _initialStatusChecked;
|
||||
window._SSE_POLL_INTERVAL = _SSE_POLL_INTERVAL;
|
||||
730
static/js/scan.js
Normal file
730
static/js/scan.js
Normal file
@ -0,0 +1,730 @@
|
||||
import { S } from './state.js';
|
||||
// ── DB Export / Import (#11) ──────────────────────────────────────────────────
|
||||
|
||||
async function exportDB() {
|
||||
// In pywebview app, use native save dialog; in browser, use blob download
|
||||
if (window.pywebview && window.pywebview.api && window.pywebview.api.save_db_export) {
|
||||
try {
|
||||
const r = await window.pywebview.api.save_db_export();
|
||||
if (r && r.ok) { log(t('m365_db_exported','Database exported') + ': ' + r.path); }
|
||||
else if (r && r.error && r.error !== 'cancelled') { alert(t('m365_db_export_error','Export failed') + ': ' + r.error); }
|
||||
} catch(e) { alert(t('m365_db_export_error','Export failed') + ': ' + e.message); }
|
||||
return;
|
||||
}
|
||||
// Browser fallback
|
||||
try {
|
||||
const res = await fetch('/api/db/export');
|
||||
if (!res.ok) {
|
||||
const d = await res.json().catch(() => ({}));
|
||||
alert(t('m365_db_export_error','Export failed') + ': ' + (d.error || res.statusText));
|
||||
return;
|
||||
}
|
||||
const blob = await res.blob();
|
||||
const cd = res.headers.get('Content-Disposition') || '';
|
||||
const m = cd.match(/filename="([^"]+)"/);
|
||||
const name = m ? m[1] : 'gdpr_export.zip';
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url; a.download = name; a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
log(t('m365_db_exported','Database exported') + ': ' + name);
|
||||
} catch(e) {
|
||||
alert(t('m365_db_export_error','Export failed') + ': ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function openImportDBModal() {
|
||||
const fi = document.getElementById('importDbFile');
|
||||
if (fi) fi.value = '';
|
||||
const mode = document.getElementById('importDbMode');
|
||||
if (mode) mode.value = 'merge';
|
||||
document.getElementById('importDbReplaceWarn').style.display = 'none';
|
||||
document.getElementById('importDbStatus').textContent = '';
|
||||
document.getElementById('importDbBackdrop').classList.add('open');
|
||||
}
|
||||
|
||||
function closeImportDBModal() {
|
||||
document.getElementById('importDbBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
// Show/hide the replace warning when mode changes
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
document.getElementById('importDbMode')?.addEventListener('change', function() {
|
||||
document.getElementById('importDbReplaceWarn').style.display =
|
||||
this.value === 'replace' ? 'block' : 'none';
|
||||
});
|
||||
});
|
||||
|
||||
async function doImportDB() {
|
||||
const fi = document.getElementById('importDbFile');
|
||||
const mode = document.getElementById('importDbMode')?.value || 'merge';
|
||||
const stat = document.getElementById('importDbStatus');
|
||||
const btn = document.getElementById('importDbBtn');
|
||||
if (!fi?.files?.length) {
|
||||
stat.textContent = t('m365_db_import_no_file','Please select a ZIP file first.');
|
||||
stat.style.color = 'var(--danger)';
|
||||
return;
|
||||
}
|
||||
if (mode === 'replace') {
|
||||
if (!confirm(t('m365_db_import_replace_confirm',
|
||||
'Replace mode will erase ALL existing scan data and restore from the archive.\n\nMake sure you have a manual backup of ~/.gdpr_scanner.db.\n\nProceed?'))) return;
|
||||
}
|
||||
btn.disabled = true;
|
||||
stat.style.color = 'var(--muted)';
|
||||
stat.textContent = t('m365_db_importing','Importing…');
|
||||
const fd = new FormData();
|
||||
fd.append('file', fi.files[0]);
|
||||
fd.append('mode', mode);
|
||||
if (mode === 'replace') fd.append('confirm', 'yes');
|
||||
try {
|
||||
const r = await fetch('/api/db/import', { method: 'POST', body: fd });
|
||||
const d = await r.json();
|
||||
if (!r.ok || d.error) {
|
||||
stat.style.color = 'var(--danger)';
|
||||
stat.textContent = '✖ ' + (d.error || r.statusText);
|
||||
} else {
|
||||
const counts = Object.entries(d.imported || {}).map(([k,v]) => `${k}: ${v}`).join(', ');
|
||||
stat.style.color = 'var(--accent)';
|
||||
stat.textContent = '✔ ' + t('m365_db_imported','Imported') + (counts ? ' (' + counts + ')' : '');
|
||||
log(t('m365_db_imported','Imported') + ' [' + mode + '] ' + fi.files[0].name);
|
||||
}
|
||||
} catch(e) {
|
||||
stat.style.color = 'var(--danger)';
|
||||
stat.textContent = '✖ ' + e.message;
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Scan ─────────────────────────────────────────────────────────────────────
|
||||
function buildScanPayload() {
|
||||
// Collect checked M365 sources from dynamic panel
|
||||
const sources = [];
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-type="m365"]:checked').forEach(function(cb) {
|
||||
sources.push(cb.dataset.sourceId);
|
||||
});
|
||||
// Collect checked file sources (local/smb) — handled separately in startScan()
|
||||
// but included here so profiles and checkpoint checks are aware of them
|
||||
const fileSources = [];
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-type="file"]:checked').forEach(function(cb) {
|
||||
fileSources.push(cb.dataset.sourceId);
|
||||
});
|
||||
// Collect checked Google sources
|
||||
const googleSources = [];
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-type="google"]:checked').forEach(function(cb) {
|
||||
googleSources.push(cb.dataset.sourceId);
|
||||
});
|
||||
const user_ids = getSelectedUsers();
|
||||
// Merge all source types into a single array for profiles
|
||||
const allSources = sources.concat(fileSources).concat(googleSources);
|
||||
const options = {
|
||||
older_than_days: parseInt(document.getElementById('olderThan').value) || 0,
|
||||
email_body: document.getElementById('optEmailBody').checked,
|
||||
attachments: document.getElementById('optAttachments').checked,
|
||||
max_attach_mb: parseInt(document.getElementById('optMaxAttachMB').value) || 20,
|
||||
max_emails: parseInt(document.getElementById('optMaxEmails').value) || 200,
|
||||
delta: document.getElementById('optDelta') ? document.getElementById('optDelta').checked : false,
|
||||
scan_photos: document.getElementById('optScanPhotos') ? document.getElementById('optScanPhotos').checked : false,
|
||||
retention_enabled: document.getElementById('optRetention') ? document.getElementById('optRetention').checked : false,
|
||||
retention_years: parseInt(document.getElementById('optRetentionYears')?.value) || 5,
|
||||
fiscal_year_end: document.getElementById('optFiscalYearEnd')?.value || '',
|
||||
};
|
||||
return { sources, fileSources, allSources, googleSources, user_ids, options };
|
||||
}
|
||||
|
||||
async function checkCheckpoint() {
|
||||
const payload = buildScanPayload();
|
||||
if (!payload.sources.length && !payload.fileSources.length) return;
|
||||
if (payload.sources.length && !payload.user_ids.length) return;
|
||||
try {
|
||||
const r = await fetch('/api/scan/checkpoint', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
const d = await r.json();
|
||||
const banner = document.getElementById('resumeBanner');
|
||||
if (d.exists) {
|
||||
const ts = d.started_at ? new Date(d.started_at * 1000).toLocaleString([], {dateStyle:'short', timeStyle:'short'}) : '';
|
||||
document.getElementById('resumeBannerText').textContent =
|
||||
t('m365_resume_banner', `Previous scan interrupted (${d.scanned_count} scanned, ${d.flagged_count} found${ts ? ' — ' + ts : ''})`);
|
||||
banner.style.display = 'flex';
|
||||
} else {
|
||||
banner.style.display = 'none';
|
||||
}
|
||||
} catch(e) { /* ignore */ }
|
||||
}
|
||||
|
||||
async function clearCheckpointAndScan() {
|
||||
await fetch('/api/scan/clear_checkpoint', {method:'POST'});
|
||||
document.getElementById('resumeBanner').style.display = 'none';
|
||||
startScan(false);
|
||||
}
|
||||
|
||||
async function checkDeltaStatus() {
|
||||
const cb = document.getElementById('optDelta');
|
||||
if (!cb) return;
|
||||
try {
|
||||
const r = await fetch('/api/delta/status');
|
||||
const d = await r.json();
|
||||
const row = document.getElementById('deltaStatusRow');
|
||||
const txt = document.getElementById('deltaStatusText');
|
||||
if (d.exists) {
|
||||
const src = d.count === 1 ? '1 source' : `${d.count} sources`;
|
||||
txt.textContent = t('m365_delta_tokens_saved', `Tokens saved for ${src}`);
|
||||
row.style.display = 'flex';
|
||||
row.style.alignItems = 'center';
|
||||
} else {
|
||||
row.style.display = 'none';
|
||||
}
|
||||
} catch(e) { /* ignore */ }
|
||||
}
|
||||
|
||||
async function clearDeltaTokens() {
|
||||
await fetch('/api/delta/clear', {method:'POST'});
|
||||
document.getElementById('deltaStatusRow').style.display = 'none';
|
||||
log(t('m365_delta_cleared', 'Delta tokens cleared — next scan will be a full scan.'));
|
||||
}
|
||||
|
||||
// ── SMTP / Email report modal ─────────────────────────────────────────────────
|
||||
|
||||
function openSmtpModal(focusSend) {
|
||||
document.getElementById('smtpBackdrop').classList.add('open');
|
||||
document.getElementById('smtpStatus').textContent = '';
|
||||
loadSmtpConfig();
|
||||
if (focusSend) {
|
||||
setTimeout(() => document.getElementById('smtpRecipients').focus(), 120);
|
||||
}
|
||||
}
|
||||
|
||||
function closeSmtpModal() {
|
||||
document.getElementById('smtpBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
async function loadSmtpConfig() {
|
||||
try {
|
||||
const r = await fetch('/api/smtp/config');
|
||||
const d = await r.json();
|
||||
if (d.host) document.getElementById('smtpHost').value = d.host;
|
||||
if (d.port) document.getElementById('smtpPort').value = d.port;
|
||||
if (d.username) document.getElementById('smtpUser').value = d.username;
|
||||
if (d.from_addr) document.getElementById('smtpFrom').value = d.from_addr;
|
||||
if (d.recipients) document.getElementById('smtpRecipients').value = Array.isArray(d.recipients) ? d.recipients.join(', ') : d.recipients;
|
||||
if (d.password_saved) document.getElementById('smtpPass').placeholder = '(password saved)';
|
||||
if (d.use_tls !== undefined) document.getElementById('smtpTLS').checked = d.use_tls;
|
||||
if (d.use_ssl !== undefined) document.getElementById('smtpSSL').checked = d.use_ssl;
|
||||
} catch(e) { /* ignore */ }
|
||||
}
|
||||
|
||||
async function saveSmtpConfig() {
|
||||
const cfg = _smtpFields();
|
||||
const r = await fetch('/api/smtp/config', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(cfg)
|
||||
});
|
||||
const d = await r.json();
|
||||
const el = document.getElementById('smtpStatus');
|
||||
if (d.status === 'saved') {
|
||||
el.style.color = 'var(--success)';
|
||||
el.textContent = t('m365_smtp_saved', 'Settings saved.');
|
||||
if (cfg.password) document.getElementById('smtpPass').placeholder = '(password saved)';
|
||||
} else {
|
||||
el.style.color = 'var(--danger)';
|
||||
el.textContent = d.error || 'Error saving';
|
||||
}
|
||||
}
|
||||
|
||||
async function sendReport() {
|
||||
const cfg = _smtpFields();
|
||||
const recipStr = document.getElementById('smtpRecipients').value.trim();
|
||||
if (!recipStr) {
|
||||
document.getElementById('smtpStatus').style.color = 'var(--danger)';
|
||||
document.getElementById('smtpStatus').textContent = t('m365_smtp_no_recipients', 'Enter at least one recipient.');
|
||||
document.getElementById('smtpRecipients').focus();
|
||||
return;
|
||||
}
|
||||
const recipients = recipStr.split(/[,;]/).map(s => s.trim()).filter(Boolean);
|
||||
const statusEl = document.getElementById('smtpStatus');
|
||||
statusEl.style.color = 'var(--muted)';
|
||||
statusEl.textContent = t('m365_smtp_sending', 'Sending…');
|
||||
|
||||
const r = await fetch('/api/send_report', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({recipients, smtp: cfg})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.status === 'sent') {
|
||||
statusEl.style.color = 'var(--success)';
|
||||
statusEl.textContent = t('m365_smtp_sent', 'Sent to ' + recipients.join(', '));
|
||||
log('Report emailed to ' + recipients.join(', '), 'ok');
|
||||
} else {
|
||||
statusEl.style.color = 'var(--danger)';
|
||||
statusEl.textContent = d.error || 'Send failed';
|
||||
log('Email send failed: ' + (d.error || ''), 'err');
|
||||
}
|
||||
}
|
||||
|
||||
function _smtpFields() {
|
||||
return {
|
||||
host: document.getElementById('smtpHost').value.trim(),
|
||||
port: parseInt(document.getElementById('smtpPort').value) || 587,
|
||||
username: document.getElementById('smtpUser').value.trim(),
|
||||
password: document.getElementById('smtpPass').value,
|
||||
from_addr: document.getElementById('smtpFrom').value.trim(),
|
||||
use_tls: document.getElementById('smtpTLS').checked,
|
||||
use_ssl: document.getElementById('smtpSSL').checked,
|
||||
recipients: document.getElementById('smtpRecipients').value,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ── Shared SSE event listeners (#21) ─────────────────────────────────────────
|
||||
// Extracted so both startScan() and _autoConnectSSEIfRunning() share identical
|
||||
// handlers — fixes the bug where replayed events from a scheduled scan were
|
||||
// silently ignored because the page-load SSE only had scheduler_* listeners.
|
||||
|
||||
function _attachScanListeners(source) {
|
||||
source.addEventListener('scan_phase', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] scan_phase:', d.phase);
|
||||
// Ensure a progress segment exists before rendering phase text.
|
||||
// scan_phase can arrive before scan_progress (or before scan_start on replay
|
||||
// if scan_start has been pushed out of the 500-event SSE buffer).
|
||||
if (!S._m365ScanRunning && !S._googleScanRunning && !S._fileScanRunning) {
|
||||
var ph = (d.phase || '').toLowerCase();
|
||||
var phaseSrc = /google|gmail|gdrive/.test(ph) ? 'google'
|
||||
: /^files\s*[—\-–]/.test(ph) ? 'file'
|
||||
: 'm365';
|
||||
if (phaseSrc === 'google') { S._googleScanRunning = true; }
|
||||
else if (phaseSrc === 'file') { S._fileScanRunning = true; }
|
||||
else { S._m365ScanRunning = true; }
|
||||
document.getElementById('scanBtn').disabled = true;
|
||||
document.getElementById('stopBtn').style.display = 'inline-block';
|
||||
_renderProgressSegments();
|
||||
}
|
||||
_setProgressPhase(d.phase);
|
||||
log(d.phase);
|
||||
});
|
||||
source.addEventListener('scan_progress', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
var src = d.source || 'm365';
|
||||
var pct = d.pct !== undefined ? d.pct
|
||||
: (d.total > 0 ? Math.round((d.index || d.completed || 0) / d.total * 100) : 0);
|
||||
S._srcPct[src] = pct;
|
||||
// If reconnecting mid-scan the running flag may not be set yet — ensure segment exists
|
||||
if (src === 'm365' && !S._m365ScanRunning) { S._m365ScanRunning = true; document.getElementById('scanBtn').disabled = true; document.getElementById('stopBtn').style.display = 'inline-block'; _renderProgressSegments(); }
|
||||
if (src === 'google' && !S._googleScanRunning) { S._googleScanRunning = true; document.getElementById('scanBtn').disabled = true; document.getElementById('stopBtn').style.display = 'inline-block'; _renderProgressSegments(); }
|
||||
if (src === 'file' && !S._fileScanRunning) { S._fileScanRunning = true; document.getElementById('scanBtn').disabled = true; document.getElementById('stopBtn').style.display = 'inline-block'; _renderProgressSegments(); }
|
||||
var fill = document.getElementById('progressFill_' + src);
|
||||
if (fill) fill.style.width = pct + '%';
|
||||
document.getElementById('progressFile').textContent = d.file || '';
|
||||
// Only update stats/ETA from M365 (has meaningful totals and ETA)
|
||||
if (src === 'm365') {
|
||||
var statsEl = document.getElementById('progressStats');
|
||||
if (statsEl && d.total) {
|
||||
statsEl.textContent = (d.index || 0) + ' / ' + d.total;
|
||||
}
|
||||
var etaEl = document.getElementById('progressEta');
|
||||
if (etaEl && d.eta !== undefined) {
|
||||
etaEl.textContent = d.eta ? ('ETA ' + d.eta) : '';
|
||||
}
|
||||
}
|
||||
});
|
||||
source.addEventListener('scan_file', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
setLogLive(d.file || '');
|
||||
});
|
||||
source.addEventListener('scan_file_flagged', function(e) {
|
||||
var card = JSON.parse(e.data);
|
||||
console.log('[SSE] scan_file_flagged:', card.name || card.id);
|
||||
if (!S.flaggedData.find(function(x){ return x.id === card.id; })) {
|
||||
S.flaggedData.push(card);
|
||||
S.totalCPR += (card.cpr_count || 0);
|
||||
document.getElementById('filterBar').style.display = 'flex';
|
||||
document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
|
||||
applyFilters();
|
||||
}
|
||||
});
|
||||
source.addEventListener('scan_error', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
log((d.file ? d.file + ': ' : '') + d.error, 'err');
|
||||
});
|
||||
source.addEventListener('scan_cancelled', function() {
|
||||
if (S._userStartedScan) {
|
||||
S._userStartedScan = false;
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
}
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
_clearProgressBar();
|
||||
setLogLive('');
|
||||
log('Scan stopped.', 'warn');
|
||||
});
|
||||
source.addEventListener('scan_done', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] scan_done:', d);
|
||||
// Only close SSE if the user started this scan via the Scan button.
|
||||
// For scheduled scans, keep the SSE connection alive so future
|
||||
// scheduler events are still received.
|
||||
if (S._userStartedScan) {
|
||||
S._userStartedScan = false;
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
}
|
||||
S._srcPct.m365 = 100;
|
||||
S._m365ScanRunning = false;
|
||||
_renderProgressSegments();
|
||||
var _anyRunning = S._googleScanRunning || S._fileScanRunning;
|
||||
if (!_anyRunning) setLogLive('');
|
||||
document.getElementById('scanBtn').disabled = _anyRunning;
|
||||
document.getElementById('stopBtn').style.display = _anyRunning ? 'inline-block' : 'none';
|
||||
if (!_anyRunning) _clearProgressBar();
|
||||
document.getElementById('statsSection').style.display = 'block';
|
||||
document.getElementById('statScanned').textContent = d.total_scanned;
|
||||
document.getElementById('statFlagged').textContent = d.flagged_count;
|
||||
document.getElementById('statCPR').textContent = S.totalCPR;
|
||||
document.getElementById('statsPill').style.display = 'block';
|
||||
updateStats();
|
||||
if (S.flaggedData.length) {
|
||||
document.getElementById('filterBar').style.display = 'flex';
|
||||
document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
|
||||
applyFilters();
|
||||
} else {
|
||||
document.getElementById('emptyState').style.display = 'flex';
|
||||
document.getElementById('emptyState').innerHTML = '<div class="empty-icon">\u2705</div><div class="empty-text">' + t('m365_no_cpr_found','No CPR numbers found.') + '</div>';
|
||||
}
|
||||
var deltaNote = d.delta ? ' (\u0394 delta \u2014 ' + (d.delta_sources||0) + ' source(s) indexed)' : '';
|
||||
log('Scan complete \u2014 ' + d.flagged_count + ' flagged of ' + d.total_scanned + deltaNote, 'ok');
|
||||
if (d.delta) checkDeltaStatus();
|
||||
markOverdueCards();
|
||||
loadTrend();
|
||||
});
|
||||
source.addEventListener('google_scan_done', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] google_scan_done:', d);
|
||||
S._srcPct.google = 100;
|
||||
S._googleScanRunning = false;
|
||||
_renderProgressSegments();
|
||||
if (!S._m365ScanRunning && !S._fileScanRunning) {
|
||||
setLogLive('');
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
_clearProgressBar();
|
||||
document.getElementById('statsSection').style.display = 'block';
|
||||
document.getElementById('statsPill').style.display = 'block';
|
||||
updateStats();
|
||||
if (S.flaggedData.length) {
|
||||
document.getElementById('filterBar').style.display = 'flex';
|
||||
document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
|
||||
applyFilters();
|
||||
}
|
||||
}
|
||||
log('Google scan complete \u2014 ' + d.flagged_count + ' flagged of ' + d.total_scanned, 'ok');
|
||||
markOverdueCards();
|
||||
loadTrend();
|
||||
});
|
||||
source.addEventListener('file_scan_done', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] file_scan_done:', d);
|
||||
S._srcPct.file = 100;
|
||||
S._fileScanRunning = false;
|
||||
_renderProgressSegments();
|
||||
if (!S._m365ScanRunning && !S._googleScanRunning) {
|
||||
setLogLive('');
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
_clearProgressBar();
|
||||
document.getElementById('statsSection').style.display = 'block';
|
||||
document.getElementById('statsPill').style.display = 'block';
|
||||
updateStats();
|
||||
if (S.flaggedData.length) {
|
||||
document.getElementById('filterBar').style.display = 'flex';
|
||||
document.getElementById('grid').style.display = S.isListView ? 'block' : 'grid';
|
||||
applyFilters();
|
||||
}
|
||||
}
|
||||
log('Bestandsscan fuldført \u2014 ' + d.flagged_count + ' flagget af ' + d.total_scanned, 'ok');
|
||||
markOverdueCards();
|
||||
loadTrend();
|
||||
});
|
||||
// sse_replay_done marks end of buffer replay — log a note so the user knows
|
||||
// earlier events above were replayed from an already-running scan
|
||||
source.addEventListener('sse_replay_done', function() {
|
||||
log(t('m365_sse_replay_note', 'Live log resumed \u2014 earlier entries replayed from running scan.'));
|
||||
});
|
||||
}
|
||||
|
||||
function _attachSchedulerListeners(source) {
|
||||
source.addEventListener('scheduler_started', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] scheduler_started received:', d);
|
||||
log('\uD83D\uDD50 ' + t('m365_sched_title','Scheduled scan') + ': ' + (d.job_name||'') + '\u2026');
|
||||
// Show progress UI so scan_phase / scan_progress events are visible
|
||||
document.getElementById('scanBtn').disabled = true;
|
||||
document.getElementById('stopBtn').style.display = 'inline-block';
|
||||
S._srcPct = { m365: 0, google: 0, file: 0 }; S._m365ScanRunning = true; _renderProgressSegments();
|
||||
_setProgressPhase((d.job_name||'') + '\u2026');
|
||||
document.getElementById('progressFile').textContent = '';
|
||||
});
|
||||
source.addEventListener('scan_start', function(e) {
|
||||
// Scheduled scans also emit scan_start — show progress UI in case
|
||||
// scheduler_started was missed (e.g. browser reconnected mid-scan)
|
||||
console.log('[SSE] scan_start received');
|
||||
document.getElementById('scanBtn').disabled = true;
|
||||
document.getElementById('stopBtn').style.display = 'inline-block';
|
||||
// Ensure at least the M365 segment is rendered (scan_start is M365-only)
|
||||
if (!S._m365ScanRunning) { S._m365ScanRunning = true; _renderProgressSegments(); }
|
||||
});
|
||||
source.addEventListener('scheduler_done', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] scheduler_done received:', d);
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
_clearProgressBar();
|
||||
log('\u2713 ' + t('m365_sched_title','Scheduled scan') + ' ' + (d.job_name||'') + ' \u2014 ' + (d.flagged||0) + ' flagged', 'ok');
|
||||
markOverdueCards();
|
||||
loadTrend();
|
||||
});
|
||||
source.addEventListener('scheduler_error', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] scheduler_error received:', d);
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
_clearProgressBar();
|
||||
log('\u26A0 ' + t('m365_sched_title','Scheduled scan') + ' failed: ' + (d.error||''), 'err');
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
function startScan(resume) {
|
||||
const { sources, fileSources, googleSources, user_ids, options } = buildScanPayload();
|
||||
if (!sources.length && !fileSources.length && !googleSources.length) { alert(t('m365_no_sources','No sources selected — nothing to scan.')); return; }
|
||||
if (sources.length && !user_ids.length && !googleSources.length) { alert('Select at least one account to scan.'); return; }
|
||||
|
||||
// When resuming, keep existing cards; otherwise clear everything
|
||||
if (!resume) {
|
||||
S.flaggedData = []; S.filteredData = []; S.totalCPR = 0;
|
||||
document.getElementById('grid').innerHTML = '';
|
||||
document.getElementById('grid').style.display = 'none';
|
||||
document.getElementById('emptyState').style.display = 'none';
|
||||
const _lss = document.getElementById('lastScanSummary'); if (_lss) _lss.style.display = 'none';
|
||||
document.getElementById('statsSection').style.display = 'none';
|
||||
document.getElementById('statsPill').style.display = 'none';
|
||||
}
|
||||
document.getElementById('resumeBanner').style.display = 'none';
|
||||
document.getElementById('logPanel').innerHTML = '<div class="log-line log-live" id="logLive" style="display:none"></div>';
|
||||
try { sessionStorage.removeItem(_LOG_SESSION_KEY); } catch(e) {}
|
||||
S._m365ScanRunning = sources.length > 0;
|
||||
S._googleScanRunning = googleSources.length > 0;
|
||||
S._fileScanRunning = fileSources.length > 0;
|
||||
S._srcPct = { m365: 0, google: 0, file: 0 };
|
||||
S._progressCurrentUser = '';
|
||||
_renderProgressSegments();
|
||||
document.getElementById('scanBtn').disabled = true;
|
||||
document.getElementById('stopBtn').style.display = 'inline-block';
|
||||
// progress segments rendered by _renderProgressSegments() called above
|
||||
document.getElementById('progressFile').textContent = '';
|
||||
_setProgressPhase(t('scan_preparing', 'Preparing…'));
|
||||
|
||||
const dateLabel = options.older_than_days > 0 ? ', ' + t('m365_log_older_than', 'older than') + ' ' + document.getElementById('olderThanDate').value : '';
|
||||
const modeLabel = resume ? t('m365_log_resuming', 'Resuming scan:') : t('m365_log_starting_scan', 'Starting scan:');
|
||||
var googleCount = googleSources.length > 0 ? S._allUsers.filter(function(u) {
|
||||
return u.selected !== false && (u.platform === 'google' || u.platform === 'both');
|
||||
}).length : 0;
|
||||
var totalAccounts = (sources.length > 0 ? user_ids.length : 0) + (googleSources.length > 0 && sources.length === 0 ? googleCount : 0);
|
||||
var allSourceLabels = sources.concat(googleSources);
|
||||
log(modeLabel + ' ' + allSourceLabels.join(', ') + ' — ' + (totalAccounts || googleCount) + ' ' + t('m365_log_accounts', 'account(s)') + dateLabel + '…');
|
||||
|
||||
// Always close and reopen SSE — ensures a fresh queue is registered
|
||||
// before the scan fires events (prevents missed events on the server side)
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
S._userStartedScan = true;
|
||||
_ensureSSE();
|
||||
|
||||
setTimeout(() => {
|
||||
// Fire M365 scan if any M365 sources are selected
|
||||
if (sources.length > 0) {
|
||||
fetch('/api/scan/start', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({sources, user_ids, options, resume: !!resume,
|
||||
profile_id: S._activeProfileId || null})
|
||||
}).then(r => {
|
||||
if (r.status === 409) { log('Scan already running', 'err'); }
|
||||
}).catch(e => { log('Scan start failed: ' + e, 'err'); });
|
||||
}
|
||||
|
||||
// Fire file scans for each checked file source (local/smb)
|
||||
const checkedFileIds = [];
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-type="file"]:checked').forEach(function(cb) {
|
||||
checkedFileIds.push(cb.dataset.sourceId);
|
||||
});
|
||||
checkedFileIds.forEach(function(id) {
|
||||
const source = S._fileSources.find(function(s) { return s.id === id; });
|
||||
if (!source) return;
|
||||
fetch('/api/file_scan/start', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(Object.assign({}, source, {scan_photos: options.scan_photos || false}))
|
||||
}).catch(e => { log('File scan error: ' + e, 'err'); });
|
||||
});
|
||||
|
||||
// Fire Google Workspace scan if any Google sources are selected
|
||||
const checkedGoogleIds = [];
|
||||
document.querySelectorAll('#sourcesPanel input[data-source-type="google"]:checked').forEach(function(cb) {
|
||||
checkedGoogleIds.push(cb.dataset.sourceId);
|
||||
});
|
||||
if (checkedGoogleIds.length > 0) {
|
||||
// Collect selected Google user emails from the account list
|
||||
var selectedGoogleEmails = S._allUsers
|
||||
.filter(function(u) { return u.selected !== false && (u.platform === 'google' || u.platform === 'both'); })
|
||||
.map(function(u) { return u.platform === 'both' ? u.googleEmail : u.email; })
|
||||
.filter(Boolean);
|
||||
fetch('/api/google/scan/start', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({
|
||||
sources: checkedGoogleIds,
|
||||
user_emails: selectedGoogleEmails,
|
||||
options: options
|
||||
})
|
||||
}).then(r => {
|
||||
if (r.status === 409) { log('Google scan already running', 'err'); }
|
||||
}).catch(e => { log('Google scan error: ' + e, 'err'); });
|
||||
}
|
||||
|
||||
// All scan types fired above — no fallback error needed
|
||||
}, 300);
|
||||
|
||||
}
|
||||
|
||||
function stopScan() {
|
||||
fetch('/api/scan/stop', {method:'POST'});
|
||||
}
|
||||
|
||||
// ── Trend sparkline (#7) ──────────────────────────────────────────────────────
|
||||
|
||||
function drawSparkline(data) {
|
||||
const canvas = document.getElementById('sparkCanvas');
|
||||
if (!canvas) return;
|
||||
const dpr = window.devicePixelRatio || 1;
|
||||
const W = canvas.offsetWidth || 220;
|
||||
const H = 60;
|
||||
canvas.width = W * dpr;
|
||||
canvas.height = H * dpr;
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.scale(dpr, dpr);
|
||||
|
||||
const flagged = data.map(d => d.flagged_count);
|
||||
const overdue = data.map(d => d.overdue_count);
|
||||
const maxVal = Math.max(...flagged, 1) * 1.2;
|
||||
const n = data.length;
|
||||
const xPos = i => (i / (n - 1)) * (W - 8) + 4;
|
||||
const yPos = v => H - 4 - (v / maxVal) * (H - 10);
|
||||
|
||||
const isDark = document.body.getAttribute('data-theme') !== 'light';
|
||||
const cBlue = '#378ADD';
|
||||
const cAmber = '#BA7517';
|
||||
const cFill = isDark ? 'rgba(55,138,221,0.12)' : 'rgba(55,138,221,0.08)';
|
||||
|
||||
// Fill under flagged line
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(xPos(0), yPos(flagged[0]));
|
||||
for (let i = 1; i < n; i++) ctx.lineTo(xPos(i), yPos(flagged[i]));
|
||||
ctx.lineTo(xPos(n - 1), H);
|
||||
ctx.lineTo(xPos(0), H);
|
||||
ctx.closePath();
|
||||
ctx.fillStyle = cFill;
|
||||
ctx.fill();
|
||||
|
||||
// Flagged line
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(xPos(0), yPos(flagged[0]));
|
||||
for (let i = 1; i < n; i++) ctx.lineTo(xPos(i), yPos(flagged[i]));
|
||||
ctx.strokeStyle = cBlue; ctx.lineWidth = 1.5; ctx.lineJoin = 'round';
|
||||
ctx.stroke();
|
||||
|
||||
// Overdue dashed line
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(xPos(0), yPos(overdue[0]));
|
||||
for (let i = 1; i < n; i++) ctx.lineTo(xPos(i), yPos(overdue[i]));
|
||||
ctx.strokeStyle = cAmber; ctx.lineWidth = 1;
|
||||
ctx.setLineDash([3, 3]); ctx.stroke(); ctx.setLineDash([]);
|
||||
|
||||
// Dot on latest point
|
||||
ctx.beginPath();
|
||||
ctx.arc(xPos(n - 1), yPos(flagged[n - 1]), 3, 0, Math.PI * 2);
|
||||
ctx.fillStyle = cBlue; ctx.fill();
|
||||
|
||||
// Labels: first, middle, last date (MM-DD only)
|
||||
const lblEl = document.getElementById('sparkLabels');
|
||||
if (lblEl) {
|
||||
const fmt = d => d.scan_date.slice(5);
|
||||
lblEl.innerHTML = `<span>${fmt(data[0])}</span><span>${fmt(data[Math.floor(n/2)])}</span><span>${fmt(data[n-1])}</span>`;
|
||||
}
|
||||
|
||||
// Trend change label
|
||||
const last = flagged[n - 1], prev = flagged[n - 2] || last;
|
||||
const diff = last - prev;
|
||||
const pct = prev ? Math.round(Math.abs(diff / prev) * 100) : 0;
|
||||
const arrow = diff < 0 ? '↓' : diff > 0 ? '↑' : '→';
|
||||
const color = diff < 0 ? 'var(--success)' : diff > 0 ? 'var(--danger)' : 'var(--muted)';
|
||||
const chEl = document.getElementById('trendChange');
|
||||
if (chEl) chEl.innerHTML = `<span style="color:${color}">${arrow} ${pct}%</span>`;
|
||||
|
||||
// Hover tooltip
|
||||
canvas.onmousemove = e => {
|
||||
const rect = canvas.getBoundingClientRect();
|
||||
const mx = e.clientX - rect.left;
|
||||
const idx = Math.round(((mx - 4) / (W - 8)) * (n - 1));
|
||||
if (idx < 0 || idx >= n) return;
|
||||
const d = data[idx];
|
||||
const tip = document.getElementById('sparkTip');
|
||||
if (!tip) return;
|
||||
tip.style.display = 'block';
|
||||
tip.textContent = `${d.scan_date} ${d.flagged_count} / ${d.overdue_count} overdue`;
|
||||
tip.style.left = Math.min(mx, W - tip.offsetWidth - 4) + 'px';
|
||||
};
|
||||
canvas.onmouseleave = () => {
|
||||
const tip = document.getElementById('sparkTip');
|
||||
if (tip) tip.style.display = 'none';
|
||||
};
|
||||
}
|
||||
|
||||
async function loadTrend() {
|
||||
try {
|
||||
const r = await fetch('/api/db/trend?n=10');
|
||||
if (!r.ok) return;
|
||||
const data = await r.json();
|
||||
if (!Array.isArray(data) || data.length < 2) return;
|
||||
document.getElementById('trendPanel').style.display = 'block';
|
||||
// Defer draw until canvas has layout width
|
||||
setTimeout(() => drawSparkline(data), 60);
|
||||
} catch(e) { /* DB not available */ }
|
||||
}
|
||||
|
||||
function updateStats() {
|
||||
document.getElementById('pillFlagged').textContent = S.flaggedData.length;
|
||||
document.getElementById('pillScanned').textContent =
|
||||
parseInt(document.getElementById('progressStats').textContent.split('/')[1] || '0') || 0;
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.exportDB = exportDB;
|
||||
window.openImportDBModal = openImportDBModal;
|
||||
window.closeImportDBModal = closeImportDBModal;
|
||||
window.doImportDB = doImportDB;
|
||||
window.buildScanPayload = buildScanPayload;
|
||||
window.checkCheckpoint = checkCheckpoint;
|
||||
window.clearCheckpointAndScan = clearCheckpointAndScan;
|
||||
window.checkDeltaStatus = checkDeltaStatus;
|
||||
window.clearDeltaTokens = clearDeltaTokens;
|
||||
window.openSmtpModal = openSmtpModal;
|
||||
window.closeSmtpModal = closeSmtpModal;
|
||||
window.loadSmtpConfig = loadSmtpConfig;
|
||||
window.saveSmtpConfig = saveSmtpConfig;
|
||||
window.sendReport = sendReport;
|
||||
window._smtpFields = _smtpFields;
|
||||
window._attachScanListeners = _attachScanListeners;
|
||||
window._attachSchedulerListeners = _attachSchedulerListeners;
|
||||
window.startScan = startScan;
|
||||
window.stopScan = stopScan;
|
||||
window.drawSparkline = drawSparkline;
|
||||
window.loadTrend = loadTrend;
|
||||
window.updateStats = updateStats;
|
||||
439
static/js/scheduler.js
Normal file
439
static/js/scheduler.js
Normal file
@ -0,0 +1,439 @@
|
||||
// ── Scheduler — multi-job (#19) ─────────────────────────────────────────────
|
||||
|
||||
var _schedJobs = [];
|
||||
|
||||
function schedLoad() {
|
||||
fetch('/api/scheduler/jobs').then(function(r){ return r.json(); }).then(function(d) {
|
||||
_schedJobs = d.jobs || [];
|
||||
schedRenderJobs();
|
||||
schedLoadHistory();
|
||||
// Fetch status AFTER rendering so run buttons exist in the DOM
|
||||
return fetch('/api/scheduler/status').then(function(r){ return r.json(); });
|
||||
}).then(function(d) {
|
||||
if (!d) return;
|
||||
var noAps = document.getElementById('schedNoAps');
|
||||
if (noAps) noAps.style.display = d.available ? 'none' : 'block';
|
||||
schedUpdateSidebarIndicator(d);
|
||||
(d.jobs || []).forEach(function(js) {
|
||||
var descEl = document.getElementById('schedDesc_' + js.id);
|
||||
if (!descEl) return;
|
||||
var j2 = _schedJobs.find(function(x){ return x.id === js.id; });
|
||||
var freqLabel = !j2 ? '' : (j2.frequency === 'weekly' ? 'Weekly' : j2.frequency === 'monthly' ? 'Monthly' : 'Daily');
|
||||
var timeStr = !j2 ? '' : String(j2.hour||0).padStart(2,'0') + ':' + String(j2.minute||0).padStart(2,'0');
|
||||
var base = freqLabel + ' ' + timeStr;
|
||||
var runBtn = document.getElementById('schedRunBtn_' + js.id);
|
||||
if (js.is_running) {
|
||||
descEl.textContent = base + ' \u00b7 Running...';
|
||||
if (runBtn) { runBtn.style.borderColor='#22c55e'; runBtn.style.color='#22c55e'; }
|
||||
} else if (js.next_run) {
|
||||
var dt = new Date(js.next_run);
|
||||
descEl.textContent = base + ' \u00b7 Next: ' + dt.toLocaleString(undefined,{month:'short',day:'numeric',hour:'2-digit',minute:'2-digit'});
|
||||
if (runBtn) { runBtn.style.borderColor='var(--border)'; runBtn.style.color='var(--muted)'; }
|
||||
} else {
|
||||
descEl.textContent = base + (js.enabled ? '' : ' \u00b7 Disabled');
|
||||
if (runBtn) { runBtn.style.borderColor='var(--border)'; runBtn.style.color='var(--muted)'; }
|
||||
}
|
||||
});
|
||||
}).catch(function(e){ console.warn('schedLoad:', e); });
|
||||
}
|
||||
|
||||
function schedRenderJobs() {
|
||||
var list = document.getElementById('schedJobList');
|
||||
if (!list) return;
|
||||
if (!_schedJobs.length) {
|
||||
list.innerHTML = '<div style="font-size:11px;color:var(--muted);padding:4px 0">No scheduled scans yet.</div>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = _schedJobs.map(function(j) {
|
||||
var sid = _esc(j.id);
|
||||
var sname = _esc(j.name || 'Unnamed');
|
||||
var freqLabel = j.frequency === 'weekly' ? 'Weekly' : j.frequency === 'monthly' ? 'Monthly' : 'Daily';
|
||||
var timeStr = String(j.hour||0).padStart(2,'0') + ':' + String(j.minute||0).padStart(2,'0');
|
||||
var desc = freqLabel + ' ' + timeStr;
|
||||
var chk = j.enabled ? ' checked' : '';
|
||||
return '<div style="display:flex;align-items:center;gap:6px;padding:5px 6px;border:1px solid var(--border);border-radius:6px;background:var(--surface)">'
|
||||
+ '<label class="toggle" style="flex:unset;margin:0"><input type="checkbox"'+chk+' onchange="schedToggleEnabled(\''+sid+'\',this.checked)"><span class="toggle-slider"></span></label>'
|
||||
+ '<div style="flex:1;min-width:0">'
|
||||
+ '<div style="font-size:12px;font-weight:600;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">'+sname+'</div>'
|
||||
+ '<div id="schedDesc_'+sid+'" style="font-size:10px;color:var(--muted)">'+desc+'</div>'
|
||||
+ '</div>'
|
||||
+ '<button onclick="schedRunJob(\''+sid+'\')" id="schedRunBtn_'+sid+'" style="background:none;border:1px solid var(--border);color:var(--muted);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer" title="Run now">▶</button>'
|
||||
+ '<button onclick="schedEditJob(\''+sid+'\')" style="background:none;border:1px solid var(--border);color:var(--muted);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer" title="Edit">✎</button>'
|
||||
+ '<button onclick="schedDeleteJob(\''+sid+'\')" style="background:none;border:1px solid var(--danger);color:var(--danger);padding:2px 7px;border-radius:4px;font-size:10px;cursor:pointer" title="Delete">✕</button>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function schedToggleEnabled(id, enabled) {
|
||||
var j = _schedJobs.find(function(x){ return x.id === id; });
|
||||
if (!j) return;
|
||||
var updated = Object.assign({}, j, {enabled: enabled});
|
||||
fetch('/api/scheduler/jobs/save', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(updated)
|
||||
}).then(function(r){ return r.json(); }).then(function(d) {
|
||||
if (d.error) { alert('Error: ' + d.error); return; }
|
||||
j.enabled = enabled;
|
||||
schedLoad();
|
||||
}).catch(function(e){ alert('Error: ' + e); });
|
||||
}
|
||||
|
||||
function schedAddJob() {
|
||||
document.getElementById('schedEditId').value = '';
|
||||
document.getElementById('schedName').value = '';
|
||||
document.getElementById('schedEnabled').checked = true;
|
||||
document.getElementById('schedFrequency').value = 'daily';
|
||||
document.getElementById('schedDow').value = 'mon';
|
||||
document.getElementById('schedDom').value = 1;
|
||||
document.getElementById('schedHour').value = 2;
|
||||
document.getElementById('schedMinute').value = 0;
|
||||
document.getElementById('schedAutoEmail').checked = false;
|
||||
document.getElementById('schedAutoRetention').checked = false;
|
||||
var titleEl = document.getElementById('schedEditorTitle');
|
||||
if (titleEl) titleEl.textContent = t('m365_sched_editor_new', 'New scheduled scan');
|
||||
schedPopulateProfiles('');
|
||||
schedToggleFreqRows();
|
||||
document.getElementById('schedJobEditor').style.display = 'block';
|
||||
document.getElementById('schedSaveStatus').textContent = '';
|
||||
document.getElementById('schedName').focus();
|
||||
}
|
||||
|
||||
function schedEditJob(id) {
|
||||
var j = _schedJobs.find(function(x){ return x.id === id; });
|
||||
if (!j) return;
|
||||
document.getElementById('schedEditId').value = j.id;
|
||||
document.getElementById('schedName').value = j.name || '';
|
||||
document.getElementById('schedEnabled').checked = !!j.enabled;
|
||||
document.getElementById('schedFrequency').value = j.frequency || 'daily';
|
||||
document.getElementById('schedDow').value = j.day_of_week || 'mon';
|
||||
document.getElementById('schedDom').value = j.day_of_month || 1;
|
||||
document.getElementById('schedHour').value = j.hour != null ? j.hour : 2;
|
||||
document.getElementById('schedMinute').value = j.minute != null ? j.minute : 0;
|
||||
document.getElementById('schedAutoEmail').checked = !!j.auto_email;
|
||||
document.getElementById('schedAutoRetention').checked = !!j.auto_retention;
|
||||
var titleEl = document.getElementById('schedEditorTitle');
|
||||
if (titleEl) titleEl.textContent = t('m365_sched_editor_edit', 'Edit scheduled scan');
|
||||
schedPopulateProfiles(j.profile_id || '');
|
||||
schedToggleFreqRows();
|
||||
document.getElementById('schedJobEditor').style.display = 'block';
|
||||
document.getElementById('schedSaveStatus').textContent = '';
|
||||
}
|
||||
|
||||
function schedCancelEdit() {
|
||||
document.getElementById('schedJobEditor').style.display = 'none';
|
||||
}
|
||||
|
||||
function schedSaveJob() {
|
||||
var name = document.getElementById('schedName').value.trim();
|
||||
if (!name) {
|
||||
var st = document.getElementById('schedSaveStatus');
|
||||
st.textContent = t('m365_sched_name_required', 'Name is required');
|
||||
st.style.color = 'var(--danger)';
|
||||
document.getElementById('schedName').focus();
|
||||
return;
|
||||
}
|
||||
var job = {
|
||||
id: document.getElementById('schedEditId').value || '',
|
||||
name: name,
|
||||
enabled: document.getElementById('schedEnabled').checked,
|
||||
frequency: document.getElementById('schedFrequency').value,
|
||||
day_of_week: document.getElementById('schedDow').value,
|
||||
day_of_month: parseInt(document.getElementById('schedDom').value) || 1,
|
||||
hour: parseInt(document.getElementById('schedHour').value) || 0,
|
||||
minute: parseInt(document.getElementById('schedMinute').value) || 0,
|
||||
profile_id: document.getElementById('schedProfile').value,
|
||||
auto_email: document.getElementById('schedAutoEmail').checked,
|
||||
auto_retention: document.getElementById('schedAutoRetention').checked,
|
||||
};
|
||||
var st = document.getElementById('schedSaveStatus');
|
||||
st.style.color = 'var(--muted)'; st.textContent = 'Saving...';
|
||||
fetch('/api/scheduler/jobs/save', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(job)
|
||||
}).then(function(r){ return r.json(); }).then(function(d) {
|
||||
if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
|
||||
st.style.color = 'var(--accent)'; st.textContent = '\u2713 Saved';
|
||||
setTimeout(function(){ st.textContent=''; }, 1500);
|
||||
document.getElementById('schedJobEditor').style.display = 'none';
|
||||
schedLoad();
|
||||
}).catch(function(e){ st.style.color='var(--danger)'; st.textContent=e.message; });
|
||||
}
|
||||
|
||||
function schedDeleteJob(id) {
|
||||
var j = _schedJobs.find(function(x){ return x.id === id; });
|
||||
var name = j ? j.name : id;
|
||||
if (!confirm('Delete "' + name + '"?')) return;
|
||||
fetch('/api/scheduler/jobs/delete', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({id: id})
|
||||
}).then(function(r){ return r.json(); }).then(function(d) {
|
||||
if (d.error) { alert('Delete failed: ' + d.error); return; }
|
||||
schedLoad();
|
||||
}).catch(function(e){ alert('Delete error: ' + e); });
|
||||
}
|
||||
|
||||
function schedRunJob(id) {
|
||||
var j = _schedJobs.find(function(x){ return x.id === id; });
|
||||
var name = j ? j.name : 'this scan';
|
||||
if (!confirm('Run "' + name + '" now?')) return;
|
||||
fetch('/api/scheduler/jobs/run_now', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({id: id})
|
||||
}).then(function(r){ return r.json(); }).then(function(d) {
|
||||
if (d.error) alert(d.error);
|
||||
else schedLoad();
|
||||
});
|
||||
}
|
||||
|
||||
function schedToggleFreqRows() {
|
||||
var freq = document.getElementById('schedFrequency');
|
||||
if (!freq) return;
|
||||
var val = freq.value;
|
||||
var dowRow = document.getElementById('schedDowRow');
|
||||
var domRow = document.getElementById('schedDomRow');
|
||||
if (dowRow) dowRow.style.display = val === 'weekly' ? 'flex' : 'none';
|
||||
if (domRow) domRow.style.display = val === 'monthly' ? 'flex' : 'none';
|
||||
}
|
||||
|
||||
function schedPopulateProfiles(selectedId) {
|
||||
fetch('/api/profiles').then(function(r){ return r.json(); }).then(function(d) {
|
||||
var sel = document.getElementById('schedProfile');
|
||||
if (!sel) return;
|
||||
var firstOpt = sel.options[0];
|
||||
sel.innerHTML = '';
|
||||
sel.appendChild(firstOpt);
|
||||
(d.profiles || []).forEach(function(p) {
|
||||
var o = document.createElement('option');
|
||||
o.value = p.id || p.name;
|
||||
o.textContent = p.name;
|
||||
if ((p.id || p.name) === selectedId) o.selected = true;
|
||||
sel.appendChild(o);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function schedLoadHistory() {
|
||||
var el = document.getElementById('schedHistory');
|
||||
if (!el) return;
|
||||
fetch('/api/scheduler/history?limit=10').then(function(r){ return r.json(); }).then(function(d) {
|
||||
var runs = d.runs || [];
|
||||
if (!runs.length) { el.innerHTML = '<em>No scheduled runs yet</em>'; return; }
|
||||
var html = '';
|
||||
runs.forEach(function(r) {
|
||||
var ts = r.started_at ? new Date(r.started_at * 1000).toLocaleString() : '-';
|
||||
var icon = r.status === 'completed' ? '\u2713' : r.status === 'failed' ? '\u2716' : '\u23f3';
|
||||
var jname = r.job_name ? '<strong>' + _esc(r.job_name) + '</strong> - ' : '';
|
||||
html += icon + ' ' + jname + ts + ' - ' + (r.flagged||0) + ' flagged';
|
||||
if (r.emailed) html += ' \u2709';
|
||||
if (r.error) html += ' <span style="color:var(--danger)">' + _esc(r.error.substring(0,60)) + '</span>';
|
||||
html += '<br>';
|
||||
});
|
||||
el.innerHTML = html;
|
||||
});
|
||||
}
|
||||
|
||||
function schedUpdateSidebarIndicator(d) {
|
||||
var wrap = document.getElementById('schedNextIndicator');
|
||||
var txt = document.getElementById('schedNextText');
|
||||
if (!wrap || !txt) return;
|
||||
if (d && d.enabled && d.next_run) {
|
||||
try {
|
||||
var dt = new Date(d.next_run);
|
||||
txt.textContent = t('m365_sched_next', 'Next') + ': ' + dt.toLocaleString(undefined, {month:'short',day:'numeric',hour:'2-digit',minute:'2-digit'});
|
||||
wrap.style.display = 'inline-flex';
|
||||
} catch(e) { wrap.style.display = 'none'; }
|
||||
} else {
|
||||
wrap.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Poll scheduler status every 60s
|
||||
setInterval(function() {
|
||||
fetch('/api/scheduler/status').then(function(r){ return r.json(); }).then(function(d) {
|
||||
schedUpdateSidebarIndicator(d);
|
||||
}).catch(function(){});
|
||||
}, 60000);
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
fetch('/api/scheduler/status').then(function(r){ return r.json(); }).then(function(d) {
|
||||
schedUpdateSidebarIndicator(d);
|
||||
}).catch(function(){});
|
||||
});
|
||||
|
||||
// ── General tab ───────────────────────────────────────────────────────────────
|
||||
|
||||
function stPopulateGeneral() {
|
||||
stLoadPinStatus();
|
||||
// Populate language selector (mirrors the hidden langSelect)
|
||||
const src = document.getElementById('langSelect');
|
||||
const dst = document.getElementById('langSelectSettings');
|
||||
if (src && dst && dst.options.length === 0) {
|
||||
Array.from(src.options).forEach(function(opt) {
|
||||
const o = document.createElement('option');
|
||||
o.value = opt.value; o.textContent = opt.textContent;
|
||||
if (opt.selected) o.selected = true;
|
||||
dst.appendChild(o);
|
||||
});
|
||||
} else if (src && dst) {
|
||||
dst.value = src.value;
|
||||
}
|
||||
// Populate About rows
|
||||
fetch('/api/about').then(function(r){ return r.json(); }).then(function(d) {
|
||||
const set = function(id, val) { const el=document.getElementById(id); if(el) el.textContent=val||'\u2014'; };
|
||||
set('st-about-python', d.python);
|
||||
set('st-about-msal', d.msal);
|
||||
set('st-about-requests',d.requests);
|
||||
set('st-about-openpyxl',d.openpyxl);
|
||||
}).catch(function(){});
|
||||
}
|
||||
|
||||
// ── Email tab ─────────────────────────────────────────────────────────────────
|
||||
|
||||
function stLoadSmtp() {
|
||||
fetch('/api/smtp/config').then(function(r){ return r.json(); }).then(function(d) {
|
||||
const set = function(id, val) { const el=document.getElementById(id); if(el) el.value=val||''; };
|
||||
set('st-smtpHost', d.host);
|
||||
set('st-smtpPort', d.port || 587);
|
||||
set('st-smtpUser', d.user);
|
||||
set('st-smtpFrom', d.from_addr);
|
||||
set('st-smtpTo', Array.isArray(d.recipients) ? d.recipients.join(', ') : (d.recipients||''));
|
||||
const tls = document.getElementById('st-smtpTls');
|
||||
if (tls) tls.checked = d.starttls !== false;
|
||||
const pw = document.getElementById('st-smtpPw');
|
||||
if (pw) pw.value = d.has_password ? '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' : '';
|
||||
}).catch(function(){});
|
||||
}
|
||||
|
||||
async function stSmtpSave() {
|
||||
const st = document.getElementById('st-smtpStatus');
|
||||
const rawPw = document.getElementById('st-smtpPw').value;
|
||||
const pw = rawPw === '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' ? null : rawPw;
|
||||
const body = {
|
||||
host: document.getElementById('st-smtpHost').value.trim(),
|
||||
port: parseInt(document.getElementById('st-smtpPort').value) || 587,
|
||||
user: document.getElementById('st-smtpUser').value.trim(),
|
||||
from_addr: document.getElementById('st-smtpFrom').value.trim(),
|
||||
recipients: document.getElementById('st-smtpTo').value.split(/[,;]/).map(function(s){return s.trim();}).filter(Boolean),
|
||||
starttls: document.getElementById('st-smtpTls').checked,
|
||||
};
|
||||
if (pw !== null) body.password = pw;
|
||||
st.style.color = 'var(--muted)'; st.textContent = t('m365_smtp_saving','Saving...');
|
||||
try {
|
||||
const r = await fetch('/api/smtp/config', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(body)});
|
||||
const d = await r.json();
|
||||
if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
|
||||
st.style.color='var(--accent)'; st.textContent='\u2714 '+t('m365_smtp_saved','Saved');
|
||||
} catch(e){ st.style.color='var(--danger)'; st.textContent=e.message; }
|
||||
}
|
||||
|
||||
async function stSmtpTest() {
|
||||
const st = document.getElementById('st-smtpStatus');
|
||||
await stSmtpSave();
|
||||
if (st) { st.style.color='var(--muted)'; st.textContent=t('m365_smtp_testing','Testing connection\u2026'); }
|
||||
try {
|
||||
const r = await fetch('/api/smtp/test', {method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body:JSON.stringify({})});
|
||||
const d = await r.json();
|
||||
if (d.ok) {
|
||||
if (st) { st.style.color='var(--accent)'; st.textContent='\u2714 ' + (d.message || t('m365_smtp_test_ok','Connection successful')); }
|
||||
} else {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent='\u2717 ' + (d.error || t('m365_smtp_test_fail','Connection failed')); }
|
||||
}
|
||||
} catch(e) {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent='\u2717 ' + e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
async function stSmtpSend() {
|
||||
const st = document.getElementById('st-smtpStatus');
|
||||
// First save current field values
|
||||
await stSmtpSave();
|
||||
// Check we have recipients
|
||||
const recipStr = document.getElementById('st-smtpTo').value.trim();
|
||||
if (!recipStr) {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent=t('m365_smtp_no_recipients','Enter at least one recipient.'); }
|
||||
return;
|
||||
}
|
||||
const recipients = recipStr.split(/[,;]/).map(function(s){return s.trim();}).filter(Boolean);
|
||||
const rawPw = document.getElementById('st-smtpPw').value;
|
||||
const cfg = {
|
||||
host: document.getElementById('st-smtpHost').value.trim(),
|
||||
port: parseInt(document.getElementById('st-smtpPort').value) || 587,
|
||||
username: document.getElementById('st-smtpUser').value.trim(),
|
||||
password: rawPw === '\u2022\u2022\u2022\u2022\u2022\u2022\u2022\u2022' ? null : rawPw,
|
||||
from_addr: document.getElementById('st-smtpFrom').value.trim(),
|
||||
use_tls: document.getElementById('st-smtpTls').checked,
|
||||
use_ssl: false,
|
||||
};
|
||||
if (st) { st.style.color='var(--muted)'; st.textContent=t('m365_smtp_sending','Sending\u2026'); }
|
||||
try {
|
||||
const r = await fetch('/api/send_report', {method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body:JSON.stringify({recipients, smtp:cfg})});
|
||||
const d = await r.json();
|
||||
if (d.status === 'sent') {
|
||||
if (st) { st.style.color='var(--accent)'; st.textContent=t('m365_smtp_sent','\u2714 Sent'); }
|
||||
log(t('m365_smtp_sent','Report sent to') + ' ' + recipients.join(', '), 'ok');
|
||||
} else {
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent=d.error||'Send failed'; }
|
||||
log('Email send failed: '+(d.error||''),'err');
|
||||
}
|
||||
} catch(e){
|
||||
if (st) { st.style.color='var(--danger)'; st.textContent=e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
// ── Database tab ──────────────────────────────────────────────────────────────
|
||||
|
||||
function stLoadDbStats() {
|
||||
fetch('/api/db/stats').then(function(r){ return r.json(); }).then(function(d) {
|
||||
const el = document.getElementById('st-dbStats');
|
||||
if (!el) return;
|
||||
if (d.error) { el.textContent = d.error; return; }
|
||||
el.innerHTML =
|
||||
'<span>' + t('m365_stat_scanned','Scanned items') + '</span>: <strong>' + (d.total_items||0) + '</strong><br>' +
|
||||
'<span>' + t('m365_stat_flagged','Flagged items') + '</span>: <strong>' + (d.flagged_items||0) + '</strong><br>' +
|
||||
'<span>' + t('m365_db_scans','Scans') + '</span>: <strong>' + (d.total_scans||0) + '</strong>';
|
||||
}).catch(function(){ });
|
||||
}
|
||||
|
||||
function stResetDB() {
|
||||
if (!confirm(t('m365_db_reset_confirm','Reset database? All scan results will be deleted.'))) return;
|
||||
requirePin(t('m365_settings_enter_pin_reset','Enter admin PIN to reset the database.'), function(pin) {
|
||||
fetch('/api/db/reset', {method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body:JSON.stringify({confirm:'yes', pin:pin})
|
||||
}).then(function(r){ return r.json(); }).then(function(d) {
|
||||
if (d.error === 'incorrect_pin') { log(t('m365_settings_pin_wrong','Incorrect PIN \u2014 reset cancelled.'), 'err'); return; }
|
||||
if (d.error) { log('Reset failed: '+d.error, 'err'); return; }
|
||||
stLoadDbStats();
|
||||
log(t('m365_db_reset_done','Database reset'));
|
||||
}).catch(function(e){ log('Reset failed: '+e,'err'); });
|
||||
});
|
||||
}
|
||||
|
||||
// Redirect old openSmtpModal to Settings email tab
|
||||
function openSmtpModal(send) {
|
||||
openSettings('email');
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.schedLoad = schedLoad;
|
||||
window.schedRenderJobs = schedRenderJobs;
|
||||
window.schedToggleEnabled = schedToggleEnabled;
|
||||
window.schedAddJob = schedAddJob;
|
||||
window.schedEditJob = schedEditJob;
|
||||
window.schedCancelEdit = schedCancelEdit;
|
||||
window.schedSaveJob = schedSaveJob;
|
||||
window.schedDeleteJob = schedDeleteJob;
|
||||
window.schedRunJob = schedRunJob;
|
||||
window.schedToggleFreqRows = schedToggleFreqRows;
|
||||
window.schedPopulateProfiles = schedPopulateProfiles;
|
||||
window.schedLoadHistory = schedLoadHistory;
|
||||
window.schedUpdateSidebarIndicator = schedUpdateSidebarIndicator;
|
||||
window.stPopulateGeneral = stPopulateGeneral;
|
||||
window.stLoadSmtp = stLoadSmtp;
|
||||
window.stSmtpSave = stSmtpSave;
|
||||
window.stSmtpTest = stSmtpTest;
|
||||
window.stSmtpSend = stSmtpSend;
|
||||
window.stLoadDbStats = stLoadDbStats;
|
||||
window.stResetDB = stResetDB;
|
||||
window.openSmtpModal = openSmtpModal;
|
||||
window._schedJobs = _schedJobs;
|
||||
269
static/js/sources.js
Normal file
269
static/js/sources.js
Normal file
@ -0,0 +1,269 @@
|
||||
import { S } from './state.js';
|
||||
// ── Dynamic sources panel ─────────────────────────────────────────────────────
|
||||
|
||||
// Fixed M365 sources — always present when authenticated
|
||||
const _M365_SOURCES = [
|
||||
{ id: 'email', icon: '\uD83D\uDCE7', labelKey: 'm365_src_email', labelDefault: 'Exchange / Outlook', toggleId: 'smSrcEmail' },
|
||||
{ id: 'onedrive', icon: '\uD83D\uDCBE', labelKey: 'm365_src_onedrive', labelDefault: 'OneDrive', toggleId: 'smSrcOneDrive' },
|
||||
{ id: 'sharepoint', icon: '\uD83C\uDF10', labelKey: 'm365_src_sharepoint', labelDefault: 'SharePoint', toggleId: 'smSrcSharePoint' },
|
||||
{ id: 'teams', icon: '\uD83D\uDCAC', labelKey: 'm365_src_teams', labelDefault: 'Teams', toggleId: 'smSrcTeams' },
|
||||
];
|
||||
|
||||
// Future connector stubs — uncomment when implemented
|
||||
// const _GMAIL_SOURCE = { id: 'gmail', icon: '\uD83D\uDCE7', labelKey: 'm365_src_gmail', labelDefault: 'Gmail', type: 'm365' };
|
||||
// const _GDRIVE_SOURCE = { id: 'googledrive', icon: '\uD83D\uDCC1', labelKey: 'm365_src_googledrive', labelDefault: 'Google Drive', type: 'm365' };
|
||||
|
||||
function renderSourcesPanel() {
|
||||
const panel = document.getElementById('sourcesPanel');
|
||||
if (!panel) return;
|
||||
|
||||
// Remember currently checked state before re-render
|
||||
const checked = {};
|
||||
panel.querySelectorAll('input[data-source-id]').forEach(function(cb) {
|
||||
checked[cb.dataset.sourceId] = cb.checked;
|
||||
});
|
||||
|
||||
let html = '';
|
||||
|
||||
// M365 fixed sources — only show if their toggle in Source Management is on
|
||||
_M365_SOURCES.forEach(function(s) {
|
||||
const toggle = s.toggleId ? document.getElementById(s.toggleId) : null;
|
||||
if (toggle && !toggle.checked) return; // hidden by user in Source Management
|
||||
const isChecked = (s.id in checked) ? checked[s.id] : true;
|
||||
html += '<label class="source-check">'
|
||||
+ '<input type="checkbox" data-source-id="' + s.id + '" data-source-type="m365"' + (isChecked ? ' checked' : '') + ' onchange="_onSourceChange()">'
|
||||
+ '<span class="source-icon">' + s.icon + '</span>'
|
||||
+ '<span class="source-label" data-i18n="' + s.labelKey + '">' + t(s.labelKey, s.labelDefault) + '</span>'
|
||||
+ '</label>';
|
||||
});
|
||||
|
||||
// Google Workspace sources — only show if connected
|
||||
if (window._googleConnected) {
|
||||
var gmailToggle = document.getElementById('smGoogleSrcGmail');
|
||||
var driveToggle = document.getElementById('smGoogleSrcDrive');
|
||||
var showGmail = !gmailToggle || gmailToggle.checked;
|
||||
var showDrive = !driveToggle || driveToggle.checked;
|
||||
if (showGmail || showDrive) {
|
||||
html += '<div style="margin:6px 0 2px"><hr style="border:none;border-top:1px solid var(--border);margin:1px 0 2px"></div>';
|
||||
}
|
||||
if (showGmail) {
|
||||
var isCheckedG = ('gmail' in checked) ? checked['gmail']
|
||||
: S._pendingGoogleSources !== null ? S._pendingGoogleSources.includes('gmail')
|
||||
: true;
|
||||
html += '<label class="source-check"><input type="checkbox" data-source-id="gmail" data-source-type="google"' + (isCheckedG ? ' checked' : '') + ' onchange="_onSourceChange()"><span class="source-icon">📧</span><span class="source-label">Gmail</span></label>';
|
||||
}
|
||||
if (showDrive) {
|
||||
var isCheckedD = ('gdrive' in checked) ? checked['gdrive']
|
||||
: S._pendingGoogleSources !== null ? S._pendingGoogleSources.includes('gdrive')
|
||||
: true;
|
||||
html += '<label class="source-check"><input type="checkbox" data-source-id="gdrive" data-source-type="google"' + (isCheckedD ? ' checked' : '') + ' onchange="_onSourceChange()"><span class="source-icon">📁</span><span class="source-label">Google Drive</span></label>';
|
||||
}
|
||||
// Pending has been applied — clear it
|
||||
S._pendingGoogleSources = null;
|
||||
}
|
||||
|
||||
// File sources (local / SMB) — one entry per saved source
|
||||
if (S._fileSources.length > 0) {
|
||||
html += '<div style="margin:6px 0 2px;font-size:10px;color:var(--muted);text-transform:uppercase;letter-spacing:.04em">'
|
||||
+ '<hr style="border:none;border-top:1px solid var(--border);margin:1px 0 2px">';
|
||||
S._fileSources.forEach(function(s) {
|
||||
const isSmb = s.path && (s.path.startsWith('//') || s.path.startsWith('\\\\'));
|
||||
const icon = isSmb ? '\uD83C\uDF10' : '\uD83D\uDCC1';
|
||||
const label = s.label || s.path || s.id;
|
||||
const isChecked = (s.id in checked) ? checked[s.id] : true;
|
||||
html += '<label class="source-check">'
|
||||
+ '<input type="checkbox" data-source-id="' + _esc(s.id) + '" data-source-type="file"' + (isChecked ? ' checked' : '') + '>'
|
||||
+ '<span class="source-icon">' + icon + '</span>'
|
||||
+ '<span class="source-label" title="' + _esc(s.path || '') + '">' + _esc(label) + '</span>'
|
||||
+ '</label>';
|
||||
});
|
||||
}
|
||||
|
||||
panel.innerHTML = html;
|
||||
|
||||
// Resize panel to fit all rendered sources (respects user's saved smaller preference)
|
||||
if (typeof _fitSourcesPanel === 'function') _fitSourcesPanel();
|
||||
|
||||
// Grey out the accounts section when no M365 sources are selected
|
||||
_updateAccountsVisibility();
|
||||
}
|
||||
|
||||
function _onSourceChange() {
|
||||
_updateAccountsVisibility();
|
||||
renderAccountList();
|
||||
}
|
||||
|
||||
function _onGoogleSourceToggle() {
|
||||
// Re-render sources panel (hides/shows Gmail+Drive checkboxes in KILDER)
|
||||
renderSourcesPanel();
|
||||
// Re-render accounts — 'both' users show as M365-only when Google sources disabled
|
||||
renderAccountList();
|
||||
// Persist toggle state
|
||||
var gm = document.getElementById('smGoogleSrcGmail');
|
||||
var gd = document.getElementById('smGoogleSrcDrive');
|
||||
fetch('/api/src_toggles', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({
|
||||
src_gmail: gm ? gm.checked : true,
|
||||
src_drive: gd ? gd.checked : true
|
||||
})
|
||||
}).catch(function(){});
|
||||
}
|
||||
function _saveM365SourceToggles() {
|
||||
var state = {};
|
||||
_M365_SOURCES.forEach(function(s) {
|
||||
var el = s.toggleId ? document.getElementById(s.toggleId) : null;
|
||||
if (el) state['src_toggle_' + s.id] = el.checked;
|
||||
});
|
||||
fetch('/api/src_toggles', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(state)
|
||||
}).catch(function(){});
|
||||
}
|
||||
|
||||
function _restoreM365SourceToggles(settings) {
|
||||
_M365_SOURCES.forEach(function(s) {
|
||||
var el = s.toggleId ? document.getElementById(s.toggleId) : null;
|
||||
var key = 'src_toggle_' + s.id;
|
||||
if (el && settings[key] !== undefined) el.checked = !!settings[key];
|
||||
});
|
||||
renderSourcesPanel();
|
||||
}
|
||||
|
||||
function _googleSourcesEnabled() {
|
||||
return !!(document.getElementById('smGoogleSrcGmail') && document.getElementById('smGoogleSrcGmail').checked)
|
||||
|| !!(document.getElementById('smGoogleSrcDrive') && document.getElementById('smGoogleSrcDrive').checked);
|
||||
}
|
||||
|
||||
|
||||
function _updateAccountsVisibility() {
|
||||
const panel = document.getElementById('sourcesPanel');
|
||||
const anyActive = panel
|
||||
? Array.from(panel.querySelectorAll('input[data-source-type]')).some(cb => cb.checked)
|
||||
: false;
|
||||
const sec = document.getElementById('accountsSection');
|
||||
if (!sec) return;
|
||||
sec.style.opacity = anyActive ? '1' : '0.35';
|
||||
sec.style.pointerEvents = anyActive ? '' : 'none';
|
||||
sec.title = anyActive ? '' : t('m365_accounts_disabled_tip', 'Select a source to enable account selection');
|
||||
}
|
||||
|
||||
// ── Admin PIN ─────────────────────────────────────────────────────────────────
|
||||
|
||||
let _pinCallback = null;
|
||||
|
||||
async function stLoadPinStatus() {
|
||||
const r = await fetch('/api/admin/pin');
|
||||
const d = await r.json();
|
||||
const statusEl = document.getElementById('stPinStatus');
|
||||
const currentRow = document.getElementById('stCurrentPinRow');
|
||||
if (d.pin_set) {
|
||||
if (statusEl) statusEl.textContent = '\u2714 ' + t('m365_settings_pin_set', 'Admin PIN is set');
|
||||
if (currentRow) currentRow.style.display = '';
|
||||
} else {
|
||||
if (statusEl) statusEl.textContent = t('m365_settings_pin_not_set', 'No PIN set \u2014 Reset DB is unprotected');
|
||||
if (currentRow) currentRow.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
async function stSavePin() {
|
||||
const newPin = document.getElementById('stNewPin').value;
|
||||
const confirmPin = document.getElementById('stConfirmPin').value;
|
||||
const currentPin = document.getElementById('stCurrentPin')?.value || '';
|
||||
const st = document.getElementById('stPinSaveStatus');
|
||||
if (!newPin) { st.style.color='var(--danger)'; st.textContent=t('m365_settings_pin_required','New PIN is required.'); return; }
|
||||
if (newPin !== confirmPin) { st.style.color='var(--danger)'; st.textContent=t('m365_settings_pin_mismatch','PINs do not match.'); return; }
|
||||
st.style.color='var(--muted)'; st.textContent=t('m365_fsrc_saving','Saving...');
|
||||
try {
|
||||
const r = await fetch('/api/admin/pin', {method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body: JSON.stringify({current_pin: currentPin, new_pin: newPin})});
|
||||
const d = await r.json();
|
||||
if (d.error === 'incorrect_pin') { st.style.color='var(--danger)'; st.textContent=t('m365_settings_pin_wrong','Current PIN is incorrect.'); return; }
|
||||
if (d.error) { st.style.color='var(--danger)'; st.textContent=d.error; return; }
|
||||
st.style.color='var(--accent)'; st.textContent='\u2714 '+t('m365_settings_pin_saved','PIN saved');
|
||||
['stNewPin','stConfirmPin','stCurrentPin'].forEach(function(id){const el=document.getElementById(id);if(el)el.value='';});
|
||||
stLoadPinStatus();
|
||||
} catch(e){ st.style.color='var(--danger)'; st.textContent=e.message; }
|
||||
}
|
||||
|
||||
// PIN prompt — used for destructive actions
|
||||
function requirePin(message, callback) {
|
||||
fetch('/api/admin/pin').then(function(r){return r.json();}).then(function(d) {
|
||||
if (!d.pin_set) {
|
||||
// No PIN set — proceed directly
|
||||
callback('');
|
||||
return;
|
||||
}
|
||||
_pinCallback = callback;
|
||||
const msg = document.getElementById('pinPromptMsg');
|
||||
const inp = document.getElementById('pinPromptInput');
|
||||
const err = document.getElementById('pinPromptError');
|
||||
if (msg) msg.textContent = message || t('m365_settings_enter_pin','Enter admin PIN to continue.');
|
||||
if (inp) inp.value = '';
|
||||
if (err) err.textContent = '';
|
||||
document.getElementById('pinPromptBackdrop').classList.add('open');
|
||||
setTimeout(function(){ if(inp) inp.focus(); }, 100);
|
||||
});
|
||||
}
|
||||
|
||||
function closePinPrompt() {
|
||||
document.getElementById('pinPromptBackdrop').classList.remove('open');
|
||||
_pinCallback = null;
|
||||
}
|
||||
|
||||
function confirmPinPrompt() {
|
||||
const pin = document.getElementById('pinPromptInput').value;
|
||||
const err = document.getElementById('pinPromptError');
|
||||
if (!pin) { if(err) err.textContent = t('m365_settings_pin_required','PIN is required.'); return; }
|
||||
const cb = _pinCallback; // save before closePinPrompt nulls it
|
||||
closePinPrompt();
|
||||
if (cb) cb(pin);
|
||||
}
|
||||
|
||||
// ── Settings modal ────────────────────────────────────────────────────────────
|
||||
|
||||
function openSettings(tab) {
|
||||
document.getElementById('settingsBackdrop').classList.add('open');
|
||||
switchSettingsTab(tab || 'general');
|
||||
stPopulateGeneral();
|
||||
if (tab === 'email') stLoadSmtp();
|
||||
if (tab === 'database') stLoadDbStats();
|
||||
if (tab === 'scheduler') schedLoad();
|
||||
}
|
||||
|
||||
function closeSettings() {
|
||||
document.getElementById('settingsBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
function switchSettingsTab(tab) {
|
||||
['general','security','scheduler','email','database'].forEach(function(t) {
|
||||
var cap = t.charAt(0).toUpperCase() + t.slice(1);
|
||||
var pane = document.getElementById('stPane' + cap);
|
||||
var btn = document.getElementById('stTab' + cap);
|
||||
if (pane) pane.classList.toggle('active', t === tab);
|
||||
if (btn) btn.classList.toggle('active', t === tab);
|
||||
});
|
||||
if (tab === 'security') { stLoadPinStatus(); if (typeof stLoadViewerPinStatus === 'function') stLoadViewerPinStatus(); }
|
||||
if (tab === 'email') stLoadSmtp();
|
||||
if (tab === 'database') stLoadDbStats();
|
||||
if (tab === 'scheduler') schedLoad();
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.renderSourcesPanel = renderSourcesPanel;
|
||||
window._onSourceChange = _onSourceChange;
|
||||
window._onGoogleSourceToggle = _onGoogleSourceToggle;
|
||||
window._saveM365SourceToggles = _saveM365SourceToggles;
|
||||
window._restoreM365SourceToggles = _restoreM365SourceToggles;
|
||||
window._googleSourcesEnabled = _googleSourcesEnabled;
|
||||
window._updateAccountsVisibility = _updateAccountsVisibility;
|
||||
window.stLoadPinStatus = stLoadPinStatus;
|
||||
window.stSavePin = stSavePin;
|
||||
window.requirePin = requirePin;
|
||||
window.closePinPrompt = closePinPrompt;
|
||||
window.confirmPinPrompt = confirmPinPrompt;
|
||||
window.openSettings = openSettings;
|
||||
window.closeSettings = closeSettings;
|
||||
window.switchSettingsTab = switchSettingsTab;
|
||||
window._M365_SOURCES = _M365_SOURCES;
|
||||
window._pinCallback = _pinCallback;
|
||||
31
static/js/state.js
Normal file
31
static/js/state.js
Normal file
@ -0,0 +1,31 @@
|
||||
// state.js — shared mutable state for GDPRScanner
|
||||
// Imported by every module that needs cross-module state.
|
||||
// Use S.varName everywhere instead of bare varName.
|
||||
|
||||
export const S = {
|
||||
// Scan results
|
||||
flaggedData: [],
|
||||
filteredData: [],
|
||||
totalCPR: 0,
|
||||
isListView: false,
|
||||
// SSE connection
|
||||
es: null,
|
||||
_userStartedScan: false,
|
||||
// Scan running flags + progress
|
||||
_m365ScanRunning: false,
|
||||
_googleScanRunning: false,
|
||||
_fileScanRunning: false,
|
||||
_srcPct: { m365: 0, google: 0, file: 0 },
|
||||
_progressCurrentUser: '',
|
||||
// Users
|
||||
_allUsers: [],
|
||||
// Auth
|
||||
_currentAppMode: null,
|
||||
// Profiles
|
||||
_profiles: [],
|
||||
_activeProfileId: null,
|
||||
_pendingProfileSources: [],
|
||||
_pendingGoogleSources: null,
|
||||
// Sources
|
||||
_fileSources: [],
|
||||
};
|
||||
120
static/js/ui.js
Normal file
120
static/js/ui.js
Normal file
@ -0,0 +1,120 @@
|
||||
import { S } from './state.js';
|
||||
// Global error trap — logs JS errors to console without blocking the page
|
||||
window.onerror = function(msg, src, line, col, err) {
|
||||
console.error('JS Error [' + (src||'').split('/').pop() + ':' + line + '] ' + msg, err);
|
||||
return false;
|
||||
};
|
||||
window.addEventListener('unhandledrejection', function(e) {
|
||||
console.error('Unhandled promise rejection:', e.reason);
|
||||
});
|
||||
|
||||
// ── Theme ────────────────────────────────────────────────────────────────────
|
||||
function openAbout() {
|
||||
document.getElementById('aboutBackdrop').classList.add('open');
|
||||
fetch('/api/about').then(r => r.json()).then(d => {
|
||||
document.getElementById('about-python').textContent = d.python || '—';
|
||||
document.getElementById('about-msal').textContent = d.msal || '—';
|
||||
document.getElementById('about-requests').textContent = d.requests || '—';
|
||||
document.getElementById('about-openpyxl').textContent = d.openpyxl || '—';
|
||||
}).catch(() => {});
|
||||
}
|
||||
function closeAbout() {
|
||||
document.getElementById('aboutBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
// ── Mode info modal ───────────────────────────────────────────────────────────
|
||||
function openModeInfo() {
|
||||
const isApp = S._currentAppMode === true;
|
||||
const title = document.getElementById('modeInfoTitle');
|
||||
const sub = document.getElementById('modeInfoSubtitle');
|
||||
const rows = document.getElementById('modeInfoRows');
|
||||
|
||||
if (isApp) {
|
||||
title.textContent = t('m365_mode_app', '🔑 App mode — org-wide');
|
||||
sub.textContent = t('m365_auth_mode_app_short', 'Application permissions · client credentials');
|
||||
rows.innerHTML = `
|
||||
<div class="about-row"><span>${t('m365_info_permissions','Permissions')}</span><span>Application</span></div>
|
||||
<div class="about-row"><span>${t('m365_info_signin','Sign-in required')}</span><span>${t('m365_info_no','No')}</span></div>
|
||||
<div class="about-row"><span>${t('m365_info_scope','Scope')}</span><span>${t('m365_info_scope_org','All users in tenant')}</span></div>
|
||||
<div class="about-row"><span>${t('m365_info_consent','Admin consent')}</span><span>${t('m365_info_required','Required')}</span></div>
|
||||
<div style="margin-top:12px;font-size:11px;color:var(--muted);line-height:1.6">
|
||||
${t('m365_info_app_desc','The app authenticates with a Client Secret and accesses all users\' data directly via Microsoft Graph — no interactive sign-in needed. Ideal for automated or scheduled scans.')}
|
||||
</div>`;
|
||||
} else {
|
||||
title.textContent = t('m365_mode_delegated', '👤 Delegated');
|
||||
sub.textContent = t('m365_auth_mode_delegated_short', 'Delegated permissions · device code flow');
|
||||
rows.innerHTML = `
|
||||
<div class="about-row"><span>${t('m365_info_permissions','Permissions')}</span><span>Delegated</span></div>
|
||||
<div class="about-row"><span>${t('m365_info_signin','Sign-in required')}</span><span>${t('m365_info_yes','Yes')}</span></div>
|
||||
<div class="about-row"><span>${t('m365_info_scope','Scope')}</span><span>${t('m365_info_scope_user','Signed-in user only')}</span></div>
|
||||
<div class="about-row"><span>${t('m365_info_admin','Global Admin')}</span><span>${t('m365_info_expands_scope','Expands scope to all users')}</span></div>
|
||||
<div style="margin-top:12px;font-size:11px;color:var(--muted);line-height:1.6">
|
||||
${t('m365_info_delegated_desc','The app acts on behalf of the signed-in user via the device code flow. By default only that user\'s data is accessible. A Global Admin can grant broader consent to scan all users.')}
|
||||
</div>`;
|
||||
}
|
||||
document.getElementById('modeInfoBackdrop').classList.add('open');
|
||||
}
|
||||
function closeModeInfo() {
|
||||
document.getElementById('modeInfoBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
function toggleTheme() {
|
||||
const t = document.body.dataset.theme === 'dark' ? 'light' : 'dark';
|
||||
document.body.dataset.theme = t;
|
||||
document.getElementById('themeBtn').textContent = t === 'dark' ? '🌙' : '☀️';
|
||||
try { localStorage.setItem('m365_theme', t); } catch(e) {}
|
||||
}
|
||||
(function() {
|
||||
try {
|
||||
const t = localStorage.getItem('m365_theme');
|
||||
if (t) {
|
||||
document.body.dataset.theme = t;
|
||||
const btn = document.getElementById('themeBtn');
|
||||
if (btn) btn.textContent = t === 'dark' ? '🌙' : '☀️';
|
||||
}
|
||||
} catch(e) {}
|
||||
})();
|
||||
|
||||
// ── Language selector ─────────────────────────────────────────────────────────
|
||||
fetch('/api/langs').then(r => r.json()).then(d => {
|
||||
const sel = document.getElementById('langSelect');
|
||||
if (!sel || !d.langs || d.langs.length < 2) {
|
||||
if (sel) sel.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
d.langs.forEach(l => {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = l.code;
|
||||
opt.textContent = l.name;
|
||||
if (l.code === d.current) opt.selected = true;
|
||||
sel.appendChild(opt);
|
||||
});
|
||||
}).catch(() => {
|
||||
const sel = document.getElementById('langSelect');
|
||||
if (sel) sel.style.display = 'none';
|
||||
});
|
||||
|
||||
async function setLang(code) {
|
||||
const r = await fetch('/api/set_lang', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({lang: code})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.translations) {
|
||||
// Update the in-memory LANG dict and re-apply all translations in place.
|
||||
// This keeps all scan results, cards, and state intact.
|
||||
Object.assign(LANG, d.translations);
|
||||
applyI18n();
|
||||
// Re-render the grid so card text (source badges etc.) picks up new strings
|
||||
if (S.flaggedData.length) renderGrid(S.filteredData.length ? S.filteredData : S.flaggedData);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.openAbout = openAbout;
|
||||
window.closeAbout = closeAbout;
|
||||
window.openModeInfo = openModeInfo;
|
||||
window.closeModeInfo = closeModeInfo;
|
||||
window.toggleTheme = toggleTheme;
|
||||
window.setLang = setLang;
|
||||
475
static/js/users.js
Normal file
475
static/js/users.js
Normal file
@ -0,0 +1,475 @@
|
||||
import { S } from './state.js';
|
||||
// ── Accounts ──────────────────────────────────────────────────────────────────
|
||||
|
||||
async function loadUsers() {
|
||||
const list = document.getElementById('accountsList');
|
||||
const loading = document.getElementById('accountsLoading');
|
||||
if (!list) return;
|
||||
if (loading) loading.textContent = t('lbl_loading', 'Loading…');
|
||||
// Ensure source panel checkboxes exist before we render the account list
|
||||
if (!document.querySelector('#sourcesPanel input') && typeof renderSourcesPanel === 'function') {
|
||||
renderSourcesPanel();
|
||||
}
|
||||
try {
|
||||
const r = await fetch('/api/users');
|
||||
if (!r.ok) { if (loading) loading.textContent = 'Could not load users'; return; }
|
||||
const d = await r.json();
|
||||
if (d.error) { if (loading) loading.textContent = d.error; return; }
|
||||
// Merge with any manually-added users, preserving them
|
||||
const fetched = d.users || [];
|
||||
fetched.forEach(u => { u.platform = 'm365'; });
|
||||
const existingManual = S._allUsers.filter(u => u.manual);
|
||||
const fetchedIds = new Set(fetched.map(u => u.id));
|
||||
const toAdd = existingManual.filter(u => !fetchedIds.has(u.id));
|
||||
// Preserve existing selected state for users already in S._allUsers;
|
||||
// new users default to selected=true
|
||||
const prevSelected = new Map(S._allUsers.map(u => [u.id, u.selected]));
|
||||
fetched.forEach(u => {
|
||||
u.selected = prevSelected.has(u.id) ? prevSelected.get(u.id) : false;
|
||||
});
|
||||
S._allUsers = [...fetched, ...toAdd];
|
||||
renderAccountList(fetched.length <= 1);
|
||||
// Merge Google users separately so they're not blocked by M365 auth timing
|
||||
_mergeGoogleUsers();
|
||||
checkCheckpoint();
|
||||
checkDeltaStatus();
|
||||
_applyPendingProfileUsers();
|
||||
|
||||
// Show warning banner when no users could be classified
|
||||
const warn = document.getElementById('skuWarnBanner');
|
||||
if (warn) {
|
||||
const allOther = fetched.length > 0 && fetched.every(u => u.userRole === 'other');
|
||||
warn.style.display = allOther ? 'block' : 'none';
|
||||
}
|
||||
} catch(e) {
|
||||
if (loading) loading.textContent = 'Could not load users';
|
||||
}
|
||||
}
|
||||
|
||||
async function _mergeGoogleUsers() {
|
||||
if (!window._googleConnected) return;
|
||||
try {
|
||||
var gr = await fetch('/api/google/scan/users');
|
||||
if (!gr.ok) return;
|
||||
var gd = await gr.json();
|
||||
if (gd.error) return;
|
||||
var prevSelected = new Map(S._allUsers.map(function(u){ return [u.id, u.selected]; }));
|
||||
|
||||
// Build displayName → Google user map for cross-platform matching
|
||||
// Both M365 and GWS are maintained from AD — full name is identical
|
||||
var googleByName = {};
|
||||
(gd.users || []).forEach(function(gu) {
|
||||
var name = (gu.displayName || '').trim().toLowerCase();
|
||||
if (name) googleByName[name] = gu;
|
||||
});
|
||||
|
||||
// Merge onto M365 users where display name matches
|
||||
var matchedNames = new Set();
|
||||
S._allUsers.forEach(function(u) {
|
||||
if ((u.platform || 'm365') !== 'm365') return;
|
||||
var name = (u.displayName || '').trim().toLowerCase();
|
||||
var gu = googleByName[name];
|
||||
if (gu) {
|
||||
u.platform = 'both';
|
||||
u.googleEmail = gu.email;
|
||||
// Keep M365 displayName (from AD, authoritative)
|
||||
matchedNames.add(name);
|
||||
} else {
|
||||
// Clear previous merge if Google disconnected
|
||||
delete u.googleEmail;
|
||||
u.platform = 'm365';
|
||||
}
|
||||
});
|
||||
|
||||
// Add unmatched Google users as standalone entries
|
||||
var googleUsers = [];
|
||||
(gd.users || []).forEach(function(gu) {
|
||||
var name = (gu.displayName || '').trim().toLowerCase();
|
||||
if (matchedNames.has(name)) return; // already merged
|
||||
var uid = 'google:' + gu.email;
|
||||
googleUsers.push({
|
||||
id: uid,
|
||||
displayName: gu.displayName || gu.email,
|
||||
email: gu.email,
|
||||
userRole: gu.userRole || 'other',
|
||||
platform: 'google',
|
||||
selected: prevSelected.has(uid) ? prevSelected.get(uid) : false,
|
||||
});
|
||||
});
|
||||
|
||||
// Remove stale standalone Google users, add fresh unmatched ones
|
||||
S._allUsers = S._allUsers.filter(function(u){ return (u.platform||'m365') !== 'google'; });
|
||||
S._allUsers = S._allUsers.concat(googleUsers);
|
||||
renderAccountList();
|
||||
} catch(e) { /* Google users unavailable */ }
|
||||
}
|
||||
|
||||
let _activeRoleFilter = ''; // '' = all, 'staff', 'student'
|
||||
|
||||
// ── Sidebar section collapse ──────────────────────────────────────────────────
|
||||
const _COLLAPSE_SECTIONS = ['sourcesPanelSection', 'optionsSection', 'accountsSection', 'logSection'];
|
||||
|
||||
function toggleSection(id) {
|
||||
const body = document.getElementById(id + 'Body');
|
||||
if (!body) return;
|
||||
const collapsing = body.style.display !== 'none';
|
||||
body.style.display = collapsing ? 'none' : '';
|
||||
const btn = document.getElementById(id + '-btn');
|
||||
if (btn) btn.textContent = collapsing ? '▸' : '▾';
|
||||
if (id === 'accountsSection') {
|
||||
const sec = document.getElementById('accountsSection');
|
||||
if (sec) sec.style.flex = collapsing ? '0 0 auto' : '1';
|
||||
}
|
||||
try { localStorage.setItem('sc_' + id, collapsing ? '1' : '0'); } catch(e) {}
|
||||
}
|
||||
|
||||
function restoreSectionStates() {
|
||||
_COLLAPSE_SECTIONS.forEach(function(id) {
|
||||
try {
|
||||
if (localStorage.getItem('sc_' + id) === '1') {
|
||||
const body = document.getElementById(id + 'Body');
|
||||
if (body) body.style.display = 'none';
|
||||
const btn = document.getElementById(id + '-btn');
|
||||
if (btn) btn.textContent = '▸';
|
||||
if (id === 'accountsSection') {
|
||||
const sec = document.getElementById('accountsSection');
|
||||
if (sec) sec.style.flex = '0 0 auto';
|
||||
}
|
||||
}
|
||||
} catch(e) {}
|
||||
});
|
||||
}
|
||||
|
||||
// ── Role filter with counts ───────────────────────────────────────────────────
|
||||
function updateRoleFilterCounts() {
|
||||
const total = S._allUsers.filter(function(u){ return !u.manual; }).length;
|
||||
const staff = S._allUsers.filter(function(u){ return !u.manual && u.userRole === 'staff'; }).length;
|
||||
const student = S._allUsers.filter(function(u){ return !u.manual && u.userRole === 'student'; }).length;
|
||||
const btnAll = document.getElementById('rfAll');
|
||||
const btnStaff = document.getElementById('rfStaff');
|
||||
const btnStudent = document.getElementById('rfStudent');
|
||||
if (btnAll) btnAll.textContent = t('m365_role_all','All') + (total ? ' (' + total + ')' : '');
|
||||
if (btnStaff) btnStaff.textContent = t('role_staff','Ansat') + (staff ? ' (' + staff + ')' : '');
|
||||
if (btnStudent) btnStudent.textContent = t('role_student','Elev') + (student ? ' (' + student + ')' : '');
|
||||
}
|
||||
|
||||
function setRoleFilter(role) {
|
||||
_activeRoleFilter = role;
|
||||
[['rfAll',''],['rfStaff','staff'],['rfStudent','student']].forEach(function(pair) {
|
||||
const btn = document.getElementById(pair[0]);
|
||||
if (!btn) return;
|
||||
const active = role === pair[1];
|
||||
btn.style.background = active ? 'var(--accent)' : 'none';
|
||||
btn.style.color = active ? '#fff' : 'var(--muted)';
|
||||
});
|
||||
updateRoleFilterCounts();
|
||||
filterUsers();
|
||||
}
|
||||
|
||||
// ── Last scan summary (empty state) ──────────────────────────────────────────
|
||||
async function loadLastScanSummary() {
|
||||
try {
|
||||
const r = await fetch('/api/db/stats');
|
||||
const d = await r.json();
|
||||
if (!d.scan_id || S.flaggedData.length > 0) return;
|
||||
const panel = document.getElementById('lastScanSummary');
|
||||
const empty = document.getElementById('emptyState');
|
||||
if (!panel || !empty) return;
|
||||
|
||||
const dateStr = d.finished_at
|
||||
? new Date(d.finished_at * 1000).toLocaleDateString('da-DK', {day:'numeric', month:'short', year:'numeric'})
|
||||
: '—';
|
||||
const sources = Object.keys(d.by_source || {});
|
||||
const srcLabels = {'email':'Outlook','onedrive':'OneDrive','sharepoint':'SharePoint','teams':'Teams',
|
||||
'gmail':'Gmail','gdrive':'Drive','local':'Lokale filer','smb':'SMB'};
|
||||
const srcStr = sources.map(function(s){ return srcLabels[s] || s; }).join(' · ') || '—';
|
||||
|
||||
panel.innerHTML =
|
||||
'<div class="last-scan-card">' +
|
||||
'<h3>' + t('last_scan_title', 'Seneste scanning') + '</h3>' +
|
||||
'<div class="last-scan-stats">' +
|
||||
'<div class="last-scan-stat"><span class="val">' + (d.flagged_count || 0) + '</span><span class="lbl">' + t('last_scan_hits', 'Fund') + '</span></div>' +
|
||||
'<div class="last-scan-stat"><span class="val">' + (d.unique_subjects || 0) + '</span><span class="lbl">' + t('last_scan_subjects', 'Unikke CPR') + '</span></div>' +
|
||||
'<div class="last-scan-stat"><span class="val">' + (d.total_scanned || 0) + '</span><span class="lbl">' + t('last_scan_scanned', 'Scannet') + '</span></div>' +
|
||||
'</div>' +
|
||||
'<div style="margin-top:12px;font-size:11px;color:var(--muted)">' + dateStr + ' · ' + srcStr + '</div>' +
|
||||
'</div>' +
|
||||
'<div class="empty-text" style="font-size:12px">' + t('m365_empty_hint', 'Vælg kilder og klik på <strong>Scan</strong><br>for at starte en ny scanning') + '</div>';
|
||||
|
||||
empty.style.display = 'none';
|
||||
panel.style.display = 'flex';
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
function renderAccountList(showAdminNote = false) {
|
||||
updateRoleFilterCounts();
|
||||
const list = document.getElementById('accountsList');
|
||||
if (!list) return;
|
||||
const q = (document.getElementById('userSearch')?.value || '').toLowerCase().trim();
|
||||
|
||||
let visible = S._allUsers;
|
||||
|
||||
// Filter by platform: only show accounts relevant to checked sources
|
||||
// If the sources panel hasn't been rendered yet (no checkboxes at all), treat M365 as active
|
||||
var panelHasAny = !!document.querySelector('#sourcesPanel input[data-source-type]');
|
||||
var hasM365Src = panelHasAny
|
||||
? !!document.querySelector('#sourcesPanel input[data-source-type="m365"]:checked')
|
||||
: S._allUsers.some(function(u){ return !u.platform || u.platform === 'm365' || u.platform === 'both'; });
|
||||
var hasGoogleSrc = !!document.querySelector('#sourcesPanel input[data-source-type="google"]:checked');
|
||||
// Always filter — if neither is active, show nothing
|
||||
// Check if Google is enabled in Source Management (not just selected in KILDER)
|
||||
var googleEnabled = !!(document.getElementById('smGoogleSrcGmail') && document.getElementById('smGoogleSrcGmail').checked)
|
||||
|| !!(document.getElementById('smGoogleSrcDrive') && document.getElementById('smGoogleSrcDrive').checked);
|
||||
var effectiveGws = hasGoogleSrc && googleEnabled;
|
||||
visible = visible.filter(function(u) {
|
||||
var plat = u.platform || 'm365';
|
||||
if (plat === 'both') return hasM365Src || effectiveGws;
|
||||
return (plat === 'm365' && hasM365Src) || (plat === 'google' && effectiveGws);
|
||||
});
|
||||
|
||||
// Apply role filter first
|
||||
if (_activeRoleFilter) {
|
||||
visible = visible.filter(u => (u.userRole || 'other') === _activeRoleFilter);
|
||||
}
|
||||
|
||||
// Then apply text search
|
||||
if (q) {
|
||||
visible = visible.filter(u =>
|
||||
(u.displayName || '').toLowerCase().includes(q) ||
|
||||
(u.email || '').toLowerCase().includes(q));
|
||||
}
|
||||
|
||||
_updateUserCountBadge(visible.length, S._allUsers.length);
|
||||
|
||||
const note = (!q && !_activeRoleFilter && showAdminNote)
|
||||
? `<div style="font-size:10px;color:var(--muted);padding:4px 0 6px;line-height:1.4">${t('m365_admin_note','Only showing your account. To list all users, an admin must grant <strong>User.Read.All</strong> consent.')}</div>`
|
||||
: '';
|
||||
|
||||
const noMatch = (q || _activeRoleFilter) && !visible.length
|
||||
? `<div style="padding:4px 0;color:var(--muted);font-size:11px">${t('m365_no_users_match','No users match')} "${q || _activeRoleFilter}"</div>`
|
||||
: '';
|
||||
|
||||
list.innerHTML = note + noMatch + visible.map(u => `
|
||||
<label style="display:flex;align-items:center;gap:7px;padding:2px 0;cursor:pointer">
|
||||
<input type="checkbox" class="account-check" data-id="${u.id}" data-name="${u.displayName}" data-role="${u.userRole || 'other'}"
|
||||
${u.selected !== false ? 'checked' : ''}
|
||||
onchange="onAccountCheckChange('${u.id}', this.checked)">
|
||||
<span style="flex:1;overflow:hidden">
|
||||
<span style="display:block;font-weight:500;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">${u.displayName}${u.isMe ? ' <span style=\'color:var(--accent);font-size:10px\'>(you)</span>' : ''}${u.manual ? ' <span style=\'color:var(--muted);font-size:10px\'>(manual)</span>' : ''}</span>
|
||||
<span style="color:var(--muted);font-size:10px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;display:block">${u.email}</span>
|
||||
</span>
|
||||
<span style="font-size:9px;padding:1px 5px;border-radius:10px;flex-shrink:0;"
|
||||
class="${u.platform==='both' ? (hasM365Src && effectiveGws ? 'plat-badge-both' : effectiveGws ? 'plat-badge-google' : 'plat-badge-m365') : (u.platform||'m365')==='google' ? 'plat-badge-google' : 'plat-badge-m365'}">
|
||||
${u.platform==='both' ? (hasM365Src && effectiveGws ? 'M365 + GWS' : effectiveGws ? 'GWS' : 'M365') : (u.platform||'m365')==='google' ? 'GWS' : 'M365'}
|
||||
</span>
|
||||
<button type="button" onclick="cycleUserRole(this.getAttribute('data-uid'))"
|
||||
data-uid="${u.id.replace(/&/g,'&').replace(/'/g,''').replace(/"/g,'"')}"
|
||||
title="${t('m365_role_cycle_tip','Click to change role')}"
|
||||
class="role-badge" style="font-size:9px;padding:1px 5px;cursor:pointer;flex-shrink:0;white-space:nowrap;border:none;${u.roleOverride ? 'color:var(--color-text-info);outline:1px solid var(--color-border-info)' : ''}">
|
||||
${u.userRole === 'student' ? t('role_student','Elev') : u.userRole === 'staff' ? t('role_staff','Ansat') : t('role_other','Anden')}${u.roleOverride ? ' ✎' : ''}
|
||||
</button>
|
||||
${u.manual ? `<button onclick="removeUser(this.getAttribute('data-uid'))" data-uid="${u.id.replace(/&/g,'&').replace(/'/g,''').replace(/"/g,'"')}" style="background:none;border:none;color:var(--muted);cursor:pointer;font-size:13px;padding:0;flex-shrink:0" title="Remove">×</button>` : ''}
|
||||
</label>`).join('');
|
||||
}
|
||||
|
||||
function _updateUserCountBadge(visible, total) {
|
||||
const badge = document.getElementById('userCountBadge');
|
||||
if (!badge) return;
|
||||
if (total === 0) { badge.textContent = ''; return; }
|
||||
badge.textContent = visible < total ? `(${visible} / ${total})` : `(${total})`;
|
||||
}
|
||||
|
||||
// ── SKU debug — surface unknown tenant SKU IDs so they can be added to m365_skus.json ──
|
||||
async function showSkuDebug() {
|
||||
let modal = document.getElementById('skuDebugModal');
|
||||
if (!modal) {
|
||||
modal = document.createElement('div');
|
||||
modal.id = 'skuDebugModal';
|
||||
modal.style.cssText = 'position:fixed;inset:0;background:rgba(0,0,0,.55);z-index:1000;display:flex;align-items:center;justify-content:center';
|
||||
modal.onclick = e => { if (e.target === modal) modal.remove(); };
|
||||
document.body.appendChild(modal);
|
||||
}
|
||||
modal.innerHTML = `<div style="background:var(--surface);border:1px solid var(--border);border-radius:10px;padding:22px 26px;width:min(520px,95vw);max-height:80vh;display:flex;flex-direction:column;gap:12px;font-size:12px">
|
||||
<div style="display:flex;align-items:center;justify-content:space-between">
|
||||
<strong style="font-size:13px">${t('m365_sku_debug_title','🔍 Tenant SKU IDs')}</strong>
|
||||
<button onclick="document.getElementById('skuDebugModal').remove()" style="background:none;border:none;color:var(--muted);cursor:pointer;font-size:16px">×</button>
|
||||
</div>
|
||||
<div style="color:var(--muted);font-size:11px;line-height:1.5">${t('m365_sku_debug_desc','These are the raw SKU IDs assigned to your users. Any marked <b>❓ unknown</b> are not in <code>classification/m365_skus.json</code> — copy them in under <code>student_ids</code> or <code>staff_ids</code> and restart.')}</div>
|
||||
<div id="skuDebugList" style="overflow-y:auto;flex:1;font-family:var(--mono);font-size:11px">Loading…</div>
|
||||
<div style="display:flex;justify-content:flex-end;gap:8px;padding-top:4px;border-top:1px solid var(--border)">
|
||||
<button onclick="document.getElementById('skuDebugModal').remove()" style="background:none;border:1px solid var(--border);color:var(--muted);padding:4px 14px;border-radius:6px;cursor:pointer">${t('btn_close','Close')}</button>
|
||||
</div>
|
||||
</div>`;
|
||||
|
||||
const listEl = document.getElementById('skuDebugList');
|
||||
try {
|
||||
const r = await fetch('/api/users/license_debug');
|
||||
const d = await r.json();
|
||||
if (d.error) { listEl.textContent = 'Error: ' + d.error; return; }
|
||||
|
||||
// Collect unique SKUs across all users
|
||||
const skuSeen = {}; // skuId → {name, role, count, known}
|
||||
for (const u of (d.users || [])) {
|
||||
for (let i = 0; i < (u.skuIds || []).length; i++) {
|
||||
const id = u.skuIds[i];
|
||||
const nm = (u.skuNames || [])[i] || '';
|
||||
if (!skuSeen[id]) skuSeen[id] = { name: nm, role: u.role, count: 0 };
|
||||
skuSeen[id].count++;
|
||||
}
|
||||
}
|
||||
|
||||
const rows = Object.entries(skuSeen).sort((a,b) => b[1].count - a[1].count);
|
||||
if (!rows.length) { listEl.textContent = t('m365_sku_debug_none','No license data returned — check that the app has User.Read.All permission.'); return; }
|
||||
|
||||
const knownStudent = new Set((d.student_ids || []));
|
||||
const knownStaff = new Set((d.staff_ids || []));
|
||||
|
||||
listEl.innerHTML = rows.map(([id, info]) => {
|
||||
const known = knownStudent.has(id) ? '🎓 student'
|
||||
: knownStaff.has(id) ? '👔 staff'
|
||||
: '❓ unknown';
|
||||
const color = known.startsWith('❓') ? 'var(--danger)' : 'var(--accent)';
|
||||
return `<div style="display:flex;align-items:baseline;gap:8px;padding:3px 0;border-bottom:1px solid var(--border)">
|
||||
<code style="flex:1;color:var(--text);user-select:all">${id}</code>
|
||||
<span style="color:var(--muted);font-size:10px;white-space:nowrap">${info.name || '—'}</span>
|
||||
<span style="color:${color};font-size:10px;white-space:nowrap;flex-shrink:0">${known} (${info.count})</span>
|
||||
</div>`;
|
||||
}).join('');
|
||||
} catch(e) {
|
||||
listEl.textContent = 'Error: ' + e.message;
|
||||
}
|
||||
}
|
||||
|
||||
function filterUsers() {
|
||||
const showAdminNote = S._allUsers.filter(u => !u.manual).length <= 1;
|
||||
renderAccountList(showAdminNote);
|
||||
}
|
||||
|
||||
async function cycleUserRole(id) {
|
||||
// Cycle: student → staff → other → (clear override, back to auto)
|
||||
if (!id) { console.warn('cycleUserRole: no id'); return; }
|
||||
const u = S._allUsers.find(u => u.id === id);
|
||||
if (!u) { console.warn('cycleUserRole: user not found for id', id); return; }
|
||||
const cycle = ['student', 'staff', 'other'];
|
||||
let next;
|
||||
if (!u.roleOverride) {
|
||||
// First click: remember auto role, pin to next in cycle
|
||||
u._autoRole = u.userRole;
|
||||
u._cycleSteps = 0;
|
||||
const cur = cycle.indexOf(u.userRole);
|
||||
next = cycle[(cur + 1) % cycle.length];
|
||||
} else {
|
||||
u._cycleSteps = (u._cycleSteps || 0) + 1;
|
||||
if (u._cycleSteps >= cycle.length) {
|
||||
next = ''; // full cycle completed — clear override
|
||||
} else {
|
||||
const cur = cycle.indexOf(u.userRole);
|
||||
next = cycle[(cur + 1) % cycle.length];
|
||||
}
|
||||
}
|
||||
try {
|
||||
const r = await fetch('/api/users/role_override', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({user_id: id, role: next})
|
||||
});
|
||||
const d = await r.json();
|
||||
if (d.error) { log('Role override failed: ' + d.error, 'err'); return; }
|
||||
// Update local state
|
||||
if (next) {
|
||||
if (!u.roleOverride) u._autoRole = u.userRole; // remember original for clear
|
||||
u.userRole = next;
|
||||
u.roleOverride = true;
|
||||
} else {
|
||||
u.userRole = u._autoRole || u.userRole;
|
||||
u.roleOverride = false;
|
||||
u._autoRole = undefined;
|
||||
}
|
||||
// Update the role filter count badges and re-render
|
||||
renderAccountList(S._allUsers.filter(u => !u.manual).length <= 1);
|
||||
log((next ? t('m365_role_set', 'Role set') + ': ' + next : t('m365_role_cleared', 'Role override cleared')) + ' — ' + (u.displayName || id));
|
||||
} catch(e) {
|
||||
log('Role override error: ' + e.message, 'err');
|
||||
}
|
||||
}
|
||||
|
||||
function removeUser(id) {
|
||||
S._allUsers = S._allUsers.filter(u => u.id !== id);
|
||||
renderAccountList(S._allUsers.filter(u => !u.manual).length <= 1);
|
||||
}
|
||||
|
||||
async function addUserManually() {
|
||||
const input = document.getElementById('addUserInput');
|
||||
const upn = input.value.trim();
|
||||
if (!upn) return;
|
||||
// Look up the user via server
|
||||
const btn = input.nextElementSibling;
|
||||
btn.disabled = true; btn.textContent = '…';
|
||||
try {
|
||||
const r = await fetch('/api/users/lookup?upn=' + encodeURIComponent(upn));
|
||||
const d = await r.json();
|
||||
if (d.error) { alert('User not found: ' + d.error); return; }
|
||||
if (S._allUsers.find(u => u.id === d.id)) { alert('User already in list.'); return; }
|
||||
S._allUsers.push({...d, manual: true});
|
||||
input.value = '';
|
||||
renderAccountList(S._allUsers.filter(u => !u.manual).length <= 1);
|
||||
} catch(e) {
|
||||
alert('Lookup failed: ' + e.message);
|
||||
} finally {
|
||||
btn.disabled = false; btn.textContent = '+';
|
||||
}
|
||||
}
|
||||
|
||||
function onAccountCheckChange(id, checked) {
|
||||
const user = S._allUsers.find(u => u.id === id);
|
||||
if (user) user.selected = checked;
|
||||
}
|
||||
|
||||
function selectAllAccounts(checked) {
|
||||
// Toggle all visible users (respects search + role filter)
|
||||
const visible = new Set(
|
||||
Array.from(document.querySelectorAll('#accountsList .account-check')).map(cb => cb.dataset.id)
|
||||
);
|
||||
S._allUsers.forEach(u => { if (visible.has(u.id)) u.selected = checked; });
|
||||
document.querySelectorAll('#accountsList .account-check').forEach(cb => cb.checked = checked);
|
||||
}
|
||||
|
||||
function getSelectedUsers() {
|
||||
// Only return M365 users — Google users are handled separately via selectedGoogleEmails
|
||||
let selected = S._allUsers.filter(u => u.selected !== false && (u.platform === 'm365' || u.platform === 'both'));
|
||||
// Respect the active role filter — hidden users must not sneak into the scan
|
||||
// even if they were checked before the filter was applied.
|
||||
if (_activeRoleFilter) {
|
||||
selected = selected.filter(u => (u.userRole || 'other') === _activeRoleFilter);
|
||||
}
|
||||
if (selected.length) {
|
||||
return selected.map(u => ({
|
||||
id: u.id, displayName: u.displayName, userRole: u.userRole || 'other'
|
||||
}));
|
||||
}
|
||||
// Fallback to DOM if S._allUsers not yet populated
|
||||
return Array.from(document.querySelectorAll('.account-check:checked')).map(cb => ({
|
||||
id: cb.dataset.id, displayName: cb.dataset.name, userRole: cb.dataset.role || 'other'
|
||||
}));
|
||||
}
|
||||
|
||||
// ── Window exports (HTML handlers + cross-module calls) ─────────────────────
|
||||
window.loadUsers = loadUsers;
|
||||
window._mergeGoogleUsers = _mergeGoogleUsers;
|
||||
window.toggleSection = toggleSection;
|
||||
window.restoreSectionStates = restoreSectionStates;
|
||||
window.updateRoleFilterCounts = updateRoleFilterCounts;
|
||||
window.setRoleFilter = setRoleFilter;
|
||||
window.loadLastScanSummary = loadLastScanSummary;
|
||||
window.renderAccountList = renderAccountList;
|
||||
window._updateUserCountBadge = _updateUserCountBadge;
|
||||
window.showSkuDebug = showSkuDebug;
|
||||
window.filterUsers = filterUsers;
|
||||
window.cycleUserRole = cycleUserRole;
|
||||
window.removeUser = removeUser;
|
||||
window.addUserManually = addUserManually;
|
||||
window.onAccountCheckChange = onAccountCheckChange;
|
||||
window.selectAllAccounts = selectAllAccounts;
|
||||
window.getSelectedUsers = getSelectedUsers;
|
||||
window._activeRoleFilter = _activeRoleFilter;
|
||||
window._COLLAPSE_SECTIONS = _COLLAPSE_SECTIONS;
|
||||
225
static/js/viewer.js
Normal file
225
static/js/viewer.js
Normal file
@ -0,0 +1,225 @@
|
||||
// ── Viewer token management (#33) ─────────────────────────────────────────────
|
||||
// Share button → modal to create, copy, and revoke read-only viewer links.
|
||||
|
||||
function openShareModal() {
|
||||
document.getElementById('shareBackdrop').classList.add('open');
|
||||
document.getElementById('shareNewLinkRow').style.display = 'none';
|
||||
document.getElementById('shareLabel').value = '';
|
||||
document.getElementById('shareExpiry').value = '30';
|
||||
_renderTokenList();
|
||||
fetch('/api/viewer/pin').then(function(r){ return r.json(); }).then(function(d) {
|
||||
const el = document.getElementById('sharePinStatus');
|
||||
if (el) el.textContent = d.pin_set ? t('share_pin_set', 'Set') : t('share_pin_not_set', 'Not set');
|
||||
}).catch(function(){});
|
||||
}
|
||||
|
||||
function closeShareModal() {
|
||||
document.getElementById('shareBackdrop').classList.remove('open');
|
||||
}
|
||||
|
||||
async function _renderTokenList() {
|
||||
const list = document.getElementById('shareTokenList');
|
||||
list.innerHTML = '<div style="font-size:12px;color:var(--muted);padding:4px 0">' + t('lbl_loading', 'Loading…') + '</div>';
|
||||
try {
|
||||
const r = await fetch('/api/viewer/tokens');
|
||||
const tokens = await r.json();
|
||||
if (!tokens.length) {
|
||||
list.innerHTML = '<div style="font-size:12px;color:var(--muted);padding:4px 0">' + t('share_no_links', 'No active links.') + '</div>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = '';
|
||||
tokens.forEach(tok => {
|
||||
const expires = tok.expires_at
|
||||
? new Date(tok.expires_at * 1000).toLocaleDateString(undefined, {day:'numeric', month:'short', year:'numeric'})
|
||||
: t('share_expires_never', 'Never');
|
||||
const lastUsed = tok.last_used_at
|
||||
? new Date(tok.last_used_at * 1000).toLocaleDateString(undefined, {day:'numeric', month:'short'})
|
||||
: '—';
|
||||
const row = document.createElement('div');
|
||||
row.style.cssText = 'display:flex;align-items:center;gap:8px;padding:6px 10px;background:var(--bg);border:1px solid var(--border);border-radius:6px;font-size:12px';
|
||||
row.innerHTML =
|
||||
'<div style="flex:1;min-width:0">' +
|
||||
'<div style="font-weight:500;color:var(--text);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">' +
|
||||
(tok.label || '<span style="color:var(--muted);font-style:italic">' + t('share_unlabelled', 'Unlabelled') + '</span>') +
|
||||
'</div>' +
|
||||
'<div style="font-size:10px;color:var(--muted);margin-top:1px">' +
|
||||
t('share_expires_prefix', 'Expires:') + ' ' + expires + ' · ' + t('share_last_used', 'Last used:') + ' ' + lastUsed +
|
||||
'</div>' +
|
||||
'</div>' +
|
||||
'<button title="' + t('share_copy_link_prompt', 'Copy link:') + '" onclick="copyTokenLink(\'' + tok.token + '\',this)" ' +
|
||||
'style="height:24px;padding:0 8px;background:none;border:1px solid var(--border);color:var(--muted);border-radius:4px;font-size:11px;cursor:pointer;flex-shrink:0">' + t('log_copy', 'Copy') + '</button>' +
|
||||
'<button title="' + t('share_revoke', 'Revoke') + '" onclick="revokeToken(\'' + tok.token + '\',this.closest(\'div[style]\'))" ' +
|
||||
'style="height:24px;padding:0 8px;background:none;border:1px solid var(--danger);color:var(--danger);border-radius:4px;font-size:11px;cursor:pointer;flex-shrink:0">' + t('share_revoke', 'Revoke') + '</button>';
|
||||
list.appendChild(row);
|
||||
});
|
||||
} catch(e) {
|
||||
list.innerHTML = '<div style="font-size:12px;color:var(--danger);padding:4px 0">' + t('share_load_error', 'Failed to load links.') + '</div>';
|
||||
}
|
||||
}
|
||||
|
||||
async function createShareLink() {
|
||||
const label = document.getElementById('shareLabel').value.trim();
|
||||
const expiry = document.getElementById('shareExpiry').value;
|
||||
const body = {label};
|
||||
if (expiry) body.expires_days = parseInt(expiry);
|
||||
try {
|
||||
const r = await fetch('/api/viewer/tokens', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!r.ok) throw new Error('Server error ' + r.status);
|
||||
const entry = await r.json();
|
||||
const url = window.location.origin + '/view?token=' + encodeURIComponent(entry.token);
|
||||
const urlInput = document.getElementById('shareNewLinkUrl');
|
||||
urlInput.value = url;
|
||||
document.getElementById('shareNewLinkRow').style.display = 'block';
|
||||
document.getElementById('shareCopyBtn').textContent = t('log_copy', 'Copy');
|
||||
document.getElementById('shareLabel').value = '';
|
||||
_renderTokenList();
|
||||
} catch(e) {
|
||||
alert(t('share_create_error', 'Failed to create link:') + ' ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function copyShareLink() {
|
||||
const url = document.getElementById('shareNewLinkUrl').value;
|
||||
_copyText(url, document.getElementById('shareCopyBtn'));
|
||||
}
|
||||
|
||||
function copyTokenLink(token, btn) {
|
||||
const url = window.location.origin + '/view?token=' + encodeURIComponent(token);
|
||||
_copyText(url, btn);
|
||||
}
|
||||
|
||||
function _copyText(text, btn) {
|
||||
navigator.clipboard.writeText(text).then(() => {
|
||||
const orig = btn.textContent;
|
||||
btn.textContent = t('share_copied', 'Copied!');
|
||||
setTimeout(() => { btn.textContent = orig; }, 1800);
|
||||
}).catch(() => {
|
||||
// Fallback for HTTP contexts
|
||||
try {
|
||||
const ta = document.createElement('textarea');
|
||||
ta.value = text;
|
||||
ta.style.position = 'fixed'; ta.style.opacity = '0';
|
||||
document.body.appendChild(ta);
|
||||
ta.select();
|
||||
document.execCommand('copy');
|
||||
document.body.removeChild(ta);
|
||||
const orig = btn.textContent;
|
||||
btn.textContent = t('share_copied', 'Copied!');
|
||||
setTimeout(() => { btn.textContent = orig; }, 1800);
|
||||
} catch(_) {}
|
||||
});
|
||||
}
|
||||
|
||||
async function revokeToken(token, rowEl) {
|
||||
if (!confirm(t('share_revoke_confirm', 'Revoke this link? Anyone using it will immediately lose access.'))) return;
|
||||
try {
|
||||
const r = await fetch('/api/viewer/tokens/' + encodeURIComponent(token), {method: 'DELETE'});
|
||||
if (!r.ok) throw new Error('Server error ' + r.status);
|
||||
rowEl.remove();
|
||||
const list = document.getElementById('shareTokenList');
|
||||
if (!list.children.length) {
|
||||
list.innerHTML = '<div style="font-size:12px;color:var(--muted);padding:4px 0">' + t('share_no_links', 'No active links.') + '</div>';
|
||||
}
|
||||
// Hide the copy row if the just-revoked token was the last created
|
||||
const newRow = document.getElementById('shareNewLinkRow');
|
||||
if (newRow) {
|
||||
const shownUrl = document.getElementById('shareNewLinkUrl')?.value || '';
|
||||
if (shownUrl.includes(token)) newRow.style.display = 'none';
|
||||
}
|
||||
} catch(e) {
|
||||
alert(t('share_revoke_error', 'Failed to revoke:') + ' ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Viewer PIN — Settings UI ──────────────────────────────────────────────────
|
||||
|
||||
async function stLoadViewerPinStatus() {
|
||||
try {
|
||||
const r = await fetch('/api/viewer/pin');
|
||||
const d = await r.json();
|
||||
const statusEl = document.getElementById('stViewerPinStatus');
|
||||
const currentRow = document.getElementById('stViewerCurrentPinRow');
|
||||
const clearBtn = document.getElementById('stViewerPinClearBtn');
|
||||
if (d.pin_set) {
|
||||
if (statusEl) statusEl.textContent = '\u2714 ' + t('viewer_pin_is_set', 'Viewer PIN is set');
|
||||
if (currentRow) currentRow.style.display = '';
|
||||
if (clearBtn) clearBtn.style.display = '';
|
||||
} else {
|
||||
if (statusEl) statusEl.textContent = t('viewer_pin_not_set_msg', 'No PIN set \u2014 /view requires a token link');
|
||||
if (currentRow) currentRow.style.display = 'none';
|
||||
if (clearBtn) clearBtn.style.display = 'none';
|
||||
}
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
async function stSaveViewerPin() {
|
||||
const newPin = (document.getElementById('stViewerNewPin')?.value || '').trim();
|
||||
const currentPin = (document.getElementById('stViewerCurrentPin')?.value || '').trim();
|
||||
const st = document.getElementById('stViewerPinSaveStatus');
|
||||
if (!newPin) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = t('m365_settings_pin_required', 'PIN is required.'); }
|
||||
return;
|
||||
}
|
||||
if (!/^\d{4,8}$/.test(newPin)) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = t('viewer_pin_format', 'PIN must be 4\u20138 digits.'); }
|
||||
return;
|
||||
}
|
||||
if (st) { st.style.color = 'var(--muted)'; st.textContent = t('viewer_pin_saving', 'Saving\u2026'); }
|
||||
try {
|
||||
const r = await fetch('/api/viewer/pin', {
|
||||
method: 'POST', headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({pin: newPin, current_pin: currentPin}),
|
||||
});
|
||||
const d = await r.json();
|
||||
if (!r.ok) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = d.error || 'Error.'; }
|
||||
return;
|
||||
}
|
||||
if (st) { st.style.color = 'var(--accent)'; st.textContent = '\u2714 ' + t('viewer_pin_saved', 'PIN saved'); }
|
||||
if (document.getElementById('stViewerNewPin')) document.getElementById('stViewerNewPin').value = '';
|
||||
if (document.getElementById('stViewerCurrentPin')) document.getElementById('stViewerCurrentPin').value = '';
|
||||
stLoadViewerPinStatus();
|
||||
} catch(e) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
async function stClearViewerPin() {
|
||||
const currentPin = (document.getElementById('stViewerCurrentPin')?.value || '').trim();
|
||||
const st = document.getElementById('stViewerPinSaveStatus');
|
||||
if (!currentPin) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = t('m365_settings_pin_required', 'PIN is required.'); }
|
||||
document.getElementById('stViewerCurrentPin')?.focus();
|
||||
return;
|
||||
}
|
||||
if (!confirm(t('viewer_pin_clear_confirm', 'Remove the viewer PIN? /view will require a token link again.'))) return;
|
||||
try {
|
||||
const r = await fetch('/api/viewer/pin', {
|
||||
method: 'DELETE', headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({current_pin: currentPin}),
|
||||
});
|
||||
const d = await r.json();
|
||||
if (!r.ok) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = d.error || 'Error.'; }
|
||||
return;
|
||||
}
|
||||
if (st) { st.style.color = 'var(--muted)'; st.textContent = t('viewer_pin_cleared', 'PIN cleared'); }
|
||||
stLoadViewerPinStatus();
|
||||
} catch(e) {
|
||||
if (st) { st.style.color = 'var(--danger)'; st.textContent = e.message; }
|
||||
}
|
||||
}
|
||||
|
||||
// ── Window exports ────────────────────────────────────────────────────────────
|
||||
window.openShareModal = openShareModal;
|
||||
window.closeShareModal = closeShareModal;
|
||||
window.createShareLink = createShareLink;
|
||||
window.copyShareLink = copyShareLink;
|
||||
window.copyTokenLink = copyTokenLink;
|
||||
window.revokeToken = revokeToken;
|
||||
window.stLoadViewerPinStatus = stLoadViewerPinStatus;
|
||||
window.stSaveViewerPin = stSaveViewerPin;
|
||||
window.stClearViewerPin = stClearViewerPin;
|
||||
616
static/style.css
Normal file
616
static/style.css
Normal file
@ -0,0 +1,616 @@
|
||||
:root {
|
||||
--pmgmt-divider: #484850;
|
||||
--bg: #0f0f11;
|
||||
--surface: #18181c;
|
||||
--border: #2a2a30;
|
||||
--accent: #0078d4;
|
||||
--accent2: #f5a623;
|
||||
--text: #e8e6e1;
|
||||
--muted: #6b6970;
|
||||
--success: #2ecc71;
|
||||
--danger: #e74c3c;
|
||||
--mono: 'IBM Plex Mono', monospace;
|
||||
--sans: 'IBM Plex Sans', sans-serif;
|
||||
}
|
||||
[data-theme="light"] {
|
||||
--pmgmt-divider: #b0b0bc;
|
||||
--bg: #f5f5f7;
|
||||
--surface: #ffffff;
|
||||
--border: #d8d8df;
|
||||
--accent: #0060b0;
|
||||
--text: #1a1a1f;
|
||||
--muted: #888891;
|
||||
--success: #1a9952;
|
||||
--danger: #c0392b;
|
||||
}
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; transition: background-color .15s, border-color .15s, color .1s; }
|
||||
html, body { height: 100%; margin: 0; background: var(--bg); color: var(--text); font-family: var(--sans); box-sizing: border-box; }
|
||||
input, select, textarea { background: var(--surface); color: var(--text); border: 1px solid var(--border); border-radius: 6px; padding: 7px 10px; font-family: var(--sans); font-size: 13px; }
|
||||
input:focus, select:focus { outline: none; border-color: var(--accent); }
|
||||
button { cursor: pointer; font-family: var(--sans); font-size: 13px; }
|
||||
|
||||
/* Layout */
|
||||
.layout { display: flex; height: 100%; overflow: hidden; }
|
||||
#sourcesPanel::-webkit-scrollbar,
|
||||
#accountsList::-webkit-scrollbar { width: 4px; }
|
||||
#sourcesPanel::-webkit-scrollbar-track,
|
||||
#accountsList::-webkit-scrollbar-track { background: transparent; }
|
||||
#sourcesPanel::-webkit-scrollbar-thumb,
|
||||
#accountsList::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
|
||||
#sourcesPanel { scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
|
||||
#accountsList { scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
|
||||
.sidebar { width: 260px; min-width: 260px; display: flex; flex-direction: column;
|
||||
background: var(--surface); border-right: 1px solid var(--border); overflow: hidden; }
|
||||
.sidebar-header { padding: 10px 12px 8px; border-bottom: 1px solid var(--border); }
|
||||
.sidebar-title { font-size: 15px; font-weight: 600; }
|
||||
.sidebar-section { padding: 7px 12px; border-bottom: 1px solid var(--border); }
|
||||
.section-label { font-size: 10px; font-weight: 600; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; margin-bottom: 4px; }
|
||||
.section-collapse-btn { background: none; border: none; color: var(--muted); cursor: pointer; font-size: 10px; padding: 0; line-height: 1; transition: transform .15s; }
|
||||
.section-collapsed > .section-collapse-btn { transform: rotate(-90deg); }
|
||||
.last-scan-summary { display: flex; flex-direction: column; align-items: center; gap: 14px; padding: 24px 16px; }
|
||||
.last-scan-card { background: var(--surface); border: 1px solid var(--border); border-radius: 10px; padding: 16px 24px; min-width: 280px; text-align: center; }
|
||||
.last-scan-card h3 { font-size: 12px; font-weight: 600; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; margin: 0 0 12px; }
|
||||
.last-scan-stats { display: flex; gap: 24px; justify-content: center; }
|
||||
.last-scan-stat { display: flex; flex-direction: column; align-items: center; gap: 2px; }
|
||||
.last-scan-stat .val { font-size: 22px; font-weight: 700; color: var(--text); }
|
||||
.last-scan-stat .lbl { font-size: 10px; color: var(--muted); }
|
||||
input[type="text"], input[type="number"], input[type="date"] {
|
||||
width: 100%; background: var(--bg); border: 1px solid var(--border); border-radius: 6px;
|
||||
color: var(--text); font-family: var(--mono); font-size: 12px; padding: 0 10px;
|
||||
height: 26px; box-sizing: border-box; outline: none;
|
||||
}
|
||||
[data-theme="dark"] input[type="date"] { color-scheme: dark; }
|
||||
[data-theme="light"] input[type="date"] { color-scheme: light; }
|
||||
input[type="text"]:focus, input[type="number"]:focus, input[type="date"]:focus {
|
||||
border-color: var(--accent);
|
||||
}
|
||||
.datepicker-wrap { margin-bottom: 4px; }
|
||||
.date-presets { display: flex; margin-top: 6px; background: var(--bg); border: 1px solid var(--border); border-radius: 6px; overflow: hidden; }
|
||||
.date-preset {
|
||||
flex: 1; min-width: 0; background: none; border: none; border-right: 1px solid var(--border);
|
||||
color: var(--muted); font-family: var(--mono); font-size: 10px; padding: 0 2px; height: 26px;
|
||||
cursor: pointer; transition: background .15s, color .15s; letter-spacing: 0.05em;
|
||||
}
|
||||
.date-preset:last-child { border-right: none; }
|
||||
.date-preset:hover { background: var(--surface); color: var(--text); }
|
||||
.date-preset.selected { background: var(--accent); color: #fff; }
|
||||
.role-filter-btn { flex: 1; font-size: 10px; height: 26px; padding: 0 4px; cursor: pointer; border: none; background: none; color: var(--muted); }
|
||||
.role-filter-btn:hover { background: var(--surface); color: var(--text); }
|
||||
.role-filter-btn.rf-sep { border-right: 1px solid var(--border); }
|
||||
.toggle-row { display: flex; align-items: center; justify-content: space-between; margin-bottom: 5px; }
|
||||
.toggle-label { font-size: 12px; color: var(--text); }
|
||||
/* Hint icon + speech bubble tooltip */
|
||||
.hint-wrap { position:relative; display:inline-flex; align-items:center; margin-left:5px; }
|
||||
.hint-icon {
|
||||
display:inline-flex; align-items:center; justify-content:center;
|
||||
width:14px; height:14px; border-radius:50%;
|
||||
border:1px solid var(--muted); color:var(--muted);
|
||||
font-size:9px; font-weight:700; cursor:pointer;
|
||||
flex-shrink:0; user-select:none; line-height:1;
|
||||
transition:border-color .15s, color .15s;
|
||||
}
|
||||
.hint-icon:hover, .hint-icon.active { border-color:var(--accent); color:var(--accent); }
|
||||
.hint-bubble {
|
||||
display:none; position:fixed;
|
||||
background:var(--surface); border:1px solid var(--border);
|
||||
border-radius:8px; padding:7px 10px;
|
||||
font-size:10px; color:var(--muted); line-height:1.5;
|
||||
width:200px; z-index:9999; box-shadow:0 4px 16px rgba(0,0,0,.35);
|
||||
pointer-events:none;
|
||||
}
|
||||
.hint-bubble::before {
|
||||
content:''; position:absolute; right:100%; top:50%; transform:translateY(-50%);
|
||||
border:5px solid transparent; border-right-color:var(--border);
|
||||
}
|
||||
.hint-bubble::after {
|
||||
content:''; position:absolute; right:calc(100% - 1px); top:50%; transform:translateY(-50%);
|
||||
border:5px solid transparent; border-right-color:var(--surface);
|
||||
}
|
||||
/* bubble display controlled by toggleHint() JS */
|
||||
.toggle { position: relative; width: 32px; height: 18px; flex-shrink: 0; }
|
||||
.toggle input { opacity: 0; width: 0; height: 0; }
|
||||
.toggle-slider {
|
||||
position: absolute; inset: 0; background: var(--border); border-radius: 18px;
|
||||
cursor: pointer; transition: 0.2s;
|
||||
}
|
||||
.toggle-slider::before {
|
||||
content: ''; position: absolute; width: 14px; height: 14px; left: 2px; top: 2px;
|
||||
background: var(--muted); border-radius: 50%; transition: 0.2s;
|
||||
}
|
||||
.toggle input:checked + .toggle-slider { background: var(--accent); }
|
||||
.toggle input:checked + .toggle-slider::before { transform: translateX(14px); background: #fff; }
|
||||
.main { flex: 1; display: flex; flex-direction: column; overflow: hidden; }
|
||||
|
||||
/* Auth panel */
|
||||
.auth-panel { flex: 1; display: flex; align-items: center; justify-content: center; padding: 40px; }
|
||||
.auth-card { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; padding: 32px; width: 100%; max-width: 480px; }
|
||||
.auth-title { font-size: 20px; font-weight: 600; margin-bottom: 6px; }
|
||||
.auth-sub { font-size: 13px; color: var(--muted); margin-bottom: 24px; line-height: 1.5; }
|
||||
.form-row { margin-bottom: 14px; }
|
||||
.form-label { font-size: 12px; color: var(--muted); margin-bottom: 4px; display: block; }
|
||||
.form-row input { width: 100%; }
|
||||
.btn-primary { background: var(--accent); color: #fff; border: none; padding: 9px 20px; border-radius: 7px; font-weight: 500; }
|
||||
.btn-primary:hover { filter: brightness(1.1); }
|
||||
|
||||
/* Device code flow */
|
||||
.device-code-box { background: var(--bg); border: 1px solid var(--border); border-radius: 8px; padding: 20px; margin: 16px 0; text-align: center; }
|
||||
.device-code { font-family: var(--mono); font-size: 28px; font-weight: 600; letter-spacing: .15em; color: var(--accent); margin: 10px 0; }
|
||||
.device-url { font-size: 13px; color: var(--muted); }
|
||||
.device-url a { color: var(--accent); }
|
||||
.auth-status { font-size: 13px; margin-top: 12px; padding: 8px 12px; border-radius: 6px; }
|
||||
.auth-status.waiting { background: rgba(0,120,212,.1); color: var(--accent); }
|
||||
.auth-status.success { background: rgba(46,204,113,.1); color: var(--success); }
|
||||
.auth-status.error { background: rgba(231,76,60,.1); color: var(--danger); }
|
||||
|
||||
/* Source selector */
|
||||
.source-check { display: flex; align-items: center; gap: 6px; padding: 3px 0; cursor: pointer; }
|
||||
.source-check input[type=checkbox] { width: 15px; height: 15px; accent-color: var(--accent); cursor: pointer; }
|
||||
.account-check { width: 14px; height: 14px; accent-color: var(--accent); cursor: pointer; flex-shrink: 0; margin: 0; }
|
||||
.source-icon { font-size: 13px; }
|
||||
.source-label { font-size: 12px; }
|
||||
|
||||
/* Topbar */
|
||||
.topbar { display: flex; align-items: center; gap: 10px; padding: 10px 16px;
|
||||
position: sticky; top: 0; z-index: 20;
|
||||
border-bottom: 1px solid var(--border); background: var(--surface); flex-shrink: 0; }
|
||||
.scan-btn { background: var(--accent); color: #fff; border: none; height: 26px; padding: 0 16px; border-radius: 7px; font-weight: 500; font-size: 13px; cursor: pointer; }
|
||||
.scan-btn:hover:not(:disabled) { filter: brightness(1.1); }
|
||||
.scan-btn:disabled { opacity: .5; cursor: default; }
|
||||
.stop-btn { background: transparent; color: var(--danger); border: 1px solid var(--danger); height: 26px; padding: 0 12px; border-radius: 7px; font-size: 13px; cursor: pointer; }
|
||||
.stats-pill { background: var(--bg); border: 1px solid var(--border); border-radius: 20px; height: 26px; padding: 0 12px; font-size: 12px; color: var(--muted); display: flex; align-items: center; }
|
||||
.stats-pill span { color: var(--text); font-weight: 600; }
|
||||
.spacer { flex: 1; }
|
||||
.theme-btn { background: none; border: 1px solid var(--border); color: var(--muted); height: 26px; padding: 0 10px; border-radius: 7px; font-size: 14px; cursor: pointer; }
|
||||
.theme-btn:hover { border-color: var(--accent); color: var(--accent); }
|
||||
.topbar-sep { width: 1px; height: 20px; background: var(--border); flex-shrink: 0; margin: 0 2px; }
|
||||
.config-group { display: flex; align-items: center; background: var(--bg); border: 1px solid var(--border); border-radius: 7px; overflow: hidden; }
|
||||
.config-group button { background: none; border: none; border-right: 1px solid var(--border); color: var(--muted); padding: 0 11px; height: 26px; font-size: 11px; cursor: pointer; white-space: nowrap; }
|
||||
.config-group button:last-child { border-right: none; }
|
||||
.config-group button:hover { background: var(--surface); color: var(--text); }
|
||||
|
||||
/* Progress bar */
|
||||
.progress-bar { display: flex; align-items: center; gap: 10px; padding: 0 16px;
|
||||
height: 32px; min-height: 32px; max-height: 32px;
|
||||
background: var(--bg); border-top: 1px solid var(--border); font-size: 12px; color: var(--muted); flex-shrink: 0;
|
||||
line-height: 1; overflow: hidden; }
|
||||
.progress-who { display:flex; align-items:center; gap:5px; flex-shrink:0; min-width:0; max-width:45%; overflow:hidden; }
|
||||
.progress-src-pill { font-size:9px; font-weight:500; padding:1px 5px; border-radius:10px; flex-shrink:0; white-space:nowrap; }
|
||||
.progress-src-m365 { background:#E6F1FB; color:#185FA5; }
|
||||
.progress-src-google { background:#EAF3DE; color:#3B6D11; }
|
||||
.progress-src-file { background:#EDE8F5; color:#5a4080; }
|
||||
.progress-user { font-size:11px; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis; }
|
||||
.progress-file { flex:1; font-size:11px; color:var(--muted); white-space:nowrap; overflow:hidden; text-overflow:ellipsis; opacity:0.7; text-align:right; }
|
||||
.progress-track { width: 180px; height: 6px; background: var(--border); border-radius: 3px; flex-shrink: 0; display: flex; overflow: hidden; }
|
||||
.progress-seg { height: 100%; flex: 1; background: var(--border); position: relative; }
|
||||
.progress-seg + .progress-seg { border-left: 1px solid var(--bg); }
|
||||
.progress-seg-fill { height: 100%; width: 0; transition: width .3s ease; }
|
||||
.progress-phase { font-size: 11px; color: var(--accent); flex-shrink:0; }
|
||||
|
||||
/* Filter bar */
|
||||
.filter-bar { display: flex; align-items: center; gap: 8px; padding: 6px 16px;
|
||||
border-bottom: 1px solid var(--border); background: var(--surface); flex-shrink: 0; }
|
||||
.filter-bar input, .filter-bar select { height: 26px; box-sizing: border-box; padding: 0 8px; font-size: 12px; }
|
||||
.filter-bar input { width: 180px; }
|
||||
.filter-bar select { width: 130px; }
|
||||
.filter-bar button { height: 26px; padding: 0 10px; border-radius: 5px; font-size: 12px; cursor: pointer; box-sizing: border-box; }
|
||||
.filter-clear { background: none; border: 1px solid var(--border); color: var(--muted); font-size: 12px; height: 26px; padding: 0 10px; border-radius: 5px; cursor: pointer; box-sizing: border-box; }
|
||||
.filter-clear:hover { border-color: var(--danger); color: var(--danger); }
|
||||
|
||||
/* Grid */
|
||||
.grid-area { flex: 1; overflow-y: auto; padding: 24px; min-width: 0; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
|
||||
.grid-area::-webkit-scrollbar { width: 4px; }
|
||||
.grid-area::-webkit-scrollbar-track { background: transparent; }
|
||||
.grid-area::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
|
||||
.grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px,1fr)); gap: 14px; }
|
||||
/* Preview panel */
|
||||
.content-area { flex: 1; display: flex; overflow: hidden; min-height: 0; }
|
||||
.preview-panel {
|
||||
width: 420px; flex-shrink: 0;
|
||||
display: flex; flex-direction: row;
|
||||
background: var(--surface);
|
||||
overflow: hidden;
|
||||
transition: none;
|
||||
}
|
||||
.preview-panel.hidden { width: 0; }
|
||||
.preview-resize-handle {
|
||||
width: 8px; flex-shrink: 0; cursor: col-resize; position: relative;
|
||||
background: transparent; border-left: 1px solid var(--border);
|
||||
}
|
||||
.preview-resize-handle::after {
|
||||
content: ''; position: absolute; inset: 0 -4px; /* extend hit area 4px each side */
|
||||
}
|
||||
.preview-resize-handle:hover { background: var(--accent); opacity: 0.35; }
|
||||
.preview-inner { flex: 1; display: flex; flex-direction: column; overflow: hidden; }
|
||||
.preview-header {
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
padding: 10px 14px; border-bottom: 1px solid var(--border); flex-shrink: 0;
|
||||
}
|
||||
.preview-title { font-size: 12px; font-weight: 600; color: var(--text); overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; margin-right: 8px; }
|
||||
.preview-close { background: none; border: none; color: var(--muted); font-size: 18px; cursor: pointer; padding: 0; line-height: 1; }
|
||||
.preview-close:hover { color: var(--text); }
|
||||
.preview-body { flex: 1; overflow: hidden; position: relative; }
|
||||
.preview-body iframe { width: 100%; height: 100%; border: none; display: block; overflow-x: hidden; }
|
||||
.preview-loading { position: absolute; inset: 0; display: flex; align-items: center; justify-content: center; color: var(--muted); font-size: 12px; }
|
||||
.preview-meta { padding: 10px 14px; border-top: 1px solid var(--border); font-size: 11px; color: var(--muted); display: flex; gap: 10px; flex-wrap: wrap; flex-shrink: 0; }
|
||||
.preview-open-btn { margin-left: auto; background: var(--accent); color: #fff; border: none; border-radius: 5px; padding: 4px 10px; font-size: 11px; cursor: pointer; white-space: nowrap; }
|
||||
.card.selected { outline: 2px solid var(--accent); outline-offset: 2px; }
|
||||
.card { background: var(--surface); border: 1px solid var(--border); border-radius: 10px; overflow: hidden; cursor: pointer; transition: border-color .15s, box-shadow .15s; }
|
||||
.card:hover { border-color: var(--accent); box-shadow: 0 0 0 1px var(--accent); }
|
||||
.card.list-view { display: flex; align-items: center; gap: 12px; padding: 10px 14px; border-radius: 8px; }
|
||||
.thumb-wrap { aspect-ratio: 7/9; overflow: hidden; background: var(--bg); }
|
||||
.thumb-wrap img { width: 100%; height: 100%; object-fit: cover; }
|
||||
.card-info { padding: 10px 12px; }
|
||||
.card-info.list-info { flex: 1; padding: 0; }
|
||||
.card-name { font-size: 12px; font-weight: 500; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; margin-bottom: 3px; }
|
||||
.card-meta { font-size: 11px; color: var(--muted); }
|
||||
.plat-badge-m365 { background:#E6F1FB; color:#185FA5; font-weight:500; }
|
||||
.plat-badge-google { background:#EAF3DE; color:#3B6D11; font-weight:500; }
|
||||
.plat-badge-both { background:linear-gradient(90deg,#E6F1FB 50%,#EAF3DE 50%); color:#1a4a1a; font-weight:500; border:0.5px solid #b5d4b5; }
|
||||
.role-badge { font-size:9px; padding:1px 6px; border-radius:10px; background:#D3D1C7; color:#444441; font-weight:500; margin-right:3px; }
|
||||
.card-source { font-size: 10px; color: var(--muted); margin-top: 2px; display: flex; align-items: center; gap: 4px; flex-wrap: wrap; }
|
||||
.source-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px; font-weight: 500; white-space: nowrap; }
|
||||
.cpr-badge { background: var(--danger); color: #fff; font-size: 9px; font-weight: 600; padding: 1px 5px; border-radius: 10px; display: inline-block; }
|
||||
.card-delete-btn { position:absolute; top:6px; right:6px; background:rgba(0,0,0,0.45); color:#fff; border:none; border-radius:50%; width:22px; height:22px; font-size:13px; line-height:22px; text-align:center; cursor:pointer; opacity:0.35; transition:opacity .15s; padding:0; z-index:1; }
|
||||
.card:hover .card-delete-btn { opacity:1; }
|
||||
.card.list-view .card-delete-btn { position:static; opacity:1; background:transparent; color:var(--muted); flex-shrink:0; }
|
||||
.bulk-delete-modal { max-width:460px; }
|
||||
.bulk-criteria-row { display:flex; align-items:center; gap:8px; margin-bottom:8px; font-size:12px; }
|
||||
.bulk-criteria-row label { flex:0 0 130px; color:var(--muted); }
|
||||
.bulk-criteria-row select, .bulk-criteria-row input { flex:1; font-size:12px; padding:4px 7px; background:var(--bg2); border:1px solid var(--border); border-radius:5px; color:var(--text); }
|
||||
.delete-progress { font-size:12px; color:var(--muted); margin-top:10px; min-height:18px; }
|
||||
.btn-danger { background:var(--danger); color:#fff; border:none; padding:7px 16px; border-radius:6px; font-size:12px; cursor:pointer; font-weight:600; }
|
||||
.btn-danger:disabled { opacity:.5; cursor:not-allowed; }
|
||||
|
||||
/* Profile management modal (#15d) */
|
||||
.pmgmt-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.55); z-index: 1000;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.pmgmt-backdrop.open { display: flex; }
|
||||
.pmgmt-modal {
|
||||
display: flex; gap: 0; width: min(820px,96vw); max-height: 80vh; overflow: hidden;
|
||||
background: var(--surface); border-radius: 10px;
|
||||
border: 1px solid var(--border); box-shadow: 0 8px 32px rgba(0,0,0,.4); font-size: 12px; color: var(--text);
|
||||
|
||||
}
|
||||
.pmgmt-panel-list { width: 260px; min-width: 260px; display: flex; flex-direction: column; border-right: 1px solid var(--pmgmt-divider); }
|
||||
.pmgmt-panel-editor { flex: 1; display: none; flex-direction: column; overflow: hidden; }
|
||||
.pmgmt-panel-editor.open { display: flex; }
|
||||
.pmgmt-list { overflow-y: auto; flex: 1; display: flex; flex-direction: column; }
|
||||
.pmgmt-row { cursor: pointer; padding: 8px 12px; border-bottom: 1px solid var(--pmgmt-divider); }
|
||||
.pmgmt-row:hover { background: var(--bg); }
|
||||
.pmgmt-row.active { background: rgba(99,126,210,.15); }
|
||||
.pmgmt-row.active .pmgmt-name { color: var(--accent); }
|
||||
.pmgmt-row.active .pmgmt-sources, .pmgmt-row.active .pmgmt-meta { opacity: .7; }
|
||||
.pmgmt-row-head { display: flex; align-items: center; gap: 8px; }
|
||||
.pmgmt-name { font-weight: 500; flex: 1; font-size: 13px; color: var(--text); }
|
||||
.pmgmt-meta { font-size: 10px; color: var(--muted); margin-top: 1px; }
|
||||
.pmgmt-desc { font-size: 11px; color: var(--muted); margin-top: 2px; font-style: italic; }
|
||||
.pmgmt-sources { font-size: 11px; color: var(--muted); margin-top: 1px; }
|
||||
.pmgmt-actions { display: flex; gap: 5px; flex-shrink: 0; }
|
||||
.pmgmt-actions button { border: 1px solid var(--border); background: none; color: var(--muted); border-radius: 5px; height: 26px; padding: 0 8px; font-size: 11px; cursor: pointer; box-sizing: border-box; }
|
||||
.pmgmt-actions button:hover { color: var(--text); border-color: var(--text); }
|
||||
.pmgmt-actions button.btn-use { background: var(--accent); color: #fff; border-color: var(--accent); }
|
||||
.pmgmt-actions button.btn-del { color: var(--danger); border-color: var(--danger); }
|
||||
.pmgmt-empty { color: var(--muted); font-size: 12px; text-align: center; padding: 24px 12px; }
|
||||
.pmgmt-editor-body { flex: 1; overflow: hidden; padding: 14px 16px; display: flex; flex-direction: column; gap: 14px; }
|
||||
.pmgmt-editor-section-title { font-size: 10px; font-weight: 600; color: var(--text); text-transform: uppercase; letter-spacing: .07em; margin-bottom: 8px; padding-bottom: 5px; border-bottom: 1px solid var(--pmgmt-divider); display: flex; align-items: center; justify-content: space-between; opacity: .9; }
|
||||
.pmgmt-account-list { display: flex; flex-direction: column; gap: 3px; max-height: 160px; overflow-y: auto; margin-top: 4px; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
|
||||
.pmgmt-account-list::-webkit-scrollbar { width: 4px; }
|
||||
.pmgmt-account-list::-webkit-scrollbar-track { background: transparent; }
|
||||
.pmgmt-account-list::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
|
||||
#peSourcesPanel { max-height: 130px; overflow-y: auto; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
|
||||
#peSourcesPanel::-webkit-scrollbar { width: 4px; }
|
||||
#peSourcesPanel::-webkit-scrollbar-track { background: transparent; }
|
||||
#peSourcesPanel::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
|
||||
.pmgmt-acct-row { display: flex; align-items: center; gap: 6px; font-size: 12px; cursor: pointer; padding: 2px 0; }
|
||||
.pmgmt-opt-row { display: flex; align-items: center; justify-content: space-between; font-size: 12px; color: var(--text); }
|
||||
.pmgmt-settings-col { flex: 1; padding-left: 16px; border-left: 1px solid var(--pmgmt-divider); }
|
||||
.pmgmt-opt-row input[type=number] { width: 60px; font-size: 12px; height: 26px; box-sizing: border-box; }
|
||||
.pmgmt-acct-row span:first-of-type { flex:1; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||
.pmgmt-panel-list { width: 260px; min-width: 260px; display: flex; flex-direction: column; border-right: 0.5px solid var(--color-border-tertiary); }
|
||||
.pmgmt-panel-editor { flex: 1; display: none; flex-direction: column; }
|
||||
.pmgmt-panel-editor.open { display: flex; }
|
||||
.pmgmt-row { cursor: pointer; padding: 8px 12px; border-radius: 0; border-bottom: 0.5px solid var(--color-border-tertiary); }
|
||||
.pmgmt-row:hover { background: var(--color-background-secondary); }
|
||||
.pmgmt-row.active { background: var(--color-background-info); }
|
||||
.pmgmt-row.active .pmgmt-name { color: var(--color-text-info); }
|
||||
.pmgmt-row.active .pmgmt-sources, .pmgmt-row.active .pmgmt-meta { color: var(--color-text-info); opacity: .7; }
|
||||
.pmgmt-editor-body { flex: 1; overflow: hidden; padding: 14px 16px; display: flex; flex-direction: column; gap: 14px; }
|
||||
.pmgmt-editor-section-title { font-size: 10px; font-weight: 500; color: var(--color-text-secondary); text-transform: uppercase; letter-spacing: .06em; margin-bottom: 8px; }
|
||||
.pmgmt-account-list { display: flex; flex-direction: column; gap: 3px; max-height: 160px; overflow-y: auto; margin-top: 6px; }
|
||||
.pmgmt-account-list label { display: flex; align-items: center; gap: 6px; font-size: 12px; cursor: pointer; padding: 2px 0; }
|
||||
.pmgmt-opt-row { display: flex; align-items: center; justify-content: space-between; font-size: 12px; color: var(--color-text-primary); }
|
||||
.pmgmt-opt-row input[type=number] { width: 60px; font-size: 12px; }
|
||||
|
||||
|
||||
|
||||
|
||||
/* Settings modal */
|
||||
.settings-backdrop {
|
||||
display:none; position:fixed; inset:0;
|
||||
background:rgba(0,0,0,.55); z-index:1200;
|
||||
align-items:center; justify-content:center;
|
||||
}
|
||||
.settings-backdrop.open { display:flex; }
|
||||
.settings-modal {
|
||||
background:var(--surface); border:1px solid var(--border);
|
||||
border-radius:10px; width:min(540px,96vw);
|
||||
display:flex; flex-direction:column; overflow:hidden;
|
||||
font-size:12px; color:var(--text);
|
||||
}
|
||||
.settings-header { padding:16px 20px 0; display:flex; align-items:center; justify-content:space-between; }
|
||||
.settings-header h2 { font-size:14px; font-weight:700; margin:0; }
|
||||
.settings-tabs { display:flex; border-bottom:1px solid var(--border); padding:0 20px; margin-top:12px; }
|
||||
.settings-tab {
|
||||
height:36px; padding:0 14px; font-size:12px; cursor:pointer; border:none;
|
||||
background:none; color:var(--muted); border-bottom:2px solid transparent;
|
||||
margin-bottom:-1px; font-weight:500;
|
||||
}
|
||||
.settings-tab.active { color:var(--accent); border-bottom-color:var(--accent); font-weight:600; }
|
||||
.settings-body { padding:16px 20px; overflow-y:auto; max-height:65vh; display:flex; flex-direction:column; gap:14px; }
|
||||
.settings-pane { display:none; flex-direction:column; gap:12px; }
|
||||
.settings-pane.active { display:flex; }
|
||||
.settings-group { display:flex; flex-direction:column; gap:6px; }
|
||||
.settings-group-title { font-size:10px; font-weight:700; color:var(--muted); text-transform:uppercase; letter-spacing:.05em; }
|
||||
.settings-row { display:flex; align-items:center; gap:10px; }
|
||||
.settings-row label { flex:0 0 110px; font-size:11px; color:var(--muted); }
|
||||
.settings-row input, .settings-row select { flex:1; font-size:12px; height:26px; padding:0 8px; background:var(--bg); border:1px solid var(--border); border-radius:5px; color:var(--text); box-sizing:border-box; }
|
||||
.settings-footer { padding:10px 20px; border-top:1px solid var(--border); display:flex; justify-content:flex-end; gap:8px; }
|
||||
.settings-about-row { display:flex; justify-content:space-between; font-size:11px; padding:3px 0; border-bottom:1px solid var(--border); }
|
||||
.settings-about-row:last-child { border-bottom:none; }
|
||||
|
||||
/* Unified Source Management modal (#17) */
|
||||
.srcmgmt-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.55); z-index: 1100;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.srcmgmt-backdrop.open { display: flex; }
|
||||
.srcmgmt-modal {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 10px; width: min(620px, 96vw);
|
||||
display: flex; flex-direction: column;
|
||||
font-size: 12px; color: var(--text); overflow: hidden;
|
||||
}
|
||||
.srcmgmt-header { padding: 18px 22px 0; display: flex; align-items: center; justify-content: space-between; }
|
||||
.srcmgmt-header h2 { font-size: 14px; font-weight: 700; margin: 0; }
|
||||
.srcmgmt-tabs { display: flex; gap: 0; border-bottom: 1px solid var(--border); padding: 0 22px; margin-top: 14px; }
|
||||
.srcmgmt-tab {
|
||||
height: 36px; padding: 0 16px; font-size: 12px; cursor: pointer; border: none;
|
||||
background: none; color: var(--muted); border-bottom: 2px solid transparent;
|
||||
margin-bottom: -1px; font-weight: 500; transition: color .15s;
|
||||
}
|
||||
.srcmgmt-tab:hover { color: var(--text); }
|
||||
.srcmgmt-tab.active { color: var(--accent); border-bottom-color: var(--accent); font-weight: 600; }
|
||||
.srcmgmt-tab.stub { opacity: .45; cursor: default; }
|
||||
.srcmgmt-body { padding: 18px 22px; overflow-y: auto; max-height: 65vh; display: flex; flex-direction: column; gap: 14px; }
|
||||
.srcmgmt-pane { display: none; flex-direction: column; gap: 14px; }
|
||||
.srcmgmt-pane.active { display: flex; }
|
||||
.srcmgmt-group { display: flex; flex-direction: column; gap: 6px; }
|
||||
.srcmgmt-group-title { font-size: 10px; font-weight: 700; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; }
|
||||
.srcmgmt-row { display: flex; align-items: center; gap: 10px; padding: 7px 10px; border-radius: 7px; background: var(--bg2); border: 1px solid var(--border); }
|
||||
.srcmgmt-row-icon { font-size: 16px; flex-shrink: 0; width: 22px; text-align: center; }
|
||||
.srcmgmt-row-label { flex: 1; font-size: 12px; font-weight: 500; }
|
||||
.srcmgmt-row-sub { font-size: 10px; color: var(--muted); }
|
||||
.srcmgmt-status { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
|
||||
.srcmgmt-status.green { background: #3fb950; }
|
||||
.srcmgmt-status.amber { background: #d29922; }
|
||||
.srcmgmt-status.grey { background: var(--border); }
|
||||
.srcmgmt-cred-form { display: flex; flex-direction: column; gap: 8px; padding: 10px; border: 1px solid var(--border); border-radius: 8px; background: var(--bg); }
|
||||
.srcmgmt-cred-row { display: flex; align-items: center; gap: 8px; }
|
||||
.srcmgmt-cred-row label { flex: 0 0 110px; font-size: 11px; color: var(--muted); }
|
||||
.srcmgmt-cred-row input { flex: 1; font-size: 12px; height: 26px; padding: 0 8px; background: var(--surface); border: 1px solid var(--border); border-radius: 5px; color: var(--text); box-sizing: border-box; }
|
||||
.srcmgmt-footer { padding: 12px 22px; border-top: 1px solid var(--border); display: flex; justify-content: flex-end; gap: 8px; }
|
||||
|
||||
/* File Sources modal (#8) */
|
||||
.fsrc-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.55); z-index: 1000;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.fsrc-backdrop.open { display: flex; }
|
||||
.fsrc-modal {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 10px; padding: 22px 26px;
|
||||
width: min(560px, 95vw);
|
||||
display: flex; flex-direction: column; gap: 12px;
|
||||
font-size: 12px; color: var(--text);
|
||||
}
|
||||
.fsrc-modal h2 { font-size: 14px; font-weight: 700; margin: 0; }
|
||||
.fsrc-list { overflow-y: auto; flex-shrink: 0; display: flex; flex-direction: column; gap: 8px; height: calc(5 * 58px); min-height: 58px; border: 1px solid var(--border); border-radius: 7px; padding: 6px; background: var(--bg); }
|
||||
.fsrc-row { border: 1px solid var(--border); border-radius: 8px; padding: 10px 12px; background: var(--bg2); display: flex; flex-direction: column; gap: 4px; }
|
||||
.fsrc-row-head { display: flex; align-items: center; gap: 8px; }
|
||||
.fsrc-row-label { font-weight: 600; flex: 1; font-size: 12px; }
|
||||
.fsrc-row-path { font-size: 10px; color: var(--muted); font-family: var(--mono); }
|
||||
.fsrc-actions { display: flex; gap: 5px; flex-shrink: 0; }
|
||||
.fsrc-actions button { border: 1px solid var(--border); background: none; color: var(--muted); border-radius: 5px; padding: 2px 8px; font-size: 11px; cursor: pointer; }
|
||||
.fsrc-actions button:hover { color: var(--text); border-color: var(--text); }
|
||||
.fsrc-actions button.btn-scan { background: var(--accent); color: #fff; border-color: var(--accent); }
|
||||
.fsrc-actions button.btn-del { color: var(--danger); border-color: var(--danger); }
|
||||
.fsrc-form { display: flex; flex-direction: column; gap: 8px; padding: 10px; border: 1px dashed var(--border); border-radius: 8px; }
|
||||
.fsrc-form-row { display: flex; align-items: center; gap: 8px; }
|
||||
.fsrc-form-row label { flex: 0 0 120px; font-size: 11px; color: var(--muted); }
|
||||
.fsrc-form-row input { flex: 1; font-size: 12px; padding: 4px 8px; background: var(--bg); border: 1px solid var(--border); border-radius: 5px; color: var(--text); }
|
||||
.fsrc-smb-fields { display: none; }
|
||||
.fsrc-empty { color: var(--muted); font-size: 12px; text-align: center; padding: 20px 0; }
|
||||
.fsrc-footer { display: flex; justify-content: flex-end; gap: 8px; padding-top: 4px; border-top: 1px solid var(--border); }
|
||||
|
||||
/* Import DB modal (#11) */
|
||||
.import-db-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.55); z-index: 1000;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.import-db-backdrop.open { display: flex; }
|
||||
.import-db-modal {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 10px; padding: 24px 28px;
|
||||
width: min(420px, 95vw);
|
||||
display: flex; flex-direction: column; gap: 14px;
|
||||
font-size: 12px; color: var(--text);
|
||||
}
|
||||
.import-db-modal h2 { font-size: 14px; font-weight: 700; margin: 0; }
|
||||
.import-db-modal p { margin: 0; color: var(--muted); line-height: 1.5; }
|
||||
.account-pill { font-size: 10px; color: var(--muted); white-space: nowrap; overflow: hidden;
|
||||
text-overflow: ellipsis; max-width: 140px; display: inline-block; vertical-align: middle; }
|
||||
.role-pill { font-size: 9px; padding: 1px 5px; border-radius: 10px; font-weight: 500; white-space: nowrap; }
|
||||
.role-pill.student { background: #0f3d6e; color: #7ec8ff; }
|
||||
.role-pill.staff { background: #1a3a1a; color: #7ed07e; }
|
||||
[data-theme="light"] .role-pill.student { background: #dbeeff; color: #0054a6; }
|
||||
[data-theme="light"] .role-pill.staff { background: #dff0df; color: #1a6e1a; }
|
||||
.special-cat-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px;
|
||||
background: #4B0082; color: #E0B0FF; font-weight: 500; white-space: nowrap; }
|
||||
[data-theme="light"] .special-cat-badge { background: #EDE0FF; color: #5A007A; }
|
||||
.photo-face-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px;
|
||||
background: #005060; color: #80E8FF; font-weight: 500; white-space: nowrap; }
|
||||
[data-theme="light"] .photo-face-badge { background: #D0F4FF; color: #00505F; }
|
||||
.overdue-badge { font-size: 9px; padding: 1px 5px; border-radius: 10px;
|
||||
background: #7c3200; color: #ffb347; font-weight: 600; white-space: nowrap; }
|
||||
[data-theme="light"] .overdue-badge { background: #fff3e0; color: #c55a00; }
|
||||
.badge-email { background: rgba(139,68,173,.2); color: #b87fd8; }
|
||||
.badge-onedrive { background: rgba(0,120,212,.2); color: #5ba4e8; }
|
||||
.badge-sharepoint { background: rgba(0,160,100,.2); color: #2ecc71; }
|
||||
.badge-teams { background: rgba(88,101,242,.2); color: #9ba4ff; }
|
||||
.badge-local { background: rgba(40,120,40,.2); color: #7ec87e; }
|
||||
.badge-smb { background: rgba(20,100,140,.2); color: #7ec8d0; }
|
||||
.badge-gmail { background: rgba(234,67,53,.18); color: #ea4335; }
|
||||
.badge-gdrive { background: rgba(15,117,210,.18); color: #0f75d2; }
|
||||
|
||||
/* Empty state */
|
||||
.empty-state { display: flex; flex-direction: column; align-items: center; justify-content: center;
|
||||
height: 100%; color: var(--muted); text-align: center; gap: 12px; }
|
||||
.empty-icon { font-size: 48px; opacity: .3; }
|
||||
.empty-text { font-size: 14px; line-height: 1.6; }
|
||||
|
||||
/* Log panel */
|
||||
.log-wrap { display: flex; flex-direction: column; flex-shrink: 0; border-top: 1px solid var(--border); }
|
||||
.sources-resize-handle { height: 5px; cursor: ns-resize; background: transparent; flex-shrink: 0; }
|
||||
.sources-resize-handle:hover { background: var(--border); }
|
||||
.log-resize-handle { height: 5px; cursor: ns-resize; background: transparent; flex-shrink: 0; }
|
||||
.log-resize-handle:hover { background: var(--border); }
|
||||
.log-header { display: flex; align-items: center; gap: 6px; padding: 3px 10px; background: var(--bg); border-bottom: 1px solid var(--border); flex-shrink: 0; }
|
||||
.log-header-title { font-size: 10px; font-weight: 600; color: var(--muted); letter-spacing: 0.04em; text-transform: uppercase; flex: 1; }
|
||||
.log-filter-btn { font-size: 10px; height: 18px; padding: 0 6px; border: 1px solid var(--border); border-radius: 4px; background: none; color: var(--muted); cursor: pointer; }
|
||||
.log-filter-btn.active { background: var(--accent); color: #fff; border-color: var(--accent); }
|
||||
.log-copy-btn { font-size: 10px; height: 18px; padding: 0 6px; border: 1px solid var(--border); border-radius: 4px; background: none; color: var(--muted); cursor: pointer; }
|
||||
.log-copy-btn:hover { color: var(--text); }
|
||||
.log-panel { height: 154px; min-height: 60px; overflow-y: auto; background: var(--bg); padding: 6px 14px; font-family: var(--mono); font-size: 11px; line-height: 16px; color: var(--muted); flex: none; scrollbar-width: thin; scrollbar-color: var(--border) transparent; }
|
||||
.log-panel::-webkit-scrollbar { width: 4px; }
|
||||
.log-panel::-webkit-scrollbar-track { background: transparent; }
|
||||
.log-panel::-webkit-scrollbar-thumb { background: var(--border); border-radius: 2px; }
|
||||
.log-line { margin-bottom: 2px; white-space: pre-wrap; word-break: break-all; }
|
||||
.log-err { color: var(--danger); }
|
||||
.log-ok { color: var(--success); }
|
||||
.log-warn { color: #e0922a; }
|
||||
.log-live { color: var(--muted); opacity: 0.7; font-style: italic; }
|
||||
.log-line.log-err-hidden { display: none; }
|
||||
.sidebar-footer { padding: 6px 12px; border-top: 1px solid var(--border); display: flex; align-items: center; justify-content: space-between; margin-top: auto; flex-shrink: 0; }
|
||||
.sidebar-footer select { background: var(--surface); border: 1px solid var(--border); border-radius: 4px; color: var(--muted); font-size: 10px; padding: 2px 4px; cursor: pointer; }
|
||||
/* Data subject lookup modal */
|
||||
.dsub-modal-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.45); z-index: 1000;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.dsub-modal-backdrop.open { display: flex; }
|
||||
.dsub-modal {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 10px; padding: 22px 26px;
|
||||
width: 500px; max-width: 95vw; max-height: 80vh;
|
||||
display: flex; flex-direction: column; gap: 12px;
|
||||
font-family: var(--sans); color: var(--text);
|
||||
}
|
||||
.dsub-modal h2 { font-size: 14px; font-weight: 600; margin: 0; }
|
||||
.dsub-input-row { display: flex; gap: 8px; }
|
||||
.dsub-input-row input { flex: 1; font-size: 13px; letter-spacing: .05em; }
|
||||
.dsub-results { flex: 1; overflow-y: auto; min-height: 0; }
|
||||
.dsub-result-row { display: flex; align-items: center; gap: 8px; padding: 7px 0;
|
||||
border-bottom: 1px solid var(--border); font-size: 12px; }
|
||||
.dsub-result-row:last-child { border-bottom: none; }
|
||||
.dsub-result-name { flex: 1; font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
||||
.dsub-result-meta { color: var(--muted); font-size: 11px; white-space: nowrap; }
|
||||
.dsub-footer { display: flex; gap: 8px; padding-top: 4px; }
|
||||
.dsub-footer button { flex: 1; padding: 7px; border-radius: 7px; font-size: 12px; cursor: pointer; font-family: var(--sans); }
|
||||
/* Disposition widget */
|
||||
.disposition-row { display: flex; align-items: center; gap: 8px; padding: 8px 14px;
|
||||
border-top: 1px solid var(--border); flex-shrink: 0; }
|
||||
.disposition-label { font-size: 11px; color: var(--muted); white-space: nowrap; }
|
||||
.disposition-select { flex: 1; font-size: 11px; padding: 4px 6px; }
|
||||
.disposition-save { padding: 4px 10px; border-radius: 6px; font-size: 11px;
|
||||
background: var(--accent); color: #fff; border: none; cursor: pointer; white-space: nowrap; }
|
||||
.disposition-saved { font-size: 10px; color: var(--success); }
|
||||
|
||||
/* Trend sparkline */
|
||||
.spark-wrap { position: relative; height: 60px; margin: 6px 0 2px; }
|
||||
.spark-wrap canvas { width: 100%; height: 60px; }
|
||||
.spark-tip { display:none; position:absolute; background:var(--surface);
|
||||
border:1px solid var(--border); border-radius:5px; padding:3px 7px;
|
||||
font-size:10px; color:var(--text); pointer-events:none; white-space:nowrap;
|
||||
top:0; left:0; z-index:10; }
|
||||
.spark-labels { display:flex; justify-content:space-between;
|
||||
font-size:9px; color:var(--muted); margin-bottom:4px; }
|
||||
.spark-legend { display:flex; gap:10px; font-size:9px;
|
||||
color:var(--muted); margin-top:3px; }
|
||||
.spark-legend span { display:flex; align-items:center; gap:3px; }
|
||||
.spark-dot { width:8px; height:2px; border-radius:1px; }
|
||||
|
||||
.about-modal-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.45); z-index: 1000;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.about-modal-backdrop.open { display: flex; }
|
||||
.about-modal {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 8px; padding: 32px 36px;
|
||||
max-width: 380px; width: 90%;
|
||||
font-family: var(--mono); font-size: 12px; color: var(--text);
|
||||
}
|
||||
.about-modal h2 { font-size: 16px; font-weight: 700; margin: 0 0 4px; color: var(--text); font-family: var(--mono); }
|
||||
.about-modal .about-version { color: var(--accent); font-size: 11px; margin-bottom: 20px; }
|
||||
.about-modal .about-row { display: flex; justify-content: space-between; padding: 5px 0; border-bottom: 1px solid var(--border); color: var(--muted); }
|
||||
.about-modal .about-row span:last-child { color: var(--text); }
|
||||
.about-close { margin-top: 20px; width: 100%; padding: 8px; background: var(--accent); color: #fff; border: none; border-radius: 6px; font-size: 13px; cursor: pointer; font-family: var(--mono); }
|
||||
|
||||
/* SMTP modal */
|
||||
.smtp-modal-backdrop {
|
||||
display: none; position: fixed; inset: 0;
|
||||
background: rgba(0,0,0,0.45); z-index: 1000;
|
||||
align-items: center; justify-content: center;
|
||||
}
|
||||
.smtp-modal-backdrop.open { display: flex; }
|
||||
.smtp-modal {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 10px; padding: 24px 28px;
|
||||
width: 460px; max-width: 95vw; max-height: 90vh; overflow-y: auto;
|
||||
font-family: var(--sans); font-size: 12px; color: var(--text);
|
||||
}
|
||||
.smtp-modal h2 { font-size: 15px; font-weight: 600; margin: 0 0 4px; }
|
||||
.smtp-modal .smtp-subtitle { color: var(--muted); font-size: 11px; margin-bottom: 18px; }
|
||||
.smtp-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 10px 16px; }
|
||||
.smtp-grid .full { grid-column: 1 / -1; }
|
||||
.smtp-field label { display: block; font-size: 11px; color: var(--muted); margin-bottom: 3px; }
|
||||
.smtp-field input { width: 100%; padding: 6px 8px; font-size: 12px; }
|
||||
.smtp-toggle-row { display: flex; align-items: center; gap: 8px; margin-top: 4px; font-size: 11px; color: var(--muted); }
|
||||
.smtp-divider { grid-column: 1 / -1; border: none; border-top: 1px solid var(--border); margin: 4px 0; }
|
||||
.smtp-footer { display: flex; gap: 8px; margin-top: 18px; }
|
||||
.smtp-footer button { flex: 1; padding: 8px; border-radius: 7px; font-size: 12px; cursor: pointer; font-family: var(--sans); }
|
||||
.smtp-status { font-size: 11px; margin-top: 8px; min-height: 16px; text-align: center; }
|
||||
|
||||
/* ── Viewer mode — hide scan controls ──────────────────────────────────── */
|
||||
body.viewer-mode #scanBtn,
|
||||
body.viewer-mode #stopBtn,
|
||||
body.viewer-mode #profileBar,
|
||||
body.viewer-mode .topbar-sep,
|
||||
body.viewer-mode .config-group { display: none !important; }
|
||||
body.viewer-mode #resumeBanner { display: none !important; }
|
||||
body.viewer-mode #bulkDeleteBtn { display: none !important; }
|
||||
body.viewer-mode .card-delete-btn { display: none !important; }
|
||||
body.viewer-mode #dsubDeleteBtn { display: none !important; }
|
||||
body.viewer-mode #shareBtn { display: none !important; }
|
||||
body.viewer-mode .sidebar { display: none !important; }
|
||||
body.viewer-mode #viewerBrand { display: inline !important; }
|
||||
body.viewer-mode #logWrap { display: none !important; }
|
||||
body.viewer-mode #progressBar { display: none !important; }
|
||||
29
templates/CLAUDE.md
Normal file
29
templates/CLAUDE.md
Normal file
@ -0,0 +1,29 @@
|
||||
# templates/ — CSS & HTML Rules
|
||||
|
||||
## CSS variables
|
||||
Use the app's own variables (defined in `static/style.css`). Never use claude.ai system variables like `var(--color-background-primary)` — the app uses `var(--bg)`, `var(--surface)`, `var(--border)`, `var(--text)`, `var(--muted)`, `var(--accent)`, `var(--danger)`, `var(--success)`.
|
||||
|
||||
Theme is switched via `[data-theme="light"]` attribute on `<body>` — not `prefers-color-scheme`.
|
||||
|
||||
## Standard control height: 26px
|
||||
Every interactive element in the topbar and sidebar. Exception: `.toggle` is `32×18px` — do not change to 26px.
|
||||
|
||||
## Pill cluster container pattern
|
||||
```css
|
||||
display: flex; background: var(--bg); border: 1px solid var(--border);
|
||||
border-radius: 6px; overflow: hidden;
|
||||
```
|
||||
Buttons inside: `border-right: 1px solid var(--border)` as dividers; last child has none. Selected: `background: var(--accent); color: #fff`.
|
||||
|
||||
## Danger buttons
|
||||
Never place destructive actions (delete, reset, disconnect, sign out) inside a pill cluster. Standalone button with `border: 1px solid var(--danger); color: var(--danger)`, separated by a gap. Applies everywhere — topbar, sidebar, modals, list rows.
|
||||
|
||||
## Badge sizing standard
|
||||
All badges — platform, role, source, CPR, faces, Art.9, overdue, risk — use: `font-size: 9px; padding: 1px 5px; border-radius: 10px`. Never override with larger inline styles. New badge classes always start from this standard.
|
||||
|
||||
## No emojis in button labels
|
||||
All buttons use plain text — topbar, filter bar, modals, settings, and lang file values. No `▶ ■ 💾 ⚙ 🕐 ⬇ ⬆ 🗑 📋 ☰ ⊞`.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- **Label click forwarding** — interactive elements inside `<label>` get clicks forwarded to the label's checkbox. Use `<button type="button">` to prevent this.
|
||||
1276
templates/index.html
Normal file
1276
templates/index.html
Normal file
File diff suppressed because it is too large
Load Diff
28
templates/viewer_denied.html
Normal file
28
templates/viewer_denied.html
Normal file
@ -0,0 +1,28 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>GDPRScanner — Access denied</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
||||
<style>
|
||||
body { display: flex; align-items: center; justify-content: center; min-height: 100vh; margin: 0; }
|
||||
.denied-card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 32px 40px;
|
||||
text-align: center;
|
||||
max-width: 360px;
|
||||
}
|
||||
.denied-card h1 { font-size: 16px; font-weight: 600; margin: 0 0 8px; color: var(--text); }
|
||||
.denied-card p { font-size: 13px; color: var(--muted); margin: 0; }
|
||||
</style>
|
||||
</head>
|
||||
<body data-theme="dark">
|
||||
<div class="denied-card">
|
||||
<h1>Access denied</h1>
|
||||
<p>This link is invalid or has expired.<br>Ask the administrator for a new link.</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
82
templates/viewer_pin.html
Normal file
82
templates/viewer_pin.html
Normal file
@ -0,0 +1,82 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>GDPRScanner — Enter PIN</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
||||
<style>
|
||||
body { display: flex; align-items: center; justify-content: center; min-height: 100vh; margin: 0; }
|
||||
.pin-card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 32px 40px;
|
||||
width: min(340px, 92vw);
|
||||
box-sizing: border-box;
|
||||
}
|
||||
.pin-card h1 { font-size: 15px; font-weight: 600; margin: 0 0 6px; color: var(--text); }
|
||||
.pin-card p { font-size: 12px; color: var(--muted); margin: 0 0 18px; }
|
||||
.pin-input {
|
||||
width: 100%; box-sizing: border-box;
|
||||
font-size: 22px; letter-spacing: .3em; text-align: center;
|
||||
padding: 10px 12px; border-radius: 6px;
|
||||
border: 1px solid var(--border); background: var(--bg);
|
||||
color: var(--text); outline: none; margin-bottom: 12px;
|
||||
}
|
||||
.pin-input:focus { border-color: var(--accent); }
|
||||
.pin-btn {
|
||||
width: 100%; padding: 10px; border: none; border-radius: 6px;
|
||||
background: var(--accent); color: #fff; font-size: 13px;
|
||||
font-weight: 600; cursor: pointer; font-family: var(--sans);
|
||||
}
|
||||
.pin-btn:disabled { opacity: .5; cursor: default; }
|
||||
.pin-error { font-size: 12px; color: var(--danger); margin-top: 8px; min-height: 16px; text-align: center; }
|
||||
</style>
|
||||
</head>
|
||||
<body data-theme="dark">
|
||||
<div class="pin-card">
|
||||
<h1>GDPRScanner</h1>
|
||||
<p>Enter the viewer PIN to access results.</p>
|
||||
<input id="pinInput" class="pin-input" type="password" inputmode="numeric"
|
||||
maxlength="8" placeholder="••••" autocomplete="off"
|
||||
onkeydown="if(event.key==='Enter')submitPin()">
|
||||
<button class="pin-btn" id="pinBtn" onclick="submitPin()">Continue</button>
|
||||
<div class="pin-error" id="pinError"></div>
|
||||
</div>
|
||||
<script>
|
||||
async function submitPin() {
|
||||
const pin = document.getElementById('pinInput').value.trim();
|
||||
if (!pin) return;
|
||||
const btn = document.getElementById('pinBtn');
|
||||
const err = document.getElementById('pinError');
|
||||
btn.disabled = true;
|
||||
err.textContent = '';
|
||||
try {
|
||||
const r = await fetch('/api/viewer/pin/verify', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({pin})
|
||||
});
|
||||
if (r.ok) {
|
||||
window.location.href = '/view';
|
||||
} else {
|
||||
const d = await r.json().catch(() => ({}));
|
||||
if (r.status === 429) {
|
||||
err.textContent = d.error || 'Too many attempts. Try again later.';
|
||||
} else {
|
||||
err.textContent = d.error || 'Incorrect PIN.';
|
||||
document.getElementById('pinInput').value = '';
|
||||
document.getElementById('pinInput').focus();
|
||||
}
|
||||
btn.disabled = false;
|
||||
}
|
||||
} catch(e) {
|
||||
err.textContent = 'Network error. Please try again.';
|
||||
btn.disabled = false;
|
||||
}
|
||||
}
|
||||
document.getElementById('pinInput').focus();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
113
tests/conftest.py
Normal file
113
tests/conftest.py
Normal file
@ -0,0 +1,113 @@
|
||||
"""
|
||||
conftest.py — shared fixtures for GDPRScanner test suite.
|
||||
"""
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure the project root is on sys.path so all modules are importable
|
||||
ROOT = Path(__file__).parent.parent
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
|
||||
# ── File fixtures ─────────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.fixture()
|
||||
def tmp_dir(tmp_path):
|
||||
return tmp_path
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def docx_with_cpr(tmp_path):
|
||||
"""Word document containing 3 CPR numbers in different positions."""
|
||||
from docx import Document
|
||||
doc = Document()
|
||||
doc.add_paragraph("Elev 1: CPR 290472-1234 er registreret i systemet.")
|
||||
doc.add_paragraph("Elev 2: personnummer 010185-4321.")
|
||||
tbl = doc.add_table(rows=2, cols=2)
|
||||
tbl.cell(0, 0).text = "Navn"
|
||||
tbl.cell(0, 1).text = "CPR"
|
||||
tbl.cell(1, 0).text = "Anne Hansen"
|
||||
tbl.cell(1, 1).text = "CPR: 150364-5678"
|
||||
p = tmp_path / "sample_with_cpr.docx"
|
||||
doc.save(p)
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def docx_no_cpr(tmp_path):
|
||||
"""Word document with no CPR numbers."""
|
||||
from docx import Document
|
||||
doc = Document()
|
||||
doc.add_paragraph("Ingen personoplysninger her.")
|
||||
doc.add_paragraph("Konto: 1234-5678 Telefon: 33 12 34 56")
|
||||
p = tmp_path / "sample_no_cpr.docx"
|
||||
doc.save(p)
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def xlsx_with_cpr(tmp_path):
|
||||
"""Excel workbook containing 1 CPR in a cell."""
|
||||
from openpyxl import Workbook
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws["A1"] = "Navn"
|
||||
ws["B1"] = "CPR"
|
||||
ws["A2"] = "Test Person"
|
||||
ws["B2"] = "CPR: 290472-1234"
|
||||
p = tmp_path / "sample_with_cpr.xlsx"
|
||||
wb.save(p)
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def xlsx_no_cpr(tmp_path):
|
||||
"""Excel workbook with account numbers that look CPR-like."""
|
||||
from openpyxl import Workbook
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws["A1"] = "Kontonummer"
|
||||
ws["B1"] = "Beløb"
|
||||
ws["A2"] = "12345678" # 8-digit — too short
|
||||
ws["A3"] = "29047212345" # 11-digit — too long
|
||||
ws["A4"] = "Reg: 2904"
|
||||
p = tmp_path / "sample_no_cpr.xlsx"
|
||||
wb.save(p)
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def txt_with_art9(tmp_path):
|
||||
"""Plain text with CPR adjacent to Article 9 health keywords."""
|
||||
content = (
|
||||
"Eleven CPR 290472-1234 har diagnosen diabetes og modtager behandling.\n"
|
||||
"Kontakt læge vedr. sygemelding."
|
||||
)
|
||||
p = tmp_path / "sample_art9.txt"
|
||||
p.write_text(content, encoding="utf-8")
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def binary_garbage(tmp_path):
|
||||
"""Binary file that must not crash the scanner."""
|
||||
p = tmp_path / "sample_binary.bin"
|
||||
p.write_bytes(bytes(range(256)) * 100)
|
||||
return p
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tmp_db(tmp_path):
|
||||
"""Fresh in-memory-path SQLite DB for each test."""
|
||||
from gdpr_db import ScanDB
|
||||
db_path = tmp_path / "test.db"
|
||||
db = ScanDB(str(db_path))
|
||||
yield db
|
||||
try:
|
||||
db_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
254
tests/test_app_config.py
Normal file
254
tests/test_app_config.py
Normal file
@ -0,0 +1,254 @@
|
||||
"""
|
||||
test_app_config.py — Tests for app_config.py.
|
||||
|
||||
Covers:
|
||||
- LANG loading and key access
|
||||
- Article 9 keyword detection (_check_special_category)
|
||||
- Config load/save round-trip
|
||||
- Admin PIN hash/verify
|
||||
- Profile CRUD (_profile_save, _profile_get, _profile_delete)
|
||||
- SMTP password encryption/decryption round-trip
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
import app_config
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 1. i18n
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestLang:
|
||||
|
||||
def test_lang_dict_loaded(self):
|
||||
assert isinstance(app_config.LANG, dict)
|
||||
assert len(app_config.LANG) > 0
|
||||
|
||||
def test_lang_has_lang_code(self):
|
||||
assert "_lang_code" in app_config.LANG
|
||||
|
||||
def test_load_lang_returns_dict(self):
|
||||
lang = app_config._load_lang()
|
||||
assert isinstance(lang, dict)
|
||||
|
||||
def test_load_lang_forced_en(self):
|
||||
lang = app_config._load_lang_forced("en")
|
||||
assert isinstance(lang, dict)
|
||||
assert len(lang) > 0
|
||||
|
||||
def test_load_lang_forced_da(self):
|
||||
lang = app_config._load_lang_forced("da")
|
||||
assert isinstance(lang, dict)
|
||||
assert len(lang) > 0
|
||||
|
||||
def test_load_lang_forced_de(self):
|
||||
lang = app_config._load_lang_forced("de")
|
||||
assert isinstance(lang, dict)
|
||||
assert len(lang) > 0
|
||||
|
||||
def test_missing_lang_falls_back(self):
|
||||
# Unknown lang code should fall back without raising
|
||||
lang = app_config._load_lang_forced("xx")
|
||||
assert isinstance(lang, dict)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 2. Article 9 keyword detection
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestCheckSpecialCategory:
|
||||
|
||||
def _cats(self, text):
|
||||
cprs = [{"raw": "290472-1234"}]
|
||||
return app_config._check_special_category(text, cprs)
|
||||
|
||||
def test_health_keyword_detected(self):
|
||||
cats = self._cats("CPR: 290472-1234 har diagnosen diabetes og behandling")
|
||||
assert "health" in cats
|
||||
|
||||
def test_trade_union_keyword_detected(self):
|
||||
cats = self._cats("CPR: 290472-1234 er fagforeningsmedlem tillidsrepræsentant")
|
||||
assert "trade_union" in cats
|
||||
|
||||
def test_religion_keyword_detected(self):
|
||||
cats = self._cats("CPR: 290472-1234 kirke konfirmation")
|
||||
assert "religion" in cats
|
||||
|
||||
def test_no_keyword_returns_empty(self):
|
||||
cats = self._cats("CPR: 290472-1234 bor i Aarhus")
|
||||
assert cats == []
|
||||
|
||||
def test_empty_text_returns_empty(self):
|
||||
cats = app_config._check_special_category("", [])
|
||||
assert cats == []
|
||||
|
||||
def test_keyword_without_cpr_still_detected(self):
|
||||
# No CPR — keyword still triggers if no CPR list given
|
||||
cats = app_config._check_special_category("diagnose sygemelding behandling", [])
|
||||
assert "health" in cats
|
||||
|
||||
def test_returns_sorted_list(self):
|
||||
cats = self._cats("CPR 290472-1234 diabetes fagforening")
|
||||
assert cats == sorted(cats)
|
||||
|
||||
def test_compiled_keywords_populated(self):
|
||||
assert len(app_config._compiled_keywords) > 0
|
||||
|
||||
def test_keyword_flat_has_entries(self):
|
||||
assert len(app_config._keyword_flat) > 0
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 3. Config load / save
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestConfig:
|
||||
|
||||
def test_load_config_returns_dict(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
|
||||
cfg = app_config._load_config()
|
||||
assert isinstance(cfg, dict)
|
||||
|
||||
def test_save_and_load_round_trip(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
|
||||
app_config._save_config({"client_id": "test-id", "tenant_id": "test-tid"})
|
||||
cfg = app_config._load_config()
|
||||
assert cfg["client_id"] == "test-id"
|
||||
assert cfg["tenant_id"] == "test-tid"
|
||||
|
||||
def test_save_config_creates_file(self, tmp_path, monkeypatch):
|
||||
cfg_path = tmp_path / "config.json"
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", cfg_path)
|
||||
app_config._save_config({"x": 1})
|
||||
assert cfg_path.exists()
|
||||
|
||||
def test_load_missing_file_returns_empty(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "nonexistent.json")
|
||||
cfg = app_config._load_config()
|
||||
assert cfg == {}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 4. Admin PIN
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestAdminPin:
|
||||
|
||||
def test_pin_not_set_initially(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
|
||||
# Fresh config — no PIN
|
||||
app_config._save_config({})
|
||||
assert app_config._admin_pin_is_set() is False
|
||||
|
||||
def test_set_and_verify_pin(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
|
||||
app_config._save_config({})
|
||||
app_config._set_admin_pin("1234")
|
||||
assert app_config._verify_admin_pin("1234") is True
|
||||
|
||||
def test_wrong_pin_fails(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
|
||||
app_config._save_config({})
|
||||
app_config._set_admin_pin("1234")
|
||||
assert app_config._verify_admin_pin("9999") is False
|
||||
|
||||
def test_pin_is_set_after_setting(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_CONFIG_FILE", tmp_path / "config.json")
|
||||
app_config._save_config({})
|
||||
app_config._set_admin_pin("5678")
|
||||
assert app_config._admin_pin_is_set() is True
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 5. Profiles
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestProfiles:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_SETTINGS_PATH", tmp_path / "settings.json")
|
||||
|
||||
def test_profiles_load_returns_list(self):
|
||||
profiles = app_config._profiles_load()
|
||||
assert isinstance(profiles, list)
|
||||
|
||||
def test_save_and_get_profile(self):
|
||||
profile = {
|
||||
"id": "test-uuid-1",
|
||||
"name": "Test Profile",
|
||||
"sources": ["email"],
|
||||
"user_ids": "all",
|
||||
"options": {},
|
||||
}
|
||||
app_config._profile_save(profile)
|
||||
loaded = app_config._profile_get("Test Profile")
|
||||
assert loaded is not None
|
||||
assert loaded["name"] == "Test Profile"
|
||||
|
||||
def test_profile_get_by_id(self):
|
||||
profile = {"id": "uid-42", "name": "By ID", "sources": [], "options": {}}
|
||||
app_config._profile_save(profile)
|
||||
loaded = app_config._profile_get("uid-42")
|
||||
assert loaded is not None
|
||||
|
||||
def test_profile_delete(self):
|
||||
profile = {"id": "del-1", "name": "To Delete", "sources": [], "options": {}}
|
||||
app_config._profile_save(profile)
|
||||
deleted = app_config._profile_delete("To Delete")
|
||||
assert deleted is True
|
||||
assert app_config._profile_get("To Delete") is None
|
||||
|
||||
def test_delete_nonexistent_returns_false(self):
|
||||
assert app_config._profile_delete("Does Not Exist") is False
|
||||
|
||||
def test_profiles_load_after_save(self):
|
||||
app_config._profile_save({"id": "p1", "name": "P1", "sources": [], "options": {}})
|
||||
app_config._profile_save({"id": "p2", "name": "P2", "sources": [], "options": {}})
|
||||
profiles = app_config._profiles_load()
|
||||
names = [p["name"] for p in profiles]
|
||||
assert "P1" in names
|
||||
assert "P2" in names
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 6. SMTP password encryption
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestFernet:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "_MACHINE_ID_PATH", tmp_path / "machine_id")
|
||||
|
||||
def test_encrypt_decrypt_round_trip(self):
|
||||
fernet = app_config._get_fernet()
|
||||
if fernet is None:
|
||||
pytest.skip("cryptography not installed")
|
||||
plaintext = "my-secret-smtp-password"
|
||||
encrypted = app_config._encrypt_password(plaintext)
|
||||
decrypted = app_config._decrypt_password(encrypted)
|
||||
assert decrypted == plaintext
|
||||
|
||||
def test_encrypt_returns_string(self):
|
||||
fernet = app_config._get_fernet()
|
||||
if fernet is None:
|
||||
pytest.skip("cryptography not installed")
|
||||
result = app_config._encrypt_password("test")
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_encrypted_differs_from_plaintext(self):
|
||||
fernet = app_config._get_fernet()
|
||||
if fernet is None:
|
||||
pytest.skip("cryptography not installed")
|
||||
enc = app_config._encrypt_password("password123")
|
||||
assert enc != "password123"
|
||||
|
||||
def test_decrypt_empty_returns_empty(self):
|
||||
result = app_config._decrypt_password("")
|
||||
assert result == ""
|
||||
147
tests/test_checkpoint.py
Normal file
147
tests/test_checkpoint.py
Normal file
@ -0,0 +1,147 @@
|
||||
"""
|
||||
test_checkpoint.py — Tests for checkpoint.py.
|
||||
|
||||
Covers:
|
||||
- _checkpoint_key: stable hashing of scan options
|
||||
- _save_checkpoint / _load_checkpoint / _clear_checkpoint
|
||||
- _load_delta_tokens / _save_delta_tokens
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
import checkpoint
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Fixtures
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate(tmp_path, monkeypatch):
|
||||
"""Redirect all disk writes to a temp dir for each test."""
|
||||
monkeypatch.setattr(checkpoint, "_CHECKPOINT_PATH", tmp_path / "checkpoint.json")
|
||||
monkeypatch.setattr(checkpoint, "_DELTA_PATH", tmp_path / "delta.json")
|
||||
|
||||
|
||||
_OPTS = {
|
||||
"sources": ["email", "onedrive"],
|
||||
"user_ids": [{"id": "user-1"}, {"id": "user-2"}],
|
||||
"options": {"older_than_days": 365},
|
||||
}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 1. _checkpoint_key
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestCheckpointKey:
|
||||
|
||||
def test_returns_string(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
assert isinstance(key, str)
|
||||
|
||||
def test_key_is_hex(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
int(key, 16) # raises ValueError if not hex
|
||||
|
||||
def test_same_options_same_key(self):
|
||||
assert checkpoint._checkpoint_key(_OPTS) == checkpoint._checkpoint_key(_OPTS)
|
||||
|
||||
def test_different_sources_different_key(self):
|
||||
opts2 = {**_OPTS, "sources": ["sharepoint"]}
|
||||
assert checkpoint._checkpoint_key(_OPTS) != checkpoint._checkpoint_key(opts2)
|
||||
|
||||
def test_different_users_different_key(self):
|
||||
opts2 = {**_OPTS, "user_ids": [{"id": "user-99"}]}
|
||||
assert checkpoint._checkpoint_key(_OPTS) != checkpoint._checkpoint_key(opts2)
|
||||
|
||||
def test_source_order_irrelevant(self):
|
||||
opts_a = {**_OPTS, "sources": ["email", "onedrive"]}
|
||||
opts_b = {**_OPTS, "sources": ["onedrive", "email"]}
|
||||
assert checkpoint._checkpoint_key(opts_a) == checkpoint._checkpoint_key(opts_b)
|
||||
|
||||
def test_empty_options(self):
|
||||
key = checkpoint._checkpoint_key({})
|
||||
assert isinstance(key, str) and len(key) > 0
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 2. Save / load / clear
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestSaveLoadCheckpoint:
|
||||
|
||||
def test_load_returns_none_when_no_file(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
assert checkpoint._load_checkpoint(key) is None
|
||||
|
||||
def test_save_then_load(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
checkpoint._save_checkpoint(
|
||||
key,
|
||||
scanned_ids={"id1", "id2", "id3"},
|
||||
flagged=[{"id": "c1", "name": "file.docx"}],
|
||||
meta={"started_at": 1700000000},
|
||||
)
|
||||
loaded = checkpoint._load_checkpoint(key)
|
||||
assert loaded is not None
|
||||
|
||||
def test_scanned_ids_preserved(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
checkpoint._save_checkpoint(key, {"id1", "id2"}, [], {})
|
||||
loaded = checkpoint._load_checkpoint(key)
|
||||
assert set(loaded["scanned_ids"]) == {"id1", "id2"}
|
||||
|
||||
def test_flagged_items_preserved(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
cards = [{"id": "c1"}, {"id": "c2"}]
|
||||
checkpoint._save_checkpoint(key, set(), cards, {})
|
||||
loaded = checkpoint._load_checkpoint(key)
|
||||
assert len(loaded["flagged"]) == 2
|
||||
|
||||
def test_wrong_key_returns_none(self):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
checkpoint._save_checkpoint(key, {"id1"}, [], {})
|
||||
other_opts = {**_OPTS, "sources": ["sharepoint"]}
|
||||
other_key = checkpoint._checkpoint_key(other_opts)
|
||||
assert checkpoint._load_checkpoint(other_key) is None
|
||||
|
||||
def test_clear_removes_file(self, tmp_path):
|
||||
key = checkpoint._checkpoint_key(_OPTS)
|
||||
checkpoint._save_checkpoint(key, {"id1"}, [], {})
|
||||
checkpoint._clear_checkpoint()
|
||||
assert checkpoint._load_checkpoint(key) is None
|
||||
|
||||
def test_clear_on_missing_file_does_not_raise(self):
|
||||
checkpoint._clear_checkpoint() # no file exists — must not raise
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 3. Delta tokens
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestDeltaTokens:
|
||||
|
||||
def test_load_returns_empty_when_no_file(self):
|
||||
assert checkpoint._load_delta_tokens() == {}
|
||||
|
||||
def test_save_then_load(self):
|
||||
tokens = {
|
||||
"email:user1": "https://graph.microsoft.com/v1.0/me/mailFolders/delta?$deltaToken=abc",
|
||||
"onedrive:user1": "https://graph.microsoft.com/v1.0/me/drive/delta?token=xyz",
|
||||
}
|
||||
checkpoint._save_delta_tokens(tokens)
|
||||
loaded = checkpoint._load_delta_tokens()
|
||||
assert loaded == tokens
|
||||
|
||||
def test_overwrite_preserves_new_value(self):
|
||||
checkpoint._save_delta_tokens({"key": "old_url"})
|
||||
checkpoint._save_delta_tokens({"key": "new_url"})
|
||||
assert checkpoint._load_delta_tokens()["key"] == "new_url"
|
||||
|
||||
def test_save_empty_dict(self):
|
||||
checkpoint._save_delta_tokens({})
|
||||
assert checkpoint._load_delta_tokens() == {}
|
||||
267
tests/test_db.py
Normal file
267
tests/test_db.py
Normal file
@ -0,0 +1,267 @@
|
||||
"""
|
||||
test_db.py — Tests for gdpr_db.py (ScanDB).
|
||||
|
||||
Covers:
|
||||
- begin_scan / finish_scan round-trip
|
||||
- save_item and retrieval
|
||||
- CPR index stores hash, never plaintext
|
||||
- lookup_data_subject returns matching items
|
||||
- set_disposition / get_disposition
|
||||
- Deletion log
|
||||
- Export / import cycle (merge and replace modes)
|
||||
"""
|
||||
import sys
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from gdpr_db import ScanDB
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_card(item_id="abc123", cpr_count=1, source_type="email", role="staff"):
|
||||
return {
|
||||
"id": item_id,
|
||||
"name": f"{item_id}.docx",
|
||||
"source": "email",
|
||||
"source_type": source_type,
|
||||
"cpr_count": cpr_count,
|
||||
"url": "https://example.com/item",
|
||||
"size_kb": 12.5,
|
||||
"modified": "2024-03-01",
|
||||
"thumb_b64": "",
|
||||
"thumb_mime": "image/svg+xml",
|
||||
"risk": None,
|
||||
"account_id": "user-1",
|
||||
"account_name": "Test User",
|
||||
"user_role": role,
|
||||
"drive_id": "",
|
||||
"attachments": [],
|
||||
"folder": "",
|
||||
"transfer_risk": "",
|
||||
"special_category": [],
|
||||
"face_count": 0,
|
||||
"exif": {},
|
||||
}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 1. Scan lifecycle
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestScanLifecycle:
|
||||
|
||||
def test_begin_scan_returns_int(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
assert isinstance(scan_id, int)
|
||||
assert scan_id > 0
|
||||
|
||||
def test_begin_scan_increments(self, tmp_db):
|
||||
id1 = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
id2 = tmp_db.begin_scan({"sources": ["onedrive"], "user_ids": []})
|
||||
assert id2 > id1
|
||||
|
||||
def test_finish_scan_does_not_raise(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.finish_scan(scan_id, 42) # must not raise
|
||||
|
||||
def test_multiple_scans_independent(self, tmp_db):
|
||||
id1 = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(id1, _make_card("item-a"), ["290472-1234"])
|
||||
id2 = tmp_db.begin_scan({"sources": ["onedrive"], "user_ids": []})
|
||||
tmp_db.save_item(id2, _make_card("item-b"), ["010185-4321"])
|
||||
tmp_db.finish_scan(id1, 1)
|
||||
tmp_db.finish_scan(id2, 1)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 2. save_item
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestSaveItem:
|
||||
|
||||
def test_save_item_does_not_raise(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
|
||||
|
||||
def test_save_item_without_cprs(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card(cpr_count=0), [])
|
||||
|
||||
def test_save_multiple_items(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
for i in range(5):
|
||||
tmp_db.save_item(scan_id, _make_card(f"item-{i}"), ["290472-1234"])
|
||||
|
||||
def test_save_item_with_pii_counts(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
pii = {"cpr": 1, "name": 2, "email": 0}
|
||||
tmp_db.save_item(scan_id, _make_card(), ["290472-1234"], pii_counts=pii)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 3. CPR index — hash only, never plaintext
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestCprIndex:
|
||||
|
||||
def test_cpr_not_stored_in_plaintext(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
|
||||
# Read the raw DB and confirm plaintext CPR is absent
|
||||
import sqlite3
|
||||
with sqlite3.connect(tmp_db._path) as con:
|
||||
rows = con.execute("SELECT cpr_hash FROM cpr_index").fetchall()
|
||||
assert len(rows) == 1
|
||||
stored = rows[0][0]
|
||||
assert stored != "290472-1234"
|
||||
assert "290472" not in stored
|
||||
|
||||
def test_cpr_hash_is_sha256(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
|
||||
import sqlite3
|
||||
with sqlite3.connect(tmp_db._path) as con:
|
||||
rows = con.execute("SELECT cpr_hash FROM cpr_index").fetchall()
|
||||
stored = rows[0][0]
|
||||
expected = hashlib.sha256("290472-1234".encode()).hexdigest()
|
||||
assert stored == expected
|
||||
|
||||
def test_lookup_finds_item(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card("item-x"), ["290472-1234"])
|
||||
results = tmp_db.lookup_data_subject("290472-1234")
|
||||
assert len(results) >= 1
|
||||
|
||||
def test_lookup_returns_correct_item(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card("target-item"), ["290472-1234"])
|
||||
results = tmp_db.lookup_data_subject("290472-1234")
|
||||
ids = [r.get("id") or r.get("item_id") for r in results]
|
||||
assert "target-item" in ids
|
||||
|
||||
def test_lookup_different_cpr_returns_empty(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card(), ["290472-1234"])
|
||||
results = tmp_db.lookup_data_subject("010185-4321")
|
||||
assert results == []
|
||||
|
||||
def test_lookup_multiple_items_for_same_cpr(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card("item-a"), ["290472-1234"])
|
||||
tmp_db.save_item(scan_id, _make_card("item-b"), ["290472-1234"])
|
||||
results = tmp_db.lookup_data_subject("290472-1234")
|
||||
assert len(results) >= 2
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 4. Dispositions
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestDispositions:
|
||||
|
||||
def test_get_disposition_returns_none_for_unknown(self, tmp_db):
|
||||
assert tmp_db.get_disposition("nonexistent") is None
|
||||
|
||||
def test_set_and_get_disposition(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card("disp-item"), ["290472-1234"])
|
||||
tmp_db.set_disposition("disp-item", "retain-legal", "Bogfoeringsloven", "", "admin")
|
||||
disp = tmp_db.get_disposition("disp-item")
|
||||
assert disp is not None
|
||||
assert disp["status"] == "retain-legal"
|
||||
|
||||
def test_disposition_legal_basis_stored(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card("disp-2"), [])
|
||||
tmp_db.set_disposition("disp-2", "delete-scheduled", "Data minimisation", "", "reviewer")
|
||||
disp = tmp_db.get_disposition("disp-2")
|
||||
assert disp["legal_basis"] == "Data minimisation"
|
||||
|
||||
def test_disposition_overwrite(self, tmp_db):
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
tmp_db.save_item(scan_id, _make_card("disp-3"), [])
|
||||
tmp_db.set_disposition("disp-3", "unreviewed", "", "", "")
|
||||
tmp_db.set_disposition("disp-3", "deleted", "", "", "admin")
|
||||
disp = tmp_db.get_disposition("disp-3")
|
||||
assert disp["status"] == "deleted"
|
||||
|
||||
def test_all_disposition_values_accepted(self, tmp_db):
|
||||
statuses = ["unreviewed", "retain-legal", "retain-legitimate",
|
||||
"retain-contract", "delete-scheduled", "deleted"]
|
||||
scan_id = tmp_db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
for i, status in enumerate(statuses):
|
||||
item_id = f"disp-status-{i}"
|
||||
tmp_db.save_item(scan_id, _make_card(item_id), [])
|
||||
tmp_db.set_disposition(item_id, status, "", "", "test")
|
||||
disp = tmp_db.get_disposition(item_id)
|
||||
assert disp["status"] == status
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 5. Export / import
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestExportImport:
|
||||
|
||||
def _populate(self, db):
|
||||
scan_id = db.begin_scan({"sources": ["email"], "user_ids": []})
|
||||
db.save_item(scan_id, _make_card("exp-1"), ["290472-1234"])
|
||||
db.save_item(scan_id, _make_card("exp-2"), ["010185-4321"])
|
||||
db.set_disposition("exp-1", "retain-legal", "Bogfoeringsloven", "", "admin")
|
||||
db.finish_scan(scan_id, 2)
|
||||
|
||||
def test_export_creates_zip(self, tmp_db, tmp_path):
|
||||
if not hasattr(tmp_db, "export_db"):
|
||||
pytest.skip("export_db not implemented")
|
||||
self._populate(tmp_db)
|
||||
export_path = tmp_path / "export.zip"
|
||||
tmp_db.export_db(str(export_path))
|
||||
assert export_path.exists()
|
||||
assert export_path.stat().st_size > 0
|
||||
|
||||
def test_export_zip_contains_expected_files(self, tmp_db, tmp_path):
|
||||
if not hasattr(tmp_db, "export_db"):
|
||||
pytest.skip("export_db not implemented")
|
||||
self._populate(tmp_db)
|
||||
export_path = tmp_path / "export.zip"
|
||||
tmp_db.export_db(str(export_path))
|
||||
import zipfile
|
||||
with zipfile.ZipFile(export_path) as zf:
|
||||
names = zf.namelist()
|
||||
for expected in ["export_meta.json", "flagged_items.json", "dispositions.json"]:
|
||||
assert expected in names
|
||||
|
||||
def test_import_merge_adds_dispositions(self, tmp_path):
|
||||
if not hasattr(ScanDB, "export_db"):
|
||||
pytest.skip("export_db not implemented")
|
||||
# Source DB
|
||||
src = ScanDB(str(tmp_path / "src.db"))
|
||||
self._populate(src)
|
||||
export_path = tmp_path / "export.zip"
|
||||
src.export_db(str(export_path))
|
||||
|
||||
# Target DB (fresh)
|
||||
tgt = ScanDB(str(tmp_path / "tgt.db"))
|
||||
tgt.import_db(str(export_path), mode="merge")
|
||||
# Disposition for exp-1 should now exist in tgt
|
||||
disp = tgt.get_disposition("exp-1")
|
||||
assert disp is not None
|
||||
|
||||
def test_import_replace_restores_items(self, tmp_path):
|
||||
if not hasattr(ScanDB, "export_db"):
|
||||
pytest.skip("export_db not implemented")
|
||||
src = ScanDB(str(tmp_path / "src2.db"))
|
||||
self._populate(src)
|
||||
export_path = tmp_path / "export2.zip"
|
||||
src.export_db(str(export_path))
|
||||
|
||||
tgt = ScanDB(str(tmp_path / "tgt2.db"))
|
||||
tgt.import_db(str(export_path), mode="replace")
|
||||
results = tgt.lookup_data_subject("290472-1234")
|
||||
assert len(results) >= 1
|
||||
224
tests/test_document_scanner.py
Normal file
224
tests/test_document_scanner.py
Normal file
@ -0,0 +1,224 @@
|
||||
"""
|
||||
test_document_scanner.py — Tests for CPR detection in document_scanner.py.
|
||||
|
||||
Covers:
|
||||
- extract_matches: context-gated CPR detection
|
||||
- is_valid_cpr: date validation and modulo-11
|
||||
- scan_docx: CPR detection in Word documents (including table cells)
|
||||
- scan_xlsx: CPR detection in Excel cells with context
|
||||
- False-positive suppression (invoices, phone numbers, account numbers)
|
||||
"""
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
import document_scanner as ds
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _cprs(text: str) -> list:
|
||||
"""Return list of CPR dicts found in text via extract_matches."""
|
||||
found, _ = ds.extract_matches(text, 1, "test")
|
||||
return found
|
||||
|
||||
|
||||
def _has_cpr(text: str) -> bool:
|
||||
return bool(_cprs(text))
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 1. Date validation — is_valid_cpr
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestIsValidCpr:
|
||||
def test_valid_date_returns_true(self):
|
||||
valid, _ = ds.is_valid_cpr("29", "04", "72", "1234")
|
||||
assert valid is True
|
||||
|
||||
def test_invalid_month_returns_false(self):
|
||||
valid, _ = ds.is_valid_cpr("01", "13", "70", "1234")
|
||||
assert valid is False
|
||||
|
||||
def test_invalid_day_zero_returns_false(self):
|
||||
valid, _ = ds.is_valid_cpr("00", "01", "70", "1234")
|
||||
assert valid is False
|
||||
|
||||
def test_invalid_day_32_returns_false(self):
|
||||
valid, _ = ds.is_valid_cpr("32", "01", "70", "1234")
|
||||
assert valid is False
|
||||
|
||||
def test_february_31_invalid(self):
|
||||
valid, _ = ds.is_valid_cpr("31", "02", "90", "1234")
|
||||
assert valid is False
|
||||
|
||||
def test_returns_tuple_of_two(self):
|
||||
result = ds.is_valid_cpr("01", "01", "70", "1234")
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
|
||||
def test_mod11_field_is_bool(self):
|
||||
_, mod11 = ds.is_valid_cpr("01", "01", "70", "1234")
|
||||
assert isinstance(mod11, bool)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 2. extract_matches — context-gated detection
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestExtractMatches:
|
||||
|
||||
# ── Should detect ─────────────────────────────────────────────────────────
|
||||
|
||||
def test_detects_cpr_with_label(self):
|
||||
assert _has_cpr("CPR: 290472-1234")
|
||||
|
||||
def test_detects_cpr_uppercase_label(self):
|
||||
assert _has_cpr("CPR-nummer: 290472-1234")
|
||||
|
||||
def test_detects_personnummer_keyword(self):
|
||||
assert _has_cpr("personnummer 010185-4321")
|
||||
|
||||
def test_detects_no_separator(self):
|
||||
assert _has_cpr("cpr nummer 2904721234")
|
||||
|
||||
def test_detects_space_separator(self):
|
||||
assert _has_cpr("CPR 290472 1234")
|
||||
|
||||
def test_result_contains_formatted_field(self):
|
||||
cprs = _cprs("CPR: 290472-1234")
|
||||
assert cprs[0]["formatted"] == "290472-1234"
|
||||
|
||||
def test_result_contains_raw_field(self):
|
||||
cprs = _cprs("CPR: 290472-1234")
|
||||
assert "raw" in cprs[0]
|
||||
|
||||
def test_multiple_cprs_returned(self):
|
||||
text = "CPR: 290472-1234 og personnummer 010185-4321"
|
||||
cprs = _cprs(text)
|
||||
assert len(cprs) == 2
|
||||
|
||||
# ── Should NOT detect ─────────────────────────────────────────────────────
|
||||
|
||||
def test_rejects_naked_number_without_context(self):
|
||||
# No context keyword and no mod-11 — should be suppressed
|
||||
assert not _has_cpr("2904721234")
|
||||
|
||||
def test_rejects_phone_number_8_digits(self):
|
||||
assert not _has_cpr("ring 12345678 for info")
|
||||
|
||||
def test_rejects_invoice_context(self):
|
||||
assert not _has_cpr("faktura nr 290472-1234")
|
||||
|
||||
def test_rejects_part_number_context(self):
|
||||
assert not _has_cpr("del nr. 290472-1234")
|
||||
|
||||
def test_rejects_invalid_date(self):
|
||||
# Month 13 — date invalid, should not appear
|
||||
assert not _has_cpr("CPR: 011370-1234")
|
||||
|
||||
def test_empty_string(self):
|
||||
assert not _has_cpr("")
|
||||
|
||||
def test_plain_prose_no_numbers(self):
|
||||
assert not _has_cpr("Ingen personoplysninger i denne tekst.")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 3. scan_docx
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestScanDocx:
|
||||
|
||||
def test_detects_cpr_in_paragraph(self, docx_with_cpr):
|
||||
result = ds.scan_docx(docx_with_cpr)
|
||||
assert len(result["cprs"]) >= 1
|
||||
|
||||
def test_detects_multiple_cprs(self, docx_with_cpr):
|
||||
result = ds.scan_docx(docx_with_cpr)
|
||||
assert len(result["cprs"]) >= 2
|
||||
|
||||
def test_detects_cpr_in_table_cell(self, docx_with_cpr):
|
||||
result = ds.scan_docx(docx_with_cpr)
|
||||
# Fixture: 2 CPRs in paragraphs + 1 in a table cell (with context)
|
||||
assert len(result["cprs"]) >= 3
|
||||
|
||||
def test_no_false_positive_on_clean_doc(self, docx_no_cpr):
|
||||
result = ds.scan_docx(docx_no_cpr)
|
||||
assert result["cprs"] == []
|
||||
|
||||
def test_returns_cprs_key(self, docx_with_cpr):
|
||||
result = ds.scan_docx(docx_with_cpr)
|
||||
assert "cprs" in result
|
||||
|
||||
def test_no_error_on_clean_doc(self, docx_no_cpr):
|
||||
result = ds.scan_docx(docx_no_cpr)
|
||||
assert result.get("error") is None
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 4. scan_xlsx
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestScanXlsx:
|
||||
|
||||
def test_detects_cpr_in_cell_with_context(self, xlsx_with_cpr):
|
||||
result = ds.scan_xlsx(xlsx_with_cpr)
|
||||
assert len(result["cprs"]) >= 1
|
||||
|
||||
def test_no_false_positive_on_account_numbers(self, xlsx_no_cpr):
|
||||
result = ds.scan_xlsx(xlsx_no_cpr)
|
||||
assert result["cprs"] == []
|
||||
|
||||
def test_returns_cprs_key(self, xlsx_with_cpr):
|
||||
result = ds.scan_xlsx(xlsx_with_cpr)
|
||||
assert "cprs" in result
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 5. Binary / edge cases via cpr_detector._scan_bytes
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestScanBytes:
|
||||
|
||||
def test_binary_garbage_does_not_crash(self, binary_garbage):
|
||||
import cpr_detector
|
||||
data = binary_garbage.read_bytes()
|
||||
result = cpr_detector._scan_bytes(data, "sample.bin")
|
||||
assert isinstance(result, dict)
|
||||
assert "cprs" in result
|
||||
|
||||
def test_empty_bytes_returns_empty(self):
|
||||
import cpr_detector
|
||||
result = cpr_detector._scan_bytes(b"", "empty.txt")
|
||||
assert result["cprs"] == []
|
||||
|
||||
def test_txt_with_cpr_detected(self, txt_with_art9):
|
||||
import cpr_detector, document_scanner as ds
|
||||
# scan_text in document_scanner calls undefined extract_cpr_and_dates;
|
||||
# test the underlying extract_matches directly on the file content.
|
||||
text = txt_with_art9.read_text(encoding='utf-8')
|
||||
cprs, _ = ds.extract_matches(text, 1, 'test')
|
||||
assert len(cprs) >= 1
|
||||
|
||||
def test_docx_with_cpr_via_scan_bytes(self, docx_with_cpr):
|
||||
import cpr_detector
|
||||
data = docx_with_cpr.read_bytes()
|
||||
result = cpr_detector._scan_bytes(data, "sample.docx")
|
||||
assert len(result["cprs"]) >= 1
|
||||
|
||||
def test_xlsx_with_cpr_via_scan_bytes(self, xlsx_with_cpr):
|
||||
import cpr_detector
|
||||
data = xlsx_with_cpr.read_bytes()
|
||||
result = cpr_detector._scan_bytes(data, "sample.xlsx")
|
||||
assert len(result["cprs"]) >= 1
|
||||
|
||||
def test_unsupported_extension_does_not_crash(self):
|
||||
import cpr_detector
|
||||
result = cpr_detector._scan_bytes(b"some bytes", "file.xyz")
|
||||
assert isinstance(result, dict)
|
||||
277
tests/test_routes.py
Normal file
277
tests/test_routes.py
Normal file
@ -0,0 +1,277 @@
|
||||
"""
|
||||
Integration tests for Flask routes — uses the real Flask test client.
|
||||
|
||||
Strategy
|
||||
--------
|
||||
- ``flask_app`` (module-scope) — imports gdpr_scanner once, enables TESTING mode.
|
||||
- ``client`` (function-scope) — fresh test_client() per test.
|
||||
- ``db_patch`` (function-scope) — replaces routes.database._get_db with a ScanDB
|
||||
backed by a tmp_path so tests never touch ~/.gdprscanner.
|
||||
Also sets routes.database.DB_OK = True.
|
||||
- ``mock_connector`` — sets routes.state.connector to a MagicMock so routes
|
||||
that require authentication pass the ``if not state.connector``
|
||||
guard.
|
||||
- ``clean_state`` — autouse, resets routes.state.flagged_items and ensures the
|
||||
scan lock is released between tests.
|
||||
"""
|
||||
import io
|
||||
import threading
|
||||
import time
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def flask_app():
|
||||
import gdpr_scanner
|
||||
gdpr_scanner.app.config["TESTING"] = True
|
||||
gdpr_scanner.app.config["WTF_CSRF_ENABLED"] = False
|
||||
return gdpr_scanner.app
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def client(flask_app):
|
||||
with flask_app.test_client() as c:
|
||||
yield c
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def db_patch(tmp_path, monkeypatch):
|
||||
"""Point routes.database and routes.export _get_db at a fresh ScanDB in a temp dir."""
|
||||
from gdpr_db import ScanDB
|
||||
import routes.database, routes.export
|
||||
db = ScanDB(str(tmp_path / "test.db"))
|
||||
monkeypatch.setattr(routes.database, "_get_db", lambda: db)
|
||||
monkeypatch.setattr(routes.database, "DB_OK", True)
|
||||
monkeypatch.setattr(routes.export, "_get_db", lambda: db)
|
||||
monkeypatch.setattr(routes.export, "DB_OK", True)
|
||||
return db
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_connector(monkeypatch):
|
||||
"""Satisfy the connector guard in scan routes.
|
||||
|
||||
/api/scan/start is now handled exclusively by the blueprint (routes/scan.py),
|
||||
which checks ``state.connector``. Patching state.connector is sufficient.
|
||||
"""
|
||||
from routes import state
|
||||
conn = MagicMock()
|
||||
monkeypatch.setattr(state, "connector", conn)
|
||||
return conn
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_state():
|
||||
"""Wipe in-memory scan state and ensure the scan lock is free after each test."""
|
||||
from routes import state
|
||||
yield
|
||||
# Clear in-memory results so export tests don't bleed into each other
|
||||
state.flagged_items.clear()
|
||||
# Release the lock if a test left it held (e.g. a failed scan-start test)
|
||||
if not state._scan_lock.acquire(blocking=False):
|
||||
pass # still held — leave it; the test that set it is responsible
|
||||
else:
|
||||
state._scan_lock.release()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/scan/status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScanStatus:
|
||||
def test_idle_returns_not_running(self, client):
|
||||
r = client.get("/api/scan/status")
|
||||
assert r.status_code == 200
|
||||
data = r.get_json()
|
||||
assert data["running"] is False
|
||||
|
||||
def test_scan_id_is_none_when_idle(self, client):
|
||||
r = client.get("/api/scan/status")
|
||||
data = r.get_json()
|
||||
assert "scan_id" in data
|
||||
assert data["scan_id"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/scan/start
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScanStart:
|
||||
def test_unauthenticated_returns_401(self, client, monkeypatch):
|
||||
from routes import state
|
||||
monkeypatch.setattr(state, "connector", None)
|
||||
r = client.post("/api/scan/start", json={})
|
||||
assert r.status_code == 401
|
||||
assert "not authenticated" in r.get_json()["error"]
|
||||
|
||||
def test_lock_held_returns_409(self, client, mock_connector):
|
||||
from routes import state
|
||||
# Hold the lock as if a scan were already running
|
||||
acquired = state._scan_lock.acquire(blocking=False)
|
||||
assert acquired, "Lock should be free at test start"
|
||||
try:
|
||||
r = client.post("/api/scan/start", json={})
|
||||
assert r.status_code == 409
|
||||
assert "already running" in r.get_json()["error"]
|
||||
finally:
|
||||
state._scan_lock.release()
|
||||
|
||||
def test_authenticated_returns_started(self, client, mock_connector, monkeypatch):
|
||||
import scan_engine
|
||||
from routes import state
|
||||
# Stub run_scan so the background thread finishes instantly
|
||||
monkeypatch.setattr(scan_engine, "run_scan", lambda opts: None)
|
||||
r = client.post("/api/scan/start", json={"sources": ["email"]})
|
||||
assert r.status_code == 200
|
||||
assert r.get_json()["status"] == "started"
|
||||
# Give the background thread time to release the lock
|
||||
deadline = time.time() + 2.0
|
||||
while not state._scan_lock.acquire(blocking=False):
|
||||
assert time.time() < deadline, "scan lock was never released"
|
||||
time.sleep(0.05)
|
||||
state._scan_lock.release()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/scan/stop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScanStop:
|
||||
def test_stop_always_returns_200(self, client):
|
||||
r = client.post("/api/scan/stop")
|
||||
assert r.status_code == 200
|
||||
assert r.get_json()["status"] == "stopping"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/db/stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDbStats:
|
||||
def test_without_db_returns_503(self, client, monkeypatch):
|
||||
import routes.database
|
||||
monkeypatch.setattr(routes.database, "DB_OK", False)
|
||||
r = client.get("/api/db/stats")
|
||||
assert r.status_code == 503
|
||||
|
||||
def test_with_db_returns_200(self, client, db_patch):
|
||||
# The direct route in gdpr_scanner.py (which takes precedence over the
|
||||
# blueprint) returns get_stats() directly — an empty dict for a fresh DB.
|
||||
r = client.get("/api/db/stats")
|
||||
assert r.status_code == 200
|
||||
assert isinstance(r.get_json(), dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/db/disposition
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDisposition:
|
||||
def test_set_disposition_missing_item_id_returns_400(self, client, db_patch):
|
||||
r = client.post("/api/db/disposition", json={"status": "retain-legal"})
|
||||
assert r.status_code == 400
|
||||
assert "item_id" in r.get_json()["error"]
|
||||
|
||||
def test_set_disposition_saves_and_get_returns_it(self, client, db_patch):
|
||||
item_id = "test-item-abc123"
|
||||
|
||||
# Set
|
||||
r = client.post("/api/db/disposition", json={
|
||||
"item_id": item_id,
|
||||
"status": "retain-legal",
|
||||
"legal_basis": "GDPR Art. 6(1)(c)",
|
||||
"notes": "Required by law",
|
||||
})
|
||||
assert r.status_code == 200
|
||||
assert r.get_json()["status"] == "saved"
|
||||
|
||||
# Get
|
||||
r2 = client.get(f"/api/db/disposition/{item_id}")
|
||||
assert r2.status_code == 200
|
||||
data = r2.get_json()
|
||||
assert data["status"] == "retain-legal"
|
||||
|
||||
def test_get_disposition_unknown_id_returns_unreviewed(self, client, db_patch):
|
||||
r = client.get("/api/db/disposition/no-such-item")
|
||||
assert r.status_code == 200
|
||||
assert r.get_json()["status"] == "unreviewed"
|
||||
|
||||
def test_without_db_returns_503(self, client, monkeypatch):
|
||||
import routes.database
|
||||
monkeypatch.setattr(routes.database, "DB_OK", False)
|
||||
r = client.post("/api/db/disposition",
|
||||
json={"item_id": "x", "status": "retain-legal"})
|
||||
assert r.status_code == 503
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/export_excel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExportExcel:
|
||||
XLSX_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
|
||||
def test_empty_db_returns_workbook(self, client, db_patch):
|
||||
r = client.get("/api/export_excel")
|
||||
assert r.status_code == 200
|
||||
assert self.XLSX_MIME in r.content_type
|
||||
# Must be a valid zip/xlsx (PK magic bytes)
|
||||
assert r.data[:2] == b"PK"
|
||||
|
||||
def test_with_items_in_memory_includes_data(self, client, db_patch):
|
||||
from routes import state
|
||||
state.flagged_items.append({
|
||||
"id": "item-001",
|
||||
"name": "test_file.docx",
|
||||
"source": "onedrive",
|
||||
"cpr_count": 2,
|
||||
"face_count": 0,
|
||||
"account_name": "Anna Hansen",
|
||||
"user_role": "staff",
|
||||
"modified": "2025-01-15T10:00:00",
|
||||
"size_kb": 42,
|
||||
"url": "https://example.com/file",
|
||||
})
|
||||
r = client.get("/api/export_excel")
|
||||
assert r.status_code == 200
|
||||
assert r.data[:2] == b"PK"
|
||||
# Workbook with data is larger than a skeleton workbook
|
||||
assert len(r.data) > 4096
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/export_article30
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExportArticle30:
|
||||
DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
def test_no_items_returns_400(self, client, db_patch):
|
||||
"""Article 30 export requires at least one flagged item."""
|
||||
r = client.get("/api/export_article30")
|
||||
assert r.status_code == 400
|
||||
assert "scan first" in r.get_json()["error"].lower()
|
||||
|
||||
def test_with_items_returns_docx(self, client, db_patch):
|
||||
from routes import state
|
||||
state.flagged_items.append({
|
||||
"id": "item-002",
|
||||
"name": "payroll.xlsx",
|
||||
"source": "email",
|
||||
"cpr_count": 1,
|
||||
"account_name": "Test User",
|
||||
"user_role": "staff",
|
||||
"modified": "2025-03-01T09:00:00",
|
||||
"size_kb": 10,
|
||||
})
|
||||
r = client.get("/api/export_article30")
|
||||
assert r.status_code == 200
|
||||
assert self.DOCX_MIME in r.content_type
|
||||
# DOCX is a zip — check PK magic bytes
|
||||
assert r.data[:2] == b"PK"
|
||||
Loading…
x
Reference in New Issue
Block a user