From b6d2915d497e2ebb4f1f66fb829c7a0fbe06878b Mon Sep 17 00:00:00 2001 From: StyxX65 <150797939+StyxX65@users.noreply.github.com> Date: Wed, 10 Jun 2026 11:06:36 +0200 Subject: [PATCH] Harden XSS escaping and encrypt Claude API key at rest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - results.js: add esc() helper and apply to all scan-derived fields (name, account_name, folder, source, modified, label, img alt) across card/list/preview/subject-lookup/related views. Scan-derived strings can carry attacker-controlled markup (e.g. a OneDrive file named with HTML), so they must be escaped before innerHTML/attribute embedding. Also escape the related-docs onclick JSON to match the delete/redact " pattern. - cpr_detector._placeholder_svg: escape label/name before embedding — served as image/svg+xml via /api/thumb?name=, so an unescaped value was a reflected-XSS vector when the URL is opened directly. - cpr_detector: remove 44-line unreachable duplicate of the face-detection body left inside _extract_audio_metadata after its return. - app_config: encrypt claude_api_key at rest with the machine-keyed Fernet (same as the SMTP password); add get_claude_api_key() for decryption. Legacy plaintext keys still read and are re-encrypted on next save. Update readers in document_scanner.py and routes/app_routes.py. 201 tests pass. Co-Authored-By: Claude Fable 5 --- app_config.py | 9 ++++++++- cpr_detector.py | 48 +++++--------------------------------------- document_scanner.py | 4 ++-- routes/app_routes.py | 4 ++-- static/js/results.js | 48 ++++++++++++++++++++++++++++---------------- 5 files changed, 48 insertions(+), 65 deletions(-) diff --git a/app_config.py b/app_config.py index d9cca5a..22ab906 100644 --- a/app_config.py +++ b/app_config.py @@ -343,10 +343,17 @@ def save_claude_config(enabled: bool, api_key: "str | None" = None) -> None: cfg = _load_config() cfg["claude_ner"] = bool(enabled) if api_key is not None: - cfg["claude_api_key"] = api_key + # Encrypt at rest with the machine-keyed Fernet (same as the SMTP + # password). Falls back to plaintext only if cryptography is missing. + cfg["claude_api_key"] = _encrypt_password(api_key) if api_key else "" _save_config(cfg) +def get_claude_api_key() -> str: + """Return the decrypted Claude API key (handles legacy plaintext).""" + return _decrypt_password(_load_config().get("claude_api_key", "")) + + # ── Profile storage (15a) ───────────────────────────────────────────────────── _SETTINGS_PATH = _DATA_DIR / "settings.json" _SRC_TOGGLES_PATH = _DATA_DIR / "src_toggles.json" diff --git a/cpr_detector.py b/cpr_detector.py index e210240..b2dc210 100644 --- a/cpr_detector.py +++ b/cpr_detector.py @@ -420,49 +420,6 @@ def _extract_audio_metadata(content: bytes, filename: str) -> dict: return result - """Detect faces in an image file using OpenCV Haar cascades. - - Returns the number of faces detected, or 0 if cv2 is unavailable, - the file is not a supported image format, or decoding fails. - Face detection is intentionally strict (minNeighbors=8, min_size=80px) to - reduce false positives on background textures, labels, and artwork. - Haar cascades are tuned for compliance flagging, not exhaustive detection. (#9) - """ - if not SCANNER_OK: - return 0 - try: - cv2_mod = getattr(ds, "_get_cv2", None) - if cv2_mod is None: - return 0 - cv2, np = ds._get_cv2() - if cv2 is None or np is None: - return 0 - except Exception: - return 0 - - try: - # Decode image bytes → cv2 BGR array - arr = np.frombuffer(content, dtype=np.uint8) - img = cv2.imdecode(arr, cv2.IMREAD_COLOR) - if img is None: - # imdecode failed (e.g. HEIC without codec) — try PIL fallback - if PIL_OK: - try: - from PIL import Image as _PILImg - import io as _io - pil_img = _PILImg.open(_io.BytesIO(content)).convert("RGB") - pil_arr = np.array(pil_img) - img = cv2.cvtColor(pil_arr, cv2.COLOR_RGB2BGR) - except Exception: - return 0 - else: - return 0 - - faces = ds.detect_faces_cv2(img, min_size=80, neighbors=8) - return len(faces) - except Exception: - return 0 - def _detect_photo_faces(content: bytes, filename: str) -> int: """Detect faces in an image file using OpenCV Haar cascades. @@ -749,6 +706,11 @@ def _placeholder_svg(ext: str, name: str) -> str: } bg, label = colors.get(ext, ("#9CA3AF", ext.upper().lstrip("."))) short = name[:22] + "…" if len(name) > 22 else name + # Escape label/name before embedding — served as image/svg+xml, so an + # unescaped value (from the ?name= query param via /api/thumb) would be a + # reflected-XSS vector when the URL is opened directly. + label = _html_esc(label) + short = _html_esc(short) svg = f""" diff --git a/document_scanner.py b/document_scanner.py index 8bd9862..66407d9 100644 --- a/document_scanner.py +++ b/document_scanner.py @@ -243,9 +243,9 @@ def load_nlp(): def _get_claude_ner_config() -> "tuple[bool, str]": """Read Claude NER settings from config.json. Small file — OS-cached.""" try: - from app_config import _load_config + from app_config import _load_config, get_claude_api_key cfg = _load_config() - return bool(cfg.get("claude_ner")), str(cfg.get("claude_api_key", "") or "") + return bool(cfg.get("claude_ner")), get_claude_api_key() except Exception: return False, "" diff --git a/routes/app_routes.py b/routes/app_routes.py index fb53c8f..3c7fb7b 100644 --- a/routes/app_routes.py +++ b/routes/app_routes.py @@ -99,8 +99,8 @@ def claude_settings(): @bp.route("/api/settings/claude/test", methods=["POST"]) def claude_test(): - from app_config import _load_config - api_key = _load_config().get("claude_api_key", "") + from app_config import get_claude_api_key + api_key = get_claude_api_key() if not api_key: return jsonify({"ok": False, "error": "No API key saved"}), 400 try: diff --git a/static/js/results.js b/static/js/results.js index a6b52aa..d3bcdf2 100644 --- a/static/js/results.js +++ b/static/js/results.js @@ -1,4 +1,18 @@ import { S } from './state.js'; + +// Escape untrusted strings (filenames, account/display names, folders) before +// embedding them in innerHTML / title attributes. Scan-derived values can come +// from attacker-controlled content (e.g. a OneDrive file named with markup), +// so every such field must pass through esc() to prevent stored XSS. +function esc(s) { + return String(s == null ? '' : s) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + // ── Cards ───────────────────────────────────────────────────────────────────── const SOURCE_BADGES = { email: ['📧', 'badge-email', 'Outlook'], @@ -52,9 +66,9 @@ function appendCard(f) { card.innerHTML = `
${icon}
-
${f.name}
-
${f.size_kb} KB · ${f.modified || ''}${f.folder ? ' · 📂 ' + f.folder : ''}
-
${label} ${f.source || ''}${f.account_name ? ' · ' : ''}${f.transfer_risk === 'external-recipient' ? ' ⚠ Ext.' : f.transfer_risk ? ' 🔗' : ''}
+
${esc(f.name)}
+
${f.size_kb} KB · ${esc(f.modified || '')}${f.folder ? ' · 📂 ' + esc(f.folder) : ''}
+
${esc(label)} ${esc(f.source || '')}${f.account_name ? ' · ' : ''}${f.transfer_risk === 'external-recipient' ? ' ⚠ Ext.' : f.transfer_risk ? ' 🔗' : ''}
${f.cpr_count} CPR ${f.email_count > 0 ? '' + f.email_count + ' ' + t('m365_badge_emails', 'e-mail') + ' ' : ''} @@ -65,12 +79,12 @@ function appendCard(f) { ${delBtn}${redactBtn}`; } else { card.innerHTML = ` -
${f.name}
+
${esc(f.name)}
-
${f.name}
-
${f.size_kb} KB · ${f.modified || ''}
- ${f.folder ? `
📂 ${f.folder}
` : ''} -
${label}${f.account_name ? ' ' : ''}${f.transfer_risk === "external-recipient" ? ' ⚠ Ext.' : f.transfer_risk ? ' 🔗' : ''}
+
${esc(f.name)}
+
${f.size_kb} KB · ${esc(f.modified || '')}
+ ${f.folder ? `
📂 ${esc(f.folder)}
` : ''} +
${esc(label)}${f.account_name ? ' ' : ''}${f.transfer_risk === "external-recipient" ? ' ⚠ Ext.' : f.transfer_risk ? ' 🔗' : ''}
${f.cpr_count} CPR${f.email_count > 0 ? ' ' : ''}${f.phone_count > 0 ? ' ' + f.phone_count + ' ' + t('m365_badge_phones', 'tlf.') + '' : ''}${f.face_count > 0 ? ' ' + f.face_count + ' ' + t('m365_badge_faces', f.face_count === 1 ? 'face' : 'faces') + '' : ''}${f.exif && f.exif.gps ? ' 🌍 GPS' : ''}${f._resolved ? ' ✓ ' + t('history_resolved_badge', 'Resolved') + '' : ''}${f.overdue ? ' 🗓 Overdue' : ''}
${delBtn}${redactBtn}`; @@ -111,10 +125,10 @@ async function openPreview(f) { loading.textContent = 'Loading preview…'; meta.innerHTML = [ - f.account_name ? `👤 ${f.account_name}` : '', - f.source ? `${f.source}` : '', + f.account_name ? `👤 ${esc(f.account_name)}` : '', + f.source ? `${esc(f.source)}` : '', f.size_kb ? `${f.size_kb} KB` : '', - f.modified ? `${f.modified}` : '', + f.modified ? `${esc(f.modified)}` : '', f.cpr_count ? `${f.cpr_count} CPR` : '', f.email_count ? `${f.email_count} ${t('m365_badge_emails','e-mail')}` : '', f.phone_count ? `${f.phone_count} ${t('m365_badge_phones','tlf.')}` : '', @@ -206,11 +220,11 @@ async function _loadRelated(f) { const rows = items.map(item => { const shared = item.shared_cprs ?? ''; const badge = shared ? `${shared} CPR` : ''; - const src = item.source ? `${item.source}` : ''; - return `
${esc(item.source)}` : ''; + return `
- ${item.name} + ${esc(item.name)} ${src}${badge}
`; }).join(''); @@ -351,9 +365,9 @@ async function runSubjectLookup() { _dsubItems = d.items; resultsEl.innerHTML = d.items.map(item => `
-
${item.name}
-
${item.source_type || ""}
-
${item.modified || ""}
+
${esc(item.name)}
+
${esc(item.source_type || "")}
+
${esc(item.modified || "")}
${item.cpr_count} CPR
`).join("");