diff --git a/CLAUDE.md b/CLAUDE.md index 21567c9..e80e1cd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -93,7 +93,8 @@ All options live in the profile `options` dict and apply to **all three scan eng - **`get_sessions(limit=50, window_seconds=300)`** — groups `scans` rows by 300 s window. Groups built ascending, returned descending. `ref_scan_id` is the highest `scan_id` in each group. Do not change window size independently of `get_session_items`. - **`get_session_items(ref_scan_id=N)`** — anchors 300 s window to that scan's `started_at`. Window is **symmetric**: `started_at BETWEEN ref.started_at - 300 AND ref.started_at + 300`. Do not revert to a one-sided lower bound. - **`get_related_items(item_id, ref_scan_id, window_seconds=300)`** — self-joins `cpr_index` to find items sharing ≥1 CPR hash. Uses same 300 s symmetric window — do not change independently. -- **`GET /api/db/flagged?ref=N`** — passes `ref_scan_id` to `get_session_items`; viewer scope enforcement still applies. +- **`get_open_items()`** — returns every flagged item with **no action taken**, across **all** scans (not just the latest session window). "Open" = no `dispositions` row, or one whose `status='unreviewed'`. Because `flagged_items` PK is `(id, scan_id)`, the same item recurs per scan; the query dedupes by `id`, keeping the row from the highest finished `scan_id`. This powers the **default landing view** so items don't drop out of sight once a newer scan opens a fresh session. +- **`GET /api/db/flagged`** — **with `?ref=N`** → `get_session_items(ref_scan_id=N)` (history mode); **without ref** → `get_open_items()` (default + viewer). Viewer scope enforcement applies to both. Do not change the no-ref `get_session_items()` default elsewhere (`export.py`, `scan_scheduler.py` still rely on latest-session for the current scan's report/email). - See `static/js/CLAUDE.md` for the frontend history browser behaviour and `sse_replay_done` retry fix. ## Global gotchas diff --git a/gdpr_db.py b/gdpr_db.py index 603a3b8..3eb275b 100644 --- a/gdpr_db.py +++ b/gdpr_db.py @@ -536,6 +536,40 @@ class ScanDB: result.append(d) return result + def get_open_items(self) -> list[dict]: + """Return every flagged item across all scans that has no action taken. + + "Open" means the item has no disposition row (or a row whose status is + still 'unreviewed'). Unlike get_session_items this is NOT limited to the + latest scan window — it surfaces all outstanding items so nothing slips + out of view once a newer scan starts a fresh session. + + flagged_items has a composite PK of (id, scan_id), so the same logical + item appears once per scan that flagged it. We deduplicate by id, keeping + the row from the most recent finished scan, so each open item shows once. + """ + rows = self._connect().execute( + """SELECT fi.*, COALESCE(d.status, 'unreviewed') AS disposition + FROM flagged_items fi + JOIN scans s ON fi.scan_id = s.id + LEFT JOIN dispositions d ON d.item_id = fi.id + WHERE s.finished_at IS NOT NULL + AND (d.item_id IS NULL OR d.status = 'unreviewed') + AND fi.scan_id = ( + SELECT MAX(fi2.scan_id) + FROM flagged_items fi2 + JOIN scans s2 ON fi2.scan_id = s2.id + WHERE fi2.id = fi.id AND s2.finished_at IS NOT NULL + ) + ORDER BY fi.cpr_count DESC""", + ).fetchall() + result = [] + for r in rows: + d = dict(r) + d["attachments"] = json.loads(d.get("attachments") or "[]") + result.append(d) + return result + def get_related_items(self, item_id: str, ref_scan_id: int | None = None, window_seconds: int = 300) -> list[dict]: """Return flagged items from the same session that share at least one CPR diff --git a/lang/da.json b/lang/da.json index 753c692..0a20a36 100644 --- a/lang/da.json +++ b/lang/da.json @@ -106,7 +106,7 @@ "history_lbl": "Historik", "history_items": "fund", "history_btn_sessions": "Sessioner", - "history_btn_latest": "Seneste scanning", + "history_btn_latest": "Åbne fund", "history_picker_empty": "Ingen tidligere scanninger", "history_delta_badge": "Delta", "history_latest_badge": "Seneste", diff --git a/lang/de.json b/lang/de.json index dcb3b1f..2cc7a5a 100644 --- a/lang/de.json +++ b/lang/de.json @@ -167,7 +167,7 @@ "history_lbl": "Verlauf", "history_items": "Treffer", "history_btn_sessions": "Sessionen", - "history_btn_latest": "Letzter Scan", + "history_btn_latest": "Offene Einträge", "history_picker_empty": "Keine früheren Scans", "history_delta_badge": "Delta", "history_latest_badge": "Aktuell", diff --git a/lang/en.json b/lang/en.json index e6dbbcc..2360725 100644 --- a/lang/en.json +++ b/lang/en.json @@ -106,7 +106,7 @@ "history_lbl": "History", "history_items": "items", "history_btn_sessions": "Sessions", - "history_btn_latest": "Latest scan", + "history_btn_latest": "Open items", "history_picker_empty": "No past scans", "history_delta_badge": "Delta", "history_latest_badge": "Latest", diff --git a/routes/database.py b/routes/database.py index 38195c0..72bfc1c 100644 --- a/routes/database.py +++ b/routes/database.py @@ -180,7 +180,11 @@ def db_get_disposition(item_id): @bp.route("/api/db/flagged") def db_flagged_items(): - """Return flagged items from the most recent completed scan session. + """Return flagged items for the results grid. + + With ?ref=N, returns the items from that specific past scan session (history + mode). Without ref, returns every item still awaiting action across all + scans (the default landing view) — not just the latest session window. Used by the read-only viewer to load results without an active SSE connection. Respects viewer_scope.role stored in the session for scoped tokens. """ @@ -197,7 +201,13 @@ def db_flagged_items(): else: user_filt = {raw_user.lower()} if raw_user else set() ref_scan_id = request.args.get("ref", type=int) - items = _get_db().get_session_items(ref_scan_id=ref_scan_id) + if ref_scan_id: + # History mode — a specific past session was requested. + items = _get_db().get_session_items(ref_scan_id=ref_scan_id) + else: + # Default landing / viewer — show every item still awaiting action, + # across all scans, not just the latest session window. + items = _get_db().get_open_items() # Normalise JSON-encoded columns the same way scan_engine does for SSE cards import json as _json out = [] diff --git a/static/js/CLAUDE.md b/static/js/CLAUDE.md index 68d85ad..f8b8f5a 100644 --- a/static/js/CLAUDE.md +++ b/static/js/CLAUDE.md @@ -40,7 +40,8 @@ Never revert to `!!window._googleConnected` / `_fileSources.length > 0` — thos ## Scan history browser — history.js + results.js -- **`S._historyRefScanId`** — `null` = live/SSE mode; positive int = viewing a past session. Set by `loadHistorySession()`; cleared by `exitHistoryMode()`. +- **`S._historyRefScanId`** — `null` = live/SSE mode **or** the default open-items view; positive int = viewing a past session. Set by `loadHistorySession()`; cleared by `exitHistoryMode()`. +- **`loadHistorySession(null)` → `loadOpenItems()`** — passing `null` no longer resolves to the latest session. It now loads **all open (unactioned) items across every scan** via `GET /api/db/flagged` (no `ref`), leaves `_historyRefScanId` null, and shows no history banner. The "Open items" banner button (`onclick="loadHistorySession(null)"`, key `history_btn_latest`) therefore returns to this open-items view. Specific sessions are still loaded with a positive `ref`, which keeps the re-scan resolved-diff. Do not revert `null` to "resolve latest ref" — that reintroduces the "only the last scan is shown" complaint. - **Auto-load on page load** — `_sseWatchdog()` in `results.js` calls `window.loadHistorySession?.(null)` whenever `/api/scan/status` reports neither `running` (M365 + file lock) nor `google_running` (Google lock) **and** nothing is shown yet (`!S._historyRefScanId && !S.flaggedData.length`). This is **not one-shot** — it retries on every 4s poll until a session is restored, because (a) the replay buffer is empty after a server restart so `sse_replay_done` never fires, and (b) a completed scan's replayed `scan_phase` can leave a running flag set that would otherwise block the load forever. Because both locks are confirmed free, the watchdog clears the stale `_m365/_google/_fileScanRunning` flags before calling. Do not revert to a one-shot `_initialStatusChecked` gate — that reintroduces the "blank grid after refresh/restart" bug. `/api/scan/status` **must** report `google_running` separately; `running` alone misses live Google scans. The `sse_replay_done` handler in `scan.js` still retries for the non-empty-buffer (no-restart) case. - **History banner** (`#historyBanner`) — shown when `S._historyRefScanId` is set. Do not hide/show from outside `history.js`. - **Session picker** (`#historyDropdown`) — rendered inside `[data-history-wrap]` so the outside-click handler works correctly. Do not move the picker outside this wrapper. diff --git a/static/js/history.js b/static/js/history.js index 58bf8bb..eb320cf 100644 --- a/static/js/history.js +++ b/static/js/history.js @@ -38,20 +38,50 @@ function invalidateHistoryCache() { // ── Load a session into the results grid ────────────────────────────────────── -async function loadHistorySession(refScanId) { - // refScanId: null → latest session, positive int → specific session - let resolvedRef = refScanId; - if (resolvedRef === null) { - const sessions = _sessions !== null ? _sessions : await _fetchSessions(); - // Bail if a scan started while we were fetching sessions +// Default landing view: every flagged item still awaiting action, across all +// scans (not just the latest session). Leaves S._historyRefScanId null (live +// mode) and shows no history banner — this is "now", not a past session. +async function loadOpenItems() { + // Bail if a scan is running — live SSE owns the grid then. + if (S._m365ScanRunning || S._googleScanRunning || S._fileScanRunning) return; + try { + const r = await fetch('/api/db/flagged'); + const items = await r.json(); if (S._m365ScanRunning || S._googleScanRunning || S._fileScanRunning) return; - if (!sessions.length) { - // No scans in DB — nothing to show + closeHistoryPicker(); + + if (!Array.isArray(items) || items.length === 0) { + S._historyRefScanId = null; + _setHistoryBanner(false); window.loadLastScanSummary?.(); return; } - resolvedRef = sessions[0].ref_scan_id; + + S._historyRefScanId = null; + S.flaggedData = items; + S.filteredData = []; + + const grid = document.getElementById('grid'); + const emptyState = document.getElementById('emptyState'); + const lastScan = document.getElementById('lastScanSummary'); + if (emptyState) emptyState.style.display = 'none'; + if (lastScan) lastScan.style.display = 'none'; + if (grid) { grid.innerHTML = ''; grid.style.display = 'grid'; } + + window.renderGrid(items); + try { window.markOverdueCards(); } catch(_) {} + try { window.loadTrend(); } catch(_) {} + _setHistoryBanner(false); + } catch(e) { + console.error('[history] failed to load open items:', e); } +} + +async function loadHistorySession(refScanId) { + // refScanId: null → all open (unreviewed) items across every scan, + // positive int → a specific past session + if (refScanId === null) return loadOpenItems(); + const resolvedRef = refScanId; try { const r = await fetch('/api/db/flagged?ref=' + resolvedRef); diff --git a/templates/index.html b/templates/index.html index 329888a..5f6fa9c 100644 --- a/templates/index.html +++ b/templates/index.html @@ -375,7 +375,7 @@ document.addEventListener('DOMContentLoaded', applyI18n); - + diff --git a/tests/test_route_integration.py b/tests/test_route_integration.py index 48645d8..4cadfef 100644 --- a/tests/test_route_integration.py +++ b/tests/test_route_integration.py @@ -270,6 +270,49 @@ class TestFlaggedScopeEnforcement: ids = {row["id"] for row in r.get_json()} assert "ci1" in ids + def test_no_ref_returns_open_items_across_all_sessions(self, client, db_patch): + # Two scans in separate session windows. The default (no-ref) view must + # surface unactioned items from BOTH, not just the latest session. + old_id = _seed_scan(db_patch, [_item("o1")]) + db_patch._connect().execute( + "UPDATE scans SET started_at = started_at - 400 WHERE id = ?", (old_id,) + ) + db_patch._connect().commit() + _seed_scan(db_patch, [_item("o2")]) + + r = client.get("/api/db/flagged") + ids = {row["id"] for row in r.get_json()} + assert ids == {"o1", "o2"} + + def test_no_ref_excludes_items_with_a_disposition(self, client, db_patch): + _seed_scan(db_patch, [_item("d1"), _item("d2")]) + db_patch.set_disposition("d1", "kept") + + r = client.get("/api/db/flagged") + ids = {row["id"] for row in r.get_json()} + assert "d2" in ids # untouched → still open + assert "d1" not in ids # action taken → hidden + + def test_no_ref_unreviewed_disposition_stays_open(self, client, db_patch): + _seed_scan(db_patch, [_item("u1")]) + db_patch.set_disposition("u1", "unreviewed") + + r = client.get("/api/db/flagged") + ids = {row["id"] for row in r.get_json()} + assert "u1" in ids # 'unreviewed' status is not an action + + def test_no_ref_dedupes_rescanned_item_to_latest(self, client, db_patch): + # Same item flagged by two scans → appears once. + old_id = _seed_scan(db_patch, [_item("k1")]) + db_patch._connect().execute( + "UPDATE scans SET started_at = started_at - 400 WHERE id = ?", (old_id,) + ) + db_patch._connect().commit() + _seed_scan(db_patch, [_item("k1")]) + + rows = [row for row in client.get("/api/db/flagged").get_json() if row["id"] == "k1"] + assert len(rows) == 1 + def test_ref_param_loads_historical_session(self, client, db_patch): # Push first scan >300 s into the past so it occupies its own session window. old_id = _seed_scan(db_patch, [_item("h1")])