Show all open (unactioned) items by default, not just the last scan

The default results view loaded only the latest scan session (±300s window), so items dropped out of sight once a newer scan started — and a long scheduled scan could show little or nothing on browser open. Add get_open_items(): every flagged item with no disposition (or status 'unreviewed') across all scans, deduped by id to the latest finished scan. GET /api/db/flagged now serves it when no ?ref is given; ?ref=N still loads a specific past session. Frontend loadHistorySession(null) routes to a new loadOpenItems() loader. Rename the banner button to "Open items" (da/de/en). get_session_items() default is unchanged — export.py and scan_scheduler.py still rely on latest-session for the current scan's report/email. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-22 09:19:55 +02:00 · 2026-06-22 09:19:55 +02:00 · 68076eba52
commit 68076eba52
parent 67f66c8441
10 changed files with 136 additions and 17 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -93,7 +93,8 @@ All options live in the profile `options` dict and apply to **all three scan eng
 - **`get_sessions(limit=50, window_seconds=300)`** — groups `scans` rows by 300 s window. Groups built ascending, returned descending. `ref_scan_id` is the highest `scan_id` in each group. Do not change window size independently of `get_session_items`.
 - **`get_session_items(ref_scan_id=N)`** — anchors 300 s window to that scan's `started_at`. Window is **symmetric**: `started_at BETWEEN ref.started_at - 300 AND ref.started_at + 300`. Do not revert to a one-sided lower bound.
 - **`get_related_items(item_id, ref_scan_id, window_seconds=300)`** — self-joins `cpr_index` to find items sharing ≥1 CPR hash. Uses same 300 s symmetric window — do not change independently.
- **`GET /api/db/flagged?ref=N`** — passes `ref_scan_id` to `get_session_items`; viewer scope enforcement still applies.
+- **`get_open_items()`** — returns every flagged item with **no action taken**, across **all** scans (not just the latest session window). "Open" = no `dispositions` row, or one whose `status='unreviewed'`. Because `flagged_items` PK is `(id, scan_id)`, the same item recurs per scan; the query dedupes by `id`, keeping the row from the highest finished `scan_id`. This powers the **default landing view** so items don't drop out of sight once a newer scan opens a fresh session.
+- **`GET /api/db/flagged`** — **with `?ref=N`** → `get_session_items(ref_scan_id=N)` (history mode); **without ref** → `get_open_items()` (default + viewer). Viewer scope enforcement applies to both. Do not change the no-ref `get_session_items()` default elsewhere (`export.py`, `scan_scheduler.py` still rely on latest-session for the current scan's report/email).
 - See `static/js/CLAUDE.md` for the frontend history browser behaviour and `sse_replay_done` retry fix.

 ## Global gotchas
--- a/gdpr_db.py
+++ b/gdpr_db.py
@ -536,6 +536,40 @@ class ScanDB:
            result.append(d)
        return result

+    def get_open_items(self) -> list[dict]:
+        """Return every flagged item across all scans that has no action taken.
+
+        "Open" means the item has no disposition row (or a row whose status is
+        still 'unreviewed').  Unlike get_session_items this is NOT limited to the
+        latest scan window — it surfaces all outstanding items so nothing slips
+        out of view once a newer scan starts a fresh session.
+
+        flagged_items has a composite PK of (id, scan_id), so the same logical
+        item appears once per scan that flagged it.  We deduplicate by id, keeping
+        the row from the most recent finished scan, so each open item shows once.
+        """
+        rows = self._connect().execute(
+            """SELECT fi.*, COALESCE(d.status, 'unreviewed') AS disposition
+               FROM flagged_items fi
+               JOIN scans s ON fi.scan_id = s.id
+               LEFT JOIN dispositions d ON d.item_id = fi.id
+               WHERE s.finished_at IS NOT NULL
+                 AND (d.item_id IS NULL OR d.status = 'unreviewed')
+                 AND fi.scan_id = (
+                       SELECT MAX(fi2.scan_id)
+                       FROM flagged_items fi2
+                       JOIN scans s2 ON fi2.scan_id = s2.id
+                       WHERE fi2.id = fi.id AND s2.finished_at IS NOT NULL
+                 )
+               ORDER BY fi.cpr_count DESC""",
+        ).fetchall()
+        result = []
+        for r in rows:
+            d = dict(r)
+            d["attachments"] = json.loads(d.get("attachments") or "[]")
+            result.append(d)
+        return result
+
    def get_related_items(self, item_id: str, ref_scan_id: int | None = None,
                          window_seconds: int = 300) -> list[dict]:
        """Return flagged items from the same session that share at least one CPR
--- a/lang/da.json
+++ b/lang/da.json
@ -106,7 +106,7 @@
  "history_lbl": "Historik",
  "history_items": "fund",
  "history_btn_sessions": "Sessioner",
-  "history_btn_latest": "Seneste scanning",
+  "history_btn_latest": "Åbne fund",
  "history_picker_empty": "Ingen tidligere scanninger",
  "history_delta_badge": "Delta",
  "history_latest_badge": "Seneste",
--- a/lang/de.json
+++ b/lang/de.json
@ -167,7 +167,7 @@
  "history_lbl": "Verlauf",
  "history_items": "Treffer",
  "history_btn_sessions": "Sessionen",
-  "history_btn_latest": "Letzter Scan",
+  "history_btn_latest": "Offene Einträge",
  "history_picker_empty": "Keine früheren Scans",
  "history_delta_badge": "Delta",
  "history_latest_badge": "Aktuell",
--- a/lang/en.json
+++ b/lang/en.json
@ -106,7 +106,7 @@
  "history_lbl": "History",
  "history_items": "items",
  "history_btn_sessions": "Sessions",
-  "history_btn_latest": "Latest scan",
+  "history_btn_latest": "Open items",
  "history_picker_empty": "No past scans",
  "history_delta_badge": "Delta",
  "history_latest_badge": "Latest",
--- a/routes/database.py
+++ b/routes/database.py
@ -180,7 +180,11 @@ def db_get_disposition(item_id):

@bp.route("/api/db/flagged")
 def db_flagged_items():
-    """Return flagged items from the most recent completed scan session.
+    """Return flagged items for the results grid.
+
+    With ?ref=N, returns the items from that specific past scan session (history
+    mode).  Without ref, returns every item still awaiting action across all
+    scans (the default landing view) — not just the latest session window.
    Used by the read-only viewer to load results without an active SSE connection.
    Respects viewer_scope.role stored in the session for scoped tokens.
    """
@ -197,7 +201,13 @@ def db_flagged_items():
    else:
        user_filt = {raw_user.lower()} if raw_user else set()
    ref_scan_id = request.args.get("ref", type=int)
+    if ref_scan_id:
+        # History mode — a specific past session was requested.
        items = _get_db().get_session_items(ref_scan_id=ref_scan_id)
+    else:
+        # Default landing / viewer — show every item still awaiting action,
+        # across all scans, not just the latest session window.
+        items = _get_db().get_open_items()
    # Normalise JSON-encoded columns the same way scan_engine does for SSE cards
    import json as _json
    out = []
--- a/static/js/CLAUDE.md
+++ b/static/js/CLAUDE.md
@ -40,7 +40,8 @@ Never revert to `!!window._googleConnected` / `_fileSources.length > 0` — thos

 ## Scan history browser — history.js + results.js

- **`S._historyRefScanId`** — `null` = live/SSE mode; positive int = viewing a past session. Set by `loadHistorySession()`; cleared by `exitHistoryMode()`.
+- **`S._historyRefScanId`** — `null` = live/SSE mode **or** the default open-items view; positive int = viewing a past session. Set by `loadHistorySession()`; cleared by `exitHistoryMode()`.
+- **`loadHistorySession(null)` → `loadOpenItems()`** — passing `null` no longer resolves to the latest session. It now loads **all open (unactioned) items across every scan** via `GET /api/db/flagged` (no `ref`), leaves `_historyRefScanId` null, and shows no history banner. The "Open items" banner button (`onclick="loadHistorySession(null)"`, key `history_btn_latest`) therefore returns to this open-items view. Specific sessions are still loaded with a positive `ref`, which keeps the re-scan resolved-diff. Do not revert `null` to "resolve latest ref" — that reintroduces the "only the last scan is shown" complaint.
 - **Auto-load on page load** — `_sseWatchdog()` in `results.js` calls `window.loadHistorySession?.(null)` whenever `/api/scan/status` reports neither `running` (M365 + file lock) nor `google_running` (Google lock) **and** nothing is shown yet (`!S._historyRefScanId && !S.flaggedData.length`). This is **not one-shot** — it retries on every 4s poll until a session is restored, because (a) the replay buffer is empty after a server restart so `sse_replay_done` never fires, and (b) a completed scan's replayed `scan_phase` can leave a running flag set that would otherwise block the load forever. Because both locks are confirmed free, the watchdog clears the stale `_m365/_google/_fileScanRunning` flags before calling. Do not revert to a one-shot `_initialStatusChecked` gate — that reintroduces the "blank grid after refresh/restart" bug. `/api/scan/status` **must** report `google_running` separately; `running` alone misses live Google scans. The `sse_replay_done` handler in `scan.js` still retries for the non-empty-buffer (no-restart) case.
 - **History banner** (`#historyBanner`) — shown when `S._historyRefScanId` is set. Do not hide/show from outside `history.js`.
 - **Session picker** (`#historyDropdown`) — rendered inside `[data-history-wrap]` so the outside-click handler works correctly. Do not move the picker outside this wrapper.
--- a/static/js/history.js
+++ b/static/js/history.js
@ -38,20 +38,50 @@ function invalidateHistoryCache() {

 // ── Load a session into the results grid ──────────────────────────────────────

-async function loadHistorySession(refScanId) {
-  // refScanId: null → latest session, positive int → specific session
-  let resolvedRef = refScanId;
-  if (resolvedRef === null) {
-    const sessions = _sessions !== null ? _sessions : await _fetchSessions();
-    // Bail if a scan started while we were fetching sessions
+// Default landing view: every flagged item still awaiting action, across all
+// scans (not just the latest session). Leaves S._historyRefScanId null (live
+// mode) and shows no history banner — this is "now", not a past session.
+async function loadOpenItems() {
+  // Bail if a scan is running — live SSE owns the grid then.
  if (S._m365ScanRunning || S._googleScanRunning || S._fileScanRunning) return;
-    if (!sessions.length) {
-      // No scans in DB — nothing to show
+  try {
+    const r     = await fetch('/api/db/flagged');
+    const items = await r.json();
+    if (S._m365ScanRunning || S._googleScanRunning || S._fileScanRunning) return;
+    closeHistoryPicker();
+
+    if (!Array.isArray(items) || items.length === 0) {
+      S._historyRefScanId = null;
+      _setHistoryBanner(false);
      window.loadLastScanSummary?.();
      return;
    }
-    resolvedRef = sessions[0].ref_scan_id;
+
+    S._historyRefScanId = null;
+    S.flaggedData  = items;
+    S.filteredData = [];
+
+    const grid       = document.getElementById('grid');
+    const emptyState = document.getElementById('emptyState');
+    const lastScan   = document.getElementById('lastScanSummary');
+    if (emptyState) emptyState.style.display = 'none';
+    if (lastScan)   lastScan.style.display   = 'none';
+    if (grid) { grid.innerHTML = ''; grid.style.display = 'grid'; }
+
+    window.renderGrid(items);
+    try { window.markOverdueCards(); } catch(_) {}
+    try { window.loadTrend();        } catch(_) {}
+    _setHistoryBanner(false);
+  } catch(e) {
+    console.error('[history] failed to load open items:', e);
  }
+}
+
+async function loadHistorySession(refScanId) {
+  // refScanId: null → all open (unreviewed) items across every scan,
+  //            positive int → a specific past session
+  if (refScanId === null) return loadOpenItems();
+  const resolvedRef = refScanId;

  try {
    const r     = await fetch('/api/db/flagged?ref=' + resolvedRef);
--- a/templates/index.html
+++ b/templates/index.html
@ -375,7 +375,7 @@ document.addEventListener('DOMContentLoaded', applyI18n);
          <button id="historyPickerBtn" type="button" onclick="openHistoryPicker()" style="height:24px;padding:0 10px;background:none;border:1px solid var(--border);color:var(--muted);border-radius:4px;font-size:11px;cursor:pointer" data-i18n="history_btn_sessions">Sessions</button>
          <div id="historyDropdown" style="display:none;position:absolute;right:0;top:calc(100% + 4px);background:var(--surface);border:1px solid var(--border);border-radius:6px;z-index:9999;width:300px;max-height:260px;overflow-y:auto;box-shadow:0 4px 12px rgba(0,0,0,.25)"></div>
        </div>
-        <button id="historyLatestBtn" type="button" onclick="loadHistorySession(null)" style="display:none;height:24px;padding:0 10px;background:none;border:1px solid var(--accent);color:var(--accent);border-radius:4px;font-size:11px;cursor:pointer;flex-shrink:0" data-i18n="history_btn_latest">Latest scan</button>
+        <button id="historyLatestBtn" type="button" onclick="loadHistorySession(null)" style="display:none;height:24px;padding:0 10px;background:none;border:1px solid var(--accent);color:var(--accent);border-radius:4px;font-size:11px;cursor:pointer;flex-shrink:0" data-i18n="history_btn_latest">Open items</button>
      </div>

      <!-- Filter bar — full width, above grid + preview -->
--- a/tests/test_route_integration.py
+++ b/tests/test_route_integration.py
@ -270,6 +270,49 @@ class TestFlaggedScopeEnforcement:
        ids = {row["id"] for row in r.get_json()}
        assert "ci1" in ids

+    def test_no_ref_returns_open_items_across_all_sessions(self, client, db_patch):
+        # Two scans in separate session windows. The default (no-ref) view must
+        # surface unactioned items from BOTH, not just the latest session.
+        old_id = _seed_scan(db_patch, [_item("o1")])
+        db_patch._connect().execute(
+            "UPDATE scans SET started_at = started_at - 400 WHERE id = ?", (old_id,)
+        )
+        db_patch._connect().commit()
+        _seed_scan(db_patch, [_item("o2")])
+
+        r = client.get("/api/db/flagged")
+        ids = {row["id"] for row in r.get_json()}
+        assert ids == {"o1", "o2"}
+
+    def test_no_ref_excludes_items_with_a_disposition(self, client, db_patch):
+        _seed_scan(db_patch, [_item("d1"), _item("d2")])
+        db_patch.set_disposition("d1", "kept")
+
+        r = client.get("/api/db/flagged")
+        ids = {row["id"] for row in r.get_json()}
+        assert "d2" in ids        # untouched → still open
+        assert "d1" not in ids    # action taken → hidden
+
+    def test_no_ref_unreviewed_disposition_stays_open(self, client, db_patch):
+        _seed_scan(db_patch, [_item("u1")])
+        db_patch.set_disposition("u1", "unreviewed")
+
+        r = client.get("/api/db/flagged")
+        ids = {row["id"] for row in r.get_json()}
+        assert "u1" in ids        # 'unreviewed' status is not an action
+
+    def test_no_ref_dedupes_rescanned_item_to_latest(self, client, db_patch):
+        # Same item flagged by two scans → appears once.
+        old_id = _seed_scan(db_patch, [_item("k1")])
+        db_patch._connect().execute(
+            "UPDATE scans SET started_at = started_at - 400 WHERE id = ?", (old_id,)
+        )
+        db_patch._connect().commit()
+        _seed_scan(db_patch, [_item("k1")])
+
+        rows = [row for row in client.get("/api/db/flagged").get_json() if row["id"] == "k1"]
+        assert len(rows) == 1
+
    def test_ref_param_loads_historical_session(self, client, db_patch):
        # Push first scan >300 s into the past so it occupies its own session window.
        old_id = _seed_scan(db_patch, [_item("h1")])