Fix: macOS runner, scan hang, export sources, profile role filter/badge
This commit is contained in:
parent
9e940cd60a
commit
6e0aab788a
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@ -24,7 +24,7 @@ jobs:
|
||||
name: windows
|
||||
- os: ubuntu-22.04
|
||||
name: linux
|
||||
- os: macos-13
|
||||
- os: macos-15
|
||||
name: macos
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
@ -93,7 +93,7 @@ jobs:
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
cd dist
|
||||
zip -r "GDPRScanner_macos_x86_64.zip" "GDPRScanner.app"
|
||||
zip -r "GDPRScanner_macos_arm64.zip" "GDPRScanner.app"
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
@ -103,7 +103,7 @@ jobs:
|
||||
path: |
|
||||
dist/GDPRScanner_linux_x86_64.zip
|
||||
dist/GDPRScanner_windows_x64.zip
|
||||
dist/GDPRScanner_macos_x86_64.zip
|
||||
dist/GDPRScanner_macos_arm64.zip
|
||||
|
||||
# ── Release ───────────────────────────────────────────────────────────────
|
||||
# • version tag (v*) → proper versioned release with generated notes
|
||||
|
||||
@ -11,7 +11,7 @@ Version numbers follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html
|
||||
|
||||
### Added
|
||||
|
||||
- **GitHub Actions CI/CD — macOS build** — `.github/workflows/build.yml` now also builds a macOS `.app` bundle (`macos-13`, Intel x86_64 / Rosetta) on every push to `main` and on `v*` tags. Released as `GDPRScanner_macos_x86_64.zip`.
|
||||
- **GitHub Actions CI/CD — macOS build** — `.github/workflows/build.yml` now also builds a macOS `.app` bundle (`macos-15`, Apple Silicon ARM64) on every push to `main` and on `v*` tags. Released as `GDPRScanner_macos_arm64.zip`. (Originally `macos-13` / Intel, changed when GitHub retired that runner.)
|
||||
|
||||
### Fixed
|
||||
|
||||
@ -19,12 +19,11 @@ Version numbers follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html
|
||||
- **`EFFORT_ESTIMATE.md`** — build effort estimate document covering component-by-component hour breakdowns and complexity drivers for the project.
|
||||
- **Settings → Security tab** — new dedicated pane in the Settings modal. Admin PIN and Viewer PIN groups moved here from the General tab, which now contains only Appearance and About. The Share modal's **Configure** button navigates directly to the Security tab.
|
||||
- **Viewer mode layout** — the sidebar, log panel, and progress bar are now hidden in viewer mode so results fill the full window width. The `🔍 GDPRScanner` brand is shown in the top-left of the topbar (replacing the sidebar header) at the same size and weight as the normal sidebar title.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Share modal — Revoke / Copy buttons broken** — `JSON.stringify(token)` produced a double-quoted string that terminated the surrounding `onclick="…"` HTML attribute early, so neither button fired its handler. Both now pass the token as a single-quoted JS string literal, which is safe for the hex token format.
|
||||
- **Viewer PIN — Clear PIN rejected with "current PIN is incorrect"** — clicking **Clear PIN** without first typing in the Current PIN field sent an empty string to the server, which correctly rejected it. A client-side guard now validates the field is non-empty before sending the request, and focuses the input with an inline error message if it is empty.
|
||||
- **Share modal — all UI strings now translated** — the Share results modal and Viewer PIN settings group were fully hardcoded in English. All visible strings are now backed by i18n keys (`share_*`, `viewer_pin_*`) in `en.json`, `da.json`, and `de.json`.
|
||||
- **Excel / ART.30 export — Gmail and Google Drive missing from summary** — `by_source` was built from flagged items only, so sources that produced zero hits were silently skipped. Both the Excel Summary sheet and the ART.30 "Breakdown by source" table now include every source that was actually scanned, showing `0` items and `0` CPR hits where nothing was found. New `GDPRDb.get_session_sources()` method reads the `sources` JSON column from all scans in the current session window to determine which sources ran.
|
||||
- **Scan never finishes when M365 + Google run concurrently** — `scan_done` (M365 finished) was closing the SSE connection immediately via `S.es.close()`, even when `S._googleScanRunning` or `S._fileScanRunning` was still true. The `google_scan_done` / `file_scan_done` events therefore never arrived, leaving the progress bar stuck at 100% indefinitely. SSE teardown is now deferred until the last concurrent scan completes: `scan_done` only closes the connection if neither Google nor File is still running; `google_scan_done` and `file_scan_done` close it when they are the final scan to finish.
|
||||
|
||||
---
|
||||
|
||||
|
||||
12
CLAUDE.md
12
CLAUDE.md
@ -72,6 +72,18 @@ Large M365 tenants can generate enormous memory pressure. Key rules to preserve:
|
||||
- **PDF OCR images freed page-by-page** — in `document_scanner.scan_pdf`, `images[page_num-1] = None` immediately after OCR. Do not cache or accumulate page images.
|
||||
- **Memory guard** — `psutil.virtual_memory().available` checked before each M365 file download; scan skips the file if < 300 MB free.
|
||||
|
||||
## Export — routes/export.py
|
||||
|
||||
- **`GDPRDb.get_session_sources()`** — returns a `set` of source-key strings (e.g. `{"gmail", "gdrive", "email"}`) for every scan in the current session window. Used by both `_build_excel_bytes()` and `_build_article30_docx()` to include zero-hit sources in summary tables. Do not derive the scanned-source set from `by_source` alone — that dict only contains sources with flagged items.
|
||||
- **Excel Summary sheet vs. per-source tabs** — the Summary sheet shows all scanned sources (even with 0 items). Per-source tabs are only created for sources with items; an empty tab has no value.
|
||||
- **ART.30 breakdown table** — iterates `scanned_sources` (not `by_source`) so Gmail, Google Drive, etc. appear with `0 | 0 | 0 | —` when the scan found nothing.
|
||||
|
||||
## SSE teardown — static/js/scan.js
|
||||
|
||||
- **Do not close `S.es` in `scan_done` if other scans are still running** — M365 (`scan_done`), Google (`google_scan_done`), and File (`file_scan_done`) each emit their own done event. If M365 finishes first and the SSE is closed, the remaining done events are never received and the UI hangs at 100% indefinitely.
|
||||
- **Rule:** close `S.es` (and reset `S._userStartedScan`) only inside the branch where *all* concurrent scans have finished: `scan_done` checks `!S._googleScanRunning && !S._fileScanRunning`; `google_scan_done` checks `!S._m365ScanRunning && !S._fileScanRunning`; `file_scan_done` checks `!S._m365ScanRunning && !S._googleScanRunning`.
|
||||
- **Scheduled scans** — `S._userStartedScan` is false for scheduler-triggered runs, so the SSE connection is never closed and future scheduler events continue to arrive.
|
||||
|
||||
## Global gotchas
|
||||
|
||||
- **Pattern matching in Python** — when using `str.replace()` to patch JS/HTML, whitespace and quote style must match exactly. Use `in` check first and print if not found.
|
||||
|
||||
8
TODO.md
8
TODO.md
@ -41,6 +41,14 @@ Full spec in SUGGESTIONS.md §29.
|
||||
A shareable URL (token-protected) or numeric PIN that gives a DPO, school principal, or compliance coordinator read-only access to the results grid — with disposition tagging but without scan controls, credentials, or delete access. Full spec in SUGGESTIONS.md §33.
|
||||
**Size:** Medium · **Priority:** Medium
|
||||
|
||||
### OneDrive 404 errors — investigate and handle appropriately
|
||||
Every student is supposed to have a OneDrive licence, so 404s on `drive/root/delta` are unexpected. A 404 can mean: no licence assigned, licence assigned but OneDrive service plan disabled, drive not yet provisioned (account never signed in), or account suspended/deleted. Currently broadcast as red `scan_error` in the log.
|
||||
|
||||
**Action:** Check affected users in the M365 admin centre (Licences + OneDrive status). Once root cause is confirmed, decide whether to suppress, log at lower severity, or show a specific "OneDrive not provisioned" message instead of the raw HTTP error.
|
||||
**Size:** Small · **Priority:** Medium
|
||||
|
||||
---
|
||||
|
||||
### #32 — Windowed mode for Profiles, Sources, and Settings ✗ Won't do
|
||||
The workflow is sequential (configure → scan → review), not parallel — there is no realistic scenario where a modal and the results grid need to be open simultaneously. The Sources panel is already visible in the sidebar. Option A (the least-work path) still loads the full 3800-line JS stack twice. Closed.
|
||||
|
||||
|
||||
27
gdpr_db.py
27
gdpr_db.py
@ -472,6 +472,33 @@ class ScanDB:
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
def get_session_sources(self, window_seconds: int = 300) -> set:
|
||||
"""Return the union of all source keys scanned in the current session.
|
||||
|
||||
Reads the ``sources`` JSON array stored in each scan record that belongs
|
||||
to the same session as the latest completed scan. This is used by the
|
||||
export builders so they can show every scanned source in summary tables
|
||||
even when a source produced zero flagged items.
|
||||
"""
|
||||
row = self._connect().execute(
|
||||
"SELECT started_at FROM scans WHERE finished_at IS NOT NULL ORDER BY id DESC LIMIT 1"
|
||||
).fetchone()
|
||||
if not row:
|
||||
return set()
|
||||
latest_start = row[0]
|
||||
rows = self._connect().execute(
|
||||
"""SELECT sources FROM scans
|
||||
WHERE started_at >= ? AND finished_at IS NOT NULL""",
|
||||
(latest_start - window_seconds,),
|
||||
).fetchall()
|
||||
result: set = set()
|
||||
for r in rows:
|
||||
try:
|
||||
result.update(json.loads(r[0] or "[]"))
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
def lookup_data_subject(self, cpr: str) -> list[dict]:
|
||||
"""Find all flagged items containing a given CPR number (by hash)."""
|
||||
cpr_hash = hashlib.sha256(str(cpr).encode()).hexdigest()
|
||||
|
||||
@ -171,11 +171,23 @@ def _build_excel_bytes() -> tuple[bytes, str]:
|
||||
for item in state.flagged_items:
|
||||
by_source.setdefault(item.get("source_type", "other"), []).append(item)
|
||||
|
||||
# Determine which sources were actually scanned (even if they found nothing)
|
||||
scanned_sources: set = set()
|
||||
if DB_OK:
|
||||
try:
|
||||
_db_tmp = _get_db()
|
||||
if _db_tmp:
|
||||
scanned_sources = _db_tmp.get_session_sources()
|
||||
except Exception:
|
||||
pass
|
||||
# Fall back: treat any source that has items as scanned
|
||||
scanned_sources |= set(by_source.keys())
|
||||
|
||||
sum_row = 7
|
||||
for src_key, (label, tab_bg) in SOURCE_MAP.items():
|
||||
items = by_source.get(src_key, [])
|
||||
if not items:
|
||||
if src_key not in scanned_sources:
|
||||
continue
|
||||
items = by_source.get(src_key, [])
|
||||
ws_sum.cell(row=sum_row, column=1, value=label).font = Font(name="Arial", size=10)
|
||||
ws_sum.cell(row=sum_row, column=2, value=len(items)).font = Font(name="Arial", size=10)
|
||||
ws_sum.cell(row=sum_row, column=3, value=sum(i.get("cpr_count", 0) for i in items)).font = Font(name="Arial", size=10)
|
||||
@ -353,6 +365,15 @@ def _build_article30_docx() -> tuple[bytes, str]:
|
||||
st = item.get("source_type", "other")
|
||||
by_source.setdefault(st, []).append(item)
|
||||
|
||||
# Determine which sources were actually scanned (may be empty-hit)
|
||||
scanned_sources: set = set()
|
||||
if db:
|
||||
try:
|
||||
scanned_sources = db.get_session_sources()
|
||||
except Exception:
|
||||
pass
|
||||
scanned_sources |= set(by_source.keys())
|
||||
|
||||
SOURCE_LABELS = {
|
||||
"email": "Exchange (Outlook)",
|
||||
"onedrive": "OneDrive",
|
||||
@ -557,9 +578,9 @@ def _build_article30_docx() -> tuple[bytes, str]:
|
||||
r.font.size = Pt(10); r.font.color.rgb = WHITE
|
||||
|
||||
for src_key in ("email", "onedrive", "sharepoint", "teams", "gmail", "gdrive", "local", "smb"):
|
||||
src_items = by_source.get(src_key, [])
|
||||
if not src_items:
|
||||
if src_key not in scanned_sources:
|
||||
continue
|
||||
src_items = by_source.get(src_key, [])
|
||||
row = src_tbl.add_row().cells
|
||||
n_ov = sum(1 for i in src_items if i.get("id") in overdue_ids)
|
||||
n_cpr = sum(i.get("cpr_count", 0) for i in src_items)
|
||||
|
||||
@ -335,7 +335,8 @@ function _openEditorForProfile(profile) {
|
||||
: (u.platform || 'm365') === 'google' ? '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#EAF3DE;color:#3B6D11;font-weight:500">GWS</span>'
|
||||
: '<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#E6F1FB;color:#185FA5;font-weight:500">M365</span>';
|
||||
const roleBadge = u.userRole === 'student' ? t('role_student','Elev') : u.userRole === 'staff' ? t('role_staff','Ansat') : t('role_other','Anden');
|
||||
return `<label class="pmgmt-acct-row" data-uid="${_esc(u.id)}"><input type="checkbox" ${checked} data-uid="${_esc(u.id)}"><span style="flex:1;color:var(--color-text-primary);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${_esc(u.displayName)}</span>${platBadge}<span style="font-size:9px;padding:1px 5px;border-radius:10px;background:#D3D1C7;color:#444441">${roleBadge}</span></label>`;
|
||||
const roleOverrideStyle = u.roleOverride ? 'color:var(--color-text-info);outline:1px solid var(--color-border-info);' : '';
|
||||
return `<label class="pmgmt-acct-row" data-uid="${_esc(u.id)}" data-role="${_esc(u.userRole || 'other')}"><input type="checkbox" ${checked} data-uid="${_esc(u.id)}"><span style="flex:1;color:var(--color-text-primary);overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${_esc(u.displayName)}</span>${platBadge}<button type="button" class="pmgmt-role-badge" data-uid="${_esc(u.id)}" onclick="_pmgmtCycleRole(this.getAttribute('data-uid'),event)" style="font-size:9px;padding:1px 5px;border-radius:10px;background:#D3D1C7;border:none;cursor:pointer;${roleOverrideStyle}">${roleBadge}</button></label>`;
|
||||
}).join('');
|
||||
|
||||
body.innerHTML = `
|
||||
@ -503,6 +504,26 @@ function _pmgmtCloseEditor() {
|
||||
closeProfileMgmt();
|
||||
}
|
||||
|
||||
async function _pmgmtCycleRole(uid, event) {
|
||||
event.stopPropagation();
|
||||
if (typeof cycleUserRole !== 'function') return;
|
||||
await cycleUserRole(uid);
|
||||
// Refresh the badge inside the profile modal to reflect the new role
|
||||
const u = S._allUsers.find(function(u){ return u.id === uid; });
|
||||
if (!u) return;
|
||||
const lbl = document.querySelector('#pmgmtAcctList label[data-uid="' + uid.replace(/"/g, '\\"') + '"]');
|
||||
if (!lbl) return;
|
||||
const badge = lbl.querySelector('.pmgmt-role-badge');
|
||||
if (!badge) return;
|
||||
const roleText = u.userRole === 'student' ? t('role_student','Elev')
|
||||
: u.userRole === 'staff' ? t('role_staff','Ansat')
|
||||
: t('role_other','Anden');
|
||||
badge.textContent = roleText;
|
||||
lbl.dataset.role = u.userRole || 'other';
|
||||
badge.style.color = u.roleOverride ? 'var(--color-text-info)' : '';
|
||||
badge.style.outline = u.roleOverride ? '1px solid var(--color-border-info)' : '';
|
||||
}
|
||||
|
||||
function _pmgmtSelectAllAccounts(checked) {
|
||||
document.querySelectorAll('#pmgmtAcctList label input[type=checkbox]').forEach(function(cb) {
|
||||
if (cb.closest('label').style.display !== 'none') cb.checked = checked;
|
||||
@ -542,9 +563,8 @@ function _pmgmtFilterAccounts(q) {
|
||||
q = (q || '').toLowerCase();
|
||||
document.querySelectorAll('#pmgmtAcctList label').forEach(function(lbl) {
|
||||
var name = (lbl.querySelector('span') || {}).textContent || '';
|
||||
var uid = lbl.querySelector('input')?.dataset?.uid || '';
|
||||
var user = S._allUsers.find(u => u.id === uid);
|
||||
var roleOk = !_pmgmtRoleActive || (user && user.userRole === _pmgmtRoleActive);
|
||||
var role = lbl.dataset.role || 'other';
|
||||
var roleOk = !_pmgmtRoleActive || role === _pmgmtRoleActive;
|
||||
var nameOk = !q || name.toLowerCase().includes(q);
|
||||
lbl.style.display = (roleOk && nameOk) ? '' : 'none';
|
||||
});
|
||||
@ -698,6 +718,7 @@ window._peSetYear = _peSetYear;
|
||||
window._renderEditorSources = _renderEditorSources;
|
||||
window._pmgmtNewProfile = _pmgmtNewProfile;
|
||||
window._pmgmtCloseEditor = _pmgmtCloseEditor;
|
||||
window._pmgmtCycleRole = _pmgmtCycleRole;
|
||||
window._pmgmtSelectAllAccounts = _pmgmtSelectAllAccounts;
|
||||
window._pmgmtRoleFilter = _pmgmtRoleFilter;
|
||||
window._pmgmtAddManual = _pmgmtAddManual;
|
||||
|
||||
@ -363,17 +363,17 @@ function _attachScanListeners(source) {
|
||||
source.addEventListener('scan_done', function(e) {
|
||||
var d = JSON.parse(e.data);
|
||||
console.log('[SSE] scan_done:', d);
|
||||
// Only close SSE if the user started this scan via the Scan button.
|
||||
// For scheduled scans, keep the SSE connection alive so future
|
||||
// scheduler events are still received.
|
||||
if (S._userStartedScan) {
|
||||
S._userStartedScan = false;
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
}
|
||||
S._srcPct.m365 = 100;
|
||||
S._m365ScanRunning = false;
|
||||
_renderProgressSegments();
|
||||
var _anyRunning = S._googleScanRunning || S._fileScanRunning;
|
||||
// Only close SSE once all concurrent scans have finished.
|
||||
// Closing early would drop google_scan_done / file_scan_done events and
|
||||
// leave the UI stuck in scanning state.
|
||||
if (S._userStartedScan && !_anyRunning) {
|
||||
S._userStartedScan = false;
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
}
|
||||
if (!_anyRunning) setLogLive('');
|
||||
document.getElementById('scanBtn').disabled = _anyRunning;
|
||||
document.getElementById('stopBtn').style.display = _anyRunning ? 'inline-block' : 'none';
|
||||
@ -405,6 +405,10 @@ function _attachScanListeners(source) {
|
||||
S._googleScanRunning = false;
|
||||
_renderProgressSegments();
|
||||
if (!S._m365ScanRunning && !S._fileScanRunning) {
|
||||
if (S._userStartedScan) {
|
||||
S._userStartedScan = false;
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
}
|
||||
setLogLive('');
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
@ -429,6 +433,10 @@ function _attachScanListeners(source) {
|
||||
S._fileScanRunning = false;
|
||||
_renderProgressSegments();
|
||||
if (!S._m365ScanRunning && !S._googleScanRunning) {
|
||||
if (S._userStartedScan) {
|
||||
S._userStartedScan = false;
|
||||
if (S.es) { S.es.close(); S.es = null; }
|
||||
}
|
||||
setLogLive('');
|
||||
document.getElementById('scanBtn').disabled = false;
|
||||
document.getElementById('stopBtn').style.display = 'none';
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user