721 lines
34 KiB
Python
721 lines
34 KiB
Python
"""
|
|
Database stats, disposition, export/import, admin PIN, preview, thumbnail
|
|
"""
|
|
from __future__ import annotations
|
|
import base64
|
|
from pathlib import Path
|
|
from flask import Blueprint, Response, jsonify, request
|
|
from routes import state
|
|
from app_config import _set_admin_pin, _verify_admin_pin, _admin_pin_is_set
|
|
from checkpoint import _clear_checkpoint, _DELTA_PATH
|
|
from cpr_detector import _extract_exif, _html_esc, _placeholder_svg
|
|
|
|
try:
|
|
from gdpr_db import get_db as _get_db
|
|
DB_OK = True
|
|
except ImportError:
|
|
DB_OK = False
|
|
def _get_db(*a, **kw): return None # type: ignore[misc]
|
|
|
|
try:
|
|
import document_scanner as _ds # noqa: F401
|
|
SCANNER_OK = True
|
|
except ImportError:
|
|
SCANNER_OK = False
|
|
|
|
bp = Blueprint("database", __name__)
|
|
|
|
|
|
@bp.route("/api/db/stats")
|
|
def db_stats():
|
|
"""Return stats for the latest (or specified) scan, plus aggregate counts."""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
scan_id = request.args.get("scan_id", type=int)
|
|
db = _get_db()
|
|
data = db.get_stats(scan_id) or {}
|
|
# Add aggregate counts the Settings panel needs — query directly so they
|
|
# are correct even if no scan has finished_at set yet
|
|
try:
|
|
import sqlite3 as _sq
|
|
con = _sq.connect(db._path)
|
|
con.row_factory = _sq.Row
|
|
data["total_items"] = con.execute("SELECT COUNT(*) FROM flagged_items").fetchone()[0]
|
|
data["flagged_items"] = data["total_items"]
|
|
data["total_scans"] = con.execute("SELECT COUNT(*) FROM scans").fetchone()[0]
|
|
data["finished_scans"]= con.execute("SELECT COUNT(*) FROM scans WHERE finished_at IS NOT NULL").fetchone()[0]
|
|
if not data.get("flagged_count"):
|
|
data["flagged_count"] = data["total_items"]
|
|
if not data.get("total_scanned"):
|
|
data["total_scanned"] = con.execute("SELECT COALESCE(SUM(total_scanned),0) FROM scans").fetchone()[0]
|
|
con.close()
|
|
except Exception:
|
|
data.setdefault("total_items", 0)
|
|
data.setdefault("flagged_items", 0)
|
|
data.setdefault("total_scans", 0)
|
|
return jsonify(data)
|
|
|
|
|
|
@bp.route("/api/db/trend")
|
|
def db_trend():
|
|
"""Return scan history for trend chart (last 20 scans)."""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
n = request.args.get("n", default=20, type=int)
|
|
return jsonify(_get_db().get_trend(n))
|
|
|
|
|
|
@bp.route("/api/db/scans")
|
|
def db_scans():
|
|
"""List recent completed scans."""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
return jsonify(_get_db().scans_list())
|
|
|
|
|
|
@bp.route("/api/db/sessions")
|
|
def db_sessions():
|
|
"""List scan sessions (grouped concurrent scans), newest first."""
|
|
if not DB_OK: return jsonify([])
|
|
return jsonify(_get_db().get_sessions())
|
|
|
|
|
|
@bp.route("/api/db/subject", methods=["POST"])
|
|
def db_subject_lookup():
|
|
"""Find all items containing a given CPR number.
|
|
Body: {cpr: "DDMMYY-XXXX"}
|
|
The CPR is hashed before querying -- never stored in plaintext.
|
|
"""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
data = request.get_json() or {}
|
|
cpr = data.get("cpr", "").strip().replace("-", "").replace(" ", "")
|
|
if not cpr:
|
|
return jsonify({"error": "cpr required"}), 400
|
|
items = _get_db().lookup_data_subject(cpr)
|
|
return jsonify({"count": len(items), "items": items})
|
|
|
|
|
|
@bp.route("/api/db/overdue")
|
|
def db_overdue():
|
|
"""Return items older than the retention threshold.
|
|
|
|
Query params:
|
|
years int, default 5
|
|
fiscal_year_end MM-DD string, e.g. 12-31 (omit for rolling window)
|
|
scan_id int (omit for latest scan)
|
|
"""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
years = request.args.get("years", default=5, type=int)
|
|
fiscal_year_end = request.args.get("fiscal_year_end", default=None)
|
|
scan_id = request.args.get("scan_id", type=int)
|
|
try:
|
|
from gdpr_db import overdue_cutoff
|
|
cutoff = overdue_cutoff(years, fiscal_year_end)
|
|
items = _get_db().get_overdue_items(years, scan_id, fiscal_year_end)
|
|
except ValueError as e:
|
|
return jsonify({"error": str(e)}), 400
|
|
return jsonify({
|
|
"count": len(items),
|
|
"cutoff_date": cutoff,
|
|
"cutoff_mode": "fiscal" if fiscal_year_end else "rolling",
|
|
"fiscal_year_end": fiscal_year_end,
|
|
"years": years,
|
|
"items": items,
|
|
})
|
|
|
|
|
|
@bp.route("/api/db/disposition", methods=["POST"])
|
|
def db_set_disposition():
|
|
"""Set a compliance disposition on a flagged item.
|
|
Body: {item_id, status, legal_basis?, notes?, reviewed_by?}
|
|
Status values: unreviewed | retain-legal | retain-legitimate | retain-contract |
|
|
delete-scheduled | deleted | personal-use
|
|
"""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
data = request.get_json() or {}
|
|
item_id = data.get("item_id", "")
|
|
if not item_id:
|
|
return jsonify({"error": "item_id required"}), 400
|
|
_get_db().set_disposition(
|
|
item_id,
|
|
status = data.get("status", "unreviewed"),
|
|
legal_basis = data.get("legal_basis", ""),
|
|
notes = data.get("notes", ""),
|
|
reviewed_by = data.get("reviewed_by", ""),
|
|
)
|
|
return jsonify({"status": "saved"})
|
|
|
|
|
|
@bp.route("/api/db/disposition/bulk", methods=["POST"])
|
|
def db_set_disposition_bulk():
|
|
"""Set the same disposition on multiple items at once.
|
|
Body: {item_ids: [...], status, legal_basis?, notes?, reviewed_by?}
|
|
"""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
data = request.get_json() or {}
|
|
item_ids = data.get("item_ids", [])
|
|
status = data.get("status", "")
|
|
if not item_ids or not status:
|
|
return jsonify({"error": "item_ids and status required"}), 400
|
|
db = _get_db()
|
|
for iid in item_ids:
|
|
db.set_disposition(iid, status,
|
|
legal_basis=data.get("legal_basis", ""),
|
|
notes=data.get("notes", ""),
|
|
reviewed_by=data.get("reviewed_by", ""))
|
|
return jsonify({"saved": len(item_ids)})
|
|
|
|
|
|
@bp.route("/api/db/disposition/<item_id>")
|
|
def db_get_disposition(item_id):
|
|
"""Get the current disposition for an item."""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
d = _get_db().get_disposition(item_id)
|
|
return jsonify(d or {"status": "unreviewed"})
|
|
|
|
|
|
@bp.route("/api/db/flagged")
|
|
def db_flagged_items():
|
|
"""Return flagged items from the most recent completed scan session.
|
|
Used by the read-only viewer to load results without an active SSE connection.
|
|
Respects viewer_scope.role stored in the session for scoped tokens.
|
|
"""
|
|
if not DB_OK: return jsonify([])
|
|
from flask import session as _session
|
|
scope = _session.get("viewer_scope", {})
|
|
role_filt = scope.get("role", "") if isinstance(scope, dict) else ""
|
|
# user may be a list of emails (current) or a legacy single string
|
|
raw_user = scope.get("user", "") if isinstance(scope, dict) else ""
|
|
if isinstance(raw_user, list):
|
|
user_filt = set(e.lower() for e in raw_user if e)
|
|
else:
|
|
user_filt = {raw_user.lower()} if raw_user else set()
|
|
ref_scan_id = request.args.get("ref", type=int)
|
|
items = _get_db().get_session_items(ref_scan_id=ref_scan_id)
|
|
# Normalise JSON-encoded columns the same way scan_engine does for SSE cards
|
|
import json as _json
|
|
out = []
|
|
for row in items:
|
|
if role_filt and row.get("user_role", "") != role_filt:
|
|
continue
|
|
if user_filt and (row.get("account_id", "") or "").lower() not in user_filt:
|
|
continue
|
|
row["special_category"] = _json.loads(row.get("special_category") or "[]") if isinstance(row.get("special_category"), str) else row.get("special_category", [])
|
|
row["exif"] = _json.loads(row.get("exif_json") or "{}") if isinstance(row.get("exif_json"), str) else row.get("exif", {})
|
|
row.pop("exif_json", None)
|
|
out.append(row)
|
|
return jsonify(out)
|
|
|
|
|
|
@bp.route("/api/db/related/<item_id>")
|
|
def db_related_items(item_id):
|
|
"""Return flagged items from the same session sharing at least one CPR hash."""
|
|
if not DB_OK:
|
|
return jsonify([])
|
|
ref = request.args.get("ref", type=int)
|
|
import json as _json
|
|
out = []
|
|
for row in _get_db().get_related_items(item_id, ref_scan_id=ref):
|
|
row["special_category"] = _json.loads(row.get("special_category") or "[]") if isinstance(row.get("special_category"), str) else row.get("special_category", [])
|
|
row["exif"] = _json.loads(row.get("exif_json") or "{}") if isinstance(row.get("exif_json"), str) else row.get("exif", {})
|
|
row.pop("exif_json", None)
|
|
out.append(row)
|
|
return jsonify(out)
|
|
|
|
|
|
@bp.route("/api/db/deletion_log")
|
|
def db_deletion_log():
|
|
"""Return the deletion audit log.
|
|
Query params: limit (int, default 500), reason (str filter)
|
|
"""
|
|
if not DB_OK: return jsonify({"error": "database not available"}), 503
|
|
limit = request.args.get("limit", default=500, type=int)
|
|
reason = request.args.get("reason", default=None)
|
|
rows = _get_db().get_deletion_log(limit=limit, reason=reason)
|
|
stats = _get_db().deletion_log_stats()
|
|
return jsonify({"stats": stats, "entries": rows})
|
|
|
|
|
|
@bp.route("/api/db/reset", methods=["POST"])
|
|
def db_reset():
|
|
"""Reset the database and clear in-memory scan results.
|
|
Requires {confirm: "yes", pin: "<admin_pin>"} in request body.
|
|
"""
|
|
data = request.get_json() or {}
|
|
if data.get("confirm") != "yes":
|
|
return jsonify({"error": "confirm=yes required"}), 400
|
|
if _admin_pin_is_set():
|
|
pin = data.get("pin", "")
|
|
if not _verify_admin_pin(pin):
|
|
return jsonify({"error": "incorrect_pin"}), 403
|
|
if not DB_OK:
|
|
return jsonify({"error": "database not available"}), 503
|
|
try:
|
|
_get_db().reset()
|
|
state.flagged_items = []
|
|
state.scan_meta = {}
|
|
_clear_checkpoint()
|
|
if _DELTA_PATH.exists():
|
|
_DELTA_PATH.unlink()
|
|
return jsonify({"ok": True, "message": "Database reset. All scan results cleared."})
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@bp.route("/api/admin/pin", methods=["GET"])
|
|
def admin_pin_status():
|
|
"""Return whether an admin PIN has been set."""
|
|
return jsonify({"pin_set": _admin_pin_is_set()})
|
|
|
|
|
|
@bp.route("/api/admin/pin", methods=["POST"])
|
|
def admin_pin_set():
|
|
"""Set or change the admin PIN.
|
|
Body: {current_pin: "..", new_pin: ".."}
|
|
If no PIN is currently set, current_pin is not required.
|
|
"""
|
|
data = request.get_json() or {}
|
|
new_pin = data.get("new_pin", "").strip()
|
|
if not new_pin:
|
|
return jsonify({"error": "new_pin required"}), 400
|
|
if _admin_pin_is_set():
|
|
if not _verify_admin_pin(data.get("current_pin", "")):
|
|
return jsonify({"error": "incorrect_pin"}), 403
|
|
_set_admin_pin(new_pin)
|
|
return jsonify({"ok": True})
|
|
|
|
|
|
@bp.route("/api/db/export")
|
|
def db_export():
|
|
"""Export the database to a structured ZIP and return it as a download.
|
|
The ZIP contains 8 JSON files (see ScanDB.export_db for details).
|
|
CPR numbers are stored as SHA-256 hashes only — never in plaintext.
|
|
Thumbnails are stripped to keep the download small. (#11)
|
|
"""
|
|
if not DB_OK:
|
|
return jsonify({"error": "database not available"}), 503
|
|
import tempfile, datetime as _dt
|
|
try:
|
|
ts = _dt.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"gdpr_export_{ts}.zip"
|
|
with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tf:
|
|
tmp = Path(tf.name)
|
|
try:
|
|
_get_db().export_db(tmp)
|
|
data = tmp.read_bytes()
|
|
finally:
|
|
try: tmp.unlink()
|
|
except Exception: pass
|
|
return Response(
|
|
data,
|
|
mimetype="application/zip",
|
|
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
|
)
|
|
except Exception as e:
|
|
import traceback
|
|
return jsonify({"error": str(e), "detail": traceback.format_exc()}), 500
|
|
|
|
|
|
@bp.route("/api/db/import", methods=["POST"])
|
|
def db_import():
|
|
"""Import a previously exported ZIP archive into the database. (#11)
|
|
|
|
Multipart form:
|
|
file — the export ZIP
|
|
mode — "merge" (default) or "replace"
|
|
confirm — must be "yes" when mode == "replace"
|
|
"""
|
|
if not DB_OK:
|
|
return jsonify({"error": "database not available"}), 503
|
|
import tempfile
|
|
f = request.files.get("file")
|
|
if not f:
|
|
return jsonify({"error": "no file uploaded"}), 400
|
|
mode = request.form.get("mode", "merge")
|
|
confirm = request.form.get("confirm", "")
|
|
if mode == "replace" and confirm != "yes":
|
|
return jsonify({"error": "confirm=yes required for replace mode"}), 400
|
|
try:
|
|
tmp = Path(tempfile.mktemp(suffix=".zip", prefix="gdpr_import_"))
|
|
f.save(str(tmp))
|
|
result = _get_db().import_db(tmp, mode=mode)
|
|
tmp.unlink(missing_ok=True)
|
|
return jsonify({"ok": True, "mode": mode, "imported": result})
|
|
except (ValueError, FileNotFoundError) as e:
|
|
return jsonify({"error": str(e)}), 400
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
def _excerpt_page(excerpt: str, item_meta: dict) -> str:
|
|
"""Minimal HTML page showing a stored body excerpt as a preview fallback."""
|
|
import html as _html
|
|
subject = _html.escape(item_meta.get("name", ""))
|
|
modified = item_meta.get("modified", "")
|
|
account = _html.escape(item_meta.get("account_name", ""))
|
|
body = "<pre style='white-space:pre-wrap;font-family:sans-serif;margin:0'>" + _html.escape(excerpt) + "</pre>"
|
|
note = "<p style='font-size:11px;color:#888;margin-top:12px'>Stored excerpt — connect to reload the full message.</p>"
|
|
return (
|
|
"<!DOCTYPE html><html><head><meta charset='utf-8'>"
|
|
"<style>body{font-family:-apple-system,sans-serif;font-size:13px;"
|
|
"padding:12px 16px;background:#fff;color:#111;word-break:break-word}"
|
|
".hdr{border-bottom:1px solid #eee;margin-bottom:12px;padding-bottom:10px}"
|
|
".hdr-row{color:#555;font-size:12px;margin-bottom:3px}"
|
|
".hdr-row b{color:#111}</style></head><body>"
|
|
f"<div class='hdr'>"
|
|
+ (f"<div class='hdr-row'><b>From:</b> {account}</div>" if account else "")
|
|
+ (f"<div class='hdr-row'><b>Date:</b> {_html.escape(modified)}</div>" if modified else "")
|
|
+ (f"<div class='hdr-row'><b>Subject:</b> {subject}</div>" if subject else "")
|
|
+ f"</div>{body}{note}</body></html>"
|
|
)
|
|
|
|
|
|
@bp.route("/api/preview/<item_id>")
|
|
def get_preview(item_id):
|
|
"""Return a preview URL or HTML for a flagged item."""
|
|
source_type = request.args.get("source_type", "")
|
|
account_id = request.args.get("account_id", "me") or "me"
|
|
|
|
# Local and SMB file sources — re-read file and render preview
|
|
if source_type in ("local", "smb"):
|
|
item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
|
|
full_path = item_meta.get("full_path", "")
|
|
name = item_meta.get("name", "")
|
|
ext = Path(name).suffix.lower() if name else ""
|
|
|
|
if not full_path:
|
|
return jsonify({"error": "File path not available — rescan to enable preview"})
|
|
|
|
if source_type == "smb":
|
|
return jsonify({
|
|
"type": "info",
|
|
"html": f"<p style='color:var(--muted);font-size:12px'>SMB preview requires re-reading the file over the network. Open the file directly: <code>{full_path}</code></p>",
|
|
})
|
|
|
|
try:
|
|
file_path = Path(full_path).expanduser()
|
|
if not file_path.exists():
|
|
return jsonify({"error": f"File not found: {full_path}"})
|
|
|
|
size = file_path.stat().st_size
|
|
|
|
# Images — return as data URI
|
|
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
|
|
import base64 as _b64
|
|
mime = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
|
|
"gif": "image/gif", "webp": "image/webp", "bmp": "image/bmp"}.get(ext.lstrip("."), "image/jpeg")
|
|
data = _b64.b64encode(file_path.read_bytes()).decode()
|
|
_exif = item_meta.get("exif") or _extract_exif(file_path.read_bytes(), name)
|
|
exif_html = ""
|
|
if _exif:
|
|
rows = []
|
|
if _exif.get("gps"):
|
|
g = _exif["gps"]
|
|
rows.append(f'<tr><td>📍 GPS</td><td><a href="{g["maps_url"]}" target="_blank" style="color:#7ec8d0">{g["lat"]}, {g["lon"]}</a></td></tr>')
|
|
if _exif.get("author"):
|
|
rows.append(f'<tr><td>👤 Author</td><td>{_html_esc(_exif["author"])}</td></tr>')
|
|
if _exif.get("datetime"):
|
|
rows.append(f'<tr><td>📅 Date</td><td>{_html_esc(_exif["datetime"])}</td></tr>')
|
|
if _exif.get("device"):
|
|
rows.append(f'<tr><td>📷 Device</td><td>{_html_esc(_exif["device"])}</td></tr>')
|
|
for field, val in (_exif.get("pii_fields") or {}).items():
|
|
if field not in ("Artist",):
|
|
rows.append(f'<tr><td>{_html_esc(field)}</td><td>{_html_esc(str(val)[:200])}</td></tr>')
|
|
if rows:
|
|
exif_html = ('<details style="margin:8px 12px;font-size:11px">'
|
|
'<summary style="cursor:pointer;color:#888">EXIF data</summary>'
|
|
'<table style="border-collapse:collapse;width:100%;margin-top:6px">'
|
|
+ "".join(f'<tr style="border-top:1px solid #333"><td style="padding:4px 8px;color:#888;width:120px;white-space:nowrap">{r.split("</td><td>")[0].replace("<tr><td>","")}</td><td style="padding:4px 8px;word-break:break-all">{r.split("</td><td>")[1].replace("</td></tr>","")}</td></tr>' for r in rows)
|
|
+ '</table></details>')
|
|
html = f'<div style="text-align:center;padding:12px"><img src="data:{mime};base64,{data}" style="max-width:100%;max-height:60vh;border-radius:6px"></div>{exif_html}'
|
|
return jsonify({"type": "html", "html": html})
|
|
|
|
# Text-based files — render with highlighted CPR numbers
|
|
if ext in {".txt", ".csv", ".eml", ".md", ".log", ".xml", ".json", ".html", ".htm"}:
|
|
if size > 2 * 1024 * 1024:
|
|
return jsonify({"error": "File too large for inline preview (>2 MB)"})
|
|
raw = file_path.read_bytes().decode("utf-8", errors="replace")
|
|
import html as _html, re as _re
|
|
escaped = _html.escape(raw[:50000])
|
|
escaped = _re.sub(
|
|
r"(\d{6}[-\s]?\d{4})",
|
|
r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
|
|
escaped
|
|
)
|
|
html_out = (
|
|
'<pre style="font-family:var(--mono);font-size:11px;white-space:pre-wrap;'
|
|
'word-break:break-all;padding:12px;color:var(--text);line-height:1.6">'
|
|
+ escaped + "</pre>"
|
|
)
|
|
return jsonify({"type": "html", "html": html_out})
|
|
|
|
# PDF — render first 5 pages as text using pdfplumber
|
|
if ext == ".pdf":
|
|
if size > 20 * 1024 * 1024:
|
|
return jsonify({"error": "File too large for preview (>20 MB)"})
|
|
if SCANNER_OK:
|
|
try:
|
|
import pdfplumber as _plumber, io as _io, html as _h
|
|
pages_html = []
|
|
with _plumber.open(_io.BytesIO(file_path.read_bytes())) as pdf:
|
|
total = len(pdf.pages)
|
|
for i, page in enumerate(pdf.pages[:5]):
|
|
text = page.extract_text() or ""
|
|
if not text.strip():
|
|
text = f"[Page {i+1}: image-only or OCR required]"
|
|
import re as _re
|
|
escaped = _re.sub(
|
|
r"(\d{6}[-\s]?\d{4})",
|
|
r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
|
|
_h.escape(text)
|
|
)
|
|
pages_html.append(
|
|
f'<div style="border-bottom:1px solid #333;padding:10px 0;margin-bottom:8px">'
|
|
f'<div style="font-size:9px;color:#666;margin-bottom:4px">Page {i+1}</div>'
|
|
f'<pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;margin:0;line-height:1.6">{escaped}</pre>'
|
|
f'</div>'
|
|
)
|
|
note = f'<div style="font-size:10px;color:#666;padding:6px 0">Showing {min(5,total)} of {total} page(s)</div>' if total > 5 else ""
|
|
html_out = f'<div style="padding:10px">{note}{"".join(pages_html)}</div>'
|
|
return jsonify({"type": "html", "html": html_out})
|
|
except Exception:
|
|
pass
|
|
html_out = (
|
|
f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
|
|
f'<div style="font-size:40px">📄</div>'
|
|
f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
|
|
f'<div style="font-size:11px;color:var(--muted)">{round(size/1024,1)} KB</div>'
|
|
f'<div style="margin-top:12px;font-size:11px;color:var(--muted)">{_html_esc(full_path)}</div>'
|
|
f'</div>'
|
|
)
|
|
return jsonify({"type": "html", "html": html_out})
|
|
|
|
# Word/Excel/CSV — render content or show metadata
|
|
if SCANNER_OK and ext in {".xlsx", ".xlsm", ".csv"}:
|
|
try:
|
|
import html as _hh, re as _re, io as _io
|
|
if ext == ".csv":
|
|
raw = file_path.read_bytes().decode("utf-8", errors="replace")
|
|
rows = [r for r in raw.splitlines()[:50]]
|
|
table_rows = ""
|
|
for i, row in enumerate(rows):
|
|
cols = row.split(",")
|
|
style = "background:#2a2a2a" if i % 2 == 0 else ""
|
|
cells = "".join(f'<td style="padding:3px 8px;border:1px solid #333;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">{_hh.escape(str(c)[:80])}</td>' for c in cols)
|
|
table_rows += f'<tr style="{style}">{cells}</tr>'
|
|
html_out = f'<div style="padding:8px;overflow-x:auto"><table style="border-collapse:collapse;font-size:11px;color:var(--text)">{table_rows}</table></div>'
|
|
else:
|
|
import openpyxl as _xl
|
|
wb = _xl.load_workbook(_io.BytesIO(file_path.read_bytes()), read_only=True, data_only=True)
|
|
tabs = []
|
|
for sheet_name in wb.sheetnames[:3]:
|
|
ws = wb[sheet_name]
|
|
table_rows = ""
|
|
for i, row in enumerate(ws.iter_rows(max_row=50, values_only=True)):
|
|
style = "background:#2a2a2a" if i % 2 == 0 else ""
|
|
cells = "".join(
|
|
f'<td style="padding:3px 8px;border:1px solid #333;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">'
|
|
f'{_hh.escape(str(c)[:80]) if c is not None else ""}</td>'
|
|
for c in row
|
|
)
|
|
table_rows += f'<tr style="{style}">{cells}</tr>'
|
|
tabs.append(
|
|
f'<div style="margin-bottom:12px">'
|
|
f'<div style="font-size:10px;color:#888;margin-bottom:4px">📋 {_hh.escape(sheet_name)}</div>'
|
|
f'<div style="overflow-x:auto"><table style="border-collapse:collapse;font-size:11px;color:var(--text)">{table_rows}</table></div>'
|
|
f'</div>'
|
|
)
|
|
html_out = '<div style="padding:8px">' + "".join(tabs) + '</div>'
|
|
return jsonify({"type": "html", "html": html_out})
|
|
except Exception:
|
|
pass
|
|
|
|
if SCANNER_OK and ext in {".docx", ".doc"}:
|
|
try:
|
|
import io as _io, html as _hh, re as _re
|
|
from docx import Document as _Doc
|
|
doc = _Doc(_io.BytesIO(file_path.read_bytes()))
|
|
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()][:80]
|
|
text = "\n".join(paragraphs)
|
|
escaped = _re.sub(
|
|
r"(\d{6}[-\s]?\d{4})",
|
|
r'<mark style="background:#ff444455;color:#ff8888;border-radius:2px">\1</mark>',
|
|
_hh.escape(text)
|
|
)
|
|
html_out = f'<div style="padding:12px"><pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;line-height:1.7">{escaped}</pre></div>'
|
|
return jsonify({"type": "html", "html": html_out})
|
|
except Exception:
|
|
pass
|
|
|
|
html_out = (
|
|
f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
|
|
f'<div style="font-size:40px">📄</div>'
|
|
f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
|
|
f'<div style="font-size:11px;color:var(--muted)">{round(size/1024,1)} KB · {ext.upper().lstrip(".")} file</div>'
|
|
f'<div style="margin-top:12px;font-size:11px;color:var(--muted)">{_html_esc(full_path)}</div>'
|
|
f'</div>'
|
|
)
|
|
return jsonify({"type": "html", "html": html_out})
|
|
|
|
except PermissionError:
|
|
return jsonify({"error": f"Permission denied: {full_path}"})
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)})
|
|
|
|
item_meta = next((x for x in state.flagged_items if x.get("id") == item_id), {})
|
|
drive_id = item_meta.get("drive_id", "")
|
|
|
|
try:
|
|
if source_type == "email":
|
|
excerpt = item_meta.get("body_excerpt", "")
|
|
if not state.connector:
|
|
if excerpt:
|
|
import html as _html
|
|
return jsonify({"type": "html", "html": _excerpt_page(excerpt, item_meta)})
|
|
return jsonify({"error": "not authenticated"}), 401
|
|
uid = account_id
|
|
try:
|
|
msg = state.connector._get(
|
|
f"/{'me' if uid == 'me' else 'users/' + uid}/messages/{item_id}",
|
|
{"$select": "subject,from,receivedDateTime,body"}
|
|
)
|
|
except Exception as e:
|
|
if excerpt:
|
|
return jsonify({"type": "html", "html": _excerpt_page(excerpt, item_meta)})
|
|
return jsonify({"error": f"Could not load email: {e}"})
|
|
|
|
sender = msg.get("from", {}).get("emailAddress", {})
|
|
from_str = f"{sender.get('name', '')} <{sender.get('address', '')}>"
|
|
date_str = (msg.get("receivedDateTime") or "")[:10]
|
|
body_html = msg.get("body", {}).get("content", "") or ""
|
|
content_type = msg.get("body", {}).get("contentType", "text")
|
|
import html as _html
|
|
if content_type == "text":
|
|
body_html = "<pre style='white-space:pre-wrap;font-family:sans-serif'>" + _html.escape(body_html) + "</pre>"
|
|
|
|
att_list = item_meta.get("attachments", [])
|
|
att_html = ""
|
|
if att_list:
|
|
def _att_row(a):
|
|
cpr_badge = f'<span class="att-cpr">{a["cpr_count"]} CPR</span>' if a["cpr_count"] else ''
|
|
name_esc = _html.escape(a["name"])
|
|
return f'<div class="att-row"><span class="att-name">{name_esc}</span>{cpr_badge}</div>'
|
|
rows = "".join(_att_row(a) for a in att_list)
|
|
att_html = f"""
|
|
<div class="att-section">
|
|
<div class="att-header">📎 Attachments ({len(att_list)})</div>
|
|
{rows}
|
|
</div>"""
|
|
|
|
page = f"""<!DOCTYPE html><html><head><meta charset="utf-8">
|
|
<style>
|
|
*, *::before, *::after {{ box-sizing: border-box; max-width: 100%; }}
|
|
html, body {{ margin: 0; padding: 0; overflow-x: hidden; }}
|
|
body {{ font-family: -apple-system, sans-serif; font-size: 13px; padding: 12px 16px;
|
|
background: #fff; color: #111; word-break: break-word; }}
|
|
img {{ max-width: 100% !important; height: auto !important; }}
|
|
table {{ max-width: 100% !important; table-layout: fixed; word-break: break-word; }}
|
|
.hdr {{ border-bottom: 1px solid #eee; margin-bottom: 12px; padding-bottom: 10px; }}
|
|
.hdr-row {{ color: #555; font-size: 12px; margin-bottom: 3px; }}
|
|
.hdr-row b {{ color: #111; }}
|
|
.att-section {{ margin-top: 16px; border-top: 1px solid #eee; padding-top: 10px; }}
|
|
.att-header {{ font-size: 12px; font-weight: 600; color: #555; margin-bottom: 6px; }}
|
|
.att-row {{ display: flex; align-items: center; gap: 8px; font-size: 12px;
|
|
padding: 4px 0; border-bottom: 1px solid #f0f0f0; }}
|
|
.att-name {{ flex: 1; color: #333; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }}
|
|
.att-cpr {{ background: #fff0f0; color: #c00; font-size: 11px; padding: 1px 6px;
|
|
border-radius: 10px; font-weight: 600; white-space: nowrap; }}
|
|
::-webkit-scrollbar {{ width: 4px; height: 4px; }}
|
|
::-webkit-scrollbar-track {{ background: transparent; }}
|
|
::-webkit-scrollbar-thumb {{ background: #aaa; border-radius: 2px; }}
|
|
* {{ scrollbar-width: thin; scrollbar-color: #aaa transparent; }}
|
|
</style></head><body>
|
|
<div class="hdr">
|
|
<div class="hdr-row"><b>From:</b> {from_str}</div>
|
|
<div class="hdr-row"><b>Date:</b> {date_str}</div>
|
|
<div class="hdr-row"><b>Subject:</b> {_html.escape(msg.get('subject', '(no subject)'))}</div>
|
|
</div>
|
|
{body_html}{att_html}
|
|
</body></html>"""
|
|
return jsonify({"type": "html", "html": page})
|
|
|
|
elif source_type in ("gmail", "gdrive"):
|
|
item_url = item_meta.get("url", "")
|
|
name = item_meta.get("name", "")
|
|
if source_type == "gdrive" and item_url:
|
|
# Extract Drive file ID and use the embeddable /preview URL
|
|
import re as _re
|
|
m = _re.search(r"/file/d/([^/]+)", item_url)
|
|
if m:
|
|
fid = m.group(1)
|
|
return jsonify({"type": "iframe", "url": f"https://drive.google.com/file/d/{fid}/preview"})
|
|
# Fallback: generic Drive embed
|
|
return jsonify({"type": "iframe", "url": item_url.replace("/view", "/preview")})
|
|
# Gmail — not embeddable; show link card + stored body excerpt if available
|
|
icon = "✉️" if source_type == "gmail" else "☁️"
|
|
label = "Open in Gmail" if source_type == "gmail" else "Open in Google Drive"
|
|
excerpt = item_meta.get("body_excerpt", "")
|
|
link_html = (
|
|
f'<a href="{_html_esc(item_url)}" target="_blank" '
|
|
f'style="display:inline-block;margin-top:12px;padding:8px 16px;'
|
|
f'background:#3b7dd8;color:#fff;border-radius:6px;text-decoration:none;font-size:12px">'
|
|
f'{label}</a>'
|
|
) if item_url else ""
|
|
if excerpt and source_type == "gmail":
|
|
html_out = _excerpt_page(excerpt, item_meta)
|
|
if item_url:
|
|
# Inject the "Open in Gmail" link before </body>
|
|
html_out = html_out.replace(
|
|
"</body>",
|
|
f'<div style="margin-top:12px">{link_html}</div></body>'
|
|
)
|
|
else:
|
|
html_out = (
|
|
f'<div style="padding:24px;text-align:center;font-family:sans-serif">'
|
|
f'<div style="font-size:40px">{icon}</div>'
|
|
f'<div style="font-size:13px;font-weight:600;margin:8px 0">{_html_esc(name)}</div>'
|
|
f'<div style="font-size:11px;color:var(--muted)">No inline preview available for this item</div>'
|
|
f'{link_html}'
|
|
f'</div>'
|
|
)
|
|
return jsonify({"type": "html", "html": html_out})
|
|
|
|
else:
|
|
# OneDrive / SharePoint / Teams — use Graph's embed preview API
|
|
if not state.connector:
|
|
return jsonify({"error": "not authenticated"}), 401
|
|
preview_url = None
|
|
errors = []
|
|
|
|
endpoints_to_try = []
|
|
if drive_id:
|
|
endpoints_to_try.append(f"/drives/{drive_id}/items/{item_id}/preview")
|
|
uid = account_id
|
|
if uid and uid != "me":
|
|
endpoints_to_try.append(f"/users/{uid}/drive/items/{item_id}/preview")
|
|
endpoints_to_try.append(f"/me/drive/items/{item_id}/preview")
|
|
|
|
for ep in endpoints_to_try:
|
|
try:
|
|
data = state.connector._post(ep, {})
|
|
preview_url = data.get("getUrl") or data.get("postUrl")
|
|
if preview_url:
|
|
break
|
|
except Exception as e:
|
|
errors.append(str(e))
|
|
|
|
if preview_url:
|
|
return jsonify({"type": "iframe", "url": preview_url})
|
|
return jsonify({"error": "No preview available for this file type. " + "; ".join(errors[:1])})
|
|
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)})
|
|
|
|
|
|
@bp.route("/api/thumb")
|
|
def thumb():
|
|
"""Fallback thumbnail for non-image files."""
|
|
name = request.args.get("name", "file")
|
|
ext = Path(name).suffix.lower()
|
|
svg_b64 = _placeholder_svg(ext, name)
|
|
data = base64.b64decode(svg_b64)
|
|
return Response(data, mimetype="image/svg+xml",
|
|
headers={"Cache-Control": "public, max-age=3600"})
|