Share links copied from the Share modal were built with window.location.origin, producing 127.0.0.1 URLs that remote viewers could never reach. - Bind Flask to 0.0.0.0 in gdpr_scanner.py (--host default), m365_launcher.py, and build_gdpr.py so the server is reachable on the local network. Internal loopback URLs (urllib exports, webview window, port probe) intentionally keep 127.0.0.1. - Add /api/local_ip endpoint: UDP probe to 8.8.8.8 discovers the active LAN IP without sending real traffic. - Add _getShareBaseUrl() in viewer.js: fetches /api/local_ip and substitutes the LAN IP; falls back to window.location.origin. - createShareLink and copyTokenLink are now async and await _getShareBaseUrl() before building the viewer URL. - Update CLAUDE.md and static/js/CLAUDE.md with the new invariants. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1096 lines
43 KiB
Python
Executable File
1096 lines
43 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
GDPRScanner — Self-Contained App Builder
|
|
==========================================
|
|
Packages gdpr_scanner.py + m365_connector.py + document_scanner.py into a
|
|
native desktop app:
|
|
macOS -> dist/GDPRScanner.app (double-click to run)
|
|
Windows -> dist/GDPRScanner.exe (double-click to run)
|
|
|
|
The app starts Flask on port 5100, opens the UI in a native webview window
|
|
(WKWebView on macOS, WebView2 on Windows), and quits cleanly when the window
|
|
is closed.
|
|
|
|
Usage:
|
|
python build_gdpr.py # build for current platform
|
|
python build_gdpr.py --clean # remove build/ and dist/ first
|
|
python build_gdpr.py --dmg # macOS: also wrap .app in a .dmg
|
|
python build_gdpr.py --installer # Windows: also build NSIS installer
|
|
|
|
Requirements (install once via pip):
|
|
pip install pyinstaller pyinstaller-hooks-contrib
|
|
pip install pywebview # native window (no browser chrome)
|
|
pip install pystray pillow # fallback tray icon when pywebview absent
|
|
|
|
Python version:
|
|
Requires 3.11 or 3.12. spaCy (used by document_scanner for NER) does not
|
|
support 3.13+. This script auto-relaunches with python3.12/python3.11 if
|
|
the current interpreter is incompatible.
|
|
"""
|
|
|
|
# ── Version guard ─────────────────────────────────────────────────────────────
|
|
import sys as _sys
|
|
if not (3, 11) <= _sys.version_info[:2] <= (3, 12):
|
|
import re as _re, subprocess as _sp, os as _os
|
|
|
|
_cur = f"{_sys.version_info.major}.{_sys.version_info.minor}"
|
|
print(f" [!] Python {_cur} is not supported (need 3.11 or 3.12 — spaCy incompatible with 3.13+)")
|
|
|
|
def _check_version(cmd: list) -> bool:
|
|
try:
|
|
out = _sp.check_output(cmd + ["--version"], stderr=_sp.STDOUT, text=True).strip()
|
|
m = _re.search(r"Python (\d+)\.(\d+)", out)
|
|
return bool(m and int(m.group(1)) == 3 and int(m.group(2)) in (11, 12))
|
|
except (FileNotFoundError, _sp.CalledProcessError, OSError):
|
|
return False
|
|
|
|
_candidates = [["python3.12"], ["python3.11"], ["py", "-3.12"], ["py", "-3.11"]]
|
|
_found = next((c for c in _candidates if _check_version(c)), None)
|
|
|
|
if _found:
|
|
print(f" [*] Re-launching with: {' '.join(_found)}")
|
|
_result = _sp.run(_found + [_os.path.abspath(__file__)] + _sys.argv[1:])
|
|
_sys.exit(_result.returncode)
|
|
|
|
print()
|
|
print(" No compatible Python found on PATH.")
|
|
print(" Install Python 3.12:")
|
|
if _sys.platform == "darwin":
|
|
print(" brew install python@3.12")
|
|
elif _sys.platform == "win32":
|
|
print(" winget install Python.Python.3.12")
|
|
print()
|
|
_sys.exit(1)
|
|
|
|
# ── Standard imports ──────────────────────────────────────────────────────────
|
|
import argparse
|
|
import platform
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
HERE = Path(__file__).parent.resolve()
|
|
SYSTEM = platform.system() # "Darwin", "Windows", "Linux"
|
|
|
|
# ── App metadata ──────────────────────────────────────────────────────────────
|
|
APP_NAME = "GDPRScanner"
|
|
APP_PORT = 5100
|
|
BUNDLE_ID = "com.m365scanner.app"
|
|
|
|
def _read_app_version() -> str:
|
|
# Read from VERSION file (single source of truth)
|
|
try:
|
|
return (HERE / "VERSION").read_text(encoding="utf-8").strip()
|
|
except Exception:
|
|
pass
|
|
return "1.0.0"
|
|
|
|
APP_VERSION = _read_app_version()
|
|
ICON_MACOS = HERE / "icon_gdpr.icns" # optional; falls back to icon.icns / icon.png
|
|
ICON_WIN = HERE / "icon_gdpr.ico" # optional; falls back to icon.ico / icon.png
|
|
|
|
# ── Paths ─────────────────────────────────────────────────────────────────────
|
|
ENTRY_POINT = HERE / "m365_launcher.py" # generated by this script
|
|
DIST_DIR = HERE / "dist"
|
|
BUILD_DIR = HERE / "build"
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Step 1 — Generate the launcher entry point
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
LAUNCHER_CODE = '''\
|
|
"""
|
|
gdpr_launcher.py — entry point for the packaged GDPRScanner app.
|
|
|
|
Responsibilities:
|
|
1. Find a free port (default 5100)
|
|
2. Start Flask in a background thread
|
|
3. Open the UI in a native webview window (pywebview)
|
|
— falls back to the system browser if pywebview is unavailable
|
|
|
|
Generated by build_gdpr.py — do not edit manually.
|
|
"""
|
|
import os
|
|
os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
|
|
|
|
import subprocess
|
|
import sys
|
|
import socket
|
|
import threading
|
|
import time
|
|
import webbrowser
|
|
from pathlib import Path
|
|
|
|
if getattr(sys, "frozen", False):
|
|
BASE_DIR = Path(sys._MEIPASS)
|
|
else:
|
|
BASE_DIR = Path(__file__).parent
|
|
|
|
|
|
def _setup_external_tools():
|
|
"""
|
|
Locate Tesseract and Poppler regardless of how the app was launched.
|
|
GDPRScanner calls document_scanner for file content extraction, which
|
|
may need OCR for scanned PDFs — same setup as Document Scanner.
|
|
"""
|
|
extra_paths = []
|
|
|
|
if sys.platform == "darwin":
|
|
brew_prefix = None
|
|
for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
|
|
if Path(brew_candidate).exists():
|
|
try:
|
|
result = subprocess.run(
|
|
[brew_candidate, "--prefix"],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
if result.returncode == 0:
|
|
brew_prefix = result.stdout.strip()
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
brew_candidates = []
|
|
if brew_prefix:
|
|
brew_candidates.append(brew_prefix)
|
|
brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
|
|
|
|
for prefix in brew_candidates:
|
|
bin_dir = Path(prefix) / "bin"
|
|
if bin_dir.exists():
|
|
extra_paths.append(str(bin_dir))
|
|
tessdata = Path(prefix) / "share" / "tessdata"
|
|
if tessdata.exists():
|
|
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
|
|
|
|
for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
|
|
if Path(t).exists():
|
|
os.environ.setdefault("TESSERACT_CMD", t)
|
|
break
|
|
|
|
for p in ["/opt/homebrew/bin", "/usr/local/bin",
|
|
"/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
|
|
if (Path(p) / "pdftoppm").exists():
|
|
os.environ.setdefault("POPPLER_PATH", p)
|
|
extra_paths.insert(0, p)
|
|
break
|
|
|
|
elif sys.platform == "win32":
|
|
import winreg
|
|
tess_dir = None
|
|
try:
|
|
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\\Tesseract-OCR")
|
|
tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
|
|
winreg.CloseKey(key)
|
|
except Exception:
|
|
pass
|
|
|
|
for d in ([tess_dir] if tess_dir else []) + [
|
|
r"C:\\Program Files\\Tesseract-OCR",
|
|
r"C:\\Program Files (x86)\\Tesseract-OCR",
|
|
r"C:\\Tesseract-OCR",
|
|
]:
|
|
if d and Path(d, "tesseract.exe").exists():
|
|
os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
|
|
extra_paths.append(d)
|
|
tessdata = Path(d) / "tessdata"
|
|
if tessdata.exists():
|
|
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
|
|
break
|
|
|
|
for d in [
|
|
r"C:\\poppler\\Library\\bin", r"C:\\poppler\\bin",
|
|
r"C:\\Program Files\\poppler\\Library\\bin",
|
|
r"C:\\Program Files\\poppler\\bin",
|
|
r"C:\\tools\\poppler\\Library\\bin",
|
|
]:
|
|
if (Path(d) / "pdftoppm.exe").exists():
|
|
os.environ.setdefault("POPPLER_PATH", d)
|
|
extra_paths.insert(0, d)
|
|
break
|
|
|
|
if getattr(sys, "frozen", False):
|
|
tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
|
|
if tess_bin.exists():
|
|
os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
|
|
for sub in ["poppler/bin", "poppler/Library/bin", "."]:
|
|
pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
|
|
if pdftoppm.exists():
|
|
os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
|
|
extra_paths.insert(0, str(pdftoppm.parent))
|
|
break
|
|
extra_paths.insert(0, str(BASE_DIR))
|
|
|
|
if extra_paths:
|
|
current = os.environ.get("PATH", "")
|
|
additions = os.pathsep.join(p for p in extra_paths if p not in current)
|
|
if additions:
|
|
os.environ["PATH"] = additions + os.pathsep + current
|
|
|
|
cmd = os.environ.get("TESSERACT_CMD")
|
|
if cmd and Path(cmd).exists():
|
|
try:
|
|
import pytesseract
|
|
pytesseract.pytesseract.tesseract_cmd = cmd
|
|
except ImportError:
|
|
pass
|
|
|
|
poppler = os.environ.get("POPPLER_PATH")
|
|
if poppler:
|
|
try:
|
|
import pdf2image.pdf2image as _p2i
|
|
_orig = _p2i.convert_from_path
|
|
def _patched(pdf_path, *a, poppler_path=None, **kw):
|
|
return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
|
|
_p2i.convert_from_path = _patched
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
_setup_external_tools()
|
|
|
|
|
|
def find_free_port(start: int = 5100) -> int:
|
|
for port in range(start, start + 100):
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
try:
|
|
s.bind(("127.0.0.1", port))
|
|
return port
|
|
except OSError:
|
|
continue
|
|
raise RuntimeError("No free port found in range 5100-5200")
|
|
|
|
|
|
# ── Single-instance lock ──────────────────────────────────────────────────────
|
|
_LOCK_FH = None
|
|
|
|
def acquire_instance_lock() -> bool:
|
|
"""
|
|
Acquire an exclusive process lock so only one instance runs at a time.
|
|
Returns True if the lock was acquired, False if another instance holds it.
|
|
The lock is released automatically when the process exits.
|
|
"""
|
|
global _LOCK_FH
|
|
lock_dir = Path.home() / ".gdprscanner"
|
|
lock_dir.mkdir(parents=True, exist_ok=True)
|
|
lock_path = lock_dir / "app.lock"
|
|
try:
|
|
_LOCK_FH = open(lock_path, "w")
|
|
if sys.platform == "win32":
|
|
import msvcrt
|
|
msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
|
|
else:
|
|
import fcntl
|
|
fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
_LOCK_FH.write(str(os.getpid()))
|
|
_LOCK_FH.flush()
|
|
return True
|
|
except (IOError, OSError):
|
|
if _LOCK_FH:
|
|
_LOCK_FH.close()
|
|
_LOCK_FH = None
|
|
return False
|
|
|
|
|
|
def _activate_venv():
|
|
if getattr(sys, "frozen", False):
|
|
return
|
|
for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
|
|
if sys.platform == "win32":
|
|
site_pkg = candidate / "Lib" / "site-packages"
|
|
else:
|
|
lib = candidate / "lib"
|
|
site_pkg = None
|
|
if lib.exists():
|
|
for d in lib.iterdir():
|
|
sp = d / "site-packages"
|
|
if sp.exists():
|
|
site_pkg = sp
|
|
break
|
|
if site_pkg and site_pkg.exists():
|
|
sys.path.insert(0, str(site_pkg))
|
|
os.environ["VIRTUAL_ENV"] = str(candidate)
|
|
os.environ.pop("PYTHONHOME", None)
|
|
break
|
|
|
|
|
|
_activate_venv()
|
|
|
|
|
|
def start_flask(port: int):
|
|
import gdpr_scanner as _app
|
|
_app.app.run(host="0.0.0.0", port=port, debug=False,
|
|
threaded=True, use_reloader=False)
|
|
|
|
|
|
def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
|
|
deadline = time.monotonic() + timeout
|
|
while time.monotonic() < deadline:
|
|
try:
|
|
with socket.create_connection(("127.0.0.1", port), timeout=0.2):
|
|
return True
|
|
except OSError:
|
|
time.sleep(0.1)
|
|
return False
|
|
|
|
|
|
def _load_icon_image():
|
|
try:
|
|
from PIL import Image as PILImage
|
|
for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
|
|
"icon.ico", "icon.icns", "icon.png",
|
|
"icon_m365.ico", "icon_m365.icns", "icon_m365.png"]: # legacy fallback
|
|
p = BASE_DIR / name
|
|
if p.exists():
|
|
return PILImage.open(p).convert("RGBA").resize((64, 64))
|
|
# Minimal fallback — blue square
|
|
img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
|
|
return img
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def run_webview(port: int):
|
|
"""
|
|
Open the app in a native webview window.
|
|
Returns True on success, False if pywebview is unavailable.
|
|
"""
|
|
try:
|
|
import webview
|
|
except ImportError:
|
|
return False
|
|
|
|
class Api:
|
|
def quit(self):
|
|
import webview as _wv
|
|
for w in _wv.windows:
|
|
w.destroy()
|
|
|
|
def save_excel(self):
|
|
"""Fetch the Excel export from Flask and save via native dialog."""
|
|
import urllib.request, datetime, os, webview as _wv
|
|
try:
|
|
url = f"http://127.0.0.1:{port}/api/export_excel"
|
|
with urllib.request.urlopen(url) as resp:
|
|
data = resp.read()
|
|
fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
|
win = _wv.windows[0] if _wv.windows else None
|
|
if win:
|
|
paths = win.create_file_dialog(
|
|
_wv.SAVE_DIALOG,
|
|
save_filename=fname,
|
|
file_types=("Excel Files (*.xlsx)",),
|
|
)
|
|
if paths:
|
|
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
|
if not dest.endswith(".xlsx"):
|
|
dest += ".xlsx"
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return {"ok": True, "path": dest}
|
|
return {"ok": False, "error": "cancelled"}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
def save_db_export(self):
|
|
"""Fetch the DB export ZIP from Flask and save via native dialog."""
|
|
import urllib.request, datetime, webview as _wv
|
|
try:
|
|
url = f"http://127.0.0.1:{port}/api/db/export"
|
|
with urllib.request.urlopen(url) as resp:
|
|
data = resp.read()
|
|
fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
|
|
win = _wv.windows[0] if _wv.windows else None
|
|
if win:
|
|
paths = win.create_file_dialog(
|
|
_wv.SAVE_DIALOG,
|
|
save_filename=fname,
|
|
file_types=("ZIP Archive (*.zip)",),
|
|
)
|
|
if paths:
|
|
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
|
if not dest.endswith(".zip"):
|
|
dest += ".zip"
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return {"ok": True, "path": dest}
|
|
return {"ok": False, "error": "cancelled"}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
def save_article30(self):
|
|
"""Fetch the Article 30 Word doc from Flask and save via native dialog."""
|
|
import urllib.request, datetime, webview as _wv
|
|
try:
|
|
url = f"http://127.0.0.1:{port}/api/export_article30"
|
|
with urllib.request.urlopen(url) as resp:
|
|
data = resp.read()
|
|
fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
|
|
win = _wv.windows[0] if _wv.windows else None
|
|
if win:
|
|
paths = win.create_file_dialog(
|
|
_wv.SAVE_DIALOG,
|
|
save_filename=fname,
|
|
file_types=("Word Document (*.docx)",),
|
|
)
|
|
if paths:
|
|
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
|
if not dest.endswith(".docx"):
|
|
dest += ".docx"
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return {"ok": True, "path": dest}
|
|
return {"ok": False, "error": "cancelled"}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
def open_manual(self, lang: str):
|
|
"""Open the user manual in a new native webview window."""
|
|
import webview as _wv
|
|
url = f"http://127.0.0.1:{port}/manual?lang={lang}"
|
|
existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
|
|
if existing:
|
|
existing.load_url(url)
|
|
else:
|
|
mw = _wv.create_window(
|
|
title="GDPRScanner — Manual",
|
|
url=url,
|
|
width=960,
|
|
height=800,
|
|
resizable=True,
|
|
)
|
|
mw._is_manual = True
|
|
|
|
w = webview.create_window(
|
|
title="GDPRScanner",
|
|
url=f"http://127.0.0.1:{port}/",
|
|
width=1400,
|
|
height=900,
|
|
min_size=(900, 600),
|
|
js_api=Api(),
|
|
)
|
|
|
|
def _on_closed():
|
|
os._exit(0)
|
|
|
|
w.events.closed += _on_closed
|
|
webview.start(debug=False)
|
|
return True
|
|
|
|
|
|
def _run_browser_fallback(port: int):
|
|
"""Open in system browser + optional tray icon."""
|
|
url = f"http://127.0.0.1:{port}/"
|
|
webbrowser.open(url)
|
|
|
|
try:
|
|
import pystray
|
|
from PIL import Image as PILImage
|
|
|
|
img = _load_icon_image()
|
|
if img is None:
|
|
return
|
|
|
|
def _quit(icon, item):
|
|
icon.stop()
|
|
os._exit(0)
|
|
|
|
def _open(icon, item):
|
|
webbrowser.open(url)
|
|
|
|
menu = pystray.Menu(
|
|
pystray.MenuItem("Open GDPRScanner", _open, default=True),
|
|
pystray.MenuItem("Quit", _quit),
|
|
)
|
|
icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
|
|
icon.run()
|
|
except ImportError:
|
|
# No pystray — just keep the process alive
|
|
try:
|
|
while True:
|
|
time.sleep(60)
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if not acquire_instance_lock():
|
|
print("GDPRScanner is already running.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# On macOS, multiprocessing uses "fork" which is unsafe with some
|
|
# frameworks — use "spawn" to match PyInstaller's behaviour.
|
|
if sys.platform == "darwin":
|
|
import multiprocessing
|
|
multiprocessing.set_start_method("spawn", force=True)
|
|
|
|
port = find_free_port()
|
|
# Machine-readable port line — stdout pipe for any parent process.
|
|
print(f"GDPR_PORT={port}", flush=True)
|
|
|
|
# Pre-import on main thread so cv2 / numpy initialise safely
|
|
try:
|
|
import gdpr_scanner # noqa: F401 — side effect: loads Flask app
|
|
except Exception as e:
|
|
print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
|
|
flask_thread.start()
|
|
|
|
if not wait_for_flask(port):
|
|
print("[!] Flask did not start in time", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
webview_ok = run_webview(port)
|
|
if not webview_ok:
|
|
_run_browser_fallback(port)
|
|
'''
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Step 2 — Icon generation
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
def make_icons():
|
|
"""Generate icon_gdpr.icns (macOS) and icon_gdpr.ico (Windows)."""
|
|
try:
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
except ImportError:
|
|
print(" [!] Pillow not found — skipping icon generation")
|
|
print(" Install with: pip install pillow")
|
|
return
|
|
|
|
# ── Draw the icon: dark background + "GDPR" text ──────────────────────────
|
|
SIZE = 512
|
|
img = Image.new("RGBA", (SIZE, SIZE), (0, 0, 0, 0))
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
# Rounded-rect background
|
|
R = 100
|
|
BG = (31, 41, 64, 255) # dark navy
|
|
ACC = (255, 255, 255, 255) # white text
|
|
|
|
# Fill body
|
|
draw.rectangle([R, 0, SIZE - R, SIZE], fill=BG)
|
|
draw.rectangle([0, R, SIZE, SIZE - R], fill=BG)
|
|
# Rounded corners
|
|
for cx, cy in [(R, R), (SIZE - R, R), (R, SIZE - R), (SIZE - R, SIZE - R)]:
|
|
draw.ellipse([cx - R, cy - R, cx + R, cy + R], fill=BG)
|
|
|
|
# Text "M365"
|
|
font = None
|
|
for font_path in [
|
|
"/System/Library/Fonts/Helvetica.ttc",
|
|
"/System/Library/Fonts/Arial.ttf",
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
|
"C:/Windows/Fonts/arialbd.ttf",
|
|
]:
|
|
if Path(font_path).exists():
|
|
try:
|
|
font = ImageFont.truetype(font_path, size=160)
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
if font is None:
|
|
font = ImageFont.load_default()
|
|
|
|
text = "GDPR"
|
|
bbox = draw.textbbox((0, 0), text, font=font)
|
|
tw = bbox[2] - bbox[0]
|
|
th = bbox[3] - bbox[1]
|
|
draw.text(((SIZE - tw) / 2 - bbox[0], (SIZE - th) / 2 - bbox[1] - 10),
|
|
text, fill=ACC, font=font)
|
|
|
|
# Smaller "Scanner" subtitle
|
|
sub_font = None
|
|
for font_path in [
|
|
"/System/Library/Fonts/Helvetica.ttc",
|
|
"/System/Library/Fonts/Arial.ttf",
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
|
"C:/Windows/Fonts/arial.ttf",
|
|
]:
|
|
if Path(font_path).exists():
|
|
try:
|
|
sub_font = ImageFont.truetype(font_path, size=68)
|
|
break
|
|
except Exception:
|
|
pass
|
|
if sub_font is None:
|
|
sub_font = ImageFont.load_default()
|
|
|
|
sub = "Scanner"
|
|
sbbox = draw.textbbox((0, 0), sub, font=sub_font)
|
|
sw = sbbox[2] - sbbox[0]
|
|
draw.text(((SIZE - sw) / 2 - sbbox[0], SIZE * 0.65),
|
|
sub, fill=(200, 230, 255, 220), font=sub_font)
|
|
|
|
# ── macOS .icns ────────────────────────────────────────────────────────────
|
|
if SYSTEM == "Darwin":
|
|
icns_path = HERE / "icon_gdpr.icns"
|
|
iconset = HERE / "icon_gdpr.iconset"
|
|
iconset.mkdir(exist_ok=True)
|
|
sizes = [16, 32, 64, 128, 256, 512]
|
|
for s in sizes:
|
|
img.resize((s, s), Image.LANCZOS).save(iconset / f"icon_{s}x{s}.png")
|
|
img.resize((s * 2, s * 2), Image.LANCZOS).save(iconset / f"icon_{s}x{s}@2x.png")
|
|
result = subprocess.run(
|
|
["iconutil", "-c", "icns", str(iconset), "-o", str(icns_path)],
|
|
capture_output=True
|
|
)
|
|
shutil.rmtree(iconset, ignore_errors=True)
|
|
if result.returncode == 0:
|
|
print(f" [+] Icon: {icns_path.name}")
|
|
else:
|
|
print(" [!] iconutil failed — no .icns generated")
|
|
|
|
# ── Windows .ico ───────────────────────────────────────────────────────────
|
|
ico_path = HERE / "icon_gdpr.ico"
|
|
ico_imgs = [img.resize((s, s), Image.LANCZOS).convert("RGBA")
|
|
for s in [16, 32, 48, 64, 128, 256]]
|
|
ico_imgs[0].save(ico_path, format="ICO", sizes=[(s, s) for s in [16, 32, 48, 64, 128, 256]],
|
|
append_images=ico_imgs[1:])
|
|
print(f" [+] Icon: {ico_path.name}")
|
|
|
|
# Save PNG fallback
|
|
img.save(HERE / "icon_gdpr.png")
|
|
print(f" [+] Icon: icon_gdpr.png")
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Step 3 — Build with PyInstaller
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
def get_pyinstaller_args() -> list:
|
|
"""Return the PyInstaller command-line arguments for the current platform."""
|
|
|
|
hidden = [
|
|
# Flask / web
|
|
"flask", "flask.templating", "jinja2", "jinja2.ext",
|
|
"werkzeug", "werkzeug.serving", "werkzeug.routing",
|
|
# M365 / auth
|
|
"msal", "msal.application", "msal.authority",
|
|
"requests", "requests.adapters", "urllib3",
|
|
"cryptography", "cryptography.hazmat",
|
|
# Document scanning (via document_scanner)
|
|
"pdfplumber", "pdfplumber.page", "pdfminer", "pdfminer.high_level",
|
|
"pdf2image", "pytesseract",
|
|
"pypdf", "reportlab", "reportlab.pdfgen", "reportlab.lib",
|
|
"spacy", "spacy.lang.da", "spacy.lang.en",
|
|
"docx", "docx.oxml", "docx.styles",
|
|
"openpyxl", "openpyxl.styles", "openpyxl.utils",
|
|
"numpy", "PIL", "PIL.Image",
|
|
# App window
|
|
"pystray", "pystray._base",
|
|
"webview", "webview.platforms",
|
|
"webview.platforms.cocoa",
|
|
"webview.platforms.winforms",
|
|
"webview.platforms.gtk",
|
|
"webview.platforms.qt",
|
|
# Scheduler (#19)
|
|
"apscheduler", "apscheduler.schedulers.background",
|
|
"apscheduler.triggers.cron",
|
|
]
|
|
|
|
datas = [
|
|
(str(HERE / "gdpr_scanner.py"), "."),
|
|
(str(HERE / "m365_connector.py"), "."),
|
|
(str(HERE / "gdpr_db.py"), "."),
|
|
(str(HERE / "file_scanner.py"), "."),
|
|
#(str(HERE / "scheduler.py"), "."),
|
|
(str(HERE / "document_scanner.py"), "."),
|
|
# ── Modules split from gdpr_scanner.py in v1.6.1 (#25) ──────────────
|
|
(str(HERE / "sse.py"), "."),
|
|
(str(HERE / "checkpoint.py"), "."),
|
|
(str(HERE / "app_config.py"), "."),
|
|
(str(HERE / "cpr_detector.py"), "."),
|
|
(str(HERE / "scan_engine.py"), "."),
|
|
(str(HERE / "google_connector.py"), "."),
|
|
(str(HERE / "scan_scheduler.py"), "."),
|
|
]
|
|
|
|
# Bundle VERSION file — read at startup by both scanners
|
|
version_file = HERE / "VERSION"
|
|
if version_file.exists():
|
|
datas.append((str(version_file), "."))
|
|
print(f" [+] Bundling VERSION: {version_file.read_text().strip()}")
|
|
|
|
lang_dir = HERE / "lang"
|
|
if lang_dir.exists():
|
|
datas.append((str(lang_dir), "lang"))
|
|
keywords_dir = HERE / "keywords"
|
|
if keywords_dir.exists():
|
|
datas.append((str(keywords_dir), "keywords"))
|
|
print(f" [+] Bundling keywords: {list(keywords_dir.glob('*.json'))}")
|
|
print(f" [+] Bundling lang files: {list(lang_dir.glob('*.json')) + list(lang_dir.glob('*.lang'))}")
|
|
skus_dir = HERE / "classification"
|
|
if skus_dir.exists():
|
|
datas.append((str(skus_dir), "classification"))
|
|
print(f" [+] Bundling classification files: {list(skus_dir.glob('*.json'))}")
|
|
templates_dir = HERE / "templates"
|
|
if templates_dir.exists():
|
|
datas.append((str(templates_dir), "templates"))
|
|
print(f" [+] Bundling templates: {list(templates_dir.glob('*.html'))}")
|
|
static_dir = HERE / "static"
|
|
if static_dir.exists():
|
|
datas.append((str(static_dir), "static"))
|
|
print(f" [+] Bundling static: {list(static_dir.iterdir())}")
|
|
for manual_file in (HERE / "docs" / "manuals").glob("MANUAL-*.md"):
|
|
datas.append((str(manual_file), "docs/manuals"))
|
|
print(f" [+] Bundling manual: {manual_file.name}")
|
|
|
|
# Bundle routes/ blueprints
|
|
routes_dir = HERE / "routes"
|
|
if routes_dir.exists():
|
|
for f in routes_dir.glob("*.py"):
|
|
datas.append((str(f), "routes"))
|
|
print(f" [+] Bundling routes/: {[f.name for f in routes_dir.glob('*.py')]}")
|
|
|
|
# cv2 cascade data
|
|
try:
|
|
import cv2 as _cv2
|
|
cv2_data = Path(_cv2.__file__).parent / "data"
|
|
except Exception:
|
|
import importlib.util
|
|
spec = importlib.util.find_spec("cv2")
|
|
cv2_data = Path(spec.origin).parent / "data" if spec and spec.origin else None
|
|
if cv2_data and Path(cv2_data).exists():
|
|
datas.append((str(cv2_data), "cv2/data"))
|
|
print(f" [+] Bundling cv2/data")
|
|
cv2_pkg = Path(cv2_data).parent
|
|
for so in cv2_pkg.glob("cv2*.so"):
|
|
datas.append((str(so), "cv2"))
|
|
dylibs = cv2_pkg / ".dylibs"
|
|
if dylibs.exists():
|
|
datas.append((str(dylibs), "cv2/.dylibs"))
|
|
|
|
# spaCy models
|
|
try:
|
|
from PyInstaller.utils.hooks import collect_data_files as _cdf, collect_submodules as _csm
|
|
for model in ["da_core_news_lg", "da_core_news_md", "da_core_news_sm",
|
|
"xx_ent_wiki_sm", "en_core_web_sm"]:
|
|
try:
|
|
_md = _cdf(model)
|
|
_mh = _csm(model)
|
|
if _md or _mh:
|
|
datas += _md
|
|
hidden += _mh
|
|
print(f" [+] Bundling spaCy model: {model}")
|
|
break
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
args = [
|
|
str(ENTRY_POINT),
|
|
"--name", APP_NAME,
|
|
"--onedir",
|
|
"--noconfirm",
|
|
"--clean",
|
|
"--distpath", str(DIST_DIR),
|
|
"--workpath", str(BUILD_DIR),
|
|
"--specpath", str(HERE),
|
|
"--exclude-module", "cv2",
|
|
]
|
|
|
|
for h in hidden:
|
|
args += ["--hidden-import", h]
|
|
|
|
sep = ";" if SYSTEM == "Windows" else ":"
|
|
for src, dst in datas:
|
|
args += ["--add-data", f"{src}{sep}{dst}"]
|
|
|
|
# Platform options
|
|
if SYSTEM == "Darwin":
|
|
icon = next(
|
|
(p for p in [ICON_MACOS, HERE / "icon.icns", HERE / "icon_gdpr.png", HERE / "icon.png"]
|
|
if p.exists()), None
|
|
)
|
|
if icon:
|
|
args += ["--icon", str(icon)]
|
|
args += ["--windowed", "--osx-bundle-identifier", BUNDLE_ID]
|
|
|
|
elif SYSTEM == "Windows":
|
|
icon = next(
|
|
(p for p in [ICON_WIN, HERE / "icon.ico", HERE / "icon_gdpr.png"]
|
|
if p.exists()), None
|
|
)
|
|
if icon:
|
|
args += ["--icon", str(icon)]
|
|
args += ["--windowed", "--version-file", str(_make_win_version_file())]
|
|
|
|
return args
|
|
|
|
|
|
def _make_win_version_file() -> Path:
|
|
ver = tuple(int(x) for x in (APP_VERSION + ".0.0").split(".")[:4])
|
|
content = textwrap.dedent(f"""\
|
|
VSVersionInfo(
|
|
ffi=FixedFileInfo(
|
|
filevers={ver}, prodvers={ver},
|
|
mask=0x3f, flags=0x0, OS=0x4, fileType=0x1,
|
|
subtype=0x0, date=(0, 0)
|
|
),
|
|
kids=[
|
|
StringFileInfo([StringTable('040904B0', [
|
|
StringStruct('CompanyName', 'GDPRScanner'),
|
|
StringStruct('FileDescription', '{APP_NAME}'),
|
|
StringStruct('FileVersion', '{APP_VERSION}'),
|
|
StringStruct('InternalName', 'M365Scanner'),
|
|
StringStruct('LegalCopyright', ''),
|
|
StringStruct('OriginalFilename', 'GDPRScanner.exe'),
|
|
StringStruct('ProductName', '{APP_NAME}'),
|
|
StringStruct('ProductVersion', '{APP_VERSION}'),
|
|
])]),
|
|
VarFileInfo([VarStruct('Translation', [0x0409, 1200])])
|
|
]
|
|
)
|
|
""")
|
|
path = HERE / "m365_win_version_info.txt"
|
|
path.write_text(content, encoding="utf-8")
|
|
return path
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Step 4 — Post-build helpers
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
def create_dmg():
|
|
if shutil.which("create-dmg") is None:
|
|
print(" [!] create-dmg not found — skipping .dmg")
|
|
print(" Install with: brew install create-dmg")
|
|
return
|
|
|
|
app_path = DIST_DIR / f"{APP_NAME}.app"
|
|
dmg_path = DIST_DIR / f"{APP_NAME}-{APP_VERSION}.dmg"
|
|
if dmg_path.exists():
|
|
dmg_path.unlink()
|
|
|
|
print(" Creating .dmg …")
|
|
cmd = [
|
|
"create-dmg",
|
|
"--volname", APP_NAME,
|
|
"--window-pos", "200", "120",
|
|
"--window-size", "600", "400",
|
|
"--icon-size", "100",
|
|
"--icon", f"{APP_NAME}.app", "175", "190",
|
|
"--hide-extension", f"{APP_NAME}.app",
|
|
"--app-drop-link", "425", "190",
|
|
str(dmg_path),
|
|
str(app_path),
|
|
]
|
|
result = subprocess.run(cmd)
|
|
if result.returncode == 0:
|
|
print(f" [+] DMG created: {dmg_path.name}")
|
|
else:
|
|
print(" [!] create-dmg failed — .app is still usable directly")
|
|
|
|
|
|
def create_nsis_installer():
|
|
if SYSTEM != "Windows":
|
|
print(" [!] NSIS installer only available on Windows"); return
|
|
if shutil.which("makensis") is None:
|
|
print(" [!] NSIS not found — download from https://nsis.sourceforge.io"); return
|
|
|
|
nsi = HERE / "m365_installer.nsi"
|
|
dist_folder = DIST_DIR / APP_NAME
|
|
nsi.write_text(textwrap.dedent(f"""\
|
|
!define APP_NAME "{APP_NAME}"
|
|
!define APP_VERSION "{APP_VERSION}"
|
|
!define DIST_FOLDER "{dist_folder}"
|
|
!define INSTALL_DIR "$PROGRAMFILES64\\\\{APP_NAME}"
|
|
|
|
Name "${{APP_NAME}}"
|
|
OutFile "dist\\\\{APP_NAME}-{APP_VERSION}-Setup.exe"
|
|
InstallDir "${{INSTALL_DIR}}"
|
|
RequestExecutionLevel admin
|
|
|
|
Section "Install"
|
|
SetOutPath "${{INSTALL_DIR}}"
|
|
File /r "${{DIST_FOLDER}}\\\\*.*"
|
|
CreateShortcut "$DESKTOP\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe"
|
|
CreateShortcut "$SMPROGRAMS\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe"
|
|
SectionEnd
|
|
|
|
Section "Uninstall"
|
|
Delete "$DESKTOP\\\\{APP_NAME}.lnk"
|
|
Delete "$SMPROGRAMS\\\\{APP_NAME}.lnk"
|
|
RMDir /r "${{INSTALL_DIR}}"
|
|
SectionEnd
|
|
"""), encoding="utf-8")
|
|
|
|
result = subprocess.run(["makensis", str(nsi)])
|
|
if result.returncode == 0:
|
|
print(f" [+] Installer: dist/{APP_NAME}-{APP_VERSION}-Setup.exe")
|
|
else:
|
|
print(" [!] NSIS compilation failed")
|
|
|
|
|
|
def print_next_steps():
|
|
if SYSTEM == "Darwin":
|
|
app = DIST_DIR / f"{APP_NAME}.app"
|
|
print(f"""
|
|
╔══════════════════════════════════════════════════════════╗
|
|
║ Build complete! ║
|
|
╠══════════════════════════════════════════════════════════╣
|
|
║ App: {str(app):<51}║
|
|
╠══════════════════════════════════════════════════════════╣
|
|
║ To run: ║
|
|
║ open "{app}"
|
|
║ — or double-click in Finder ║
|
|
║ ║
|
|
║ Opens on http://127.0.0.1:5100 in a native WKWebView ║
|
|
║ window (no browser chrome). ║
|
|
║ If pywebview was not installed, falls back to browser. ║
|
|
║ ║
|
|
║ To distribute: ║
|
|
║ python build_gdpr.py --dmg (requires create-dmg) ║
|
|
╚══════════════════════════════════════════════════════════╝""")
|
|
|
|
elif SYSTEM == "Windows":
|
|
exe = DIST_DIR / APP_NAME / f"{APP_NAME}.exe"
|
|
print(f"""
|
|
╔══════════════════════════════════════════════════════════╗
|
|
║ Build complete! ║
|
|
╠══════════════════════════════════════════════════════════╣
|
|
║ Exe: {str(exe):<51}║
|
|
╠══════════════════════════════════════════════════════════╣
|
|
║ To run: ║
|
|
║ Double-click "{APP_NAME}.exe" ║
|
|
║ ║
|
|
║ Opens on http://127.0.0.1:5100 in a native WebView2 ║
|
|
║ window (Edge engine, built into Windows 10/11). ║
|
|
║ If pywebview was not installed, falls back to browser. ║
|
|
║ ║
|
|
║ To distribute as installer: ║
|
|
║ Install NSIS: https://nsis.sourceforge.io ║
|
|
║ Then run: python build_gdpr.py --installer ║
|
|
╚══════════════════════════════════════════════════════════╝""")
|
|
|
|
else:
|
|
print(f"\n [+] Build complete — see dist/")
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Main
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Build GDPRScanner app")
|
|
parser.add_argument("--clean", action="store_true", help="Remove build/ and dist/ first")
|
|
parser.add_argument("--dmg", action="store_true", help="macOS: wrap .app in .dmg after build")
|
|
parser.add_argument("--installer", action="store_true", help="Windows: create NSIS installer")
|
|
parser.add_argument("--icons-only", action="store_true", help="Only regenerate icons, don't build")
|
|
args = parser.parse_args()
|
|
|
|
print(f"\n GDPRScanner — App Builder v{APP_VERSION}")
|
|
print(f" Platform: {SYSTEM} Python: {sys.version.split()[0]}")
|
|
print(f" {'─' * 42}\n")
|
|
|
|
if not args.icons_only:
|
|
# Check PyInstaller
|
|
try:
|
|
import PyInstaller
|
|
print(f" [+] PyInstaller {PyInstaller.__version__}")
|
|
except ImportError:
|
|
print(" [!] PyInstaller not found. Install with:")
|
|
print(" pip install pyinstaller pyinstaller-hooks-contrib")
|
|
sys.exit(1)
|
|
|
|
# Check pywebview
|
|
try:
|
|
import webview
|
|
try: _wv_ver = webview.__version__
|
|
except AttributeError:
|
|
import importlib.metadata
|
|
_wv_ver = importlib.metadata.version("pywebview")
|
|
print(f" [+] pywebview {_wv_ver} (native window — recommended)")
|
|
except ImportError:
|
|
print(" [!] pywebview not found — will fall back to system browser")
|
|
print(" Install with: pip install pywebview")
|
|
|
|
# Check pystray
|
|
try:
|
|
import pystray
|
|
print(f" [+] pystray available (browser-fallback tray icon)")
|
|
except ImportError:
|
|
print(" [!] pystray not found — no tray icon in browser-fallback mode")
|
|
|
|
# Check MSAL
|
|
try:
|
|
import msal
|
|
print(f" [+] msal {msal.__version__}")
|
|
except ImportError:
|
|
print(" [!] msal not found — run: pip install msal")
|
|
sys.exit(1)
|
|
|
|
# Check requests
|
|
try:
|
|
import requests
|
|
print(f" [+] requests {requests.__version__}")
|
|
except ImportError:
|
|
print(" [!] requests not found — run: pip install requests")
|
|
sys.exit(1)
|
|
|
|
# Check source files
|
|
for fname in ["gdpr_scanner.py", "gdpr_db.py", "m365_connector.py", "document_scanner.py",
|
|
"sse.py", "checkpoint.py", "app_config.py", "cpr_detector.py", "scan_engine.py"]:
|
|
p = HERE / fname
|
|
if not p.exists():
|
|
print(f" [!] {fname} not found in {HERE}")
|
|
sys.exit(1)
|
|
print(f" [+] Found {fname}")
|
|
|
|
# Clean
|
|
for d in [BUILD_DIR, DIST_DIR]:
|
|
if d.exists():
|
|
shutil.rmtree(d)
|
|
print(f" [+] Removed {d.name}/")
|
|
|
|
# Icons
|
|
print("\n Generating icons …")
|
|
make_icons()
|
|
|
|
if args.icons_only:
|
|
return
|
|
|
|
# Write launcher
|
|
print("\n Writing launcher …")
|
|
ENTRY_POINT.write_text(LAUNCHER_CODE, encoding="utf-8")
|
|
print(f" [+] {ENTRY_POINT.name}")
|
|
|
|
# cv2 DLL check on Windows
|
|
if SYSTEM == "Windows":
|
|
try:
|
|
import cv2 # noqa: F401
|
|
except ImportError as e:
|
|
if "DLL load failed" in str(e):
|
|
print(" [!] cv2 DLL load failed — reinstalling headless variant …")
|
|
subprocess.run([sys.executable, "-m", "pip", "install",
|
|
"--force-reinstall", "opencv-python-headless", "-q"], check=False)
|
|
|
|
# Run PyInstaller
|
|
print("\n Running PyInstaller …")
|
|
pyi_args = get_pyinstaller_args()
|
|
|
|
import PyInstaller.__main__ as pyi
|
|
pyi.run(pyi_args)
|
|
|
|
# Post-build
|
|
if args.dmg and SYSTEM == "Darwin":
|
|
create_dmg()
|
|
if args.installer and SYSTEM == "Windows":
|
|
create_nsis_installer()
|
|
|
|
print_next_steps()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|