GDPRScanner/build_gdpr.py
2026-04-11 04:38:11 +02:00

1096 lines
43 KiB
Python
Executable File

#!/usr/bin/env python3
"""
GDPRScanner — Self-Contained App Builder
==========================================
Packages gdpr_scanner.py + m365_connector.py + document_scanner.py into a
native desktop app:
macOS -> dist/GDPRScanner.app (double-click to run)
Windows -> dist/GDPRScanner.exe (double-click to run)
The app starts Flask on port 5100, opens the UI in a native webview window
(WKWebView on macOS, WebView2 on Windows), and quits cleanly when the window
is closed.
Usage:
python build_gdpr.py # build for current platform
python build_gdpr.py --clean # remove build/ and dist/ first
python build_gdpr.py --dmg # macOS: also wrap .app in a .dmg
python build_gdpr.py --installer # Windows: also build NSIS installer
Requirements (install once via pip):
pip install pyinstaller pyinstaller-hooks-contrib
pip install pywebview # native window (no browser chrome)
pip install pystray pillow # fallback tray icon when pywebview absent
Python version:
Requires 3.11 or 3.12. spaCy (used by document_scanner for NER) does not
support 3.13+. This script auto-relaunches with python3.12/python3.11 if
the current interpreter is incompatible.
"""
# ── Version guard ─────────────────────────────────────────────────────────────
import sys as _sys
if not (3, 11) <= _sys.version_info[:2] <= (3, 12):
import re as _re, subprocess as _sp, os as _os
_cur = f"{_sys.version_info.major}.{_sys.version_info.minor}"
print(f" [!] Python {_cur} is not supported (need 3.11 or 3.12 — spaCy incompatible with 3.13+)")
def _check_version(cmd: list) -> bool:
try:
out = _sp.check_output(cmd + ["--version"], stderr=_sp.STDOUT, text=True).strip()
m = _re.search(r"Python (\d+)\.(\d+)", out)
return bool(m and int(m.group(1)) == 3 and int(m.group(2)) in (11, 12))
except (FileNotFoundError, _sp.CalledProcessError, OSError):
return False
_candidates = [["python3.12"], ["python3.11"], ["py", "-3.12"], ["py", "-3.11"]]
_found = next((c for c in _candidates if _check_version(c)), None)
if _found:
print(f" [*] Re-launching with: {' '.join(_found)}")
_result = _sp.run(_found + [_os.path.abspath(__file__)] + _sys.argv[1:])
_sys.exit(_result.returncode)
print()
print(" No compatible Python found on PATH.")
print(" Install Python 3.12:")
if _sys.platform == "darwin":
print(" brew install python@3.12")
elif _sys.platform == "win32":
print(" winget install Python.Python.3.12")
print()
_sys.exit(1)
# ── Standard imports ──────────────────────────────────────────────────────────
import argparse
import platform
import re
import shutil
import subprocess
import sys
import textwrap
from pathlib import Path
HERE = Path(__file__).parent.resolve()
SYSTEM = platform.system() # "Darwin", "Windows", "Linux"
# ── App metadata ──────────────────────────────────────────────────────────────
APP_NAME = "GDPRScanner"
APP_PORT = 5100
BUNDLE_ID = "com.m365scanner.app"
def _read_app_version() -> str:
# Read from VERSION file (single source of truth)
try:
return (HERE / "VERSION").read_text(encoding="utf-8").strip()
except Exception:
pass
return "1.0.0"
APP_VERSION = _read_app_version()
ICON_MACOS = HERE / "icon_gdpr.icns" # optional; falls back to icon.icns / icon.png
ICON_WIN = HERE / "icon_gdpr.ico" # optional; falls back to icon.ico / icon.png
# ── Paths ─────────────────────────────────────────────────────────────────────
ENTRY_POINT = HERE / "m365_launcher.py" # generated by this script
DIST_DIR = HERE / "dist"
BUILD_DIR = HERE / "build"
# ═══════════════════════════════════════════════════════════════════════════════
# Step 1 — Generate the launcher entry point
# ═══════════════════════════════════════════════════════════════════════════════
LAUNCHER_CODE = '''\
"""
gdpr_launcher.py — entry point for the packaged GDPRScanner app.
Responsibilities:
1. Find a free port (default 5100)
2. Start Flask in a background thread
3. Open the UI in a native webview window (pywebview)
— falls back to the system browser if pywebview is unavailable
Generated by build_gdpr.py — do not edit manually.
"""
import os
os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
import subprocess
import sys
import socket
import threading
import time
import webbrowser
from pathlib import Path
if getattr(sys, "frozen", False):
BASE_DIR = Path(sys._MEIPASS)
else:
BASE_DIR = Path(__file__).parent
def _setup_external_tools():
"""
Locate Tesseract and Poppler regardless of how the app was launched.
GDPRScanner calls document_scanner for file content extraction, which
may need OCR for scanned PDFs — same setup as Document Scanner.
"""
extra_paths = []
if sys.platform == "darwin":
brew_prefix = None
for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
if Path(brew_candidate).exists():
try:
result = subprocess.run(
[brew_candidate, "--prefix"],
capture_output=True, text=True, timeout=5
)
if result.returncode == 0:
brew_prefix = result.stdout.strip()
break
except Exception:
pass
brew_candidates = []
if brew_prefix:
brew_candidates.append(brew_prefix)
brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
for prefix in brew_candidates:
bin_dir = Path(prefix) / "bin"
if bin_dir.exists():
extra_paths.append(str(bin_dir))
tessdata = Path(prefix) / "share" / "tessdata"
if tessdata.exists():
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
if Path(t).exists():
os.environ.setdefault("TESSERACT_CMD", t)
break
for p in ["/opt/homebrew/bin", "/usr/local/bin",
"/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
if (Path(p) / "pdftoppm").exists():
os.environ.setdefault("POPPLER_PATH", p)
extra_paths.insert(0, p)
break
elif sys.platform == "win32":
import winreg
tess_dir = None
try:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\\Tesseract-OCR")
tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
winreg.CloseKey(key)
except Exception:
pass
for d in ([tess_dir] if tess_dir else []) + [
r"C:\\Program Files\\Tesseract-OCR",
r"C:\\Program Files (x86)\\Tesseract-OCR",
r"C:\\Tesseract-OCR",
]:
if d and Path(d, "tesseract.exe").exists():
os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
extra_paths.append(d)
tessdata = Path(d) / "tessdata"
if tessdata.exists():
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
break
for d in [
r"C:\\poppler\\Library\\bin", r"C:\\poppler\\bin",
r"C:\\Program Files\\poppler\\Library\\bin",
r"C:\\Program Files\\poppler\\bin",
r"C:\\tools\\poppler\\Library\\bin",
]:
if (Path(d) / "pdftoppm.exe").exists():
os.environ.setdefault("POPPLER_PATH", d)
extra_paths.insert(0, d)
break
if getattr(sys, "frozen", False):
tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
if tess_bin.exists():
os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
for sub in ["poppler/bin", "poppler/Library/bin", "."]:
pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
if pdftoppm.exists():
os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
extra_paths.insert(0, str(pdftoppm.parent))
break
extra_paths.insert(0, str(BASE_DIR))
if extra_paths:
current = os.environ.get("PATH", "")
additions = os.pathsep.join(p for p in extra_paths if p not in current)
if additions:
os.environ["PATH"] = additions + os.pathsep + current
cmd = os.environ.get("TESSERACT_CMD")
if cmd and Path(cmd).exists():
try:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = cmd
except ImportError:
pass
poppler = os.environ.get("POPPLER_PATH")
if poppler:
try:
import pdf2image.pdf2image as _p2i
_orig = _p2i.convert_from_path
def _patched(pdf_path, *a, poppler_path=None, **kw):
return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
_p2i.convert_from_path = _patched
except Exception:
pass
_setup_external_tools()
def find_free_port(start: int = 5100) -> int:
for port in range(start, start + 100):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind(("127.0.0.1", port))
return port
except OSError:
continue
raise RuntimeError("No free port found in range 5100-5200")
# ── Single-instance lock ──────────────────────────────────────────────────────
_LOCK_FH = None
def acquire_instance_lock() -> bool:
"""
Acquire an exclusive process lock so only one instance runs at a time.
Returns True if the lock was acquired, False if another instance holds it.
The lock is released automatically when the process exits.
"""
global _LOCK_FH
lock_dir = Path.home() / ".gdprscanner"
lock_dir.mkdir(parents=True, exist_ok=True)
lock_path = lock_dir / "app.lock"
try:
_LOCK_FH = open(lock_path, "w")
if sys.platform == "win32":
import msvcrt
msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
else:
import fcntl
fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
_LOCK_FH.write(str(os.getpid()))
_LOCK_FH.flush()
return True
except (IOError, OSError):
if _LOCK_FH:
_LOCK_FH.close()
_LOCK_FH = None
return False
def _activate_venv():
if getattr(sys, "frozen", False):
return
for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
if sys.platform == "win32":
site_pkg = candidate / "Lib" / "site-packages"
else:
lib = candidate / "lib"
site_pkg = None
if lib.exists():
for d in lib.iterdir():
sp = d / "site-packages"
if sp.exists():
site_pkg = sp
break
if site_pkg and site_pkg.exists():
sys.path.insert(0, str(site_pkg))
os.environ["VIRTUAL_ENV"] = str(candidate)
os.environ.pop("PYTHONHOME", None)
break
_activate_venv()
def start_flask(port: int):
import gdpr_scanner as _app
_app.app.run(host="127.0.0.1", port=port, debug=False,
threaded=True, use_reloader=False)
def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
try:
with socket.create_connection(("127.0.0.1", port), timeout=0.2):
return True
except OSError:
time.sleep(0.1)
return False
def _load_icon_image():
try:
from PIL import Image as PILImage
for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
"icon.ico", "icon.icns", "icon.png",
"icon_m365.ico", "icon_m365.icns", "icon_m365.png"]: # legacy fallback
p = BASE_DIR / name
if p.exists():
return PILImage.open(p).convert("RGBA").resize((64, 64))
# Minimal fallback — blue square
img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
return img
except Exception:
return None
def run_webview(port: int):
"""
Open the app in a native webview window.
Returns True on success, False if pywebview is unavailable.
"""
try:
import webview
except ImportError:
return False
class Api:
def quit(self):
import webview as _wv
for w in _wv.windows:
w.destroy()
def save_excel(self):
"""Fetch the Excel export from Flask and save via native dialog."""
import urllib.request, datetime, os, webview as _wv
try:
url = f"http://127.0.0.1:{port}/api/export_excel"
with urllib.request.urlopen(url) as resp:
data = resp.read()
fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
win = _wv.windows[0] if _wv.windows else None
if win:
paths = win.create_file_dialog(
_wv.SAVE_DIALOG,
save_filename=fname,
file_types=("Excel Files (*.xlsx)",),
)
if paths:
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
if not dest.endswith(".xlsx"):
dest += ".xlsx"
with open(dest, "wb") as f:
f.write(data)
return {"ok": True, "path": dest}
return {"ok": False, "error": "cancelled"}
except Exception as e:
return {"ok": False, "error": str(e)}
def save_db_export(self):
"""Fetch the DB export ZIP from Flask and save via native dialog."""
import urllib.request, datetime, webview as _wv
try:
url = f"http://127.0.0.1:{port}/api/db/export"
with urllib.request.urlopen(url) as resp:
data = resp.read()
fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
win = _wv.windows[0] if _wv.windows else None
if win:
paths = win.create_file_dialog(
_wv.SAVE_DIALOG,
save_filename=fname,
file_types=("ZIP Archive (*.zip)",),
)
if paths:
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
if not dest.endswith(".zip"):
dest += ".zip"
with open(dest, "wb") as f:
f.write(data)
return {"ok": True, "path": dest}
return {"ok": False, "error": "cancelled"}
except Exception as e:
return {"ok": False, "error": str(e)}
def save_article30(self):
"""Fetch the Article 30 Word doc from Flask and save via native dialog."""
import urllib.request, datetime, webview as _wv
try:
url = f"http://127.0.0.1:{port}/api/export_article30"
with urllib.request.urlopen(url) as resp:
data = resp.read()
fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
win = _wv.windows[0] if _wv.windows else None
if win:
paths = win.create_file_dialog(
_wv.SAVE_DIALOG,
save_filename=fname,
file_types=("Word Document (*.docx)",),
)
if paths:
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
if not dest.endswith(".docx"):
dest += ".docx"
with open(dest, "wb") as f:
f.write(data)
return {"ok": True, "path": dest}
return {"ok": False, "error": "cancelled"}
except Exception as e:
return {"ok": False, "error": str(e)}
def open_manual(self, lang: str):
"""Open the user manual in a new native webview window."""
import webview as _wv
url = f"http://127.0.0.1:{port}/manual?lang={lang}"
existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
if existing:
existing.load_url(url)
else:
mw = _wv.create_window(
title="GDPRScanner — Manual",
url=url,
width=960,
height=800,
resizable=True,
)
mw._is_manual = True
w = webview.create_window(
title="GDPRScanner",
url=f"http://127.0.0.1:{port}/",
width=1400,
height=900,
min_size=(900, 600),
js_api=Api(),
)
def _on_closed():
os._exit(0)
w.events.closed += _on_closed
webview.start(debug=False)
return True
def _run_browser_fallback(port: int):
"""Open in system browser + optional tray icon."""
url = f"http://127.0.0.1:{port}/"
webbrowser.open(url)
try:
import pystray
from PIL import Image as PILImage
img = _load_icon_image()
if img is None:
return
def _quit(icon, item):
icon.stop()
os._exit(0)
def _open(icon, item):
webbrowser.open(url)
menu = pystray.Menu(
pystray.MenuItem("Open GDPRScanner", _open, default=True),
pystray.MenuItem("Quit", _quit),
)
icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
icon.run()
except ImportError:
# No pystray — just keep the process alive
try:
while True:
time.sleep(60)
except KeyboardInterrupt:
pass
if __name__ == "__main__":
if not acquire_instance_lock():
print("GDPRScanner is already running.", file=sys.stderr)
sys.exit(1)
# On macOS, multiprocessing uses "fork" which is unsafe with some
# frameworks — use "spawn" to match PyInstaller's behaviour.
if sys.platform == "darwin":
import multiprocessing
multiprocessing.set_start_method("spawn", force=True)
port = find_free_port()
# Machine-readable port line — stdout pipe for any parent process.
print(f"GDPR_PORT={port}", flush=True)
# Pre-import on main thread so cv2 / numpy initialise safely
try:
import gdpr_scanner # noqa: F401 — side effect: loads Flask app
except Exception as e:
print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
sys.exit(1)
flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
flask_thread.start()
if not wait_for_flask(port):
print("[!] Flask did not start in time", file=sys.stderr)
sys.exit(1)
webview_ok = run_webview(port)
if not webview_ok:
_run_browser_fallback(port)
'''
# ═══════════════════════════════════════════════════════════════════════════════
# Step 2 — Icon generation
# ═══════════════════════════════════════════════════════════════════════════════
def make_icons():
"""Generate icon_gdpr.icns (macOS) and icon_gdpr.ico (Windows)."""
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError:
print(" [!] Pillow not found — skipping icon generation")
print(" Install with: pip install pillow")
return
# ── Draw the icon: dark background + "GDPR" text ──────────────────────────
SIZE = 512
img = Image.new("RGBA", (SIZE, SIZE), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
# Rounded-rect background
R = 100
BG = (31, 41, 64, 255) # dark navy
ACC = (255, 255, 255, 255) # white text
# Fill body
draw.rectangle([R, 0, SIZE - R, SIZE], fill=BG)
draw.rectangle([0, R, SIZE, SIZE - R], fill=BG)
# Rounded corners
for cx, cy in [(R, R), (SIZE - R, R), (R, SIZE - R), (SIZE - R, SIZE - R)]:
draw.ellipse([cx - R, cy - R, cx + R, cy + R], fill=BG)
# Text "M365"
font = None
for font_path in [
"/System/Library/Fonts/Helvetica.ttc",
"/System/Library/Fonts/Arial.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"C:/Windows/Fonts/arialbd.ttf",
]:
if Path(font_path).exists():
try:
font = ImageFont.truetype(font_path, size=160)
break
except Exception:
pass
if font is None:
font = ImageFont.load_default()
text = "GDPR"
bbox = draw.textbbox((0, 0), text, font=font)
tw = bbox[2] - bbox[0]
th = bbox[3] - bbox[1]
draw.text(((SIZE - tw) / 2 - bbox[0], (SIZE - th) / 2 - bbox[1] - 10),
text, fill=ACC, font=font)
# Smaller "Scanner" subtitle
sub_font = None
for font_path in [
"/System/Library/Fonts/Helvetica.ttc",
"/System/Library/Fonts/Arial.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"C:/Windows/Fonts/arial.ttf",
]:
if Path(font_path).exists():
try:
sub_font = ImageFont.truetype(font_path, size=68)
break
except Exception:
pass
if sub_font is None:
sub_font = ImageFont.load_default()
sub = "Scanner"
sbbox = draw.textbbox((0, 0), sub, font=sub_font)
sw = sbbox[2] - sbbox[0]
draw.text(((SIZE - sw) / 2 - sbbox[0], SIZE * 0.65),
sub, fill=(200, 230, 255, 220), font=sub_font)
# ── macOS .icns ────────────────────────────────────────────────────────────
if SYSTEM == "Darwin":
icns_path = HERE / "icon_gdpr.icns"
iconset = HERE / "icon_gdpr.iconset"
iconset.mkdir(exist_ok=True)
sizes = [16, 32, 64, 128, 256, 512]
for s in sizes:
img.resize((s, s), Image.LANCZOS).save(iconset / f"icon_{s}x{s}.png")
img.resize((s * 2, s * 2), Image.LANCZOS).save(iconset / f"icon_{s}x{s}@2x.png")
result = subprocess.run(
["iconutil", "-c", "icns", str(iconset), "-o", str(icns_path)],
capture_output=True
)
shutil.rmtree(iconset, ignore_errors=True)
if result.returncode == 0:
print(f" [+] Icon: {icns_path.name}")
else:
print(" [!] iconutil failed — no .icns generated")
# ── Windows .ico ───────────────────────────────────────────────────────────
ico_path = HERE / "icon_gdpr.ico"
ico_imgs = [img.resize((s, s), Image.LANCZOS).convert("RGBA")
for s in [16, 32, 48, 64, 128, 256]]
ico_imgs[0].save(ico_path, format="ICO", sizes=[(s, s) for s in [16, 32, 48, 64, 128, 256]],
append_images=ico_imgs[1:])
print(f" [+] Icon: {ico_path.name}")
# Save PNG fallback
img.save(HERE / "icon_gdpr.png")
print(f" [+] Icon: icon_gdpr.png")
# ═══════════════════════════════════════════════════════════════════════════════
# Step 3 — Build with PyInstaller
# ═══════════════════════════════════════════════════════════════════════════════
def get_pyinstaller_args() -> list:
"""Return the PyInstaller command-line arguments for the current platform."""
hidden = [
# Flask / web
"flask", "flask.templating", "jinja2", "jinja2.ext",
"werkzeug", "werkzeug.serving", "werkzeug.routing",
# M365 / auth
"msal", "msal.application", "msal.authority",
"requests", "requests.adapters", "urllib3",
"cryptography", "cryptography.hazmat",
# Document scanning (via document_scanner)
"pdfplumber", "pdfplumber.page", "pdfminer", "pdfminer.high_level",
"pdf2image", "pytesseract",
"pypdf", "reportlab", "reportlab.pdfgen", "reportlab.lib",
"spacy", "spacy.lang.da", "spacy.lang.en",
"docx", "docx.oxml", "docx.styles",
"openpyxl", "openpyxl.styles", "openpyxl.utils",
"numpy", "PIL", "PIL.Image",
# App window
"pystray", "pystray._base",
"webview", "webview.platforms",
"webview.platforms.cocoa",
"webview.platforms.winforms",
"webview.platforms.gtk",
"webview.platforms.qt",
# Scheduler (#19)
"apscheduler", "apscheduler.schedulers.background",
"apscheduler.triggers.cron",
]
datas = [
(str(HERE / "gdpr_scanner.py"), "."),
(str(HERE / "m365_connector.py"), "."),
(str(HERE / "gdpr_db.py"), "."),
(str(HERE / "file_scanner.py"), "."),
#(str(HERE / "scheduler.py"), "."),
(str(HERE / "document_scanner.py"), "."),
# ── Modules split from gdpr_scanner.py in v1.6.1 (#25) ──────────────
(str(HERE / "sse.py"), "."),
(str(HERE / "checkpoint.py"), "."),
(str(HERE / "app_config.py"), "."),
(str(HERE / "cpr_detector.py"), "."),
(str(HERE / "scan_engine.py"), "."),
(str(HERE / "google_connector.py"), "."),
(str(HERE / "scan_scheduler.py"), "."),
]
# Bundle VERSION file — read at startup by both scanners
version_file = HERE / "VERSION"
if version_file.exists():
datas.append((str(version_file), "."))
print(f" [+] Bundling VERSION: {version_file.read_text().strip()}")
lang_dir = HERE / "lang"
if lang_dir.exists():
datas.append((str(lang_dir), "lang"))
keywords_dir = HERE / "keywords"
if keywords_dir.exists():
datas.append((str(keywords_dir), "keywords"))
print(f" [+] Bundling keywords: {list(keywords_dir.glob('*.json'))}")
print(f" [+] Bundling lang files: {list(lang_dir.glob('*.json')) + list(lang_dir.glob('*.lang'))}")
skus_dir = HERE / "classification"
if skus_dir.exists():
datas.append((str(skus_dir), "classification"))
print(f" [+] Bundling classification files: {list(skus_dir.glob('*.json'))}")
templates_dir = HERE / "templates"
if templates_dir.exists():
datas.append((str(templates_dir), "templates"))
print(f" [+] Bundling templates: {list(templates_dir.glob('*.html'))}")
static_dir = HERE / "static"
if static_dir.exists():
datas.append((str(static_dir), "static"))
print(f" [+] Bundling static: {list(static_dir.iterdir())}")
for manual_file in (HERE / "docs" / "manuals").glob("MANUAL-*.md"):
datas.append((str(manual_file), "docs/manuals"))
print(f" [+] Bundling manual: {manual_file.name}")
# Bundle routes/ blueprints
routes_dir = HERE / "routes"
if routes_dir.exists():
for f in routes_dir.glob("*.py"):
datas.append((str(f), "routes"))
print(f" [+] Bundling routes/: {[f.name for f in routes_dir.glob('*.py')]}")
# cv2 cascade data
try:
import cv2 as _cv2
cv2_data = Path(_cv2.__file__).parent / "data"
except Exception:
import importlib.util
spec = importlib.util.find_spec("cv2")
cv2_data = Path(spec.origin).parent / "data" if spec and spec.origin else None
if cv2_data and Path(cv2_data).exists():
datas.append((str(cv2_data), "cv2/data"))
print(f" [+] Bundling cv2/data")
cv2_pkg = Path(cv2_data).parent
for so in cv2_pkg.glob("cv2*.so"):
datas.append((str(so), "cv2"))
dylibs = cv2_pkg / ".dylibs"
if dylibs.exists():
datas.append((str(dylibs), "cv2/.dylibs"))
# spaCy models
try:
from PyInstaller.utils.hooks import collect_data_files as _cdf, collect_submodules as _csm
for model in ["da_core_news_lg", "da_core_news_md", "da_core_news_sm",
"xx_ent_wiki_sm", "en_core_web_sm"]:
try:
_md = _cdf(model)
_mh = _csm(model)
if _md or _mh:
datas += _md
hidden += _mh
print(f" [+] Bundling spaCy model: {model}")
break
except Exception:
pass
except Exception:
pass
args = [
str(ENTRY_POINT),
"--name", APP_NAME,
"--onedir",
"--noconfirm",
"--clean",
"--distpath", str(DIST_DIR),
"--workpath", str(BUILD_DIR),
"--specpath", str(HERE),
"--exclude-module", "cv2",
]
for h in hidden:
args += ["--hidden-import", h]
sep = ";" if SYSTEM == "Windows" else ":"
for src, dst in datas:
args += ["--add-data", f"{src}{sep}{dst}"]
# Platform options
if SYSTEM == "Darwin":
icon = next(
(p for p in [ICON_MACOS, HERE / "icon.icns", HERE / "icon_gdpr.png", HERE / "icon.png"]
if p.exists()), None
)
if icon:
args += ["--icon", str(icon)]
args += ["--windowed", "--osx-bundle-identifier", BUNDLE_ID]
elif SYSTEM == "Windows":
icon = next(
(p for p in [ICON_WIN, HERE / "icon.ico", HERE / "icon_gdpr.png"]
if p.exists()), None
)
if icon:
args += ["--icon", str(icon)]
args += ["--windowed", "--version-file", str(_make_win_version_file())]
return args
def _make_win_version_file() -> Path:
ver = tuple(int(x) for x in (APP_VERSION + ".0.0").split(".")[:4])
content = textwrap.dedent(f"""\
VSVersionInfo(
ffi=FixedFileInfo(
filevers={ver}, prodvers={ver},
mask=0x3f, flags=0x0, OS=0x4, fileType=0x1,
subtype=0x0, date=(0, 0)
),
kids=[
StringFileInfo([StringTable('040904B0', [
StringStruct('CompanyName', 'GDPRScanner'),
StringStruct('FileDescription', '{APP_NAME}'),
StringStruct('FileVersion', '{APP_VERSION}'),
StringStruct('InternalName', 'M365Scanner'),
StringStruct('LegalCopyright', ''),
StringStruct('OriginalFilename', 'GDPRScanner.exe'),
StringStruct('ProductName', '{APP_NAME}'),
StringStruct('ProductVersion', '{APP_VERSION}'),
])]),
VarFileInfo([VarStruct('Translation', [0x0409, 1200])])
]
)
""")
path = HERE / "m365_win_version_info.txt"
path.write_text(content, encoding="utf-8")
return path
# ═══════════════════════════════════════════════════════════════════════════════
# Step 4 — Post-build helpers
# ═══════════════════════════════════════════════════════════════════════════════
def create_dmg():
if shutil.which("create-dmg") is None:
print(" [!] create-dmg not found — skipping .dmg")
print(" Install with: brew install create-dmg")
return
app_path = DIST_DIR / f"{APP_NAME}.app"
dmg_path = DIST_DIR / f"{APP_NAME}-{APP_VERSION}.dmg"
if dmg_path.exists():
dmg_path.unlink()
print(" Creating .dmg …")
cmd = [
"create-dmg",
"--volname", APP_NAME,
"--window-pos", "200", "120",
"--window-size", "600", "400",
"--icon-size", "100",
"--icon", f"{APP_NAME}.app", "175", "190",
"--hide-extension", f"{APP_NAME}.app",
"--app-drop-link", "425", "190",
str(dmg_path),
str(app_path),
]
result = subprocess.run(cmd)
if result.returncode == 0:
print(f" [+] DMG created: {dmg_path.name}")
else:
print(" [!] create-dmg failed — .app is still usable directly")
def create_nsis_installer():
if SYSTEM != "Windows":
print(" [!] NSIS installer only available on Windows"); return
if shutil.which("makensis") is None:
print(" [!] NSIS not found — download from https://nsis.sourceforge.io"); return
nsi = HERE / "m365_installer.nsi"
dist_folder = DIST_DIR / APP_NAME
nsi.write_text(textwrap.dedent(f"""\
!define APP_NAME "{APP_NAME}"
!define APP_VERSION "{APP_VERSION}"
!define DIST_FOLDER "{dist_folder}"
!define INSTALL_DIR "$PROGRAMFILES64\\\\{APP_NAME}"
Name "${{APP_NAME}}"
OutFile "dist\\\\{APP_NAME}-{APP_VERSION}-Setup.exe"
InstallDir "${{INSTALL_DIR}}"
RequestExecutionLevel admin
Section "Install"
SetOutPath "${{INSTALL_DIR}}"
File /r "${{DIST_FOLDER}}\\\\*.*"
CreateShortcut "$DESKTOP\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe"
CreateShortcut "$SMPROGRAMS\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe"
SectionEnd
Section "Uninstall"
Delete "$DESKTOP\\\\{APP_NAME}.lnk"
Delete "$SMPROGRAMS\\\\{APP_NAME}.lnk"
RMDir /r "${{INSTALL_DIR}}"
SectionEnd
"""), encoding="utf-8")
result = subprocess.run(["makensis", str(nsi)])
if result.returncode == 0:
print(f" [+] Installer: dist/{APP_NAME}-{APP_VERSION}-Setup.exe")
else:
print(" [!] NSIS compilation failed")
def print_next_steps():
if SYSTEM == "Darwin":
app = DIST_DIR / f"{APP_NAME}.app"
print(f"""
╔══════════════════════════════════════════════════════════╗
║ Build complete! ║
╠══════════════════════════════════════════════════════════╣
║ App: {str(app):<51}
╠══════════════════════════════════════════════════════════╣
║ To run: ║
║ open "{app}"
║ — or double-click in Finder ║
║ ║
║ Opens on http://127.0.0.1:5100 in a native WKWebView ║
║ window (no browser chrome). ║
║ If pywebview was not installed, falls back to browser. ║
║ ║
║ To distribute: ║
║ python build_gdpr.py --dmg (requires create-dmg) ║
╚══════════════════════════════════════════════════════════╝""")
elif SYSTEM == "Windows":
exe = DIST_DIR / APP_NAME / f"{APP_NAME}.exe"
print(f"""
╔══════════════════════════════════════════════════════════╗
║ Build complete! ║
╠══════════════════════════════════════════════════════════╣
║ Exe: {str(exe):<51}
╠══════════════════════════════════════════════════════════╣
║ To run: ║
║ Double-click "{APP_NAME}.exe"
║ ║
║ Opens on http://127.0.0.1:5100 in a native WebView2 ║
║ window (Edge engine, built into Windows 10/11). ║
║ If pywebview was not installed, falls back to browser. ║
║ ║
║ To distribute as installer: ║
║ Install NSIS: https://nsis.sourceforge.io ║
║ Then run: python build_gdpr.py --installer ║
╚══════════════════════════════════════════════════════════╝""")
else:
print(f"\n [+] Build complete — see dist/")
# ═══════════════════════════════════════════════════════════════════════════════
# Main
# ═══════════════════════════════════════════════════════════════════════════════
def main():
parser = argparse.ArgumentParser(description="Build GDPRScanner app")
parser.add_argument("--clean", action="store_true", help="Remove build/ and dist/ first")
parser.add_argument("--dmg", action="store_true", help="macOS: wrap .app in .dmg after build")
parser.add_argument("--installer", action="store_true", help="Windows: create NSIS installer")
parser.add_argument("--icons-only", action="store_true", help="Only regenerate icons, don't build")
args = parser.parse_args()
print(f"\n GDPRScanner — App Builder v{APP_VERSION}")
print(f" Platform: {SYSTEM} Python: {sys.version.split()[0]}")
print(f" {'' * 42}\n")
if not args.icons_only:
# Check PyInstaller
try:
import PyInstaller
print(f" [+] PyInstaller {PyInstaller.__version__}")
except ImportError:
print(" [!] PyInstaller not found. Install with:")
print(" pip install pyinstaller pyinstaller-hooks-contrib")
sys.exit(1)
# Check pywebview
try:
import webview
try: _wv_ver = webview.__version__
except AttributeError:
import importlib.metadata
_wv_ver = importlib.metadata.version("pywebview")
print(f" [+] pywebview {_wv_ver} (native window — recommended)")
except ImportError:
print(" [!] pywebview not found — will fall back to system browser")
print(" Install with: pip install pywebview")
# Check pystray
try:
import pystray
print(f" [+] pystray available (browser-fallback tray icon)")
except ImportError:
print(" [!] pystray not found — no tray icon in browser-fallback mode")
# Check MSAL
try:
import msal
print(f" [+] msal {msal.__version__}")
except ImportError:
print(" [!] msal not found — run: pip install msal")
sys.exit(1)
# Check requests
try:
import requests
print(f" [+] requests {requests.__version__}")
except ImportError:
print(" [!] requests not found — run: pip install requests")
sys.exit(1)
# Check source files
for fname in ["gdpr_scanner.py", "gdpr_db.py", "m365_connector.py", "document_scanner.py",
"sse.py", "checkpoint.py", "app_config.py", "cpr_detector.py", "scan_engine.py"]:
p = HERE / fname
if not p.exists():
print(f" [!] {fname} not found in {HERE}")
sys.exit(1)
print(f" [+] Found {fname}")
# Clean
for d in [BUILD_DIR, DIST_DIR]:
if d.exists():
shutil.rmtree(d)
print(f" [+] Removed {d.name}/")
# Icons
print("\n Generating icons …")
make_icons()
if args.icons_only:
return
# Write launcher
print("\n Writing launcher …")
ENTRY_POINT.write_text(LAUNCHER_CODE, encoding="utf-8")
print(f" [+] {ENTRY_POINT.name}")
# cv2 DLL check on Windows
if SYSTEM == "Windows":
try:
import cv2 # noqa: F401
except ImportError as e:
if "DLL load failed" in str(e):
print(" [!] cv2 DLL load failed — reinstalling headless variant …")
subprocess.run([sys.executable, "-m", "pip", "install",
"--force-reinstall", "opencv-python-headless", "-q"], check=False)
# Run PyInstaller
print("\n Running PyInstaller …")
pyi_args = get_pyinstaller_args()
import PyInstaller.__main__ as pyi
pyi.run(pyi_args)
# Post-build
if args.dmg and SYSTEM == "Darwin":
create_dmg()
if args.installer and SYSTEM == "Windows":
create_nsis_installer()
print_next_steps()
if __name__ == "__main__":
main()