#!/usr/bin/env python3 """ GDPRScanner — Self-Contained App Builder ========================================== Packages gdpr_scanner.py + m365_connector.py + document_scanner.py into a native desktop app: macOS -> dist/GDPRScanner.app (double-click to run) Windows -> dist/GDPRScanner.exe (double-click to run) The app starts Flask on port 5100, opens the UI in a native webview window (WKWebView on macOS, WebView2 on Windows), and quits cleanly when the window is closed. Usage: python build_gdpr.py # build for current platform python build_gdpr.py --clean # remove build/ and dist/ first python build_gdpr.py --dmg # macOS: also wrap .app in a .dmg python build_gdpr.py --installer # Windows: also build NSIS installer Requirements (install once via pip): pip install pyinstaller pyinstaller-hooks-contrib pip install pywebview # native window (no browser chrome) pip install pystray pillow # fallback tray icon when pywebview absent Python version: Requires 3.11 or 3.12. spaCy (used by document_scanner for NER) does not support 3.13+. This script auto-relaunches with python3.12/python3.11 if the current interpreter is incompatible. """ # ── Version guard ───────────────────────────────────────────────────────────── import sys as _sys if not (3, 11) <= _sys.version_info[:2] <= (3, 12): import re as _re, subprocess as _sp, os as _os _cur = f"{_sys.version_info.major}.{_sys.version_info.minor}" print(f" [!] Python {_cur} is not supported (need 3.11 or 3.12 — spaCy incompatible with 3.13+)") def _check_version(cmd: list) -> bool: try: out = _sp.check_output(cmd + ["--version"], stderr=_sp.STDOUT, text=True).strip() m = _re.search(r"Python (\d+)\.(\d+)", out) return bool(m and int(m.group(1)) == 3 and int(m.group(2)) in (11, 12)) except (FileNotFoundError, _sp.CalledProcessError, OSError): return False _candidates = [["python3.12"], ["python3.11"], ["py", "-3.12"], ["py", "-3.11"]] _found = next((c for c in _candidates if _check_version(c)), None) if _found: print(f" [*] Re-launching with: {' '.join(_found)}") _result = _sp.run(_found + [_os.path.abspath(__file__)] + _sys.argv[1:]) _sys.exit(_result.returncode) print() print(" No compatible Python found on PATH.") print(" Install Python 3.12:") if _sys.platform == "darwin": print(" brew install python@3.12") elif _sys.platform == "win32": print(" winget install Python.Python.3.12") print() _sys.exit(1) # ── Standard imports ────────────────────────────────────────────────────────── import argparse import platform import re import shutil import subprocess import sys import textwrap from pathlib import Path HERE = Path(__file__).parent.resolve() SYSTEM = platform.system() # "Darwin", "Windows", "Linux" # ── App metadata ────────────────────────────────────────────────────────────── APP_NAME = "GDPRScanner" APP_PORT = 5100 BUNDLE_ID = "com.m365scanner.app" def _read_app_version() -> str: # Read from VERSION file (single source of truth) try: return (HERE / "VERSION").read_text(encoding="utf-8").strip() except Exception: pass return "1.0.0" APP_VERSION = _read_app_version() ICON_MACOS = HERE / "icon_gdpr.icns" # optional; falls back to icon.icns / icon.png ICON_WIN = HERE / "icon_gdpr.ico" # optional; falls back to icon.ico / icon.png # ── Paths ───────────────────────────────────────────────────────────────────── ENTRY_POINT = HERE / "m365_launcher.py" # generated by this script DIST_DIR = HERE / "dist" BUILD_DIR = HERE / "build" # ═══════════════════════════════════════════════════════════════════════════════ # Step 1 — Generate the launcher entry point # ═══════════════════════════════════════════════════════════════════════════════ LAUNCHER_CODE = '''\ """ gdpr_launcher.py — entry point for the packaged GDPRScanner app. Responsibilities: 1. Find a free port (default 5100) 2. Start Flask in a background thread 3. Open the UI in a native webview window (pywebview) — falls back to the system browser if pywebview is unavailable Generated by build_gdpr.py — do not edit manually. """ import os os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES") import subprocess import sys import socket import threading import time import webbrowser from pathlib import Path if getattr(sys, "frozen", False): BASE_DIR = Path(sys._MEIPASS) else: BASE_DIR = Path(__file__).parent def _setup_external_tools(): """ Locate Tesseract and Poppler regardless of how the app was launched. GDPRScanner calls document_scanner for file content extraction, which may need OCR for scanned PDFs — same setup as Document Scanner. """ extra_paths = [] if sys.platform == "darwin": brew_prefix = None for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]: if Path(brew_candidate).exists(): try: result = subprocess.run( [brew_candidate, "--prefix"], capture_output=True, text=True, timeout=5 ) if result.returncode == 0: brew_prefix = result.stdout.strip() break except Exception: pass brew_candidates = [] if brew_prefix: brew_candidates.append(brew_prefix) brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"] for prefix in brew_candidates: bin_dir = Path(prefix) / "bin" if bin_dir.exists(): extra_paths.append(str(bin_dir)) tessdata = Path(prefix) / "share" / "tessdata" if tessdata.exists(): os.environ.setdefault("TESSDATA_PREFIX", str(tessdata)) for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]: if Path(t).exists(): os.environ.setdefault("TESSERACT_CMD", t) break for p in ["/opt/homebrew/bin", "/usr/local/bin", "/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]: if (Path(p) / "pdftoppm").exists(): os.environ.setdefault("POPPLER_PATH", p) extra_paths.insert(0, p) break elif sys.platform == "win32": import winreg tess_dir = None try: key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\\Tesseract-OCR") tess_dir, _ = winreg.QueryValueEx(key, "InstallDir") winreg.CloseKey(key) except Exception: pass for d in ([tess_dir] if tess_dir else []) + [ r"C:\\Program Files\\Tesseract-OCR", r"C:\\Program Files (x86)\\Tesseract-OCR", r"C:\\Tesseract-OCR", ]: if d and Path(d, "tesseract.exe").exists(): os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe")) extra_paths.append(d) tessdata = Path(d) / "tessdata" if tessdata.exists(): os.environ.setdefault("TESSDATA_PREFIX", str(tessdata)) break for d in [ r"C:\\poppler\\Library\\bin", r"C:\\poppler\\bin", r"C:\\Program Files\\poppler\\Library\\bin", r"C:\\Program Files\\poppler\\bin", r"C:\\tools\\poppler\\Library\\bin", ]: if (Path(d) / "pdftoppm.exe").exists(): os.environ.setdefault("POPPLER_PATH", d) extra_paths.insert(0, d) break if getattr(sys, "frozen", False): tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract") if tess_bin.exists(): os.environ.setdefault("TESSERACT_CMD", str(tess_bin)) for sub in ["poppler/bin", "poppler/Library/bin", "."]: pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm") if pdftoppm.exists(): os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent)) extra_paths.insert(0, str(pdftoppm.parent)) break extra_paths.insert(0, str(BASE_DIR)) if extra_paths: current = os.environ.get("PATH", "") additions = os.pathsep.join(p for p in extra_paths if p not in current) if additions: os.environ["PATH"] = additions + os.pathsep + current cmd = os.environ.get("TESSERACT_CMD") if cmd and Path(cmd).exists(): try: import pytesseract pytesseract.pytesseract.tesseract_cmd = cmd except ImportError: pass poppler = os.environ.get("POPPLER_PATH") if poppler: try: import pdf2image.pdf2image as _p2i _orig = _p2i.convert_from_path def _patched(pdf_path, *a, poppler_path=None, **kw): return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw) _p2i.convert_from_path = _patched except Exception: pass _setup_external_tools() def find_free_port(start: int = 5100) -> int: for port in range(start, start + 100): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: try: s.bind(("127.0.0.1", port)) return port except OSError: continue raise RuntimeError("No free port found in range 5100-5200") # ── Single-instance lock ────────────────────────────────────────────────────── _LOCK_FH = None def acquire_instance_lock() -> bool: """ Acquire an exclusive process lock so only one instance runs at a time. Returns True if the lock was acquired, False if another instance holds it. The lock is released automatically when the process exits. """ global _LOCK_FH lock_dir = Path.home() / ".gdprscanner" lock_dir.mkdir(parents=True, exist_ok=True) lock_path = lock_dir / "app.lock" try: _LOCK_FH = open(lock_path, "w") if sys.platform == "win32": import msvcrt msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1) else: import fcntl fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB) _LOCK_FH.write(str(os.getpid())) _LOCK_FH.flush() return True except (IOError, OSError): if _LOCK_FH: _LOCK_FH.close() _LOCK_FH = None return False def _activate_venv(): if getattr(sys, "frozen", False): return for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]: if sys.platform == "win32": site_pkg = candidate / "Lib" / "site-packages" else: lib = candidate / "lib" site_pkg = None if lib.exists(): for d in lib.iterdir(): sp = d / "site-packages" if sp.exists(): site_pkg = sp break if site_pkg and site_pkg.exists(): sys.path.insert(0, str(site_pkg)) os.environ["VIRTUAL_ENV"] = str(candidate) os.environ.pop("PYTHONHOME", None) break _activate_venv() def start_flask(port: int): import gdpr_scanner as _app _app.app.run(host="127.0.0.1", port=port, debug=False, threaded=True, use_reloader=False) def wait_for_flask(port: int, timeout: float = 20.0) -> bool: deadline = time.monotonic() + timeout while time.monotonic() < deadline: try: with socket.create_connection(("127.0.0.1", port), timeout=0.2): return True except OSError: time.sleep(0.1) return False def _load_icon_image(): try: from PIL import Image as PILImage for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png", "icon.ico", "icon.icns", "icon.png", "icon_m365.ico", "icon_m365.icns", "icon_m365.png"]: # legacy fallback p = BASE_DIR / name if p.exists(): return PILImage.open(p).convert("RGBA").resize((64, 64)) # Minimal fallback — blue square img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255)) return img except Exception: return None def run_webview(port: int): """ Open the app in a native webview window. Returns True on success, False if pywebview is unavailable. """ try: import webview except ImportError: return False class Api: def quit(self): import webview as _wv for w in _wv.windows: w.destroy() def save_excel(self): """Fetch the Excel export from Flask and save via native dialog.""" import urllib.request, datetime, os, webview as _wv try: url = f"http://127.0.0.1:{port}/api/export_excel" with urllib.request.urlopen(url) as resp: data = resp.read() fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" win = _wv.windows[0] if _wv.windows else None if win: paths = win.create_file_dialog( _wv.SAVE_DIALOG, save_filename=fname, file_types=("Excel Files (*.xlsx)",), ) if paths: dest = paths[0] if isinstance(paths, (list, tuple)) else paths if not dest.endswith(".xlsx"): dest += ".xlsx" with open(dest, "wb") as f: f.write(data) return {"ok": True, "path": dest} return {"ok": False, "error": "cancelled"} except Exception as e: return {"ok": False, "error": str(e)} def save_db_export(self): """Fetch the DB export ZIP from Flask and save via native dialog.""" import urllib.request, datetime, webview as _wv try: url = f"http://127.0.0.1:{port}/api/db/export" with urllib.request.urlopen(url) as resp: data = resp.read() fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" win = _wv.windows[0] if _wv.windows else None if win: paths = win.create_file_dialog( _wv.SAVE_DIALOG, save_filename=fname, file_types=("ZIP Archive (*.zip)",), ) if paths: dest = paths[0] if isinstance(paths, (list, tuple)) else paths if not dest.endswith(".zip"): dest += ".zip" with open(dest, "wb") as f: f.write(data) return {"ok": True, "path": dest} return {"ok": False, "error": "cancelled"} except Exception as e: return {"ok": False, "error": str(e)} def save_article30(self): """Fetch the Article 30 Word doc from Flask and save via native dialog.""" import urllib.request, datetime, webview as _wv try: url = f"http://127.0.0.1:{port}/api/export_article30" with urllib.request.urlopen(url) as resp: data = resp.read() fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx" win = _wv.windows[0] if _wv.windows else None if win: paths = win.create_file_dialog( _wv.SAVE_DIALOG, save_filename=fname, file_types=("Word Document (*.docx)",), ) if paths: dest = paths[0] if isinstance(paths, (list, tuple)) else paths if not dest.endswith(".docx"): dest += ".docx" with open(dest, "wb") as f: f.write(data) return {"ok": True, "path": dest} return {"ok": False, "error": "cancelled"} except Exception as e: return {"ok": False, "error": str(e)} def open_manual(self, lang: str): """Open the user manual in a new native webview window.""" import webview as _wv url = f"http://127.0.0.1:{port}/manual?lang={lang}" existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None) if existing: existing.load_url(url) else: mw = _wv.create_window( title="GDPRScanner — Manual", url=url, width=960, height=800, resizable=True, ) mw._is_manual = True w = webview.create_window( title="GDPRScanner", url=f"http://127.0.0.1:{port}/", width=1400, height=900, min_size=(900, 600), js_api=Api(), ) def _on_closed(): os._exit(0) w.events.closed += _on_closed webview.start(debug=False) return True def _run_browser_fallback(port: int): """Open in system browser + optional tray icon.""" url = f"http://127.0.0.1:{port}/" webbrowser.open(url) try: import pystray from PIL import Image as PILImage img = _load_icon_image() if img is None: return def _quit(icon, item): icon.stop() os._exit(0) def _open(icon, item): webbrowser.open(url) menu = pystray.Menu( pystray.MenuItem("Open GDPRScanner", _open, default=True), pystray.MenuItem("Quit", _quit), ) icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu) icon.run() except ImportError: # No pystray — just keep the process alive try: while True: time.sleep(60) except KeyboardInterrupt: pass if __name__ == "__main__": if not acquire_instance_lock(): print("GDPRScanner is already running.", file=sys.stderr) sys.exit(1) # On macOS, multiprocessing uses "fork" which is unsafe with some # frameworks — use "spawn" to match PyInstaller's behaviour. if sys.platform == "darwin": import multiprocessing multiprocessing.set_start_method("spawn", force=True) port = find_free_port() # Machine-readable port line — stdout pipe for any parent process. print(f"GDPR_PORT={port}", flush=True) # Pre-import on main thread so cv2 / numpy initialise safely try: import gdpr_scanner # noqa: F401 — side effect: loads Flask app except Exception as e: print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr) sys.exit(1) flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True) flask_thread.start() if not wait_for_flask(port): print("[!] Flask did not start in time", file=sys.stderr) sys.exit(1) webview_ok = run_webview(port) if not webview_ok: _run_browser_fallback(port) ''' # ═══════════════════════════════════════════════════════════════════════════════ # Step 2 — Icon generation # ═══════════════════════════════════════════════════════════════════════════════ def make_icons(): """Generate icon_gdpr.icns (macOS) and icon_gdpr.ico (Windows).""" try: from PIL import Image, ImageDraw, ImageFont except ImportError: print(" [!] Pillow not found — skipping icon generation") print(" Install with: pip install pillow") return # ── Draw the icon: dark background + "GDPR" text ────────────────────────── SIZE = 512 img = Image.new("RGBA", (SIZE, SIZE), (0, 0, 0, 0)) draw = ImageDraw.Draw(img) # Rounded-rect background R = 100 BG = (31, 41, 64, 255) # dark navy ACC = (255, 255, 255, 255) # white text # Fill body draw.rectangle([R, 0, SIZE - R, SIZE], fill=BG) draw.rectangle([0, R, SIZE, SIZE - R], fill=BG) # Rounded corners for cx, cy in [(R, R), (SIZE - R, R), (R, SIZE - R), (SIZE - R, SIZE - R)]: draw.ellipse([cx - R, cy - R, cx + R, cy + R], fill=BG) # Text "M365" font = None for font_path in [ "/System/Library/Fonts/Helvetica.ttc", "/System/Library/Fonts/Arial.ttf", "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", "C:/Windows/Fonts/arialbd.ttf", ]: if Path(font_path).exists(): try: font = ImageFont.truetype(font_path, size=160) break except Exception: pass if font is None: font = ImageFont.load_default() text = "GDPR" bbox = draw.textbbox((0, 0), text, font=font) tw = bbox[2] - bbox[0] th = bbox[3] - bbox[1] draw.text(((SIZE - tw) / 2 - bbox[0], (SIZE - th) / 2 - bbox[1] - 10), text, fill=ACC, font=font) # Smaller "Scanner" subtitle sub_font = None for font_path in [ "/System/Library/Fonts/Helvetica.ttc", "/System/Library/Fonts/Arial.ttf", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", "C:/Windows/Fonts/arial.ttf", ]: if Path(font_path).exists(): try: sub_font = ImageFont.truetype(font_path, size=68) break except Exception: pass if sub_font is None: sub_font = ImageFont.load_default() sub = "Scanner" sbbox = draw.textbbox((0, 0), sub, font=sub_font) sw = sbbox[2] - sbbox[0] draw.text(((SIZE - sw) / 2 - sbbox[0], SIZE * 0.65), sub, fill=(200, 230, 255, 220), font=sub_font) # ── macOS .icns ──────────────────────────────────────────────────────────── if SYSTEM == "Darwin": icns_path = HERE / "icon_gdpr.icns" iconset = HERE / "icon_gdpr.iconset" iconset.mkdir(exist_ok=True) sizes = [16, 32, 64, 128, 256, 512] for s in sizes: img.resize((s, s), Image.LANCZOS).save(iconset / f"icon_{s}x{s}.png") img.resize((s * 2, s * 2), Image.LANCZOS).save(iconset / f"icon_{s}x{s}@2x.png") result = subprocess.run( ["iconutil", "-c", "icns", str(iconset), "-o", str(icns_path)], capture_output=True ) shutil.rmtree(iconset, ignore_errors=True) if result.returncode == 0: print(f" [+] Icon: {icns_path.name}") else: print(" [!] iconutil failed — no .icns generated") # ── Windows .ico ─────────────────────────────────────────────────────────── ico_path = HERE / "icon_gdpr.ico" ico_imgs = [img.resize((s, s), Image.LANCZOS).convert("RGBA") for s in [16, 32, 48, 64, 128, 256]] ico_imgs[0].save(ico_path, format="ICO", sizes=[(s, s) for s in [16, 32, 48, 64, 128, 256]], append_images=ico_imgs[1:]) print(f" [+] Icon: {ico_path.name}") # Save PNG fallback img.save(HERE / "icon_gdpr.png") print(f" [+] Icon: icon_gdpr.png") # ═══════════════════════════════════════════════════════════════════════════════ # Step 3 — Build with PyInstaller # ═══════════════════════════════════════════════════════════════════════════════ def get_pyinstaller_args() -> list: """Return the PyInstaller command-line arguments for the current platform.""" hidden = [ # Flask / web "flask", "flask.templating", "jinja2", "jinja2.ext", "werkzeug", "werkzeug.serving", "werkzeug.routing", # M365 / auth "msal", "msal.application", "msal.authority", "requests", "requests.adapters", "urllib3", "cryptography", "cryptography.hazmat", # Document scanning (via document_scanner) "pdfplumber", "pdfplumber.page", "pdfminer", "pdfminer.high_level", "pdf2image", "pytesseract", "pypdf", "reportlab", "reportlab.pdfgen", "reportlab.lib", "spacy", "spacy.lang.da", "spacy.lang.en", "docx", "docx.oxml", "docx.styles", "openpyxl", "openpyxl.styles", "openpyxl.utils", "numpy", "PIL", "PIL.Image", # App window "pystray", "pystray._base", "webview", "webview.platforms", "webview.platforms.cocoa", "webview.platforms.winforms", "webview.platforms.gtk", "webview.platforms.qt", # Scheduler (#19) "apscheduler", "apscheduler.schedulers.background", "apscheduler.triggers.cron", ] datas = [ (str(HERE / "gdpr_scanner.py"), "."), (str(HERE / "m365_connector.py"), "."), (str(HERE / "gdpr_db.py"), "."), (str(HERE / "file_scanner.py"), "."), #(str(HERE / "scheduler.py"), "."), (str(HERE / "document_scanner.py"), "."), # ── Modules split from gdpr_scanner.py in v1.6.1 (#25) ────────────── (str(HERE / "sse.py"), "."), (str(HERE / "checkpoint.py"), "."), (str(HERE / "app_config.py"), "."), (str(HERE / "cpr_detector.py"), "."), (str(HERE / "scan_engine.py"), "."), (str(HERE / "google_connector.py"), "."), (str(HERE / "scan_scheduler.py"), "."), ] # Bundle VERSION file — read at startup by both scanners version_file = HERE / "VERSION" if version_file.exists(): datas.append((str(version_file), ".")) print(f" [+] Bundling VERSION: {version_file.read_text().strip()}") lang_dir = HERE / "lang" if lang_dir.exists(): datas.append((str(lang_dir), "lang")) keywords_dir = HERE / "keywords" if keywords_dir.exists(): datas.append((str(keywords_dir), "keywords")) print(f" [+] Bundling keywords: {list(keywords_dir.glob('*.json'))}") print(f" [+] Bundling lang files: {list(lang_dir.glob('*.json')) + list(lang_dir.glob('*.lang'))}") skus_dir = HERE / "classification" if skus_dir.exists(): datas.append((str(skus_dir), "classification")) print(f" [+] Bundling classification files: {list(skus_dir.glob('*.json'))}") templates_dir = HERE / "templates" if templates_dir.exists(): datas.append((str(templates_dir), "templates")) print(f" [+] Bundling templates: {list(templates_dir.glob('*.html'))}") static_dir = HERE / "static" if static_dir.exists(): datas.append((str(static_dir), "static")) print(f" [+] Bundling static: {list(static_dir.iterdir())}") for manual_file in (HERE / "docs" / "manuals").glob("MANUAL-*.md"): datas.append((str(manual_file), "docs/manuals")) print(f" [+] Bundling manual: {manual_file.name}") # Bundle routes/ blueprints routes_dir = HERE / "routes" if routes_dir.exists(): for f in routes_dir.glob("*.py"): datas.append((str(f), "routes")) print(f" [+] Bundling routes/: {[f.name for f in routes_dir.glob('*.py')]}") # cv2 cascade data try: import cv2 as _cv2 cv2_data = Path(_cv2.__file__).parent / "data" except Exception: import importlib.util spec = importlib.util.find_spec("cv2") cv2_data = Path(spec.origin).parent / "data" if spec and spec.origin else None if cv2_data and Path(cv2_data).exists(): datas.append((str(cv2_data), "cv2/data")) print(f" [+] Bundling cv2/data") cv2_pkg = Path(cv2_data).parent for so in cv2_pkg.glob("cv2*.so"): datas.append((str(so), "cv2")) dylibs = cv2_pkg / ".dylibs" if dylibs.exists(): datas.append((str(dylibs), "cv2/.dylibs")) # spaCy models try: from PyInstaller.utils.hooks import collect_data_files as _cdf, collect_submodules as _csm for model in ["da_core_news_lg", "da_core_news_md", "da_core_news_sm", "xx_ent_wiki_sm", "en_core_web_sm"]: try: _md = _cdf(model) _mh = _csm(model) if _md or _mh: datas += _md hidden += _mh print(f" [+] Bundling spaCy model: {model}") break except Exception: pass except Exception: pass args = [ str(ENTRY_POINT), "--name", APP_NAME, "--onedir", "--noconfirm", "--clean", "--distpath", str(DIST_DIR), "--workpath", str(BUILD_DIR), "--specpath", str(HERE), "--exclude-module", "cv2", ] for h in hidden: args += ["--hidden-import", h] sep = ";" if SYSTEM == "Windows" else ":" for src, dst in datas: args += ["--add-data", f"{src}{sep}{dst}"] # Platform options if SYSTEM == "Darwin": icon = next( (p for p in [ICON_MACOS, HERE / "icon.icns", HERE / "icon_gdpr.png", HERE / "icon.png"] if p.exists()), None ) if icon: args += ["--icon", str(icon)] args += ["--windowed", "--osx-bundle-identifier", BUNDLE_ID] elif SYSTEM == "Windows": icon = next( (p for p in [ICON_WIN, HERE / "icon.ico", HERE / "icon_gdpr.png"] if p.exists()), None ) if icon: args += ["--icon", str(icon)] args += ["--windowed", "--version-file", str(_make_win_version_file())] return args def _make_win_version_file() -> Path: ver = tuple(int(x) for x in (APP_VERSION + ".0.0").split(".")[:4]) content = textwrap.dedent(f"""\ VSVersionInfo( ffi=FixedFileInfo( filevers={ver}, prodvers={ver}, mask=0x3f, flags=0x0, OS=0x4, fileType=0x1, subtype=0x0, date=(0, 0) ), kids=[ StringFileInfo([StringTable('040904B0', [ StringStruct('CompanyName', 'GDPRScanner'), StringStruct('FileDescription', '{APP_NAME}'), StringStruct('FileVersion', '{APP_VERSION}'), StringStruct('InternalName', 'M365Scanner'), StringStruct('LegalCopyright', ''), StringStruct('OriginalFilename', 'GDPRScanner.exe'), StringStruct('ProductName', '{APP_NAME}'), StringStruct('ProductVersion', '{APP_VERSION}'), ])]), VarFileInfo([VarStruct('Translation', [0x0409, 1200])]) ] ) """) path = HERE / "m365_win_version_info.txt" path.write_text(content, encoding="utf-8") return path # ═══════════════════════════════════════════════════════════════════════════════ # Step 4 — Post-build helpers # ═══════════════════════════════════════════════════════════════════════════════ def create_dmg(): if shutil.which("create-dmg") is None: print(" [!] create-dmg not found — skipping .dmg") print(" Install with: brew install create-dmg") return app_path = DIST_DIR / f"{APP_NAME}.app" dmg_path = DIST_DIR / f"{APP_NAME}-{APP_VERSION}.dmg" if dmg_path.exists(): dmg_path.unlink() print(" Creating .dmg …") cmd = [ "create-dmg", "--volname", APP_NAME, "--window-pos", "200", "120", "--window-size", "600", "400", "--icon-size", "100", "--icon", f"{APP_NAME}.app", "175", "190", "--hide-extension", f"{APP_NAME}.app", "--app-drop-link", "425", "190", str(dmg_path), str(app_path), ] result = subprocess.run(cmd) if result.returncode == 0: print(f" [+] DMG created: {dmg_path.name}") else: print(" [!] create-dmg failed — .app is still usable directly") def create_nsis_installer(): if SYSTEM != "Windows": print(" [!] NSIS installer only available on Windows"); return if shutil.which("makensis") is None: print(" [!] NSIS not found — download from https://nsis.sourceforge.io"); return nsi = HERE / "m365_installer.nsi" dist_folder = DIST_DIR / APP_NAME nsi.write_text(textwrap.dedent(f"""\ !define APP_NAME "{APP_NAME}" !define APP_VERSION "{APP_VERSION}" !define DIST_FOLDER "{dist_folder}" !define INSTALL_DIR "$PROGRAMFILES64\\\\{APP_NAME}" Name "${{APP_NAME}}" OutFile "dist\\\\{APP_NAME}-{APP_VERSION}-Setup.exe" InstallDir "${{INSTALL_DIR}}" RequestExecutionLevel admin Section "Install" SetOutPath "${{INSTALL_DIR}}" File /r "${{DIST_FOLDER}}\\\\*.*" CreateShortcut "$DESKTOP\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe" CreateShortcut "$SMPROGRAMS\\\\{APP_NAME}.lnk" "${{INSTALL_DIR}}\\\\{APP_NAME}.exe" SectionEnd Section "Uninstall" Delete "$DESKTOP\\\\{APP_NAME}.lnk" Delete "$SMPROGRAMS\\\\{APP_NAME}.lnk" RMDir /r "${{INSTALL_DIR}}" SectionEnd """), encoding="utf-8") result = subprocess.run(["makensis", str(nsi)]) if result.returncode == 0: print(f" [+] Installer: dist/{APP_NAME}-{APP_VERSION}-Setup.exe") else: print(" [!] NSIS compilation failed") def print_next_steps(): if SYSTEM == "Darwin": app = DIST_DIR / f"{APP_NAME}.app" print(f""" ╔══════════════════════════════════════════════════════════╗ ║ Build complete! ║ ╠══════════════════════════════════════════════════════════╣ ║ App: {str(app):<51}║ ╠══════════════════════════════════════════════════════════╣ ║ To run: ║ ║ open "{app}" ║ — or double-click in Finder ║ ║ ║ ║ Opens on http://127.0.0.1:5100 in a native WKWebView ║ ║ window (no browser chrome). ║ ║ If pywebview was not installed, falls back to browser. ║ ║ ║ ║ To distribute: ║ ║ python build_gdpr.py --dmg (requires create-dmg) ║ ╚══════════════════════════════════════════════════════════╝""") elif SYSTEM == "Windows": exe = DIST_DIR / APP_NAME / f"{APP_NAME}.exe" print(f""" ╔══════════════════════════════════════════════════════════╗ ║ Build complete! ║ ╠══════════════════════════════════════════════════════════╣ ║ Exe: {str(exe):<51}║ ╠══════════════════════════════════════════════════════════╣ ║ To run: ║ ║ Double-click "{APP_NAME}.exe" ║ ║ ║ ║ Opens on http://127.0.0.1:5100 in a native WebView2 ║ ║ window (Edge engine, built into Windows 10/11). ║ ║ If pywebview was not installed, falls back to browser. ║ ║ ║ ║ To distribute as installer: ║ ║ Install NSIS: https://nsis.sourceforge.io ║ ║ Then run: python build_gdpr.py --installer ║ ╚══════════════════════════════════════════════════════════╝""") else: print(f"\n [+] Build complete — see dist/") # ═══════════════════════════════════════════════════════════════════════════════ # Main # ═══════════════════════════════════════════════════════════════════════════════ def main(): parser = argparse.ArgumentParser(description="Build GDPRScanner app") parser.add_argument("--clean", action="store_true", help="Remove build/ and dist/ first") parser.add_argument("--dmg", action="store_true", help="macOS: wrap .app in .dmg after build") parser.add_argument("--installer", action="store_true", help="Windows: create NSIS installer") parser.add_argument("--icons-only", action="store_true", help="Only regenerate icons, don't build") args = parser.parse_args() print(f"\n GDPRScanner — App Builder v{APP_VERSION}") print(f" Platform: {SYSTEM} Python: {sys.version.split()[0]}") print(f" {'─' * 42}\n") if not args.icons_only: # Check PyInstaller try: import PyInstaller print(f" [+] PyInstaller {PyInstaller.__version__}") except ImportError: print(" [!] PyInstaller not found. Install with:") print(" pip install pyinstaller pyinstaller-hooks-contrib") sys.exit(1) # Check pywebview try: import webview try: _wv_ver = webview.__version__ except AttributeError: import importlib.metadata _wv_ver = importlib.metadata.version("pywebview") print(f" [+] pywebview {_wv_ver} (native window — recommended)") except ImportError: print(" [!] pywebview not found — will fall back to system browser") print(" Install with: pip install pywebview") # Check pystray try: import pystray print(f" [+] pystray available (browser-fallback tray icon)") except ImportError: print(" [!] pystray not found — no tray icon in browser-fallback mode") # Check MSAL try: import msal print(f" [+] msal {msal.__version__}") except ImportError: print(" [!] msal not found — run: pip install msal") sys.exit(1) # Check requests try: import requests print(f" [+] requests {requests.__version__}") except ImportError: print(" [!] requests not found — run: pip install requests") sys.exit(1) # Check source files for fname in ["gdpr_scanner.py", "gdpr_db.py", "m365_connector.py", "document_scanner.py", "sse.py", "checkpoint.py", "app_config.py", "cpr_detector.py", "scan_engine.py"]: p = HERE / fname if not p.exists(): print(f" [!] {fname} not found in {HERE}") sys.exit(1) print(f" [+] Found {fname}") # Clean for d in [BUILD_DIR, DIST_DIR]: if d.exists(): shutil.rmtree(d) print(f" [+] Removed {d.name}/") # Icons print("\n Generating icons …") make_icons() if args.icons_only: return # Write launcher print("\n Writing launcher …") ENTRY_POINT.write_text(LAUNCHER_CODE, encoding="utf-8") print(f" [+] {ENTRY_POINT.name}") # cv2 DLL check on Windows if SYSTEM == "Windows": try: import cv2 # noqa: F401 except ImportError as e: if "DLL load failed" in str(e): print(" [!] cv2 DLL load failed — reinstalling headless variant …") subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "opencv-python-headless", "-q"], check=False) # Run PyInstaller print("\n Running PyInstaller …") pyi_args = get_pyinstaller_args() import PyInstaller.__main__ as pyi pyi.run(pyi_args) # Post-build if args.dmg and SYSTEM == "Darwin": create_dmg() if args.installer and SYSTEM == "Windows": create_nsis_installer() print_next_steps() if __name__ == "__main__": main()