GDPRScanner/m365_launcher.py
2026-04-11 04:38:11 +02:00

447 lines
16 KiB
Python

"""
gdpr_launcher.py — entry point for the packaged GDPRScanner app.
Responsibilities:
1. Find a free port (default 5100)
2. Start Flask in a background thread
3. Open the UI in a native webview window (pywebview)
— falls back to the system browser if pywebview is unavailable
Generated by build_gdpr.py — do not edit manually.
"""
import os
os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
import subprocess
import sys
import socket
import threading
import time
import webbrowser
from pathlib import Path
if getattr(sys, "frozen", False):
BASE_DIR = Path(sys._MEIPASS)
else:
BASE_DIR = Path(__file__).parent
def _setup_external_tools():
"""
Locate Tesseract and Poppler regardless of how the app was launched.
GDPRScanner calls document_scanner for file content extraction, which
may need OCR for scanned PDFs — same setup as Document Scanner.
"""
extra_paths = []
if sys.platform == "darwin":
brew_prefix = None
for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
if Path(brew_candidate).exists():
try:
result = subprocess.run(
[brew_candidate, "--prefix"],
capture_output=True, text=True, timeout=5
)
if result.returncode == 0:
brew_prefix = result.stdout.strip()
break
except Exception:
pass
brew_candidates = []
if brew_prefix:
brew_candidates.append(brew_prefix)
brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
for prefix in brew_candidates:
bin_dir = Path(prefix) / "bin"
if bin_dir.exists():
extra_paths.append(str(bin_dir))
tessdata = Path(prefix) / "share" / "tessdata"
if tessdata.exists():
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
if Path(t).exists():
os.environ.setdefault("TESSERACT_CMD", t)
break
for p in ["/opt/homebrew/bin", "/usr/local/bin",
"/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
if (Path(p) / "pdftoppm").exists():
os.environ.setdefault("POPPLER_PATH", p)
extra_paths.insert(0, p)
break
elif sys.platform == "win32":
import winreg
tess_dir = None
try:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR")
tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
winreg.CloseKey(key)
except Exception:
pass
for d in ([tess_dir] if tess_dir else []) + [
r"C:\Program Files\Tesseract-OCR",
r"C:\Program Files (x86)\Tesseract-OCR",
r"C:\Tesseract-OCR",
]:
if d and Path(d, "tesseract.exe").exists():
os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
extra_paths.append(d)
tessdata = Path(d) / "tessdata"
if tessdata.exists():
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
break
for d in [
r"C:\poppler\Library\bin", r"C:\poppler\bin",
r"C:\Program Files\poppler\Library\bin",
r"C:\Program Files\poppler\bin",
r"C:\tools\poppler\Library\bin",
]:
if (Path(d) / "pdftoppm.exe").exists():
os.environ.setdefault("POPPLER_PATH", d)
extra_paths.insert(0, d)
break
if getattr(sys, "frozen", False):
tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
if tess_bin.exists():
os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
for sub in ["poppler/bin", "poppler/Library/bin", "."]:
pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
if pdftoppm.exists():
os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
extra_paths.insert(0, str(pdftoppm.parent))
break
extra_paths.insert(0, str(BASE_DIR))
if extra_paths:
current = os.environ.get("PATH", "")
additions = os.pathsep.join(p for p in extra_paths if p not in current)
if additions:
os.environ["PATH"] = additions + os.pathsep + current
cmd = os.environ.get("TESSERACT_CMD")
if cmd and Path(cmd).exists():
try:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = cmd
except ImportError:
pass
poppler = os.environ.get("POPPLER_PATH")
if poppler:
try:
import pdf2image.pdf2image as _p2i
_orig = _p2i.convert_from_path
def _patched(pdf_path, *a, poppler_path=None, **kw):
return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
_p2i.convert_from_path = _patched
except Exception:
pass
_setup_external_tools()
def find_free_port(start: int = 5100) -> int:
for port in range(start, start + 100):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind(("127.0.0.1", port))
return port
except OSError:
continue
raise RuntimeError("No free port found in range 5100-5200")
# ── Single-instance lock ──────────────────────────────────────────────────────
_LOCK_FH = None
def acquire_instance_lock() -> bool:
"""
Acquire an exclusive process lock so only one instance runs at a time.
Returns True if the lock was acquired, False if another instance holds it.
The lock is released automatically when the process exits.
"""
global _LOCK_FH
lock_dir = Path.home() / ".gdprscanner"
lock_dir.mkdir(parents=True, exist_ok=True)
lock_path = lock_dir / "app.lock"
try:
_LOCK_FH = open(lock_path, "w")
if sys.platform == "win32":
import msvcrt
msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
else:
import fcntl
fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
_LOCK_FH.write(str(os.getpid()))
_LOCK_FH.flush()
return True
except (IOError, OSError):
if _LOCK_FH:
_LOCK_FH.close()
_LOCK_FH = None
return False
def _activate_venv():
if getattr(sys, "frozen", False):
return
for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
if sys.platform == "win32":
site_pkg = candidate / "Lib" / "site-packages"
else:
lib = candidate / "lib"
site_pkg = None
if lib.exists():
for d in lib.iterdir():
sp = d / "site-packages"
if sp.exists():
site_pkg = sp
break
if site_pkg and site_pkg.exists():
sys.path.insert(0, str(site_pkg))
os.environ["VIRTUAL_ENV"] = str(candidate)
os.environ.pop("PYTHONHOME", None)
break
_activate_venv()
def start_flask(port: int):
import gdpr_scanner as _app
_app.app.run(host="127.0.0.1", port=port, debug=False,
threaded=True, use_reloader=False)
def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
try:
with socket.create_connection(("127.0.0.1", port), timeout=0.2):
return True
except OSError:
time.sleep(0.1)
return False
def _load_icon_image():
try:
from PIL import Image as PILImage
for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
"icon.ico", "icon.icns", "icon.png",
"icon_m365.ico", "icon_m365.icns", "icon_m365.png"]: # legacy fallback
p = BASE_DIR / name
if p.exists():
return PILImage.open(p).convert("RGBA").resize((64, 64))
# Minimal fallback — blue square
img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
return img
except Exception:
return None
def run_webview(port: int):
"""
Open the app in a native webview window.
Returns True on success, False if pywebview is unavailable.
"""
try:
import webview
except ImportError:
return False
class Api:
def quit(self):
import webview as _wv
for w in _wv.windows:
w.destroy()
def save_excel(self):
"""Fetch the Excel export from Flask and save via native dialog."""
import urllib.request, datetime, os, webview as _wv
try:
url = f"http://127.0.0.1:{port}/api/export_excel"
with urllib.request.urlopen(url) as resp:
data = resp.read()
fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
win = _wv.windows[0] if _wv.windows else None
if win:
paths = win.create_file_dialog(
_wv.SAVE_DIALOG,
save_filename=fname,
file_types=("Excel Files (*.xlsx)",),
)
if paths:
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
if not dest.endswith(".xlsx"):
dest += ".xlsx"
with open(dest, "wb") as f:
f.write(data)
return {"ok": True, "path": dest}
return {"ok": False, "error": "cancelled"}
except Exception as e:
return {"ok": False, "error": str(e)}
def save_db_export(self):
"""Fetch the DB export ZIP from Flask and save via native dialog."""
import urllib.request, datetime, webview as _wv
try:
url = f"http://127.0.0.1:{port}/api/db/export"
with urllib.request.urlopen(url) as resp:
data = resp.read()
fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
win = _wv.windows[0] if _wv.windows else None
if win:
paths = win.create_file_dialog(
_wv.SAVE_DIALOG,
save_filename=fname,
file_types=("ZIP Archive (*.zip)",),
)
if paths:
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
if not dest.endswith(".zip"):
dest += ".zip"
with open(dest, "wb") as f:
f.write(data)
return {"ok": True, "path": dest}
return {"ok": False, "error": "cancelled"}
except Exception as e:
return {"ok": False, "error": str(e)}
def save_article30(self):
"""Fetch the Article 30 Word doc from Flask and save via native dialog."""
import urllib.request, datetime, webview as _wv
try:
url = f"http://127.0.0.1:{port}/api/export_article30"
with urllib.request.urlopen(url) as resp:
data = resp.read()
fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
win = _wv.windows[0] if _wv.windows else None
if win:
paths = win.create_file_dialog(
_wv.SAVE_DIALOG,
save_filename=fname,
file_types=("Word Document (*.docx)",),
)
if paths:
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
if not dest.endswith(".docx"):
dest += ".docx"
with open(dest, "wb") as f:
f.write(data)
return {"ok": True, "path": dest}
return {"ok": False, "error": "cancelled"}
except Exception as e:
return {"ok": False, "error": str(e)}
def open_manual(self, lang: str):
"""Open the user manual in a new native webview window."""
import webview as _wv
url = f"http://127.0.0.1:{port}/manual?lang={lang}"
existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
if existing:
existing.load_url(url)
else:
mw = _wv.create_window(
title="GDPRScanner — Manual",
url=url,
width=960,
height=800,
resizable=True,
)
mw._is_manual = True
w = webview.create_window(
title="GDPRScanner",
url=f"http://127.0.0.1:{port}/",
width=1400,
height=900,
min_size=(900, 600),
js_api=Api(),
)
def _on_closed():
os._exit(0)
w.events.closed += _on_closed
webview.start(debug=False)
return True
def _run_browser_fallback(port: int):
"""Open in system browser + optional tray icon."""
url = f"http://127.0.0.1:{port}/"
webbrowser.open(url)
try:
import pystray
from PIL import Image as PILImage
img = _load_icon_image()
if img is None:
return
def _quit(icon, item):
icon.stop()
os._exit(0)
def _open(icon, item):
webbrowser.open(url)
menu = pystray.Menu(
pystray.MenuItem("Open GDPRScanner", _open, default=True),
pystray.MenuItem("Quit", _quit),
)
icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
icon.run()
except ImportError:
# No pystray — just keep the process alive
try:
while True:
time.sleep(60)
except KeyboardInterrupt:
pass
if __name__ == "__main__":
if not acquire_instance_lock():
print("GDPRScanner is already running.", file=sys.stderr)
sys.exit(1)
# On macOS, multiprocessing uses "fork" which is unsafe with some
# frameworks — use "spawn" to match PyInstaller's behaviour.
if sys.platform == "darwin":
import multiprocessing
multiprocessing.set_start_method("spawn", force=True)
port = find_free_port()
# Machine-readable port line — stdout pipe for any parent process.
print(f"GDPR_PORT={port}", flush=True)
# Pre-import on main thread so cv2 / numpy initialise safely
try:
import gdpr_scanner # noqa: F401 — side effect: loads Flask app
except Exception as e:
print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
sys.exit(1)
flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
flask_thread.start()
if not wait_for_flask(port):
print("[!] Flask did not start in time", file=sys.stderr)
sys.exit(1)
webview_ok = run_webview(port)
if not webview_ok:
_run_browser_fallback(port)