447 lines
16 KiB
Python
447 lines
16 KiB
Python
"""
|
|
gdpr_launcher.py — entry point for the packaged GDPRScanner app.
|
|
|
|
Responsibilities:
|
|
1. Find a free port (default 5100)
|
|
2. Start Flask in a background thread
|
|
3. Open the UI in a native webview window (pywebview)
|
|
— falls back to the system browser if pywebview is unavailable
|
|
|
|
Generated by build_gdpr.py — do not edit manually.
|
|
"""
|
|
import os
|
|
os.environ.setdefault("OBJC_DISABLE_INITIALIZE_FORK_SAFETY", "YES")
|
|
|
|
import subprocess
|
|
import sys
|
|
import socket
|
|
import threading
|
|
import time
|
|
import webbrowser
|
|
from pathlib import Path
|
|
|
|
if getattr(sys, "frozen", False):
|
|
BASE_DIR = Path(sys._MEIPASS)
|
|
else:
|
|
BASE_DIR = Path(__file__).parent
|
|
|
|
|
|
def _setup_external_tools():
|
|
"""
|
|
Locate Tesseract and Poppler regardless of how the app was launched.
|
|
GDPRScanner calls document_scanner for file content extraction, which
|
|
may need OCR for scanned PDFs — same setup as Document Scanner.
|
|
"""
|
|
extra_paths = []
|
|
|
|
if sys.platform == "darwin":
|
|
brew_prefix = None
|
|
for brew_candidate in ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]:
|
|
if Path(brew_candidate).exists():
|
|
try:
|
|
result = subprocess.run(
|
|
[brew_candidate, "--prefix"],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
if result.returncode == 0:
|
|
brew_prefix = result.stdout.strip()
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
brew_candidates = []
|
|
if brew_prefix:
|
|
brew_candidates.append(brew_prefix)
|
|
brew_candidates += ["/opt/homebrew", "/usr/local", "/home/linuxbrew/.linuxbrew"]
|
|
|
|
for prefix in brew_candidates:
|
|
bin_dir = Path(prefix) / "bin"
|
|
if bin_dir.exists():
|
|
extra_paths.append(str(bin_dir))
|
|
tessdata = Path(prefix) / "share" / "tessdata"
|
|
if tessdata.exists():
|
|
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
|
|
|
|
for t in ["/opt/homebrew/bin/tesseract", "/usr/local/bin/tesseract"]:
|
|
if Path(t).exists():
|
|
os.environ.setdefault("TESSERACT_CMD", t)
|
|
break
|
|
|
|
for p in ["/opt/homebrew/bin", "/usr/local/bin",
|
|
"/opt/homebrew/opt/poppler/bin", "/usr/local/opt/poppler/bin"]:
|
|
if (Path(p) / "pdftoppm").exists():
|
|
os.environ.setdefault("POPPLER_PATH", p)
|
|
extra_paths.insert(0, p)
|
|
break
|
|
|
|
elif sys.platform == "win32":
|
|
import winreg
|
|
tess_dir = None
|
|
try:
|
|
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR")
|
|
tess_dir, _ = winreg.QueryValueEx(key, "InstallDir")
|
|
winreg.CloseKey(key)
|
|
except Exception:
|
|
pass
|
|
|
|
for d in ([tess_dir] if tess_dir else []) + [
|
|
r"C:\Program Files\Tesseract-OCR",
|
|
r"C:\Program Files (x86)\Tesseract-OCR",
|
|
r"C:\Tesseract-OCR",
|
|
]:
|
|
if d and Path(d, "tesseract.exe").exists():
|
|
os.environ.setdefault("TESSERACT_CMD", str(Path(d) / "tesseract.exe"))
|
|
extra_paths.append(d)
|
|
tessdata = Path(d) / "tessdata"
|
|
if tessdata.exists():
|
|
os.environ.setdefault("TESSDATA_PREFIX", str(tessdata))
|
|
break
|
|
|
|
for d in [
|
|
r"C:\poppler\Library\bin", r"C:\poppler\bin",
|
|
r"C:\Program Files\poppler\Library\bin",
|
|
r"C:\Program Files\poppler\bin",
|
|
r"C:\tools\poppler\Library\bin",
|
|
]:
|
|
if (Path(d) / "pdftoppm.exe").exists():
|
|
os.environ.setdefault("POPPLER_PATH", d)
|
|
extra_paths.insert(0, d)
|
|
break
|
|
|
|
if getattr(sys, "frozen", False):
|
|
tess_bin = BASE_DIR / ("tesseract.exe" if sys.platform == "win32" else "tesseract")
|
|
if tess_bin.exists():
|
|
os.environ.setdefault("TESSERACT_CMD", str(tess_bin))
|
|
for sub in ["poppler/bin", "poppler/Library/bin", "."]:
|
|
pdftoppm = BASE_DIR / sub / ("pdftoppm.exe" if sys.platform == "win32" else "pdftoppm")
|
|
if pdftoppm.exists():
|
|
os.environ.setdefault("POPPLER_PATH", str(pdftoppm.parent))
|
|
extra_paths.insert(0, str(pdftoppm.parent))
|
|
break
|
|
extra_paths.insert(0, str(BASE_DIR))
|
|
|
|
if extra_paths:
|
|
current = os.environ.get("PATH", "")
|
|
additions = os.pathsep.join(p for p in extra_paths if p not in current)
|
|
if additions:
|
|
os.environ["PATH"] = additions + os.pathsep + current
|
|
|
|
cmd = os.environ.get("TESSERACT_CMD")
|
|
if cmd and Path(cmd).exists():
|
|
try:
|
|
import pytesseract
|
|
pytesseract.pytesseract.tesseract_cmd = cmd
|
|
except ImportError:
|
|
pass
|
|
|
|
poppler = os.environ.get("POPPLER_PATH")
|
|
if poppler:
|
|
try:
|
|
import pdf2image.pdf2image as _p2i
|
|
_orig = _p2i.convert_from_path
|
|
def _patched(pdf_path, *a, poppler_path=None, **kw):
|
|
return _orig(pdf_path, *a, poppler_path=poppler_path or poppler, **kw)
|
|
_p2i.convert_from_path = _patched
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
_setup_external_tools()
|
|
|
|
|
|
def find_free_port(start: int = 5100) -> int:
|
|
for port in range(start, start + 100):
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
try:
|
|
s.bind(("127.0.0.1", port))
|
|
return port
|
|
except OSError:
|
|
continue
|
|
raise RuntimeError("No free port found in range 5100-5200")
|
|
|
|
|
|
# ── Single-instance lock ──────────────────────────────────────────────────────
|
|
_LOCK_FH = None
|
|
|
|
def acquire_instance_lock() -> bool:
|
|
"""
|
|
Acquire an exclusive process lock so only one instance runs at a time.
|
|
Returns True if the lock was acquired, False if another instance holds it.
|
|
The lock is released automatically when the process exits.
|
|
"""
|
|
global _LOCK_FH
|
|
lock_dir = Path.home() / ".gdprscanner"
|
|
lock_dir.mkdir(parents=True, exist_ok=True)
|
|
lock_path = lock_dir / "app.lock"
|
|
try:
|
|
_LOCK_FH = open(lock_path, "w")
|
|
if sys.platform == "win32":
|
|
import msvcrt
|
|
msvcrt.locking(_LOCK_FH.fileno(), msvcrt.LK_NBLCK, 1)
|
|
else:
|
|
import fcntl
|
|
fcntl.flock(_LOCK_FH, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
_LOCK_FH.write(str(os.getpid()))
|
|
_LOCK_FH.flush()
|
|
return True
|
|
except (IOError, OSError):
|
|
if _LOCK_FH:
|
|
_LOCK_FH.close()
|
|
_LOCK_FH = None
|
|
return False
|
|
|
|
|
|
def _activate_venv():
|
|
if getattr(sys, "frozen", False):
|
|
return
|
|
for candidate in [BASE_DIR / "venv", Path(__file__).parent / "venv"]:
|
|
if sys.platform == "win32":
|
|
site_pkg = candidate / "Lib" / "site-packages"
|
|
else:
|
|
lib = candidate / "lib"
|
|
site_pkg = None
|
|
if lib.exists():
|
|
for d in lib.iterdir():
|
|
sp = d / "site-packages"
|
|
if sp.exists():
|
|
site_pkg = sp
|
|
break
|
|
if site_pkg and site_pkg.exists():
|
|
sys.path.insert(0, str(site_pkg))
|
|
os.environ["VIRTUAL_ENV"] = str(candidate)
|
|
os.environ.pop("PYTHONHOME", None)
|
|
break
|
|
|
|
|
|
_activate_venv()
|
|
|
|
|
|
def start_flask(port: int):
|
|
import gdpr_scanner as _app
|
|
_app.app.run(host="127.0.0.1", port=port, debug=False,
|
|
threaded=True, use_reloader=False)
|
|
|
|
|
|
def wait_for_flask(port: int, timeout: float = 20.0) -> bool:
|
|
deadline = time.monotonic() + timeout
|
|
while time.monotonic() < deadline:
|
|
try:
|
|
with socket.create_connection(("127.0.0.1", port), timeout=0.2):
|
|
return True
|
|
except OSError:
|
|
time.sleep(0.1)
|
|
return False
|
|
|
|
|
|
def _load_icon_image():
|
|
try:
|
|
from PIL import Image as PILImage
|
|
for name in ["icon_gdpr.ico", "icon_gdpr.icns", "icon_gdpr.png",
|
|
"icon.ico", "icon.icns", "icon.png",
|
|
"icon_m365.ico", "icon_m365.icns", "icon_m365.png"]: # legacy fallback
|
|
p = BASE_DIR / name
|
|
if p.exists():
|
|
return PILImage.open(p).convert("RGBA").resize((64, 64))
|
|
# Minimal fallback — blue square
|
|
img = PILImage.new("RGBA", (64, 64), (0, 114, 206, 255))
|
|
return img
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def run_webview(port: int):
|
|
"""
|
|
Open the app in a native webview window.
|
|
Returns True on success, False if pywebview is unavailable.
|
|
"""
|
|
try:
|
|
import webview
|
|
except ImportError:
|
|
return False
|
|
|
|
class Api:
|
|
def quit(self):
|
|
import webview as _wv
|
|
for w in _wv.windows:
|
|
w.destroy()
|
|
|
|
def save_excel(self):
|
|
"""Fetch the Excel export from Flask and save via native dialog."""
|
|
import urllib.request, datetime, os, webview as _wv
|
|
try:
|
|
url = f"http://127.0.0.1:{port}/api/export_excel"
|
|
with urllib.request.urlopen(url) as resp:
|
|
data = resp.read()
|
|
fname = f"gdpr_scan_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
|
win = _wv.windows[0] if _wv.windows else None
|
|
if win:
|
|
paths = win.create_file_dialog(
|
|
_wv.SAVE_DIALOG,
|
|
save_filename=fname,
|
|
file_types=("Excel Files (*.xlsx)",),
|
|
)
|
|
if paths:
|
|
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
|
if not dest.endswith(".xlsx"):
|
|
dest += ".xlsx"
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return {"ok": True, "path": dest}
|
|
return {"ok": False, "error": "cancelled"}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
def save_db_export(self):
|
|
"""Fetch the DB export ZIP from Flask and save via native dialog."""
|
|
import urllib.request, datetime, webview as _wv
|
|
try:
|
|
url = f"http://127.0.0.1:{port}/api/db/export"
|
|
with urllib.request.urlopen(url) as resp:
|
|
data = resp.read()
|
|
fname = f"gdpr_export_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
|
|
win = _wv.windows[0] if _wv.windows else None
|
|
if win:
|
|
paths = win.create_file_dialog(
|
|
_wv.SAVE_DIALOG,
|
|
save_filename=fname,
|
|
file_types=("ZIP Archive (*.zip)",),
|
|
)
|
|
if paths:
|
|
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
|
if not dest.endswith(".zip"):
|
|
dest += ".zip"
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return {"ok": True, "path": dest}
|
|
return {"ok": False, "error": "cancelled"}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
def save_article30(self):
|
|
"""Fetch the Article 30 Word doc from Flask and save via native dialog."""
|
|
import urllib.request, datetime, webview as _wv
|
|
try:
|
|
url = f"http://127.0.0.1:{port}/api/export_article30"
|
|
with urllib.request.urlopen(url) as resp:
|
|
data = resp.read()
|
|
fname = f"article30_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
|
|
win = _wv.windows[0] if _wv.windows else None
|
|
if win:
|
|
paths = win.create_file_dialog(
|
|
_wv.SAVE_DIALOG,
|
|
save_filename=fname,
|
|
file_types=("Word Document (*.docx)",),
|
|
)
|
|
if paths:
|
|
dest = paths[0] if isinstance(paths, (list, tuple)) else paths
|
|
if not dest.endswith(".docx"):
|
|
dest += ".docx"
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return {"ok": True, "path": dest}
|
|
return {"ok": False, "error": "cancelled"}
|
|
except Exception as e:
|
|
return {"ok": False, "error": str(e)}
|
|
|
|
def open_manual(self, lang: str):
|
|
"""Open the user manual in a new native webview window."""
|
|
import webview as _wv
|
|
url = f"http://127.0.0.1:{port}/manual?lang={lang}"
|
|
existing = next((w for w in _wv.windows if getattr(w, "_is_manual", False)), None)
|
|
if existing:
|
|
existing.load_url(url)
|
|
else:
|
|
mw = _wv.create_window(
|
|
title="GDPRScanner — Manual",
|
|
url=url,
|
|
width=960,
|
|
height=800,
|
|
resizable=True,
|
|
)
|
|
mw._is_manual = True
|
|
|
|
w = webview.create_window(
|
|
title="GDPRScanner",
|
|
url=f"http://127.0.0.1:{port}/",
|
|
width=1400,
|
|
height=900,
|
|
min_size=(900, 600),
|
|
js_api=Api(),
|
|
)
|
|
|
|
def _on_closed():
|
|
os._exit(0)
|
|
|
|
w.events.closed += _on_closed
|
|
webview.start(debug=False)
|
|
return True
|
|
|
|
|
|
def _run_browser_fallback(port: int):
|
|
"""Open in system browser + optional tray icon."""
|
|
url = f"http://127.0.0.1:{port}/"
|
|
webbrowser.open(url)
|
|
|
|
try:
|
|
import pystray
|
|
from PIL import Image as PILImage
|
|
|
|
img = _load_icon_image()
|
|
if img is None:
|
|
return
|
|
|
|
def _quit(icon, item):
|
|
icon.stop()
|
|
os._exit(0)
|
|
|
|
def _open(icon, item):
|
|
webbrowser.open(url)
|
|
|
|
menu = pystray.Menu(
|
|
pystray.MenuItem("Open GDPRScanner", _open, default=True),
|
|
pystray.MenuItem("Quit", _quit),
|
|
)
|
|
icon = pystray.Icon("GDPRScanner", img, "GDPRScanner", menu)
|
|
icon.run()
|
|
except ImportError:
|
|
# No pystray — just keep the process alive
|
|
try:
|
|
while True:
|
|
time.sleep(60)
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if not acquire_instance_lock():
|
|
print("GDPRScanner is already running.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# On macOS, multiprocessing uses "fork" which is unsafe with some
|
|
# frameworks — use "spawn" to match PyInstaller's behaviour.
|
|
if sys.platform == "darwin":
|
|
import multiprocessing
|
|
multiprocessing.set_start_method("spawn", force=True)
|
|
|
|
port = find_free_port()
|
|
# Machine-readable port line — stdout pipe for any parent process.
|
|
print(f"GDPR_PORT={port}", flush=True)
|
|
|
|
# Pre-import on main thread so cv2 / numpy initialise safely
|
|
try:
|
|
import gdpr_scanner # noqa: F401 — side effect: loads Flask app
|
|
except Exception as e:
|
|
print(f"[!] Failed to import gdpr_scanner: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
flask_thread = threading.Thread(target=start_flask, args=(port,), daemon=True)
|
|
flask_thread.start()
|
|
|
|
if not wait_for_flask(port):
|
|
print("[!] Flask did not start in time", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
webview_ok = run_webview(port)
|
|
if not webview_ok:
|
|
_run_browser_fallback(port)
|