569 lines
24 KiB
PowerShell
569 lines
24 KiB
PowerShell
#Requires -RunAsAdministrator
|
|
# Always run from the folder this script lives in
|
|
Set-Location -Path $PSScriptRoot
|
|
<#
|
|
.SYNOPSIS
|
|
M365 GDPR Scanner -- Windows Installation Script
|
|
.DESCRIPTION
|
|
Installs all dependencies for gdpr_scanner.py and m365_connector.py:
|
|
- Python 3.11 or 3.12 (3.13+ blocked -- spaCy incompatible)
|
|
- Tesseract OCR 5.x with Danish + English language packs
|
|
- Poppler (required by pdfplumber for PDF rendering)
|
|
- All Python packages including pywebview, pystray
|
|
- spaCy Danish NER model (da_core_news_lg, ~500 MB)
|
|
Adds Tesseract and Poppler to the system PATH.
|
|
.NOTES
|
|
Run from an elevated PowerShell prompt:
|
|
PowerShell -ExecutionPolicy Bypass -File install_windows.ps1
|
|
#>
|
|
|
|
Set-StrictMode -Version Latest
|
|
$ErrorActionPreference = "Stop"
|
|
|
|
# -- Colours --------------------------------------------------------------------
|
|
function Write-Step { param($msg) Write-Host "`n==> $msg" -ForegroundColor Cyan }
|
|
function Write-OK { param($msg) Write-Host " [OK] $msg" -ForegroundColor Green }
|
|
function Write-Warn { param($msg) Write-Host " [!!] $msg" -ForegroundColor Yellow }
|
|
function Write-Fail { param($msg) Write-Host " [XX] $msg" -ForegroundColor Red; exit 1 }
|
|
|
|
Write-Host ""
|
|
Write-Host " M365 GDPR Scanner - Windows Setup" -ForegroundColor White
|
|
Write-Host " -----------------------------------------" -ForegroundColor DarkGray
|
|
Write-Host ""
|
|
|
|
# -- 0. Check architecture ------------------------------------------------------
|
|
if ($env:PROCESSOR_ARCHITECTURE -ne "AMD64") {
|
|
Write-Warn "This script targets 64-bit Windows. Proceeding anyway."
|
|
}
|
|
|
|
# -- 1. Install Chocolatey (if not present) -------------------------------------
|
|
Write-Step "Checking Chocolatey package manager"
|
|
if (-not (Get-Command choco -ErrorAction SilentlyContinue)) {
|
|
Write-Host " Installing Chocolatey..."
|
|
Set-ExecutionPolicy Bypass -Scope Process -Force
|
|
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
|
|
Invoke-Expression ((New-Object System.Net.WebClient).DownloadString(
|
|
'https://community.chocolatey.org/install.ps1'))
|
|
$env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
|
|
[System.Environment]::GetEnvironmentVariable("PATH","User")
|
|
Write-OK "Chocolatey installed"
|
|
} else {
|
|
Write-OK "Chocolatey already installed ($((choco --version)))"
|
|
}
|
|
|
|
# -- Virtualenv path -----------------------------------------------------------
|
|
$VenvDir = Join-Path $PSScriptRoot "venv"
|
|
$VenvPython = Join-Path $VenvDir "Scripts\python.exe"
|
|
|
|
# -- 2. Install / validate Python ---------------------------------------------------
|
|
# Compatible: 3.11.x or 3.12.x
|
|
# spaCy does not support 3.13+. pywebview requires 3.8+.
|
|
Write-Step "Checking Python (need 3.11 or 3.12 -- prefer 3.12, spaCy incompatible with 3.13+)"
|
|
|
|
function Get-PythonExe {
|
|
# Returns the path/command of a compatible Python (3.11 or 3.12), or $null.
|
|
$candidates = @()
|
|
|
|
# py launcher -- wrap in try/catch so "No runtime found" exit codes don't bubble up
|
|
if (Get-Command py -ErrorAction SilentlyContinue) {
|
|
foreach ($v in @("3.12", "3.11")) {
|
|
try {
|
|
$test = $null
|
|
$prev = $ErrorActionPreference
|
|
$ErrorActionPreference = 'SilentlyContinue'
|
|
$test = & py "-$v" --version 2>&1
|
|
$ErrorActionPreference = $prev
|
|
} catch { $ErrorActionPreference = $prev }
|
|
if ("$test" -match "^Python $v") { $candidates += "py -$v" }
|
|
}
|
|
}
|
|
|
|
# Direct python / python3 commands
|
|
foreach ($cmd in @("python3.12", "python3.11", "python", "python3")) {
|
|
if (Get-Command $cmd -ErrorAction SilentlyContinue) {
|
|
$candidates += $cmd
|
|
}
|
|
}
|
|
|
|
# Well-known install locations (e.g. installed from python.org without PATH update)
|
|
$wellKnown = @(
|
|
"$env:LOCALAPPDATA\Programs\Python\Python312\python.exe",
|
|
"$env:LOCALAPPDATA\Programs\Python\Python311\python.exe",
|
|
"C:\Python312\python.exe",
|
|
"C:\Python311\python.exe",
|
|
"C:\Program Files\Python312\python.exe",
|
|
"C:\Program Files\Python311\python.exe"
|
|
)
|
|
foreach ($p in $wellKnown) {
|
|
if (Test-Path $p) { $candidates += $p }
|
|
}
|
|
|
|
foreach ($cmd in $candidates) {
|
|
$parts = $cmd -split " "
|
|
$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
|
|
if ("$raw" -match "Python (\d+)\.(\d+)") {
|
|
$maj = [int]$Matches[1]; $min = [int]$Matches[2]
|
|
if ($maj -eq 3 -and ($min -eq 11 -or $min -eq 12)) { return $cmd }
|
|
}
|
|
}
|
|
return $null
|
|
}
|
|
|
|
function Get-PythonVersionStr {
|
|
param($cmd)
|
|
$parts = $cmd -split " "
|
|
$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
|
|
return $raw
|
|
}
|
|
|
|
function Invoke-Py {
|
|
param([string[]]$PyArgs)
|
|
$parts = $script:pythonCmd -split " "
|
|
if ($parts.Count -gt 1) { & $parts[0] $parts[1] @PyArgs }
|
|
else { & $parts[0] @PyArgs }
|
|
return $LASTEXITCODE
|
|
}
|
|
|
|
$pythonCmd = Get-PythonExe
|
|
|
|
if ($pythonCmd) {
|
|
$verStr = Get-PythonVersionStr $pythonCmd
|
|
Write-OK "Compatible Python found: $verStr (using '$pythonCmd')"
|
|
} else {
|
|
# Check if an incompatible version is present so we can warn clearly
|
|
if (Get-Command python -ErrorAction SilentlyContinue) {
|
|
$raw = & python --version 2>&1
|
|
if ($raw -match "Python (\d+)\.(\d+)") {
|
|
$maj = [int]$Matches[1]; $min = [int]$Matches[2]
|
|
if ($maj -eq 3 -and $min -ge 13) {
|
|
Write-Warn "Python $maj.$min is installed but too new (spaCy needs <= 3.12)"
|
|
Write-Warn "Python 3.11 will be installed alongside it"
|
|
} elseif ($maj -eq 3 -and $min -le 10) {
|
|
Write-Warn "Python $maj.$min is installed but too old (need >= 3.11)"
|
|
}
|
|
}
|
|
}
|
|
# ---- Try Chocolatey first (fast, silent) ----
|
|
$chocoOk = $false
|
|
if (Get-Command choco -ErrorAction SilentlyContinue) {
|
|
Write-Host " Installing Python 3.12 via Chocolatey..."
|
|
choco install python312 -y --no-progress | Out-Null
|
|
$env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
|
|
[System.Environment]::GetEnvironmentVariable("PATH","User")
|
|
$pythonCmd = Get-PythonExe
|
|
if ($pythonCmd) { $chocoOk = $true }
|
|
}
|
|
|
|
# ---- Direct download from python.org (works without Chocolatey) ----
|
|
if (-not $chocoOk) {
|
|
$PyVersion = "3.12.9"
|
|
$PyInstaller = "$env:TEMP\python-$PyVersion-amd64.exe"
|
|
$PyUrl = "https://www.python.org/ftp/python/$PyVersion/python-$PyVersion-amd64.exe"
|
|
|
|
Write-Host " Downloading Python $PyVersion from python.org..."
|
|
try {
|
|
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
|
|
& curl.exe -L --silent --show-error -o $PyInstaller $PyUrl
|
|
if ($LASTEXITCODE -ne 0) { throw "curl.exe download failed" }
|
|
} catch {
|
|
Write-Fail "Download failed: $_`nInstall Python 3.12 manually from https://www.python.org/downloads/ then re-run this script."
|
|
}
|
|
|
|
Write-Host " Installing Python $PyVersion (silent, all users)..."
|
|
$installArgs = "/quiet InstallAllUsers=0 PrependPath=0 Include_test=0"
|
|
Start-Process -FilePath $PyInstaller -ArgumentList $installArgs -Wait -NoNewWindow
|
|
|
|
# Reload PATH so the new python.exe is visible in this session
|
|
$env:PATH = [System.Environment]::GetEnvironmentVariable("PATH","Machine") + ";" +
|
|
[System.Environment]::GetEnvironmentVariable("PATH","User")
|
|
|
|
$pythonCmd = Get-PythonExe
|
|
if (-not $pythonCmd) {
|
|
Write-Fail ("Python $PyVersion was installed but could not be found.`n" +
|
|
" -- Open a NEW PowerShell window and re-run this script, or`n" +
|
|
" -- Install manually from https://www.python.org/downloads/")
|
|
}
|
|
}
|
|
|
|
$verStr = Get-PythonVersionStr $pythonCmd
|
|
Write-OK "Python installed: $verStr"
|
|
}
|
|
|
|
# Final sanity check
|
|
$parts = $pythonCmd -split " "
|
|
$raw = & $parts[0] $(if ($parts.Count -gt 1) { $parts[1..($parts.Count-1)] }) --version 2>&1
|
|
if ($raw -notmatch "Python 3\.(11|12)") {
|
|
Write-Fail "Could not confirm a Python 3.11 or 3.12 interpreter. Got: $raw"
|
|
}
|
|
|
|
# -- Create / reuse virtualenv -------------------------------------------------
|
|
Write-Step "Setting up virtualenv at $VenvDir"
|
|
if (Test-Path $VenvPython) {
|
|
Write-OK "Existing virtualenv found -- reusing"
|
|
} else {
|
|
if (Test-Path $VenvDir) { Remove-Item $VenvDir -Recurse -Force }
|
|
Write-Host " Creating virtualenv..."
|
|
Invoke-Py @("-m", "venv", $VenvDir)
|
|
Write-OK "Virtualenv created: $VenvDir"
|
|
}
|
|
|
|
function Invoke-VenvPip {
|
|
param([string[]]$PipArgs)
|
|
& $VenvPython -m pip @PipArgs
|
|
return $LASTEXITCODE
|
|
}
|
|
|
|
Write-Host " Upgrading pip..."
|
|
Invoke-VenvPip @("install", "--upgrade", "pip", "--quiet") | Out-Null
|
|
Write-OK "pip up to date"
|
|
|
|
# -- 3. Install Visual C++ Redistributable (required by OpenCV/cv2) -----------
|
|
Write-Step "Checking Visual C++ Redistributable 2015-2022"
|
|
$vcKey = "HKLM:\SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64"
|
|
$vcAlt = "HKLM:\SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64"
|
|
$vcInstalled = (Test-Path $vcKey) -or (Test-Path $vcAlt)
|
|
if ($vcInstalled) {
|
|
Write-OK "Visual C++ Redistributable already installed"
|
|
} else {
|
|
Write-Host " Downloading VC++ Redistributable..."
|
|
$vcUrl = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
|
|
$vcInstaller = "$env:TEMP\vc_redist.x64.exe"
|
|
& curl.exe -L --silent --show-error -o $vcInstaller $vcUrl
|
|
if ($LASTEXITCODE -ne 0) { Write-Warn "VC++ download failed -- skipping (may already be installed)" }
|
|
Write-Host " Installing silently..."
|
|
Start-Process -FilePath $vcInstaller -ArgumentList "/install", "/quiet", "/norestart" -Wait
|
|
Remove-Item $vcInstaller -Force
|
|
Write-OK "Visual C++ Redistributable installed"
|
|
}
|
|
|
|
# -- 4. Install Tesseract OCR ---------------------------------------------------
|
|
Write-Step "Installing Tesseract OCR"
|
|
$ToolsDir = Join-Path $PSScriptRoot "tools"
|
|
$TessDir = Join-Path $ToolsDir "tesseract"
|
|
$tessExe = Join-Path $TessDir "tesseract.exe"
|
|
New-Item -ItemType Directory -Force -Path $ToolsDir | Out-Null
|
|
if (Test-Path $tessExe) {
|
|
$tessVer = & $tessExe --version 2>&1 | Select-Object -First 1
|
|
Write-OK "Tesseract already installed: $tessVer"
|
|
} else {
|
|
Write-Host " Downloading Tesseract 5.x installer..."
|
|
# Download Tesseract installer -- try multiple mirrors
|
|
$tessInstaller = "$env:TEMP\tesseract-setup.exe"
|
|
$tessUrls = @(
|
|
"https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-5.3.4.20240503.exe",
|
|
"https://github.com/UB-Mannheim/tesseract/releases/download/v5.3.4.20240503/tesseract-ocr-w64-setup-5.3.4.20240503.exe"
|
|
)
|
|
$downloaded = $false
|
|
foreach ($tessUrl in $tessUrls) {
|
|
Write-Host " Trying: $tessUrl"
|
|
# Suppress NativeCommandError -- check exit code manually
|
|
$prev = $ErrorActionPreference; $ErrorActionPreference = "SilentlyContinue"
|
|
& curl.exe -L --fail --silent --show-error -o $tessInstaller $tessUrl 2>&1 | Out-Null
|
|
$curlExit = $LASTEXITCODE
|
|
$ErrorActionPreference = $prev
|
|
$sz = if (Test-Path $tessInstaller) { (Get-Item $tessInstaller).Length } else { 0 }
|
|
if ($curlExit -eq 0 -and $sz -gt 1MB) {
|
|
Write-OK "Downloaded ($([math]::Round($sz/1MB,1)) MB)"
|
|
$downloaded = $true
|
|
break
|
|
}
|
|
Write-Host " Failed (exit $curlExit, $sz bytes) -- trying next mirror..."
|
|
if (Test-Path $tessInstaller) { Remove-Item $tessInstaller -Force }
|
|
}
|
|
if (-not $downloaded) {
|
|
Write-Host ""
|
|
Write-Host " Automatic download failed." -ForegroundColor Yellow
|
|
Write-Host " Please download the installer manually:" -ForegroundColor Yellow
|
|
Write-Host " https://github.com/UB-Mannheim/tesseract/releases/tag/v5.3.4.20240503" -ForegroundColor Cyan
|
|
Write-Host " Save it as: $tessInstaller" -ForegroundColor Cyan
|
|
Write-Host " Then press Enter to continue..." -ForegroundColor Yellow
|
|
Read-Host
|
|
if (-not (Test-Path $tessInstaller) -or (Get-Item $tessInstaller).Length -lt 1MB) {
|
|
Write-Fail "Installer not found at $tessInstaller"
|
|
}
|
|
}
|
|
Write-Host " Running installer (silent)..."
|
|
Start-Process -FilePath $tessInstaller -ArgumentList "/S /D=$TessDir" -Wait
|
|
Remove-Item $tessInstaller -Force
|
|
Write-OK "Tesseract installed in project tools\ folder"
|
|
}
|
|
|
|
# Tesseract is local in tools\ -- session PATH set above
|
|
|
|
# -- 4. Install Tesseract language packs ---------------------------------------
|
|
Write-Step "Installing Tesseract language packs (Danish + English)"
|
|
$tessData = Join-Path $TessDir "tessdata"
|
|
New-Item -ItemType Directory -Force -Path $tessData | Out-Null
|
|
$langFiles = @{
|
|
"dan" = "https://github.com/tesseract-ocr/tessdata/raw/main/dan.traineddata"
|
|
"eng" = "https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata"
|
|
}
|
|
foreach ($lang in $langFiles.Keys) {
|
|
$dest = Join-Path $tessData "$lang.traineddata"
|
|
if (Test-Path $dest) {
|
|
Write-OK "'$lang' language pack already present"
|
|
} else {
|
|
Write-Host " Downloading $lang.traineddata..."
|
|
& curl.exe -L --silent --show-error -o $dest $langFiles[$lang]
|
|
if ($LASTEXITCODE -ne 0) { Write-Warn "Failed to download $lang language pack" }
|
|
Write-OK "'$lang' installed"
|
|
}
|
|
}
|
|
|
|
# -- 5. Install Poppler --------------------------------------------------------
|
|
Write-Step "Installing Poppler (required for PDF rendering)"
|
|
$PopplerDir = Join-Path $ToolsDir "poppler"
|
|
$popplerBin = Join-Path $PopplerDir "Library\bin"
|
|
if (Test-Path (Join-Path $popplerBin "pdftoppm.exe")) {
|
|
Write-OK "Poppler already installed"
|
|
} else {
|
|
Write-Host " Downloading Poppler for Windows..."
|
|
$popplerUrl = "https://github.com/oschwartz10612/poppler-windows/releases/download/v24.07.0-0/Release-24.07.0-0.zip"
|
|
$popplerZip = "$env:TEMP\poppler.zip"
|
|
& curl.exe -L --silent --show-error -o $popplerZip $popplerUrl
|
|
if ($LASTEXITCODE -ne 0) { Write-Fail "Poppler download failed. Try re-running the script." }
|
|
Write-Host " Extracting to $popplerBase..."
|
|
Expand-Archive -Path $popplerZip -DestinationPath $PopplerDir -Force
|
|
Remove-Item $popplerZip -Force
|
|
$found = Get-ChildItem -Path $PopplerDir -Recurse -Filter "pdftoppm.exe" |
|
|
Select-Object -First 1
|
|
if ($found) {
|
|
$popplerBin = $found.DirectoryName
|
|
Write-OK "Poppler extracted: $popplerBin"
|
|
} else {
|
|
Write-Fail "Poppler extraction failed -- pdftoppm.exe not found"
|
|
}
|
|
}
|
|
|
|
# Poppler is local in tools\ -- session PATH set above
|
|
$env:PATH = "$env:PATH;$popplerBin"
|
|
|
|
# -- 6. Install Python packages -------------------------------------------------
|
|
Write-Step "Installing Python packages"
|
|
|
|
$packages = @(
|
|
# Web server
|
|
@{ name="flask"; desc="web server" },
|
|
# PDF handling
|
|
@{ name="pdfplumber"; desc="PDF text extraction" },
|
|
@{ name="pdf2image"; desc="PDF to image (needs Poppler)" },
|
|
@{ name="pytesseract"; desc="OCR wrapper (needs Tesseract)" },
|
|
@{ name="pypdf"; desc="PDF read/write" },
|
|
@{ name="reportlab"; desc="PDF generation for redaction" },
|
|
# Document formats
|
|
@{ name="python-docx"; desc="Word documents" },
|
|
@{ name="openpyxl"; desc="Excel files" },
|
|
@{ name="img2pdf"; desc="image to PDF" },
|
|
# Image / CV
|
|
@{ name="opencv-python-headless"; desc="face detection (headless, fewer DLL deps)" },
|
|
@{ name="numpy"; desc="image processing" },
|
|
@{ name="Pillow"; desc="image handling" },
|
|
# NER / anonymisation
|
|
@{ name="spacy"; desc="named entity recognition" },
|
|
# Archive scanning
|
|
# Native app window
|
|
@{ name="pymupdf"; desc="secure PDF redaction (physical text removal)" },
|
|
@{ name="pywebview"; desc="native webview window" },
|
|
@{ name="pystray"; desc="system tray icon (fallback)" },
|
|
# App bundling
|
|
@{ name="pyinstaller"; desc="app packager" },
|
|
@{ name="pyinstaller-hooks-contrib"; desc="PyInstaller hooks" },
|
|
# GDPRScanner
|
|
@{ name="msal"; desc="Microsoft authentication" },
|
|
@{ name="requests"; desc="HTTP client for Graph API" },
|
|
# Optional — File system scanning (#8)
|
|
@{ name="smbprotocol"; desc="native SMB2/3 network share scanning (optional)" },
|
|
@{ name="keyring"; desc="OS keychain credential storage for SMB (optional)" },
|
|
@{ name="python-dotenv"; desc=".env file credential fallback (optional)" },
|
|
# Scheduler (#19)
|
|
@{ name="APScheduler"; desc="in-process scheduled scans (optional)" },
|
|
# Google Workspace scanning (#10)
|
|
@{ name="google-auth"; desc="Google service account auth (optional)" },
|
|
@{ name="google-auth-httplib2"; desc="Google auth HTTP transport (optional)" },
|
|
@{ name="google-api-python-client"; desc="Gmail + Drive + Admin APIs (optional)" }
|
|
)
|
|
|
|
$failed = @()
|
|
foreach ($pkg in $packages) {
|
|
Write-Host (" {0,-36} {1}" -f ($pkg.name + "..."), $pkg.desc) -NoNewline
|
|
Invoke-VenvPip @("install", $pkg.name, "--quiet", "--disable-pip-version-check") | Out-Null
|
|
if ($LASTEXITCODE -ne 0) {
|
|
Write-Host " FAILED" -ForegroundColor Red
|
|
$failed += $pkg.name
|
|
} else {
|
|
Write-Host " OK" -ForegroundColor Green
|
|
}
|
|
}
|
|
|
|
# pywebview 5.x used a [win32] extra; 6.x+ ships WebView2 support built-in -- no extra needed
|
|
if ($LASTEXITCODE -eq 0) { Write-Host " OK" -ForegroundColor Green }
|
|
else { Write-Host " skipped" -ForegroundColor Yellow }
|
|
|
|
if ($failed.Count -gt 0) {
|
|
Write-Warn "Failed to install: $($failed -join ', ')"
|
|
Write-Warn "Retry manually: python -m pip install $($failed -join ' ')"
|
|
}
|
|
|
|
# -- 7. Install spaCy language model -------------------------------------------
|
|
Write-Step "Installing spaCy Danish NER model (~500 MB, may take several minutes)"
|
|
|
|
# Check if any model already installed
|
|
$spaCyHasModel = & $VenvPython -c "import spacy; [spacy.load(m) for m in ['da_core_news_lg','da_core_news_md','da_core_news_sm'] if spacy.util.is_package(m)]; print('ok')" 2>$null
|
|
if ($LASTEXITCODE -eq 0) {
|
|
Write-OK "spaCy Danish model already installed"
|
|
} else {
|
|
$models = @("da_core_news_lg", "da_core_news_md", "da_core_news_sm")
|
|
$installed = $false
|
|
foreach ($model in $models) {
|
|
Write-Host " Trying $model..."
|
|
& $VenvPython -m spacy download $model --quiet 2>$null | Out-Null
|
|
if ($LASTEXITCODE -eq 0) {
|
|
Write-OK "Installed: $model"
|
|
$installed = $true
|
|
break
|
|
}
|
|
}
|
|
if (-not $installed) {
|
|
Write-Warn "No spaCy Danish model installed -- anonymisation will be unavailable"
|
|
Write-Warn "Retry manually: python -m spacy download da_core_news_sm"
|
|
}
|
|
}
|
|
|
|
# -- 8. Verify installation -----------------------------------------------------
|
|
Write-Step "Verifying installation"
|
|
|
|
# Python
|
|
Write-OK "Python: $(Get-PythonVersionStr $pythonCmd)"
|
|
|
|
# Tesseract
|
|
try {
|
|
$tessVer = & tesseract --version 2>&1 | Select-Object -First 1
|
|
Write-OK "Tesseract: $tessVer"
|
|
$langs = & tesseract --list-langs 2>&1 | Where-Object { $_ -match "^(dan|eng)$" }
|
|
Write-OK "OCR languages: $($langs -join ', ')"
|
|
} catch {
|
|
Write-Warn "Tesseract not on PATH -- restart PowerShell and re-run if needed"
|
|
}
|
|
|
|
# Poppler
|
|
try {
|
|
$pp = Get-Command pdftoppm -ErrorAction Stop
|
|
Write-OK "Poppler: $($pp.Source)"
|
|
} catch {
|
|
Write-Warn "Poppler not on PATH -- restart PowerShell and re-run if needed"
|
|
}
|
|
|
|
# All Python imports -- write to a temp file to avoid PowerShell expanding {vars} in f-strings
|
|
$importScriptPath = Join-Path $env:TEMP "gdpr_verify.py"
|
|
Set-Content -Path $importScriptPath -Encoding UTF8 -Value @'
|
|
import sys
|
|
checks = [
|
|
('flask', 'flask'),
|
|
('pdfplumber', 'pdfplumber'),
|
|
('pdf2image', 'pdf2image'),
|
|
('pytesseract', 'pytesseract'),
|
|
('pypdf', 'pypdf'),
|
|
('reportlab', 'reportlab'),
|
|
('python-docx', 'docx'),
|
|
('openpyxl', 'openpyxl'),
|
|
('opencv-python-headless', 'cv2'),
|
|
('numpy', 'numpy'),
|
|
('Pillow', 'PIL'),
|
|
('spacy', 'spacy'),
|
|
('img2pdf', 'img2pdf'),
|
|
('pymupdf', 'fitz'),
|
|
('pywebview', 'webview'),
|
|
('pystray', 'pystray'),
|
|
('PyInstaller', 'PyInstaller'),
|
|
('msal', 'msal'),
|
|
('requests', 'requests'),
|
|
]
|
|
optional_checks = [
|
|
('smbprotocol', 'smbprotocol'),
|
|
('keyring', 'keyring'),
|
|
('python-dotenv', 'dotenv'),
|
|
('APScheduler', 'apscheduler'),
|
|
]
|
|
missing = []
|
|
for name, imp in checks:
|
|
try:
|
|
__import__(imp)
|
|
print(" [OK] " + name)
|
|
except ImportError:
|
|
print(" [!!] " + name + " MISSING")
|
|
missing.append(name)
|
|
print("\n Optional (file system scanning):")
|
|
for name, imp in optional_checks:
|
|
try:
|
|
__import__(imp)
|
|
print(" [OK] " + name)
|
|
except ImportError:
|
|
print(" [--] " + name + " (not installed)")
|
|
if missing:
|
|
print("\nMissing required: " + ", ".join(missing))
|
|
sys.exit(1)
|
|
print("\nAll required packages verified.")
|
|
sys.exit(0)
|
|
'@
|
|
|
|
& $VenvPython $importScriptPath
|
|
$allOk = ($LASTEXITCODE -eq 0)
|
|
Remove-Item $importScriptPath -ErrorAction SilentlyContinue
|
|
|
|
# -- 9. Create launch scripts ---------------------------------------------------
|
|
Write-Step "Creating launch scripts"
|
|
|
|
Set-Content -Path "start_gdpr.bat" -Encoding ASCII -Value @'
|
|
@echo off
|
|
:: GDPRScanner - Web UI
|
|
cd /d "%~dp0"
|
|
set PATH=%~dp0tools\tesseract;%~dp0tools\poppler\Library\bin;%PATH%
|
|
set TESSDATA_PREFIX=%~dp0tools\tesseract\tessdata
|
|
set PORT=5100
|
|
echo.
|
|
echo GDPRScanner
|
|
echo Open in browser: http://localhost:%PORT%
|
|
echo Press Ctrl+C to stop
|
|
echo.
|
|
"%~dp0venv\Scripts\python.exe" "%~dp0gdpr_scanner.py" --port %PORT%
|
|
pause
|
|
'@
|
|
Write-OK "Created: start_gdpr.bat"
|
|
|
|
Set-Content -Path "build_m365.bat" -Encoding ASCII -Value @'
|
|
@echo off
|
|
:: GDPRScanner -- Build standalone .exe
|
|
cd /d "%~dp0"
|
|
set PATH=%~dp0tools\tesseract;%~dp0tools\poppler\Library\bin;%PATH%
|
|
set TESSDATA_PREFIX=%~dp0tools\tesseract\tessdata
|
|
echo Building GDPRScanner...
|
|
echo.
|
|
"%~dp0venv\Scripts\python.exe" "%~dp0build_gdpr.py" --clean %*
|
|
pause
|
|
'@
|
|
Write-OK "Created: build_m365.bat"
|
|
|
|
|
|
# -- Done -----------------------------------------------------------------------
|
|
Write-Host ""
|
|
Write-Host " -----------------------------------------" -ForegroundColor DarkGray
|
|
if ($allOk) {
|
|
Write-Host " Installation complete!" -ForegroundColor Green
|
|
} else {
|
|
Write-Host " Installation complete with warnings -- see above" -ForegroundColor Yellow
|
|
}
|
|
Write-Host ""
|
|
Write-Host " GDPRScanner:" -ForegroundColor White
|
|
Write-Host " Double-click start_gdpr.bat" -ForegroundColor Cyan
|
|
Write-Host " Web UI: http://localhost:5100" -ForegroundColor White
|
|
Write-Host ""
|
|
Write-Host " File system scanning (optional):" -ForegroundColor White
|
|
Write-Host " python gdpr_scanner.py --scan-path C:\Users\Me\Documents" -ForegroundColor Cyan
|
|
Write-Host " python gdpr_scanner.py --scan-path //nas/shares --smb-user DOMAIN\user" -ForegroundColor Cyan
|
|
Write-Host " Or use the File sources panel in the GDPRScanner UI" -ForegroundColor Gray
|
|
Write-Host ""
|
|
Write-Host " Build standalone app:" -ForegroundColor White
|
|
Write-Host " Double-click build_gdpr.bat -> dist\GDPRScanner.exe" -ForegroundColor Cyan
|
|
Write-Host " -----------------------------------------" -ForegroundColor DarkGray
|
|
Write-Host ""
|