diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 504e233..089a240 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -111,6 +111,8 @@ jobs: run: | sudo apt-get update -qq sudo apt-get install -y --no-install-recommends \ + tesseract-ocr tesseract-ocr-dan tesseract-ocr-deu \ + poppler-utils \ libgtk-3-dev libwebkit2gtk-4.0-dev \ libglib2.0-dev libcairo2-dev pkg-config \ python3-dev @@ -118,11 +120,11 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip - # GDPRScanner only needs a subset — skip OCR/CV heavy deps - pip install flask msal requests openpyxl pillow \ - python-docx \ - pywebview pystray \ - pyinstaller pyinstaller-hooks-contrib + pip install -r requirements.txt + + # Download the Danish spaCy model used for NER/anonymisation + - name: Download spaCy model + run: python -m spacy download da_core_news_sm - name: Build GDPRScanner run: python build_gdpr.py