diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd201c587..512d30d7c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,8 +36,6 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.11' - - run: pip install poetry - - run: poetry install - name: Restore cached tessdata uses: actions/cache/restore@v4 with: @@ -45,6 +43,8 @@ jobs: enableCrossOsArchive: true fail-on-cache-miss: true key: v1-tessdata-${{ hashFiles('./install/common/download-tessdata.py') }} + - run: pip install poetry + - run: poetry install - name: Run CLI tests run: poetry run make test # Taken from: https://github.com/orgs/community/discussions/27149#discussioncomment-3254829 diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 71a4cc5ca..697a1bd18 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -172,8 +172,14 @@ def pixels_to_pdf_page( page_pdf_bytes = self.ocr_page(pixmap, ocr_lang) else: # Don't OCR page_doc = fitz.Document() - page_doc.insert_file(pixmap) - page_pdf_bytes = page_doc.tobytes(deflate_images=True) + # Added in PyMuPDF 1.22.0 (commit 6a9c9d8175c307f7f3baf605c8632745f69a8b1b) + if hasattr(page_doc, "insert_file"): + page_doc.insert_file(pixmap) + page_pdf_bytes = page_doc.tobytes(deflate_images=True) + else: + page = page_doc.newPage() + page.insertImage(page.rect, pixmap=pixmap) + page_pdf_bytes = page_doc.write(deflate=True) return fitz.open("pdf", page_pdf_bytes) @@ -240,12 +246,27 @@ def _convert( # Ensure nothing else is read after all bitmaps are obtained p.stdout.close() - safe_doc.save(document.output_filename) + if hasattr(safe_doc, "tobytes"): + pdf_bytes = safe_doc.tobytes() + else: + pdf_bytes = safe_doc.write() + with open(document.output_filename, "wb+") as f: + f.write(pdf_bytes) # TODO handle leftover code input text = "Converted document" self.print_progress(document, False, text, percentage) + if getattr(sys, "dangerzone_dev", False): + assert p.stderr + debug_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS) + log.info( + "Conversion output (doc to pixels)\n" + f"{DOC_TO_PIXELS_LOG_START}\n" + f"{debug_log}" # no need for an extra newline here + f"{DOC_TO_PIXELS_LOG_END}" + ) + def print_progress( self, document: Document, error: bool, text: str, percentage: float ) -> None: @@ -354,13 +375,3 @@ def doc_to_pixels_proc( self.ensure_stop_doc_to_pixels_proc( document, p, timeout_grace=timeout_grace, timeout_force=timeout_force ) - - if getattr(sys, "dangerzone_dev", False): - assert p.stderr - debug_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS) - log.info( - "Conversion output (doc to pixels)\n" - f"{DOC_TO_PIXELS_LOG_START}\n" - f"{debug_log}" # no need for an extra newline here - f"{DOC_TO_PIXELS_LOG_END}" - ) diff --git a/poetry.lock b/poetry.lock index 381305090..1e8fec3b2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1001,13 +1001,13 @@ files = [ [[package]] name = "urllib3" -version = "2.2.1" +version = "2.2.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, ] [package.extras] @@ -1048,4 +1048,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "4b3f14e0c3a818fd7a45367131d27152e14f2ef2e7b3d95d9b5526cb43a2a204" +content-hash = "bcc5b2685dec5e49a31e5704131d315ff5f60f485ba7d4c91c5a8bf3b927dca8" diff --git a/pyproject.toml b/pyproject.toml index 11d02bc20..be193b0a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ python = ">=3.9,<3.13" click = "*" appdirs = "*" PySide6 = "^6.7.1" -PyMuPDF = "^1.23.8" +PyMuPDF = "^1.16.11" colorama = "*" pyxdg = {version = "*", platform = "linux"} requests = "*" diff --git a/stdeb.cfg b/stdeb.cfg index 38148dcbb..79df78dd3 100644 --- a/stdeb.cfg +++ b/stdeb.cfg @@ -1,6 +1,6 @@ [DEFAULT] Package3: dangerzone -Depends3: podman, python3, python3-pyside2.qtcore, python3-pyside2.qtgui, python3-pyside2.qtwidgets, python3-pyside2.qtsvg, python3-appdirs, python3-click, python3-xdg, python3-colorama, python3-requests, python3-markdown, python3-packaging, python3-fitz (>= 1.19.0) | pyocr +Depends3: podman, python3, python3-pyside2.qtcore, python3-pyside2.qtgui, python3-pyside2.qtwidgets, python3-pyside2.qtsvg, python3-appdirs, python3-click, python3-xdg, python3-colorama, python3-requests, python3-markdown, python3-packaging, python3-fitz (>= 1.19.0) | python3-pyocr Build-Depends: dh-python, python3, python3-setuptools, python3-stdeb Suite: bionic X-Python3-Version: >= 3.8