SubProcessError tesseract

Ich habe das Problem, dass ein Dokument welches zeitgleich auch noch einigermaßen groß ist (23 MB) nicht verarbeitet wird.
Aus dem Error-Log werde ich aber nicht so richtig schlau, da mir der eigentliche Error dort fehlt.
Ich verstehe, dass es ein SubProcess Error ist, und das (so würde ich es deuten) tesseract mit dem „Signals.SIGKILL: 9“ die Verarbeitung abbricht.
Hatte dieses Phänomen schonmal jemand oder kann sagen wie ich das Problem umgehe?

Andere Dokumente können problemlos verarbeitet werden.
Hardware ist ein 224+ mit 2GB RAM

Folgend ist der erstellte LOG:
[2025-04-01 10:45:27,391] [ERROR] [paperless.consumer] Error occurred while consuming document Haftpflicht.pdf: SubprocessOutputError: . See logs for more information.

Traceback (most recent call last):

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_exec/tesseract.py“, line 313, in generate_hocr

p = run(args_tesseract, stdout=PIPE, stderr=STDOUT, timeout=timeout, check=True)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/subprocess/init.py“, line 62, in run

proc = subprocess_run(args, env=env, check=check, **kwargs)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/subprocess.py“, line 571, in run

raise CalledProcessError(retcode, process.args,

subprocess.CalledProcessError: Command ‚[‚tesseract‘, ‚-l‘, ‚deu‘, ‚/tmp/ocrmypdf.io.419a49u6/000003_ocr.png‘, ‚/tmp/ocrmypdf.io.419a49u6/000003_ocr_hocr‘, ‚hocr‘, ‚txt‘]‘ died with <Signals.SIGKILL: 9>.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):

File „/usr/src/paperless/src/paperless_tesseract/parsers.py“, line 382, in parse

ocrmypdf.ocr(**args)

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/api.py“, line 380, in ocr

return run_pipeline(options=options, plugin_manager=plugin_manager)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 214, in run_pipeline

return _run_pipeline(options, plugin_manager)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 181, in _run_pipeline

optimize_messages = exec_concurrent(context, executor)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 117, in exec_concurrent

executor(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_concurrent.py“, line 78, in call

self._execute(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/builtin_plugins/concurrency.py“, line 144, in _execute

result = future.result()

^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/concurrent/futures/_base.py“, line 449, in result

return self.__get_result()

^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/concurrent/futures/_base.py“, line 401, in __get_result

raise self._exception

File „/usr/local/lib/python3.12/concurrent/futures/thread.py“, line 59, in run

result = self.fn(*self.args, **self.kwargs)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 81, in _exec_page_sync

ocr_out, text_out = _image_to_ocr_text(page_context, ocr_image_out)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 62, in _image_to_ocr_text

hocr_out, text_out = ocr_engine_hocr(ocr_image_out, page_context)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipeline.py“, line 678, in ocr_engine_hocr

ocr_engine.generate_hocr(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/builtin_plugins/tesseract_ocr.py“, line 268, in generate_hocr

tesseract.generate_hocr(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_exec/tesseract.py“, line 327, in generate_hocr

raise SubprocessOutputError() from e

ocrmypdf.exceptions.SubprocessOutputError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):

File „/usr/local/lib/python3.12/site-packages/asgiref/sync.py“, line 327, in main_wrap

raise exc_info[1]

File „/usr/src/paperless/src/documents/consumer.py“, line 477, in run

document_parser.parse(self.working_copy, mime_type, self.filename)

File „/usr/src/paperless/src/paperless_tesseract/parsers.py“, line 405, in parse

raise ParseError(

documents.parsers.ParseError: SubprocessOutputError: . See logs for more information.

[2025-04-01 10:45:27,449] [ERROR] [paperless.tasks] ConsumeTaskPlugin failed: Haftpflicht.pdf: Error occurred while consuming document Haftpflicht.pdf: SubprocessOutputError: . See logs for more information.

Traceback (most recent call last):

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_exec/tesseract.py“, line 313, in generate_hocr

p = run(args_tesseract, stdout=PIPE, stderr=STDOUT, timeout=timeout, check=True)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/subprocess/init.py“, line 62, in run

proc = subprocess_run(args, env=env, check=check, **kwargs)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/subprocess.py“, line 571, in run

raise CalledProcessError(retcode, process.args,

subprocess.CalledProcessError: Command ‚[‚tesseract‘, ‚-l‘, ‚deu‘, ‚/tmp/ocrmypdf.io.419a49u6/000003_ocr.png‘, ‚/tmp/ocrmypdf.io.419a49u6/000003_ocr_hocr‘, ‚hocr‘, ‚txt‘]‘ died with <Signals.SIGKILL: 9>.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):

File „/usr/src/paperless/src/paperless_tesseract/parsers.py“, line 382, in parse

ocrmypdf.ocr(**args)

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/api.py“, line 380, in ocr

return run_pipeline(options=options, plugin_manager=plugin_manager)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 214, in run_pipeline

return _run_pipeline(options, plugin_manager)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 181, in _run_pipeline

optimize_messages = exec_concurrent(context, executor)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 117, in exec_concurrent

executor(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_concurrent.py“, line 78, in call

self._execute(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/builtin_plugins/concurrency.py“, line 144, in _execute

result = future.result()

^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/concurrent/futures/_base.py“, line 449, in result

return self.__get_result()

^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/concurrent/futures/_base.py“, line 401, in __get_result

raise self._exception

File „/usr/local/lib/python3.12/concurrent/futures/thread.py“, line 59, in run

result = self.fn(*self.args, **self.kwargs)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 81, in _exec_page_sync

ocr_out, text_out = _image_to_ocr_text(page_context, ocr_image_out)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipelines/ocr.py“, line 62, in _image_to_ocr_text

hocr_out, text_out = ocr_engine_hocr(ocr_image_out, page_context)

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_pipeline.py“, line 678, in ocr_engine_hocr

ocr_engine.generate_hocr(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/builtin_plugins/tesseract_ocr.py“, line 268, in generate_hocr

tesseract.generate_hocr(

File „/usr/local/lib/python3.12/site-packages/ocrmypdf/_exec/tesseract.py“, line 327, in generate_hocr

raise SubprocessOutputError() from e

ocrmypdf.exceptions.SubprocessOutputError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):

File „/usr/local/lib/python3.12/site-packages/asgiref/sync.py“, line 327, in main_wrap

raise exc_info[1]

File „/usr/src/paperless/src/documents/consumer.py“, line 477, in run

document_parser.parse(self.working_copy, mime_type, self.filename)

File „/usr/src/paperless/src/paperless_tesseract/parsers.py“, line 405, in parse

raise ParseError(

documents.parsers.ParseError: SubprocessOutputError: . See logs for more information.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):

File „/usr/src/paperless/src/documents/tasks.py“, line 154, in consume_file

msg = plugin.run()

^^^^^^^^^^^^

File „/usr/src/paperless/src/documents/consumer.py“, line 509, in run

self._fail(

File „/usr/src/paperless/src/documents/consumer.py“, line 151, in _fail

raise ConsumerError(f"{self.filename}: {log_message or message}") from exception

documents.consumer.ConsumerError: Haftpflicht.pdf: Error occurred while consuming document Haftpflicht.pdf: SubprocessOutputError: . See logs for more information.