Skip to content

Commit b45a1d3

Browse files
committed
Add GVision OCR engine support
1 parent 6851ea7 commit b45a1d3

13 files changed

Lines changed: 1265 additions & 16 deletions

File tree

misc/pdf_text_diff.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def main(
1818
engine: Annotated[str, typer.Option()] = 'pdftotext',
1919
):
2020
"""Compare text in PDFs."""
21-
2221
text1 = run(
2322
['pdftotext', '-layout', '-', '-'], stdin=pdf1, capture_output=True, check=True
2423
)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies = [
2121
"Pillow>=10.0.1",
2222
"pluggy>=1",
2323
"rich>=13",
24+
"google-cloud-vision>=3.4.2",
2425
]
2526
authors = [{ name = "James R. Barlow", email = "james@purplerock.ca" }]
2627
classifiers = [

src/ocrmypdf/_exec/ghostscript.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import os
1010
import re
1111
from collections import deque
12-
from io import BytesIO
1312
from os import fspath
1413
from pathlib import Path
1514
from subprocess import PIPE, CalledProcessError

src/ocrmypdf/_metadata.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from pikepdf import __version__ as PIKEPDF_VERSION
1616
from pikepdf.models.metadata import PdfMetadata, encode_pdf_date
1717

18-
from ocrmypdf._annots import remove_broken_goto_annotations
1918
from ocrmypdf._defaults import PROGRAM_NAME
2019
from ocrmypdf._jobcontext import PdfContext
2120
from ocrmypdf._version import __version__ as OCRMYPF_VERSION

src/ocrmypdf/_progressbar.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ class ProgressBar(Protocol):
4848
A brief description of the current step (e.g. "Scanning contents",
4949
"OCR", "PDF/A conversion"). OCRmyPDF updates this before each major step.
5050
unit (str | None):
51-
A short label for the type of work being tracked (e.g. "page", "%", "image").
51+
A short label for the type of work being tracked
52+
(e.g. "page", "%", "image").
5253
disable (bool):
53-
If ``True``, progress updates are suppressed (no output). Defaults to ``False``.
54+
If ``True``, progress updates are suppressed (no output).
55+
Defaults to ``False``.
5456
**kwargs:
5557
Future or extra parameters that OCRmyPDF might pass. Implementations
5658
should accept and ignore unrecognized keywords gracefully.
@@ -64,7 +66,9 @@ class ProgressBar(Protocol):
6466
from ocrmypdf import hookimpl
6567
6668
class ConsoleProgressBar(ProgressBar):
67-
def __init__(self, *, total=None, desc=None, unit=None, disable=False, **kwargs):
69+
def __init__(
70+
self, *, total=None, desc=None, unit=None, disable=False, **kwargs
71+
):
6872
self.total = total
6973
self.desc = desc
7074
self.unit = unit
@@ -73,7 +77,10 @@ def __init__(self, *, total=None, desc=None, unit=None, disable=False, **kwargs)
7377
7478
def __enter__(self):
7579
if not self.disable:
76-
print(f"Starting {self.desc or 'an OCR task'} (total={self.total} {self.unit})")
80+
print(
81+
f"Starting {self.desc or 'an OCR task'} "
82+
f"(total={self.total} {self.unit})"
83+
)
7784
return self
7885
7986
def __exit__(self, exc_type, exc_value, traceback):
@@ -86,15 +93,19 @@ def __exit__(self, exc_type, exc_value, traceback):
8693
8794
def update(self, n=1, *, completed=None):
8895
if completed is not None:
89-
# If 'completed' is given, you could set self.current = completed
96+
# If 'completed' is given, you could set
97+
# self.current = completed
9098
# but let's just read it to show usage
9199
print(f"Absolute completion reported: {completed}")
92100
# Otherwise, we increment by 'n'
93101
self.current += n
94102
if not self.disable:
95103
if self.total:
96104
percent = (self.current / self.total) * 100
97-
print(f"{self.desc}: {self.current}/{self.total} ({percent:.1f}%)")
105+
print(
106+
f"{self.desc}: {self.current}/{self.total}"
107+
f"({percent:.1f}%)"
108+
)
98109
else:
99110
print(f"{self.desc}: {self.current} units done")
100111

0 commit comments

Comments
 (0)