gingdev commited on
Commit
d9028d2
·
0 Parent(s):

chore: wip

Browse files
Files changed (13) hide show
  1. .gitattributes +1 -0
  2. .gitignore +4 -0
  3. Dockerfile +14 -0
  4. README.md +8 -0
  5. app/__init__.py +1 -0
  6. app/constants.py +4 -0
  7. app/utils.py +32 -0
  8. model/en_infer.onnx +3 -0
  9. pyproject.toml +19 -0
  10. run.py +51 -0
  11. tests/demo.jpg +0 -0
  12. tests/test_ocr.py +6 -0
  13. uv.lock +0 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.onnx filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /.gradio/
2
+ /.pytest_cache/
3
+ /.venv/
4
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3-bookworm
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y libmagickwand-dev ghostscript fonts-freefont-ttf curl libgl1-mesa-glx
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV PATH="/home/user/.local/bin:$PATH"
8
+ WORKDIR /app
9
+ COPY --chown=user . /app
10
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
11
+ uv sync
12
+ ENV PATH="/app/.venv/bin:$PATH"
13
+
14
+ CMD ["python", "run.py"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Image translator
3
+ emoji: 🐳
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .utils import ocr
app/constants.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ FONT_PATH = "/usr/share/fonts/truetype/freefont/FreeMono.ttf"
4
+ MODEL_PATH = str(Path(__file__).parent.parent / "model/en_infer.onnx")
app/utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .constants import MODEL_PATH
2
+ from rapidocr_onnxruntime import RapidOCR
3
+ from numpy.typing import NDArray
4
+ from os import PathLike
5
+ from shapely import Polygon
6
+ from shapely.geometry import box, MultiPolygon
7
+ from shapely.ops import unary_union
8
+ from typing import cast
9
+
10
+ _rapid_ocr = RapidOCR(rec_model_path=MODEL_PATH)
11
+
12
+
13
+ def ocr(image: NDArray | PathLike, esp: float = 0.5) -> list[tuple[Polygon, str]]:
14
+ result, _ = _rapid_ocr(image)
15
+ result.sort(key=lambda x: x[0][0][1])
16
+ texts: list[str] = []
17
+ shapes: list[Polygon] = []
18
+ for points, text, _ in result:
19
+ x, y, xmax, ymax = Polygon(points).bounds
20
+ ymax += (ymax - y) * esp
21
+ shapes.append(box(x, y, xmax, ymax))
22
+ texts.append(text)
23
+ merged_shapes = cast(MultiPolygon, unary_union(shapes)).geoms
24
+ merged_texts = [""] * len(merged_shapes)
25
+ visited = [False] * len(texts)
26
+ for i, shape in enumerate(merged_shapes):
27
+ for j, text in enumerate(texts):
28
+ if not visited[j]:
29
+ if shapes[j].intersects(shape):
30
+ merged_texts[i] += f" {text}"
31
+ visited[j] = True
32
+ return list(zip(merged_shapes, merged_texts))
model/en_infer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7abd8bd3629ae57ea2c28b425c1bd258a871b93fd2fe7c433946ade9b5d9ea
3
+ size 8967018
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "imgtran"
3
+ version = "0.0.1"
4
+ dependencies = [
5
+ "deep-translator>=1.11.4",
6
+ "fastapi[standard]>=0.115.6",
7
+ "gradio>=5.12.0",
8
+ "rapidocr-onnxruntime>=1.4.3",
9
+ "shapely>=2.0.6",
10
+ "wand>=0.6.13",
11
+ ]
12
+
13
+ [dependency-groups]
14
+ dev = [
15
+ "pytest>=8.3.4",
16
+ ]
17
+
18
+ [tool.pytest.ini_options]
19
+ pythonpath = '.'
run.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import io
3
+ import gradio as gr
4
+ from app import ocr
5
+ from app.constants import FONT_PATH
6
+ from numpy.typing import NDArray
7
+ from deep_translator import GoogleTranslator
8
+ from wand.color import Color
9
+ from wand.drawing import Drawing
10
+ from wand.image import Image
11
+ from wand.font import Font
12
+ from PIL import Image as PILImage
13
+
14
+
15
+ def handle_image(image: NDArray):
16
+ result = ocr(image)
17
+ ret, encoded_image = cv2.imencode(".png", image)
18
+
19
+ if not ret:
20
+ raise Exception("Unknown error.")
21
+
22
+ with Image(blob=encoded_image) as img:
23
+ texts: list[str] = []
24
+ rects: list[tuple[int, int, int, int]] = []
25
+ with Drawing() as draw:
26
+ draw.fill_color = Color("white")
27
+ for shape, text in result:
28
+ x, y, xmax, ymax = map(int, shape.bounds)
29
+ w = xmax - x
30
+ h = ymax - y
31
+ draw.rectangle(x, y, xmax, ymax)
32
+ rects.append((x, y, w, h))
33
+ texts.append(text)
34
+ draw.draw(img)
35
+ font = Font(FONT_PATH)
36
+ translator = GoogleTranslator(source="auto", target="vi")
37
+ texts = translator.translate_batch(texts)
38
+ for (x, y, w, h), text in zip(rects, texts):
39
+ img.caption(text, x, y, w, h, font=font, gravity="center")
40
+
41
+ return PILImage.open(io.BytesIO(img.make_blob("png")))
42
+
43
+
44
+ with gr.Blocks() as demo:
45
+ with gr.Column():
46
+ image = gr.Image()
47
+ output = gr.Image()
48
+ image.upload(handle_image, inputs=image, outputs=output)
49
+
50
+ if __name__ == "__main__":
51
+ demo.launch(server_name='0.0.0.0')
tests/demo.jpg ADDED
tests/test_ocr.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from app import ocr
2
+ from pathlib import Path
3
+
4
+ def test_detection():
5
+ result = ocr(str(Path(__file__).parent / 'demo.jpg'))
6
+ assert len(result) == 11
uv.lock ADDED
The diff for this file is too large to render. See raw diff