Spaces:
Sleeping
Sleeping
Futyn-Maker
commited on
Commit
Β·
2602ab3
1
Parent(s):
ba1b3f0
Add the app
Browse files- README.md +1 -1
- app.py +152 -0
- requirements.txt +111 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Ocr Notebooks App
|
3 |
-
emoji:
|
4 |
colorFrom: gray
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Ocr Notebooks App
|
3 |
+
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
app.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import gradio as gr
|
6 |
+
from PIL import Image
|
7 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
8 |
+
from kraken import blla, binarization
|
9 |
+
|
10 |
+
# ΠΠ½ΠΈΡΠΈΠ°Π»ΠΈΠ·Π°ΡΠΈΡ ΠΌΠΎΠ΄Π΅Π»ΠΈ ΠΈ ΠΏΡΠΎΡΠ΅ΡΡΠΎΡΠ°
|
11 |
+
print("ΠΠ°Π³ΡΡΠ·ΠΊΠ° ΠΌΠΎΠ΄Π΅Π»ΠΈ OCR...")
|
12 |
+
model_name = "Futyn-Maker/trocr-base-ru-notebooks"
|
13 |
+
processor = TrOCRProcessor.from_pretrained(model_name)
|
14 |
+
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
15 |
+
|
16 |
+
# ΠΡΠΎΠ²Π΅ΡΠΊΠ° Π΄ΠΎΡΡΡΠΏΠ½ΠΎΡΡΠΈ GPU
|
17 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
18 |
+
model.to(device)
|
19 |
+
print(f"ΠΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°Π½ΠΈΠ΅ ΡΡΡΡΠΎΠΉΡΡΠ²Π°: {device}")
|
20 |
+
|
21 |
+
def segment_image(image):
|
22 |
+
"""
|
23 |
+
Π‘Π΅Π³ΠΌΠ΅Π½ΡΠΈΡΡΠ΅Ρ ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΠ΅ Π½Π° ΡΡΡΠΎΠΊΠΈ Ρ ΠΏΠΎΠΌΠΎΡΡΡ Kraken
|
24 |
+
"""
|
25 |
+
# ΠΠΎΠ½Π²Π΅ΡΡΠ°ΡΠΈΡ Π² Π±ΠΈΠ½Π°ΡΠ½ΠΎΠ΅ ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΠ΅
|
26 |
+
bw_img = binarization.nlbin(image, threshold=0.5, escale=2.0, border=0.1, high=0.9)
|
27 |
+
|
28 |
+
# Π‘Π΅Π³ΠΌΠ΅Π½ΡΠ°ΡΠΈΡ Π½Π° ΡΡΡΠΎΠΊΠΈ
|
29 |
+
lines = blla.segment(bw_img, text_direction='horizontal-lr')
|
30 |
+
|
31 |
+
# Π‘ΠΎΡΡΠΈΡΠΎΠ²ΠΊΠ° ΠΈ ΠΎΠ±ΡΠ΅Π΄ΠΈΠ½Π΅Π½ΠΈΠ΅ Π±Π»ΠΈΠ·ΠΊΠΈΡ
ΡΡΡΠΎΠΊ
|
32 |
+
sorted_lines = sorted(lines.lines, key=lambda line: line.baseline[0][1]) # Π‘ΠΎΡΡΠΈΡΠΎΠ²ΠΊΠ° ΠΏΠΎ y-ΠΊΠΎΠΎΡΠ΄ΠΈΠ½Π°ΡΠ΅
|
33 |
+
merged_lines = []
|
34 |
+
|
35 |
+
if sorted_lines:
|
36 |
+
current_line = sorted_lines[0]
|
37 |
+
for next_line in sorted_lines[1:]:
|
38 |
+
current_y = current_line.baseline[0][1]
|
39 |
+
next_y = next_line.baseline[0][1]
|
40 |
+
|
41 |
+
if abs(next_y - current_y) < 15:
|
42 |
+
current_line.baseline.extend(next_line.baseline)
|
43 |
+
else:
|
44 |
+
merged_lines.append(current_line)
|
45 |
+
current_line = next_line
|
46 |
+
merged_lines.append(current_line)
|
47 |
+
else:
|
48 |
+
merged_lines = sorted_lines
|
49 |
+
|
50 |
+
# ΠΠ·Π²Π»Π΅ΡΠ΅Π½ΠΈΠ΅ ΠΎΠ±Π»Π°ΡΡΠ΅ΠΉ ΡΡΡΠΎΠΊ
|
51 |
+
line_images = []
|
52 |
+
for line in merged_lines:
|
53 |
+
baseline = np.array(line.baseline)
|
54 |
+
x0 = int(np.min(baseline[:, 0])) # ΠΠΈΠ½ΠΈΠΌΠ°Π»ΡΠ½Π°Ρ x-ΠΊΠΎΠΎΡΠ΄ΠΈΠ½Π°ΡΠ°
|
55 |
+
y0 = int(np.min(baseline[:, 1])) # ΠΠΈΠ½ΠΈΠΌΠ°Π»ΡΠ½Π°Ρ y-ΠΊΠΎΠΎΡΠ΄ΠΈΠ½Π°ΡΠ°
|
56 |
+
x1 = int(np.max(baseline[:, 0])) # ΠΠ°ΠΊΡΠΈΠΌΠ°Π»ΡΠ½Π°Ρ x-ΠΊΠΎΠΎΡΠ΄ΠΈΠ½Π°ΡΠ°
|
57 |
+
y1 = int(np.max(baseline[:, 1])) # ΠΠ°ΠΊΡΠΈΠΌΠ°Π»ΡΠ½Π°Ρ y-ΠΊΠΎΠΎΡΠ΄ΠΈΠ½Π°ΡΠ°
|
58 |
+
|
59 |
+
# ΠΠΎΠ±Π°Π²Π»Π΅Π½ΠΈΠ΅ ΠΎΡΡΡΡΠΏΠ° Π΄Π»Ρ Π»ΡΡΡΠ΅Π³ΠΎ ΡΠ°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΡ
|
60 |
+
padding = 30
|
61 |
+
y0 = max(0, y0 - padding)
|
62 |
+
y1 = min(image.height, y1 + padding)
|
63 |
+
|
64 |
+
# ΠΡΡΠ΅Π·Π°Π΅ΠΌ ΠΎΠ±Π»Π°ΡΡΡ ΡΡΡΠΎΠΊΠΈ
|
65 |
+
line_image = image.crop((x0, y0, x1, y1))
|
66 |
+
line_images.append(line_image)
|
67 |
+
|
68 |
+
return line_images
|
69 |
+
|
70 |
+
def recognize_text(image):
|
71 |
+
"""
|
72 |
+
Π Π°ΡΠΏΠΎΠ·Π½Π°Π΅Ρ ΡΠ΅ΠΊΡΡ Π½Π° ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΠΈ, ΡΠ΅Π³ΠΌΠ΅Π½ΡΠΈΡΠΎΠ²Π°Π½Π½ΠΎΠΌ Π½Π° ΡΡΡΠΎΠΊΠΈ
|
73 |
+
"""
|
74 |
+
# Π‘Π΅Π³ΠΌΠ΅Π½ΡΠ°ΡΠΈΡ ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΡ Π½Π° ΡΡΡΠΎΠΊΠΈ
|
75 |
+
line_images = segment_image(image)
|
76 |
+
|
77 |
+
if not line_images:
|
78 |
+
return "ΠΠ΅ ΡΠ΄Π°Π»ΠΎΡΡ ΠΎΠ±Π½Π°ΡΡΠΆΠΈΡΡ ΡΡΡΠΎΠΊΠΈ ΡΠ΅ΠΊΡΡΠ° Π½Π° ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΠΈ."
|
79 |
+
|
80 |
+
# Π Π°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΠ΅ ΡΠ΅ΠΊΡΡΠ° Π΄Π»Ρ ΠΊΠ°ΠΆΠ΄ΠΎΠΉ ΡΡΡΠΎΠΊΠΈ
|
81 |
+
recognized_lines = []
|
82 |
+
|
83 |
+
for line_image in line_images:
|
84 |
+
# ΠΠΎΠ΄Π³ΠΎΡΠΎΠ²ΠΊΠ° ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΡ Π΄Π»Ρ ΠΌΠΎΠ΄Π΅Π»ΠΈ
|
85 |
+
pixel_values = processor(line_image, return_tensors="pt").pixel_values
|
86 |
+
pixel_values = pixel_values.to(device)
|
87 |
+
|
88 |
+
# Π Π°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΠ΅ ΡΠ΅ΠΊΡΡΠ°
|
89 |
+
with torch.no_grad():
|
90 |
+
generated_ids = model.generate(
|
91 |
+
pixel_values,
|
92 |
+
max_length=256,
|
93 |
+
num_beams=4,
|
94 |
+
early_stopping=True
|
95 |
+
)
|
96 |
+
|
97 |
+
# ΠΠ΅ΠΊΠΎΠ΄ΠΈΡΠΎΠ²Π°Π½ΠΈΠ΅ ΡΠ΅Π·ΡΠ»ΡΡΠ°ΡΠ°
|
98 |
+
line_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
99 |
+
recognized_lines.append(line_text)
|
100 |
+
|
101 |
+
# ΠΠ±ΡΠ΅Π΄ΠΈΠ½Π΅Π½ΠΈΠ΅ Π²ΡΠ΅Ρ
ΡΡΡΠΎΠΊ Π² ΠΎΠ΄ΠΈΠ½ ΡΠ΅ΠΊΡΡ
|
102 |
+
full_text = "\n".join(recognized_lines)
|
103 |
+
|
104 |
+
return full_text
|
105 |
+
|
106 |
+
def save_text_to_file(text):
|
107 |
+
"""
|
108 |
+
Π‘ΠΎΡ
ΡΠ°Π½ΡΠ΅Ρ ΡΠ°ΡΠΏΠΎΠ·Π½Π°Π½Π½ΡΠΉ ΡΠ΅ΠΊΡΡ Π² ΡΠ°ΠΉΠ»
|
109 |
+
"""
|
110 |
+
with open("recognized_text.txt", "w", encoding="utf-8") as f:
|
111 |
+
f.write(text)
|
112 |
+
return "recognized_text.txt"
|
113 |
+
|
114 |
+
def process_image(input_image):
|
115 |
+
"""
|
116 |
+
ΠΡΠ½ΠΎΠ²Π½Π°Ρ ΡΡΠ½ΠΊΡΠΈΡ Π΄Π»Ρ ΠΎΠ±ΡΠ°Π±ΠΎΡΠΊΠΈ ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΡ
|
117 |
+
"""
|
118 |
+
# ΠΠΎΠ½Π²Π΅ΡΡΠ°ΡΠΈΡ Π² PIL Image, Π΅ΡΠ»ΠΈ Π½Π΅ΠΎΠ±Ρ
ΠΎΠ΄ΠΈΠΌΠΎ
|
119 |
+
if not isinstance(input_image, Image.Image):
|
120 |
+
input_image = Image.fromarray(input_image)
|
121 |
+
|
122 |
+
# Π Π°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΠ΅ ΡΠ΅ΠΊΡΡΠ°
|
123 |
+
recognized_text = recognize_text(input_image)
|
124 |
+
|
125 |
+
# Π‘ΠΎΡ
ΡΠ°Π½Π΅Π½ΠΈΠ΅ ΡΠ΅Π·ΡΠ»ΡΡΠ°ΡΠ° Π² ΡΠ°ΠΉΠ»
|
126 |
+
output_file = save_text_to_file(recognized_text)
|
127 |
+
|
128 |
+
return recognized_text, output_file
|
129 |
+
|
130 |
+
# Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ ΠΈΠ½ΡΠ΅ΡΡΠ΅ΠΉΡΠ° Gradio
|
131 |
+
with gr.Blocks(title="Π Π°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΠ΅ ΡΡΠΊΠΎΠΏΠΈΡΠ½ΠΎΠ³ΠΎ ΡΠ΅ΠΊΡΡΠ°") as demo:
|
132 |
+
gr.Markdown("# Π Π°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΠ΅ ΡΡΠΊΠΎΠΏΠΈΡΠ½ΠΎΠ³ΠΎ ΡΠ΅ΠΊΡΡΠ°")
|
133 |
+
gr.Markdown("ΠΠ°Π³ΡΡΠ·ΠΈΡΠ΅ ΠΈΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΠ΅ Ρ ΡΡΠΊΠΎΠΏΠΈΡΠ½ΡΠΌ ΡΠ΅ΠΊΡΡΠΎΠΌ Π΄Π»Ρ ΡΠ°ΡΠΏΠΎΠ·Π½Π°Π²Π°Π½ΠΈΡ.")
|
134 |
+
|
135 |
+
with gr.Row():
|
136 |
+
input_image = gr.Image(type="pil", label="ΠΠ·ΠΎΠ±ΡΠ°ΠΆΠ΅Π½ΠΈΠ΅")
|
137 |
+
|
138 |
+
with gr.Row():
|
139 |
+
submit_btn = gr.Button("Π Π°ΡΠΏΠΎΠ·Π½Π°ΡΡ ΡΠ΅ΠΊΡΡ")
|
140 |
+
|
141 |
+
with gr.Row():
|
142 |
+
text_output = gr.Textbox(label="Π Π°ΡΠΏΠΎΠ·Π½Π°Π½Π½ΡΠΉ ΡΠ΅ΠΊΡΡ", lines=10)
|
143 |
+
file_output = gr.File(label="Π‘ΠΊΠ°ΡΠ°ΡΡ ΡΠ΅ΠΊΡΡΠΎΠ²ΡΠΉ ΡΠ°ΠΉΠ»")
|
144 |
+
|
145 |
+
submit_btn.click(
|
146 |
+
fn=process_image,
|
147 |
+
inputs=input_image,
|
148 |
+
outputs=[text_output, file_output]
|
149 |
+
)
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohappyeyeballs==2.6.1
|
3 |
+
aiohttp==3.11.14
|
4 |
+
aiosignal==1.3.2
|
5 |
+
annotated-types==0.7.0
|
6 |
+
anyio==4.9.0
|
7 |
+
async-timeout==5.0.1
|
8 |
+
attrs==25.3.0
|
9 |
+
cattrs==24.1.2
|
10 |
+
certifi==2025.1.31
|
11 |
+
charset-normalizer==3.4.1
|
12 |
+
click==8.1.8
|
13 |
+
coremltools==8.2
|
14 |
+
exceptiongroup==1.2.2
|
15 |
+
fastapi==0.115.11
|
16 |
+
ffmpy==0.5.0
|
17 |
+
filelock==3.18.0
|
18 |
+
frozenlist==1.5.0
|
19 |
+
fsspec==2025.3.0
|
20 |
+
gradio==5.22.0
|
21 |
+
gradio_client==1.8.0
|
22 |
+
groovy==0.1.2
|
23 |
+
h11==0.14.0
|
24 |
+
httpcore==1.0.7
|
25 |
+
httpx==0.28.1
|
26 |
+
huggingface-hub==0.29.3
|
27 |
+
idna==3.10
|
28 |
+
imageio==2.37.0
|
29 |
+
importlib_resources==6.5.2
|
30 |
+
Jinja2==3.1.6
|
31 |
+
joblib==1.4.2
|
32 |
+
jsonschema==4.23.0
|
33 |
+
jsonschema-specifications==2024.10.1
|
34 |
+
kraken==5.3.0
|
35 |
+
lazy_loader==0.4
|
36 |
+
lightning==2.4.0
|
37 |
+
lightning-utilities==0.14.2
|
38 |
+
lxml==5.3.1
|
39 |
+
markdown-it-py==3.0.0
|
40 |
+
MarkupSafe==3.0.2
|
41 |
+
mdurl==0.1.2
|
42 |
+
mpmath==1.3.0
|
43 |
+
multidict==6.2.0
|
44 |
+
networkx==3.4.2
|
45 |
+
numpy==2.0.2
|
46 |
+
nvidia-cublas-cu12==12.1.3.1
|
47 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
48 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
49 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
50 |
+
nvidia-cudnn-cu12==9.1.0.70
|
51 |
+
nvidia-cufft-cu12==11.0.2.54
|
52 |
+
nvidia-curand-cu12==10.3.2.106
|
53 |
+
nvidia-cusolver-cu12==11.4.5.107
|
54 |
+
nvidia-cusparse-cu12==12.1.0.106
|
55 |
+
nvidia-nccl-cu12==2.20.5
|
56 |
+
nvidia-nvjitlink-cu12==12.8.93
|
57 |
+
nvidia-nvtx-cu12==12.1.105
|
58 |
+
opencv-python==4.11.0.86
|
59 |
+
orjson==3.10.15
|
60 |
+
packaging==24.2
|
61 |
+
pandas==2.2.3
|
62 |
+
pillow==11.1.0
|
63 |
+
propcache==0.3.0
|
64 |
+
protobuf==6.30.1
|
65 |
+
pyaml==25.1.0
|
66 |
+
pyarrow==19.0.1
|
67 |
+
pydantic==2.10.6
|
68 |
+
pydantic_core==2.27.2
|
69 |
+
pydub==0.25.1
|
70 |
+
Pygments==2.19.1
|
71 |
+
python-bidi==0.6.6
|
72 |
+
python-dateutil==2.9.0.post0
|
73 |
+
python-multipart==0.0.20
|
74 |
+
pytorch-lightning==2.5.1
|
75 |
+
pytz==2025.1
|
76 |
+
PyYAML==6.0.2
|
77 |
+
referencing==0.36.2
|
78 |
+
regex==2024.11.6
|
79 |
+
requests==2.32.3
|
80 |
+
rich==13.9.4
|
81 |
+
rpds-py==0.23.1
|
82 |
+
ruff==0.11.2
|
83 |
+
safehttpx==0.1.6
|
84 |
+
safetensors==0.5.3
|
85 |
+
scikit-image==0.24.0
|
86 |
+
scikit-learn==1.5.2
|
87 |
+
scipy==1.13.1
|
88 |
+
semantic-version==2.10.0
|
89 |
+
shapely==2.0.7
|
90 |
+
shellingham==1.5.4
|
91 |
+
six==1.17.0
|
92 |
+
sniffio==1.3.1
|
93 |
+
starlette==0.46.1
|
94 |
+
sympy==1.13.3
|
95 |
+
threadpoolctl==3.5.0
|
96 |
+
tifffile==2025.3.13
|
97 |
+
tokenizers==0.20.3
|
98 |
+
tomlkit==0.13.2
|
99 |
+
torch==2.4.1
|
100 |
+
torchmetrics==1.7.0
|
101 |
+
torchvision==0.19.1
|
102 |
+
tqdm==4.67.1
|
103 |
+
transformers==4.45.0
|
104 |
+
triton==3.0.0
|
105 |
+
typer==0.15.2
|
106 |
+
typing_extensions==4.12.2
|
107 |
+
tzdata==2025.2
|
108 |
+
urllib3==2.3.0
|
109 |
+
uvicorn==0.34.0
|
110 |
+
websockets==15.0.1
|
111 |
+
yarl==1.18.3
|