Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,187 +1,268 @@
|
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import time
|
4 |
-
from typing import
|
5 |
|
6 |
import cv2
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
|
|
9 |
import requests
|
10 |
import streamlit as st
|
11 |
-
from paddleocr import PaddleOCR
|
12 |
-
from PIL import Image
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
# Use environment variable or Streamlit secrets. Do NOT hardcode keys in source.
|
20 |
-
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY", None)
|
21 |
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
# ---------------------------
|
27 |
-
# HELPERS
|
28 |
-
# ---------------------------
|
29 |
-
|
30 |
-
@st.cache_resource
|
31 |
-
def init_ocr(lang: List[str] = ["en", "id"], use_angle_cls: bool = True, det=False):
|
32 |
-
"""Inisialisasi PaddleOCR. Cached untuk performa."""
|
33 |
-
try:
|
34 |
-
# det=False disables detection model dump to speed up sometimes; adjust as needed
|
35 |
-
return PaddleOCR(lang=lang, use_angle_cls=use_angle_cls, det=det)
|
36 |
-
except Exception as e:
|
37 |
-
st.error(f"Gagal inisialisasi OCR: {e}")
|
38 |
-
return None
|
39 |
-
|
40 |
-
|
41 |
-
def preprocess_variants(img: np.ndarray) -> List[np.ndarray]:
|
42 |
-
"""Buat beberapa varian pra-proses untuk meningkatkan peluang OCR."""
|
43 |
-
variants = []
|
44 |
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
45 |
-
variants.append(img) # original color
|
46 |
-
variants.append(gray)
|
47 |
-
# bilateral to reduce noise but preserve edges
|
48 |
-
variants.append(cv2.bilateralFilter(gray, 9, 75, 75))
|
49 |
-
# adaptive threshold
|
50 |
-
th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
51 |
-
cv2.THRESH_BINARY, 15, 7)
|
52 |
-
variants.append(th)
|
53 |
-
# increase contrast (CLAHE)
|
54 |
-
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)).apply(gray)
|
55 |
-
variants.append(clahe)
|
56 |
-
return variants
|
57 |
-
|
58 |
-
|
59 |
-
def ocr_on_rotations(ocr, img: np.ndarray, try_rotations: List[int] = [0, 90, 180, 270]) -> List[Tuple]:
|
60 |
"""
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
"""
|
64 |
-
|
65 |
-
variants = preprocess_variants(img)
|
66 |
-
for rot in try_rotations:
|
67 |
-
# rotate image
|
68 |
-
if rot != 0:
|
69 |
-
h, w = img.shape[:2]
|
70 |
-
M = cv2.getRotationMatrix2D((w / 2, h / 2), rot, 1.0)
|
71 |
-
rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
|
72 |
-
else:
|
73 |
-
rotated = img
|
74 |
-
for v in variants:
|
75 |
-
# PaddleOCR expects BGR or grayscale arrays; we pass rotated directly
|
76 |
-
try:
|
77 |
-
# returns list of lists: res[0] has lines; each line: [box, (text, conf)]
|
78 |
-
res = ocr.ocr(rotated, cls=True)
|
79 |
-
except Exception:
|
80 |
-
res = []
|
81 |
-
for page in res:
|
82 |
-
for ln in page:
|
83 |
-
box = ln[0]
|
84 |
-
text = ln[1][0]
|
85 |
-
conf = float(ln[1][1]) if ln[1][1] not in (None, "") else 0.0
|
86 |
-
results.append((box, text, conf, rot))
|
87 |
-
# deduplicate by text + approximate box center
|
88 |
-
dedup = {}
|
89 |
-
for box, text, conf, rot in results:
|
90 |
-
# normalize text
|
91 |
-
t = text.strip()
|
92 |
-
# compute center
|
93 |
-
cx = int((box[0][0] + box[2][0]) / 2)
|
94 |
-
cy = int((box[0][1] + box[2][1]) / 2)
|
95 |
-
key = (re.sub(r"\s+", " ", t.lower()), round(cx/50)*50, round(cy/50)*50)
|
96 |
-
if key not in dedup or conf > dedup[key][2]:
|
97 |
-
dedup[key] = (box, t, conf, rot)
|
98 |
-
return list(dedup.values())
|
99 |
-
|
100 |
-
|
101 |
-
def annotate_image(img: np.ndarray, ocr_items: List[Tuple]) -> np.ndarray:
|
102 |
-
"""Buat overlay dari hasil OCR untuk ditampilkan ke user."""
|
103 |
-
out = img.copy()
|
104 |
-
for box, text, conf, rot in ocr_items:
|
105 |
-
pts = np.array(box, dtype=np.int32).reshape((-1, 1, 2))
|
106 |
-
cv2.polylines(out, [pts], True, (0, 255, 0), 1)
|
107 |
-
# putText at top-left corner of box
|
108 |
-
x, y = int(box[0][0]), int(box[0][1]) - 10
|
109 |
-
cv2.putText(out, f"{text[:30]} ({conf:.2f})", (x, max(10, y)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)
|
110 |
-
return out
|
111 |
-
|
112 |
-
|
113 |
-
def parse_numeric_value(text: str) -> float:
|
114 |
-
"""Parse float dari teks; mendukung koma desimal dan nilai negatif tak diharapkan."""
|
115 |
-
if text is None:
|
116 |
return 0.0
|
117 |
-
|
118 |
-
#
|
119 |
-
m = re.search(r"
|
120 |
if not m:
|
121 |
return 0.0
|
122 |
-
|
123 |
-
#
|
124 |
-
|
125 |
-
|
126 |
-
if
|
127 |
-
|
128 |
else:
|
129 |
-
|
130 |
else:
|
131 |
-
# only
|
132 |
-
if
|
133 |
-
|
134 |
-
else:
|
135 |
-
num = num.replace(",", "")
|
136 |
try:
|
137 |
-
return float(
|
138 |
except Exception:
|
139 |
return 0.0
|
140 |
|
141 |
|
142 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
"""
|
144 |
-
|
145 |
-
|
146 |
"""
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
return 0.0
|
177 |
-
#
|
178 |
-
return (value /
|
|
|
|
|
|
|
179 |
|
180 |
|
181 |
def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
|
182 |
"""
|
183 |
-
|
184 |
-
|
185 |
"""
|
186 |
try:
|
187 |
if value <= thresholds["A"]:
|
@@ -190,14 +271,9 @@ def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
|
|
190 |
return "Grade B"
|
191 |
if value <= thresholds["C"]:
|
192 |
return "Grade C"
|
193 |
-
return "Grade D"
|
194 |
except Exception:
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
def grade_order(grade: str) -> int:
|
199 |
-
order = {"Grade A": 0, "Grade B": 1, "Grade C": 2, "Grade D": 3}
|
200 |
-
return order.get(grade, 3)
|
201 |
|
202 |
|
203 |
def get_grade_color(grade: str) -> Tuple[str, str]:
|
@@ -210,149 +286,192 @@ def get_grade_color(grade: str) -> Tuple[str, str]:
|
|
210 |
return colors.get(grade, ("#bdc3c7", "black"))
|
211 |
|
212 |
|
213 |
-
def call_openrouter_advice(
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
payload = {
|
216 |
"model": "mistralai/mistral-7b-instruct:free",
|
217 |
"messages": [{"role": "user", "content": prompt}],
|
218 |
"max_tokens": 250,
|
219 |
"temperature": 0.7
|
220 |
}
|
|
|
221 |
try:
|
222 |
-
r = requests.post(f"{OPENROUTER_BASE_URL}/chat/completions",
|
223 |
r.raise_for_status()
|
224 |
data = r.json()
|
225 |
-
|
|
|
|
|
226 |
except Exception as e:
|
227 |
-
return f"
|
|
|
228 |
|
|
|
|
|
|
|
|
|
229 |
|
230 |
-
# ---------------------------
|
231 |
-
# UI
|
232 |
-
# ---------------------------
|
233 |
|
234 |
-
|
235 |
-
st.
|
|
|
236 |
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
with col1:
|
244 |
-
st.
|
245 |
-
sugar_thr_a = st.number_input("Sugar A ≤", value=1.0, step=0.1)
|
246 |
-
sugar_thr_b = st.number_input("Sugar B ≤", value=5.0, step=0.1)
|
247 |
-
sugar_thr_c = st.number_input("Sugar C ≤", value=10.0, step=0.1)
|
248 |
with col2:
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
st.markdown("---")
|
253 |
-
st.
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
st.
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
st.error("OCR model belum tersedia.")
|
269 |
-
st.stop()
|
270 |
-
with st.spinner("Menjalankan OCR pada beberapa varian gambar..."):
|
271 |
-
ocr_items = ocr_on_rotations(ocr, img)
|
272 |
-
st.success(f"OCR selesai — {len(ocr_items)} potongan teks terdeteksi (deduped).")
|
273 |
-
# show annotated image
|
274 |
-
annotated = annotate_image(img, ocr_items)
|
275 |
-
st.image(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB), caption="Overlay hasil OCR", width=min(display_w, img.shape[1]))
|
276 |
-
|
277 |
-
# collect texts sorted by confidence desc
|
278 |
-
ocr_texts = [t for _, t, _, _ in sorted(ocr_items, key=lambda x: -x[2])]
|
279 |
-
extracted = extract_nutrition_from_texts(ocr_texts)
|
280 |
-
|
281 |
-
# build editable DataFrame for user correction
|
282 |
-
rows = []
|
283 |
-
for nutr in ["serving", "sugar", "saturated_fat"]:
|
284 |
-
ent = extracted.get(nutr, {"value": 0.0, "unit": ("g" if nutr != "serving" else "g"), "per_100": False})
|
285 |
-
rows.append({
|
286 |
-
"nutrient": nutr,
|
287 |
-
"value": float(ent.get("value", 0.0)),
|
288 |
-
"unit": ent.get("unit", "g"),
|
289 |
-
"is_per_100": bool(ent.get("per_100", False))
|
290 |
-
})
|
291 |
-
df = pd.DataFrame(rows)
|
292 |
-
st.markdown("### Koreksi hasil ekstraksi (ubah nilai jika OCR keliru)")
|
293 |
-
edited = st.data_editor(df, num_rows="fixed", use_container_width=True)
|
294 |
-
|
295 |
-
if st.button("Hitung Grade dari nilai di atas"):
|
296 |
-
# read corrected
|
297 |
-
serving_val = float(edited.loc[edited['nutrient'] == 'serving', 'value'].values[0])
|
298 |
-
serving_unit = edited.loc[edited['nutrient'] == 'serving', 'unit'].values[0]
|
299 |
-
sugar_val = float(edited.loc[edited['nutrient'] == 'sugar', 'value'].values[0])
|
300 |
-
sugar_per100_flag = bool(edited.loc[edited['nutrient'] == 'sugar', 'is_per_100'].values[0])
|
301 |
-
satfat_val = float(edited.loc[edited['nutrient'] == 'saturated_fat', 'value'].values[0])
|
302 |
-
satfat_per100_flag = bool(edited.loc[edited['nutrient'] == 'saturated_fat', 'is_per_100'].values[0])
|
303 |
-
|
304 |
-
# Normalize serving unit: if ml vs g, we assume density ~1 g/ml (common for liquids). Warn if ml.
|
305 |
-
if isinstance(serving_unit, str) and "ml" in serving_unit.lower():
|
306 |
-
st.info("Takaran dalam ml terdeteksi — diasumsikan densitas 1 g/ml untuk perhitungan per 100. Koreksi manual jika perlu.")
|
307 |
-
|
308 |
-
# Normalize to per 100
|
309 |
-
sugar_per100 = normalize_to_per100(sugar_val, serving_val, sugar_per100_flag)
|
310 |
-
satfat_per100 = normalize_to_per100(satfat_val, serving_val, satfat_per100_flag)
|
311 |
-
|
312 |
-
# Grades
|
313 |
-
sugar_thresholds = {"A": sugar_thr_a, "B": sugar_thr_b, "C": sugar_thr_c}
|
314 |
-
fat_thresholds = {"A": fat_thr_a, "B": fat_thr_b, "C": fat_thr_c}
|
315 |
-
gs = get_grade_from_value(sugar_per100, sugar_thresholds)
|
316 |
-
gf = get_grade_from_value(satfat_per100, fat_thresholds)
|
317 |
-
# final grade = worse (higher order)
|
318 |
-
final = gs if grade_order(gs) >= grade_order(gf) else gf
|
319 |
-
|
320 |
-
# Show results nicely
|
321 |
-
st.header("Hasil Perhitungan")
|
322 |
-
c1, c2, c3 = st.columns(3)
|
323 |
-
bg_s, tc_s = get_grade_color(gs)
|
324 |
-
c1.markdown(f"<div style='background:{bg_s};padding:12px;border-radius:8px;text-align:center;color:{tc_s};'>"
|
325 |
-
f"<strong>Gula</strong><p style='font-size:22px'>{sugar_per100:.2f} g/100g</p><h3>{gs}</h3></div>", unsafe_allow_html=True)
|
326 |
-
bg_f, tc_f = get_grade_color(gf)
|
327 |
-
c2.markdown(f"<div style='background:{bg_f};padding:12px;border-radius:8px;text-align:center;color:{tc_f};'>"
|
328 |
-
f"<strong>Lemak Jenuh</strong><p style='font-size:22px'>{satfat_per100:.2f} g/100g</p><h3>{gf}</h3></div>", unsafe_allow_html=True)
|
329 |
-
bg_fin, tc_fin = get_grade_color(final)
|
330 |
-
c3.markdown(f"<div style='background:{bg_fin};padding:12px;border-radius:8px;text-align:center;color:{tc_fin};'>"
|
331 |
-
f"<strong>Grade Akhir</strong><p style='font-size:22px'>{final}</p></div>", unsafe_allow_html=True)
|
332 |
-
|
333 |
-
st.markdown("---")
|
334 |
-
st.write("Detail nilai (per 100):")
|
335 |
-
st.write(pd.DataFrame({
|
336 |
-
"nutrient": ["sugar", "saturated_fat"],
|
337 |
-
"per_100_g": [round(sugar_per100, 3), round(satfat_per100, 3)],
|
338 |
-
"grade": [gs, gf]
|
339 |
-
}))
|
340 |
-
|
341 |
-
# AI advice (optional)
|
342 |
-
if use_ai_advice and OPENROUTER_API_KEY:
|
343 |
-
prompt = (
|
344 |
-
f"Anda adalah ahli gizi Indonesia. "
|
345 |
-
f"Takaran sajian diasumsikan {serving_val} g/ml. "
|
346 |
-
f"Gula per 100g: {sugar_per100:.2f} g ({gs}). "
|
347 |
-
f"Lemak jenuh per 100g: {satfat_per100:.2f} g ({gf}). "
|
348 |
-
f"Grade akhir: {final}. Berikan saran singkat 40-80 kata, fokus pada kesehatan dan tips sederhana."
|
349 |
-
)
|
350 |
-
with st.spinner("Mengambil saran dari AI..."):
|
351 |
-
advice = call_openrouter_advice(OPENROUTER_API_KEY, prompt)
|
352 |
-
st.header("Saran Nutrisi (AI)")
|
353 |
-
st.info(advice)
|
354 |
-
elif use_ai_advice and not OPENROUTER_API_KEY:
|
355 |
-
st.warning("Anda memilih aktifkan saran AI tetapi API key tidak tersedia. Tambahkan OPENROUTER_API_KEY di Streamlit secrets atau environment variable.")
|
356 |
|
357 |
st.markdown("---")
|
358 |
-
st.
|
|
|
1 |
+
# app.py - Nutri-Grade Detection (improved)
|
2 |
import os
|
3 |
import re
|
4 |
import time
|
5 |
+
from typing import Optional, Tuple, List, Dict
|
6 |
|
7 |
import cv2
|
8 |
import numpy as np
|
9 |
import pandas as pd
|
10 |
+
from PIL import Image
|
11 |
import requests
|
12 |
import streamlit as st
|
|
|
|
|
13 |
|
14 |
+
# Try imports that may be optional at runtime
|
15 |
+
try:
|
16 |
+
from paddleocr import PaddleOCR
|
17 |
+
PADDLE_AVAILABLE = True
|
18 |
+
except Exception:
|
19 |
+
PADDLE_AVAILABLE = False
|
20 |
+
|
21 |
+
try:
|
22 |
+
import pytesseract
|
23 |
+
PYTESSERACT_AVAILABLE = True
|
24 |
+
except Exception:
|
25 |
+
PYTESSERACT_AVAILABLE = False
|
26 |
+
|
27 |
+
# ---------------- CONFIG ----------------
|
28 |
+
st.set_page_config(
|
29 |
+
page_title="Nutri-Grade Label Detection",
|
30 |
+
page_icon="🥗",
|
31 |
+
layout="wide",
|
32 |
+
initial_sidebar_state="expanded"
|
33 |
+
)
|
34 |
|
|
|
|
|
35 |
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
36 |
+
# Get API key from secrets or env (do NOT hardcode)
|
37 |
+
OPENROUTER_API_KEY = st.secrets.get("OPENROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
|
38 |
|
39 |
+
# ---------------- HELPERS ----------------
|
40 |
+
def safe_float_from_str(s: str) -> float:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"""
|
42 |
+
Robust parse of numeric-like string:
|
43 |
+
- Accepts "1,234.56", "1.234,56" (tries common variants)
|
44 |
+
- Handles "1-2" by taking first number
|
45 |
+
- Removes non-numeric noise like "g", "mg", "%"
|
46 |
+
- Returns 0.0 if no parseable number found
|
47 |
"""
|
48 |
+
if s is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
return 0.0
|
50 |
+
text = str(s).strip()
|
51 |
+
# find first numeric-like token (allow comma and dot and minus)
|
52 |
+
m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", text)
|
53 |
if not m:
|
54 |
return 0.0
|
55 |
+
token = m.group(0)
|
56 |
+
# if token contains both comma and dot, assume dot is decimal if dot after comma or vice versa
|
57 |
+
if "," in token and "." in token:
|
58 |
+
# choose the last separator as decimal
|
59 |
+
if token.rfind(".") > token.rfind(","):
|
60 |
+
token = token.replace(",", "")
|
61 |
else:
|
62 |
+
token = token.replace(".", "").replace(",", ".")
|
63 |
else:
|
64 |
+
# if only comma, treat as decimal (common in many locales)
|
65 |
+
if "," in token and "." not in token:
|
66 |
+
token = token.replace(",", ".")
|
|
|
|
|
67 |
try:
|
68 |
+
return float(token)
|
69 |
except Exception:
|
70 |
return 0.0
|
71 |
|
72 |
|
73 |
+
def preprocess_for_ocr(image: np.ndarray, max_dim=1600) -> np.ndarray:
|
74 |
+
"""Preprocess image to improve OCR: resize, denoise, grayscale, adaptive threshold."""
|
75 |
+
img = image.copy()
|
76 |
+
h, w = img.shape[:2]
|
77 |
+
scale = 1.0
|
78 |
+
if max(h, w) > max_dim:
|
79 |
+
scale = max_dim / max(h, w)
|
80 |
+
img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA)
|
81 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
82 |
+
# denoise
|
83 |
+
gray = cv2.fastNlMeansDenoising(gray, h=7)
|
84 |
+
# increase contrast via histogram equalization
|
85 |
+
gray = cv2.equalizeHist(gray)
|
86 |
+
# adaptive threshold to emphasize text
|
87 |
+
th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
88 |
+
cv2.THRESH_BINARY, 11, 2)
|
89 |
+
# combine threshold and gray to maintain readability
|
90 |
+
combined = cv2.bitwise_or(gray, th)
|
91 |
+
return combined
|
92 |
+
|
93 |
+
|
94 |
+
@st.cache_resource
|
95 |
+
def initialize_ocr(lang_list: Optional[List[str]] = None, use_gpu: bool = False):
|
96 |
"""
|
97 |
+
Initialize OCR backend. Prefer PaddleOCR if available; else fallback to pytesseract if available.
|
98 |
+
Returns a callable ocr(image) -> List[str] of extracted text lines.
|
99 |
"""
|
100 |
+
if lang_list is None:
|
101 |
+
lang_list = ["en", "id"]
|
102 |
+
|
103 |
+
if PADDLE_AVAILABLE:
|
104 |
+
try:
|
105 |
+
ocr = PaddleOCR(lang=lang_list, use_angle_cls=True, use_gpu=use_gpu)
|
106 |
+
def paddle_runner(img: np.ndarray) -> List[str]:
|
107 |
+
# Paddle expects BGR or numpy image
|
108 |
+
try:
|
109 |
+
res = ocr.ocr(img, det=True, rec=True, cls=True)
|
110 |
+
except Exception:
|
111 |
+
# try passing grayscale
|
112 |
+
res = ocr.ocr(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), det=True, rec=True, cls=True)
|
113 |
+
texts = []
|
114 |
+
# res is list of (line) groups
|
115 |
+
for line in res:
|
116 |
+
# line: [ [box], (text, confidence) ]
|
117 |
+
if isinstance(line, list) and len(line) > 0:
|
118 |
+
for item in line:
|
119 |
+
if len(item) >= 2 and isinstance(item[1], (list, tuple)):
|
120 |
+
text = item[1][0]
|
121 |
+
texts.append(str(text))
|
122 |
+
elif len(item) >= 2 and isinstance(item[1], str):
|
123 |
+
texts.append(item[1])
|
124 |
+
elif isinstance(line, tuple) and len(line) >= 2:
|
125 |
+
texts.append(str(line[1][0] if isinstance(line[1], (list, tuple)) else line[1]))
|
126 |
+
return texts
|
127 |
+
return paddle_runner
|
128 |
+
except Exception as e:
|
129 |
+
st.warning(f"PaddleOCR init failed: {e}")
|
130 |
+
|
131 |
+
if PYTESSERACT_AVAILABLE:
|
132 |
+
try:
|
133 |
+
def pytesseract_runner(img: np.ndarray) -> List[str]:
|
134 |
+
# pytesseract expects PIL Image or array
|
135 |
+
if img.ndim == 2:
|
136 |
+
pil = Image.fromarray(img)
|
137 |
+
else:
|
138 |
+
pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
139 |
+
raw = pytesseract.image_to_string(pil)
|
140 |
+
lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
|
141 |
+
return lines
|
142 |
+
return pytesseract_runner
|
143 |
+
except Exception as e:
|
144 |
+
st.warning(f"pytesseract init failed: {e}")
|
145 |
+
|
146 |
+
return None
|
147 |
+
|
148 |
+
|
149 |
+
def ocr_extract_texts(ocr_fn, image: np.ndarray) -> List[str]:
|
150 |
+
"""Run OCR function and return cleaned list of lines."""
|
151 |
+
pre = preprocess_for_ocr(image)
|
152 |
+
lines = []
|
153 |
+
if ocr_fn is None:
|
154 |
+
return lines
|
155 |
+
try:
|
156 |
+
lines = ocr_fn(pre)
|
157 |
+
except Exception:
|
158 |
+
# fallback: try on original
|
159 |
+
try:
|
160 |
+
lines = ocr_fn(image)
|
161 |
+
except Exception:
|
162 |
+
lines = []
|
163 |
+
# normalize lines
|
164 |
+
cleaned = []
|
165 |
+
for ln in lines:
|
166 |
+
ln = re.sub(r"\s+", " ", str(ln)).strip()
|
167 |
+
if ln:
|
168 |
+
cleaned.append(ln)
|
169 |
+
return cleaned
|
170 |
+
|
171 |
+
|
172 |
+
def find_value_near_label(lines: List[str], label_variants: List[str]) -> Optional[str]:
|
173 |
+
"""
|
174 |
+
Find numeric value near a label.
|
175 |
+
Strategy:
|
176 |
+
- Join lines with separator and search for label positions then find nearest numeric token within window.
|
177 |
+
- Also check next 2 lines for numeric presence.
|
178 |
+
"""
|
179 |
+
joined = " ||| ".join(lines).lower()
|
180 |
+
# tokenize and keep positions
|
181 |
+
tokens = re.split(r"(\s+|\|\|\|)", joined) # keep splits so we can get token index
|
182 |
+
lowered = joined.lower()
|
183 |
+
for label in label_variants:
|
184 |
+
idx = lowered.find(label.lower())
|
185 |
+
if idx != -1:
|
186 |
+
# take substring after label up to 60 chars and look for number
|
187 |
+
sub = lowered[idx: idx + 120]
|
188 |
+
m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", sub)
|
189 |
+
if m:
|
190 |
+
return m.group(0)
|
191 |
+
# else scan ahead in joined string a bit more
|
192 |
+
sub2 = lowered[idx: idx + 360]
|
193 |
+
m2 = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", sub2)
|
194 |
+
if m2:
|
195 |
+
return m2.group(0)
|
196 |
+
# try pattern-based scanning line by line
|
197 |
+
for i, ln in enumerate(lines):
|
198 |
+
lowln = ln.lower()
|
199 |
+
for label in label_variants:
|
200 |
+
if label.lower() in lowln:
|
201 |
+
# first try number in same line
|
202 |
+
m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", lowln)
|
203 |
+
if m:
|
204 |
+
return m.group(0)
|
205 |
+
# else check next two lines
|
206 |
+
for j in range(1, 3):
|
207 |
+
if i + j < len(lines):
|
208 |
+
m2 = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", lines[i + j])
|
209 |
+
if m2:
|
210 |
+
return m2.group(0)
|
211 |
+
# fallback: try any number labeled with units like "g", "mg", "per 100"
|
212 |
+
anynum = re.search(r"(-?\d+[.,]?\d*(?:[eE][-+]?\d+)?)\s*(g|mg|ml|%)", joined)
|
213 |
+
if anynum:
|
214 |
+
return anynum.group(1)
|
215 |
+
return None
|
216 |
+
|
217 |
+
|
218 |
+
def detect_serving_unit(lines: List[str]) -> Tuple[float, str]:
|
219 |
+
"""
|
220 |
+
Try to find serving size and unit. If not found, default 100 g.
|
221 |
+
Returns (serving_value, unit) where unit is 'g' or 'ml' or 'serving'
|
222 |
+
"""
|
223 |
+
labels = ["serving size", "takaran saji", "takaran sajian", "portion", "porsi"]
|
224 |
+
for lab in labels:
|
225 |
+
val = find_value_near_label(lines, [lab])
|
226 |
+
if val:
|
227 |
+
v = safe_float_from_str(val)
|
228 |
+
# try to detect unit in same context
|
229 |
+
joint = " ".join(lines).lower()
|
230 |
+
# look for g / ml nearby
|
231 |
+
m = re.search(rf"{re.escape(lab)}[^\n\r]{{0,80}}?([0-9.,]+)\s*(g|ml|grams|gram|mL)", joint)
|
232 |
+
if m:
|
233 |
+
unit = "ml" if m.group(2).lower().startswith("m") else "g"
|
234 |
+
else:
|
235 |
+
# guess unit by presence of 'ml' or 'mL' anywhere after label
|
236 |
+
if re.search(r"ml\b", joint):
|
237 |
+
unit = "ml"
|
238 |
+
else:
|
239 |
+
unit = "g"
|
240 |
+
return (v if v > 0 else 100.0, unit)
|
241 |
+
# fallback default
|
242 |
+
return 100.0, "g"
|
243 |
+
|
244 |
+
|
245 |
+
def normalize_to_per_100(value: float, per_serving: float, unit_serving: str) -> float:
|
246 |
+
"""
|
247 |
+
Convert value (per serving) => value per 100 units (g or ml).
|
248 |
+
If value already per 100 (we try to detect externally), this function should not be called.
|
249 |
+
"""
|
250 |
+
if per_serving <= 0 or per_serving == 100:
|
251 |
+
# if per_serving == 100 -> already per 100
|
252 |
+
if per_serving == 100:
|
253 |
+
return value
|
254 |
return 0.0
|
255 |
+
# convert to per 100
|
256 |
+
return (value / per_serving) * 100.0
|
257 |
+
|
258 |
+
|
259 |
+
GRADE_ORDER = ["Grade A", "Grade B", "Grade C", "Grade D"]
|
260 |
|
261 |
|
262 |
def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
|
263 |
"""
|
264 |
+
thresholds: dict with keys "A","B","C" representing upper bounds for each grade
|
265 |
+
A if value <= thresholds['A'], B if value <= thresholds['B'], etc.
|
266 |
"""
|
267 |
try:
|
268 |
if value <= thresholds["A"]:
|
|
|
271 |
return "Grade B"
|
272 |
if value <= thresholds["C"]:
|
273 |
return "Grade C"
|
|
|
274 |
except Exception:
|
275 |
+
pass
|
276 |
+
return "Grade D"
|
|
|
|
|
|
|
|
|
277 |
|
278 |
|
279 |
def get_grade_color(grade: str) -> Tuple[str, str]:
|
|
|
286 |
return colors.get(grade, ("#bdc3c7", "black"))
|
287 |
|
288 |
|
289 |
+
def call_openrouter_advice(serving_val, sugar_per100, fat_per100, sugar_grade, fat_grade, final_grade) -> str:
|
290 |
+
"""
|
291 |
+
Call OpenRouter-compatible endpoint for short nutrition advice.
|
292 |
+
Use API key from secrets/env. Non-blocking timeout and friendly fallback on error.
|
293 |
+
"""
|
294 |
+
if not OPENROUTER_API_KEY:
|
295 |
+
return "OpenRouter API key not configured. Set OPENROUTER_API_KEY in secrets or environment variables."
|
296 |
+
|
297 |
+
prompt = f"""
|
298 |
+
Anda adalah ahli gizi dari Indonesia yang ramah.
|
299 |
+
- Takaran Saji: {serving_val}
|
300 |
+
- Gula (per 100): {sugar_per100:.2f} g (Grade {sugar_grade.replace('Grade ', '')})
|
301 |
+
- Lemak Jenuh (per 100): {fat_per100:.2f} g (Grade {fat_grade.replace('Grade ', '')})
|
302 |
+
- Grade Akhir: {final_grade.replace('Grade ', '')}
|
303 |
+
Berikan saran nutrisi singkat 50-80 kata, fokus pada dampak kesehatan dan tips praktis.
|
304 |
+
"""
|
305 |
payload = {
|
306 |
"model": "mistralai/mistral-7b-instruct:free",
|
307 |
"messages": [{"role": "user", "content": prompt}],
|
308 |
"max_tokens": 250,
|
309 |
"temperature": 0.7
|
310 |
}
|
311 |
+
headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
|
312 |
try:
|
313 |
+
r = requests.post(f"{OPENROUTER_BASE_URL}/chat/completions", json=payload, headers=headers, timeout=20)
|
314 |
r.raise_for_status()
|
315 |
data = r.json()
|
316 |
+
# defensive access
|
317 |
+
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
318 |
+
return content.strip() if content else "Tidak ada saran (jawaban kosong)."
|
319 |
except Exception as e:
|
320 |
+
return f"Gagal memanggil AI: {e}"
|
321 |
+
|
322 |
|
323 |
+
def reset_state():
|
324 |
+
for k in ["ocr_done", "data", "calculated", "calc"]:
|
325 |
+
if k in st.session_state:
|
326 |
+
del st.session_state[k]
|
327 |
|
|
|
|
|
|
|
328 |
|
329 |
+
# --------------- UI ---------------
|
330 |
+
st.title("🥗 Nutri-Grade Detection & Grade Calculator (Improved)")
|
331 |
+
st.caption("Analisis gizi produk berdasarkan standar Nutri-Grade (contoh: gula & lemak jenuh).")
|
332 |
|
333 |
+
with st.expander("📋 Petunjuk (singkat)"):
|
334 |
+
st.markdown("""
|
335 |
+
1. Upload gambar label nutrisi (JPG/PNG).
|
336 |
+
2. Klik **Analisis OCR** → koreksi hasil jika perlu.
|
337 |
+
3. Klik **Hitung Grade**.
|
338 |
+
\n**Catatan deploy:** taruh OPENROUTER_API_KEY di Secrets Hugging Face (atau `os.environ`). Sertakan dependency seperti `paddlepaddle` & `paddleocr` di requirements.\
|
339 |
+
""")
|
340 |
|
341 |
+
# Upload
|
342 |
+
st.header("1) Upload Gambar")
|
343 |
+
uploaded = st.file_uploader("Pilih gambar tabel nutrisi (jpg/jpeg/png)", type=["jpg", "jpeg", "png"], on_change=reset_state)
|
344 |
+
|
345 |
+
if uploaded:
|
346 |
+
file_bytes = np.frombuffer(uploaded.read(), np.uint8)
|
347 |
+
img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
|
348 |
+
if img is None:
|
349 |
+
st.error("Gagal membaca file gambar. Pastikan file valid.")
|
350 |
+
st.stop()
|
351 |
+
st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), width=380)
|
352 |
+
|
353 |
+
ocr_fn = initialize_ocr()
|
354 |
+
if ocr_fn is None:
|
355 |
+
st.warning("OCR backend tidak tersedia (PaddleOCR/pytesseract). Install dependency yang diperlukan.")
|
356 |
+
else:
|
357 |
+
if st.button("Analisis OCR"):
|
358 |
+
with st.spinner("Menjalankan OCR — ini bisa memakan beberapa saat..."):
|
359 |
+
lines = ocr_extract_texts(ocr_fn, img)
|
360 |
+
if not lines:
|
361 |
+
st.warning("Tidak ada teks terdeteksi. Coba gambar lebih jelas atau upload foto yang lebih tajam.")
|
362 |
+
st.session_state.ocr_done = False
|
363 |
+
else:
|
364 |
+
# store extracted
|
365 |
+
st.session_state.data = {"ocr_lines": lines}
|
366 |
+
st.session_state.ocr_done = True
|
367 |
+
st.success("OCR selesai — silakan koreksi nilai jika perlu.")
|
368 |
+
|
369 |
+
# Correction & Calculation input
|
370 |
+
if st.session_state.get("ocr_done"):
|
371 |
+
st.header("2) Koreksi Hasil OCR & Hitung")
|
372 |
+
lines = st.session_state.data.get("ocr_lines", [])
|
373 |
+
# Show extracted lines and allow user to edit/clean
|
374 |
+
st.subheader("Teks hasil OCR (edit jika perlu)")
|
375 |
+
joined = "\n".join(lines)
|
376 |
+
edited = st.text_area("Hasil OCR (baris per baris)", value=joined, height=180)
|
377 |
+
edited_lines = [ln.strip() for ln in edited.splitlines() if ln.strip()]
|
378 |
+
|
379 |
+
# Attempt auto-detection using edited lines
|
380 |
+
serving_val, serving_unit = detect_serving_unit(edited_lines)
|
381 |
+
sugar_label_variants = ["total sugar", "gula total", "gula", "sugars", "sugar"]
|
382 |
+
fat_label_variants = ["saturated fat", "lemak jenuh", "saturated fats", "sat fat"]
|
383 |
+
|
384 |
+
sugar_raw = find_value_near_label(edited_lines, sugar_label_variants) or ""
|
385 |
+
fat_raw = find_value_near_label(edited_lines, fat_label_variants) or ""
|
386 |
+
|
387 |
+
col1, col2, col3 = st.columns(3)
|
388 |
with col1:
|
389 |
+
serving_input = st.text_input("Takaran saji (detected)", value=f"{serving_val} {serving_unit}")
|
|
|
|
|
|
|
390 |
with col2:
|
391 |
+
sugar_input = st.text_input("Gula (detected)", value=str(sugar_raw))
|
392 |
+
with col3:
|
393 |
+
fat_input = st.text_input("Lemak jenuh (detected)", value=str(fat_raw))
|
394 |
+
|
395 |
+
# Option: user can indicate values are already per 100
|
396 |
+
per100_checkbox = st.checkbox("Nilai yang saya masukkan sudah satuan per 100g/ml (centang jika ya)", value=False)
|
397 |
+
|
398 |
+
if st.button("Hitung Grade"):
|
399 |
+
# parse numeric
|
400 |
+
# parse serving_input - may contain unit
|
401 |
+
mserv = re.search(r"(-?\d+[.,]?\d*)", serving_input)
|
402 |
+
if mserv:
|
403 |
+
parsed_serv = safe_float_from_str(mserv.group(0))
|
404 |
+
else:
|
405 |
+
parsed_serv = serving_val or 100.0
|
406 |
+
|
407 |
+
parsed_sugar = safe_float_from_str(sugar_input)
|
408 |
+
parsed_fat = safe_float_from_str(fat_input)
|
409 |
+
|
410 |
+
# convert to per 100 if needed
|
411 |
+
if per100_checkbox:
|
412 |
+
sugar_per100 = parsed_sugar
|
413 |
+
fat_per100 = parsed_fat
|
414 |
+
else:
|
415 |
+
sugar_per100 = normalize_to_per_100(parsed_sugar, parsed_serv, serving_unit)
|
416 |
+
fat_per100 = normalize_to_per_100(parsed_fat, parsed_serv, serving_unit)
|
417 |
+
|
418 |
+
# Save computed values
|
419 |
+
st.session_state.calc = {
|
420 |
+
"serving": parsed_serv,
|
421 |
+
"serving_unit": serving_unit,
|
422 |
+
"sugar_per100": sugar_per100,
|
423 |
+
"fat_per100": fat_per100,
|
424 |
+
"raw_sugar": parsed_sugar,
|
425 |
+
"raw_fat": parsed_fat
|
426 |
+
}
|
427 |
+
st.session_state.calculated = True
|
428 |
+
st.success("Perhitungan selesai.")
|
429 |
+
|
430 |
+
# Display Results
|
431 |
+
if st.session_state.get("calculated"):
|
432 |
+
st.header("3) Hasil Grading")
|
433 |
+
c = st.session_state.calc
|
434 |
+
|
435 |
+
# Thresholds (Nutri-Grade-like example — adjust sesuai standar)
|
436 |
+
sugar_thresholds = {"A": 1.0, "B": 5.0, "C": 10.0} # g per 100
|
437 |
+
fat_thresholds = {"A": 0.7, "B": 1.2, "C": 2.8} # g per 100
|
438 |
+
|
439 |
+
gs = get_grade_from_value(c["sugar_per100"], sugar_thresholds)
|
440 |
+
gf = get_grade_from_value(c["fat_per100"], fat_thresholds)
|
441 |
+
# final grade is the worst (max index)
|
442 |
+
final_grade = max([gs, gf], key=lambda x: GRADE_ORDER.index(x))
|
443 |
+
|
444 |
+
cols = st.columns(3)
|
445 |
+
|
446 |
+
def show(col, title, value, unit, grade):
|
447 |
+
bg, textc = get_grade_color(grade)
|
448 |
+
col.markdown(
|
449 |
+
f"<div style='background:{bg};padding:12px;border-radius:10px;text-align:center;color:{textc};'>"
|
450 |
+
f"<strong>{title}</strong><p style='font-size:20px;margin:4px 0;'>{value:.2f} {unit}</p>"
|
451 |
+
f"<h3 style='margin:2px;'>{grade}</h3></div>",
|
452 |
+
unsafe_allow_html=True
|
453 |
+
)
|
454 |
+
|
455 |
+
show(cols[0], "Gula (per 100)", c["sugar_per100"], "g", gs)
|
456 |
+
show(cols[1], "Lemak Jenuh (per 100)", c["fat_per100"], "g", gf)
|
457 |
+
show(cols[2], "Grade Akhir", 0.0, "", final_grade)
|
458 |
+
|
459 |
st.markdown("---")
|
460 |
+
st.subheader("Rincian input & perhitungan")
|
461 |
+
st.write(pd.DataFrame([{
|
462 |
+
"serving_value": c["serving"],
|
463 |
+
"serving_unit": c["serving_unit"],
|
464 |
+
"raw_sugar_in_serving": c["raw_sugar"],
|
465 |
+
"raw_fat_in_serving": c["raw_fat"],
|
466 |
+
"sugar_per_100": c["sugar_per100"],
|
467 |
+
"fat_per_100": c["fat_per100"]
|
468 |
+
}]))
|
469 |
+
|
470 |
+
st.markdown("---")
|
471 |
+
st.header("4) Saran Nutrisi (AI)")
|
472 |
+
with st.spinner("Meminta saran AI..."):
|
473 |
+
advice = call_openrouter_advice(c["serving"], c["sugar_per100"], c["fat_per100"], gs, gf, final_grade)
|
474 |
+
st.info(advice)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
|
476 |
st.markdown("---")
|
477 |
+
st.markdown("<p style='text-align:center;'>Nutri-Grade App (improved) © 2025</p>", unsafe_allow_html=True)
|