Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,16 @@ import streamlit as st
|
|
3 |
import cv2
|
4 |
import numpy as np
|
5 |
import re
|
6 |
-
import pandas as pd
|
7 |
from PIL import Image
|
8 |
-
from paddleocr import PaddleOCR
|
9 |
import io
|
10 |
import math
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# -----------------------
|
13 |
# CONFIG
|
14 |
# -----------------------
|
@@ -32,90 +36,64 @@ KEYS = {
|
|
32 |
# UTILITIES
|
33 |
# -----------------------
|
34 |
def safe_parse_number(s: str) -> float:
|
35 |
-
"""
|
36 |
-
Robust parse of numbers that may use comma or dot for decimals, or thousand separators.
|
37 |
-
Examples:
|
38 |
-
"1,234.56" -> 1234.56
|
39 |
-
"1.234,56" -> 1234.56
|
40 |
-
"12,34" -> 12.34 (assume comma decimal if no dot)
|
41 |
-
"(12.3)" -> -12.3
|
42 |
-
"—" or "" -> 0.0
|
43 |
-
"""
|
44 |
if s is None:
|
45 |
return 0.0
|
46 |
s = str(s).strip()
|
47 |
if s == "":
|
48 |
return 0.0
|
49 |
-
# remove parentheses -> negative
|
50 |
is_negative = False
|
51 |
if s.startswith("(") and s.endswith(")"):
|
52 |
is_negative = True
|
53 |
s = s[1:-1].strip()
|
54 |
-
# strip non numeric except . and , and - and /
|
55 |
s = re.sub(r"[^\d\.,\-\/]", "", s)
|
56 |
if s == "" or s in ["-", ".", ","]:
|
57 |
return 0.0
|
58 |
-
# handle fractions like 1/2
|
59 |
if "/" in s and not any(c.isalpha() for c in s):
|
60 |
try:
|
61 |
-
|
62 |
-
val = float(
|
63 |
return -val if is_negative else val
|
64 |
except:
|
65 |
pass
|
66 |
-
# if both dot and comma present, infer thousand/decimal by which appears last
|
67 |
if "." in s and "," in s:
|
68 |
if s.rfind(".") > s.rfind(","):
|
69 |
-
# dot decimal, comma thousands -> remove commas
|
70 |
s = s.replace(",", "")
|
71 |
else:
|
72 |
-
# comma decimal, dot thousands -> remove dots, replace comma with dot
|
73 |
s = s.replace(".", "").replace(",", ".")
|
74 |
else:
|
75 |
-
# only comma present -> treat comma as decimal if format small (e.g. "12,34")
|
76 |
if "," in s and "." not in s:
|
77 |
-
# if multiple commas, remove thousands commas except last
|
78 |
parts = s.split(",")
|
79 |
-
if len(parts[-1]) in (1,2,3):
|
80 |
s = s.replace(",", ".")
|
81 |
else:
|
82 |
s = s.replace(",", "")
|
83 |
-
# finally convert
|
84 |
try:
|
85 |
val = float(s)
|
86 |
return -val if is_negative else val
|
87 |
except:
|
88 |
-
# fallback: extract digits and first decimal-like
|
89 |
m = re.search(r"-?\d+([.,]\d+)?", s)
|
90 |
if m:
|
91 |
return float(m.group(0).replace(",", "."))
|
92 |
return 0.0
|
93 |
|
94 |
def preprocess_for_ocr(img: np.ndarray, max_dim=1600):
|
95 |
-
"""Resize, denoise, and convert to RGB for OCR. Returns RGB image."""
|
96 |
-
# if grayscale convert to BGR
|
97 |
if len(img.shape) == 2:
|
98 |
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
99 |
-
# downscale if too large (keep aspect)
|
100 |
h, w = img.shape[:2]
|
101 |
-
scale = 1.0
|
102 |
if max(h, w) > max_dim:
|
103 |
scale = max_dim / max(h, w)
|
104 |
img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA)
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
108 |
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
109 |
return img_rgb
|
110 |
|
111 |
def try_ocr_variants(ocr, img_rgb):
|
112 |
-
"""
|
113 |
-
Try several rotations and small scalings, return best text and details.
|
114 |
-
Score: number of numeric tokens found + sum(confidences).
|
115 |
-
"""
|
116 |
variants = []
|
117 |
rotations = [0, 90, 180, 270]
|
118 |
-
scales = [1.0, 1.5, 0.8]
|
119 |
for rot in rotations:
|
120 |
if rot != 0:
|
121 |
M = cv2.getRotationMatrix2D((img_rgb.shape[1]/2, img_rgb.shape[0]/2), rot, 1.0)
|
@@ -128,7 +106,7 @@ def try_ocr_variants(ocr, img_rgb):
|
|
128 |
else:
|
129 |
imgrs = imgr
|
130 |
try:
|
131 |
-
ocr_res = ocr.ocr(imgrs, cls=True)
|
132 |
except Exception:
|
133 |
try:
|
134 |
ocr_res = ocr.ocr(imgrs)
|
@@ -138,19 +116,9 @@ def try_ocr_variants(ocr, img_rgb):
|
|
138 |
conf_sum = 0.0
|
139 |
num_tokens = 0
|
140 |
if ocr_res:
|
141 |
-
|
142 |
-
# Try to flatten
|
143 |
-
flat = []
|
144 |
-
if isinstance(ocr_res, list) and len(ocr_res) > 0 and isinstance(ocr_res[0], list):
|
145 |
-
# typical: list of lines
|
146 |
-
flat = ocr_res
|
147 |
-
elif isinstance(ocr_res, dict) and "text" in ocr_res:
|
148 |
-
flat = ocr_res["text"]
|
149 |
-
else:
|
150 |
-
flat = ocr_res
|
151 |
for item in flat:
|
152 |
try:
|
153 |
-
# item example: [box, (text, prob)]
|
154 |
entry = item[1] if isinstance(item, (list, tuple)) and len(item) > 1 else item
|
155 |
if isinstance(entry, (list, tuple)):
|
156 |
txt = entry[0]
|
@@ -165,44 +133,31 @@ def try_ocr_variants(ocr, img_rgb):
|
|
165 |
txt = str(item)
|
166 |
prob = 0.0
|
167 |
text_lines.append(txt)
|
168 |
-
|
169 |
-
|
|
|
|
|
170 |
num_tokens += len(re.findall(r"[\d]+", txt))
|
171 |
joined = " ".join(text_lines)
|
172 |
-
# scoring heuristics
|
173 |
score = num_tokens + conf_sum
|
174 |
variants.append({"rot": rot, "scale": s, "score": score, "text": joined, "raw": ocr_res})
|
175 |
-
# pick best
|
176 |
variants = sorted(variants, key=lambda x: x["score"], reverse=True)
|
177 |
-
|
178 |
-
return best
|
179 |
|
180 |
def extract_nutrition_info(full_text: str) -> dict:
|
181 |
-
""
|
182 |
-
Try to extract serving size, sugar, saturated fat, and units.
|
183 |
-
Returns dict with keys: serving_value, serving_unit, sugar_value, sugar_unit, fat_value, fat_unit, per100_flag.
|
184 |
-
"""
|
185 |
-
txt = full_text.lower()
|
186 |
-
# normalize common words
|
187 |
-
txt = txt.replace("sangat", " ")
|
188 |
-
# patterns for numbers with optional unit
|
189 |
-
num_pat = r"[-+]?\d{1,3}(?:[.,]\d{3})*(?:[.,]\d+)?(?:\s*(?:g|mg|ml|serving|sajian|tsp|tbsp))?"
|
190 |
-
# serving patterns
|
191 |
serving = None
|
192 |
serving_unit = None
|
193 |
m = re.search(r"(takaran saj(?:i|a)|serving size|serving)[^\d]{0,10}([-\d\.,\/ ]{1,20})", txt)
|
194 |
if m:
|
195 |
serving = m.group(2).strip()
|
196 |
else:
|
197 |
-
# try "per serving: 250 ml" or "1 bottle (330 ml)"
|
198 |
m2 = re.search(r"(\d{1,3}(?:[.,]\d+)?)(?:\s*)(ml|g)\b", txt)
|
199 |
if m2:
|
200 |
serving = m2.group(1)
|
201 |
serving_unit = m2.group(2)
|
202 |
-
# sugar patterns - try per 100 or per serving labels
|
203 |
sugar = None
|
204 |
sugar_unit = None
|
205 |
-
# common labels
|
206 |
sugar_patterns = [
|
207 |
r"(gula|sugar)[^\d\-\,\.\d]{0,6}([-\d\.,\/ ]{1,20})",
|
208 |
r"sugars?[^\d]{0,6}([-\d\.,\/ ]{1,20})",
|
@@ -212,7 +167,6 @@ def extract_nutrition_info(full_text: str) -> dict:
|
|
212 |
if m:
|
213 |
sugar = m.group(2).strip()
|
214 |
break
|
215 |
-
# fat (saturated)
|
216 |
fat = None
|
217 |
fat_unit = None
|
218 |
fat_patterns = [
|
@@ -224,22 +178,17 @@ def extract_nutrition_info(full_text: str) -> dict:
|
|
224 |
if m:
|
225 |
fat = m.group(2).strip()
|
226 |
break
|
227 |
-
# detect if values are per 100g/ml by presence of 'per 100' 'per 100g' 'per 100 ml' or '/100g'
|
228 |
per100 = False
|
229 |
if re.search(r"(per 100|per 100g|per 100 ml|/100g|/100 ml|/100ml|/100g)", txt):
|
230 |
per100 = True
|
231 |
-
|
232 |
-
# fallback: extract any standalone numbers that might be table columns (first numeric cluster)
|
233 |
if not sugar or sugar == "":
|
234 |
nums = re.findall(r"[-+]?\d{1,3}(?:[.,]\d{3})*(?:[.,]\d+)?", txt)
|
235 |
if nums:
|
236 |
-
# heuristics: pick numbers that are plausible (0-100)
|
237 |
for n in nums:
|
238 |
v = safe_parse_number(n)
|
239 |
if 0 <= v <= 100:
|
240 |
sugar = n
|
241 |
break
|
242 |
-
# return
|
243 |
return {
|
244 |
"serving_raw": serving or "",
|
245 |
"serving_unit": serving_unit or "",
|
@@ -251,27 +200,22 @@ def extract_nutrition_info(full_text: str) -> dict:
|
|
251 |
}
|
252 |
|
253 |
def normalize_to_per_100(value_raw: str, unit_raw: str, serving_size: float, values_are_per100: bool) -> float:
|
254 |
-
"""Return value normalized to per 100g/ml. If already per100 flag true, parse and return."""
|
255 |
if value_raw is None or str(value_raw).strip() == "":
|
256 |
return 0.0
|
257 |
-
|
258 |
-
s =
|
259 |
val = safe_parse_number(s)
|
260 |
-
# if values_are_per100, return val
|
261 |
if values_are_per100:
|
262 |
return val
|
263 |
-
# Otherwise assume value corresponds to per serving. Normalize by serving_size -> per 100.
|
264 |
if serving_size and serving_size > 0:
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
268 |
return val
|
269 |
|
270 |
def get_grade_from_value(value: float, thresholds: dict) -> str:
|
271 |
-
"""
|
272 |
-
thresholds expected in ascending order: {"A": xA, "B": xB, "C": xC}
|
273 |
-
smaller value => better (A)
|
274 |
-
"""
|
275 |
try:
|
276 |
v = float(value)
|
277 |
except:
|
@@ -294,7 +238,6 @@ def get_grade_color(grade: str):
|
|
294 |
return colors.get(grade, ("#bdc3c7", "black"))
|
295 |
|
296 |
def nutrition_advice_rule_based(final_grade: str, sugar_per100: float, fat_per100: float):
|
297 |
-
"""Simple offline rule-based suggestions in Indonesian."""
|
298 |
adv = []
|
299 |
if final_grade == "Grade A":
|
300 |
adv.append("Produk tergolong baik. Konsumsi sesuai porsi.")
|
@@ -317,33 +260,39 @@ def nutrition_advice_rule_based(final_grade: str, sugar_per100: float, fat_per10
|
|
317 |
# -----------------------
|
318 |
@st.cache_resource
|
319 |
def init_ocr_model():
|
|
|
|
|
320 |
try:
|
321 |
-
# use_gpu False for compatibility in many Spaces; cls True to detect orientation
|
322 |
return PaddleOCR(lang="en", use_angle_cls=True, use_gpu=False)
|
323 |
-
except Exception
|
324 |
-
st.error(f"Gagal inisialisasi PaddleOCR: {e}")
|
325 |
return None
|
326 |
|
327 |
ocr_model = init_ocr_model()
|
328 |
-
if ocr_model is None:
|
329 |
-
st.stop()
|
330 |
|
331 |
# -----------------------
|
332 |
# UI
|
333 |
# -----------------------
|
334 |
st.title("🥗 Nutri-Grade Detection & Grade Calculator (No AI Chatbot)")
|
335 |
-
st.caption("Analisis gizi berdasarkan data pada label.
|
336 |
|
337 |
with st.expander("📋 Petunjuk"):
|
338 |
st.markdown(
|
339 |
"""
|
340 |
1. Upload gambar tabel gizi (jpg/png).
|
341 |
-
2. Klik **Analisis OCR
|
342 |
3. Koreksi hasil jika perlu.
|
343 |
4. Klik **Hitung Grade**.
|
344 |
"""
|
345 |
)
|
346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
st.header("1. Upload Gambar")
|
348 |
uploaded = st.file_uploader("Pilih gambar (jpg, jpeg, png)", type=["jpg","jpeg","png"])
|
349 |
|
@@ -353,7 +302,6 @@ def reset_state():
|
|
353 |
del st.session_state[v]
|
354 |
|
355 |
if uploaded:
|
356 |
-
# read image
|
357 |
file_bytes = np.asarray(bytearray(uploaded.read()), dtype=np.uint8)
|
358 |
img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
|
359 |
st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), caption="Preview", use_column_width=False, width=350)
|
@@ -364,7 +312,6 @@ if uploaded:
|
|
364 |
img_rgb = preprocess_for_ocr(img)
|
365 |
best = try_ocr_variants(ocr_model, img_rgb)
|
366 |
extracted = extract_nutrition_info(best.get("text", ""))
|
367 |
-
# store
|
368 |
st.session_state[KEYS["ocr_done"]] = True
|
369 |
st.session_state[KEYS["ocr_text"]] = best.get("text", "")
|
370 |
st.session_state[KEYS["data"]] = extracted
|
@@ -372,7 +319,6 @@ if uploaded:
|
|
372 |
except Exception as e:
|
373 |
st.error(f"Gagal menjalankan OCR: {e}")
|
374 |
|
375 |
-
# manual correction & calculation
|
376 |
if st.session_state.get(KEYS["ocr_done"], False):
|
377 |
st.header("2. Koreksi & Hitung Grade")
|
378 |
d = st.session_state.get(KEYS["data"], {})
|
@@ -383,8 +329,6 @@ if st.session_state.get(KEYS["ocr_done"], False):
|
|
383 |
values_are_per100 = st.checkbox("Nilai yang terdeteksi sudah per 100g/ml", value=d.get("values_are_per100", False))
|
384 |
submitted = st.form_submit_button("Hitung Grade")
|
385 |
if submitted:
|
386 |
-
# parse serving
|
387 |
-
# try extract number and unit
|
388 |
m = re.search(r"(\d{1,4}(?:[.,]\d+)?)\s*(ml|g)?", serving_raw.lower())
|
389 |
serving_size = 0.0
|
390 |
serving_unit = ""
|
@@ -392,24 +336,19 @@ if st.session_state.get(KEYS["ocr_done"], False):
|
|
392 |
serving_size = safe_parse_number(m.group(1))
|
393 |
serving_unit = (m.group(2) or "").lower()
|
394 |
else:
|
395 |
-
# fallback: raw number parse
|
396 |
serving_size = safe_parse_number(serving_raw)
|
397 |
-
# normalize sugar/fat to per100
|
398 |
sugar_per100 = normalize_to_per_100(sugar_raw, "", serving_size, values_are_per100)
|
399 |
fat_per100 = normalize_to_per_100(fat_raw, "", serving_size, values_are_per100)
|
400 |
-
# store
|
401 |
st.session_state[KEYS["calc"]] = {"serving_size": serving_size, "serving_unit": serving_unit, "sugar_per100": sugar_per100, "fat_per100": fat_per100}
|
402 |
st.session_state[KEYS["calculated"]] = True
|
403 |
|
404 |
if st.session_state.get(KEYS["calculated"], False):
|
405 |
st.header("3. Hasil Grading & Saran")
|
406 |
c = st.session_state[KEYS["calc"]]
|
407 |
-
|
408 |
-
|
409 |
-
fat_thresholds = {"A": 1.0, "B": 3.0, "C": 5.0} # contoh
|
410 |
gs = get_grade_from_value(c["sugar_per100"], sugar_thresholds)
|
411 |
gf = get_grade_from_value(c["fat_per100"], fat_thresholds)
|
412 |
-
# final grade: worst of the two (higher letter = worse)
|
413 |
order = ["Grade A", "Grade B", "Grade C", "Grade D"]
|
414 |
final = max([gs, gf], key=lambda x: order.index(x))
|
415 |
|
@@ -437,7 +376,6 @@ if st.session_state.get(KEYS["calculated"], False):
|
|
437 |
|
438 |
st.markdown("**Catatan:** Hasil tergantung akurasi OCR dan input. Jika nilai terlihat aneh, koreksi secara manual pada langkah 2 lalu hitung ulang.")
|
439 |
|
440 |
-
# Footer and reset
|
441 |
st.markdown("---")
|
442 |
st.write("Nutri-Grade App (improved). Pastikan memeriksa manual hasil OCR sebelum mengambil keputusan.")
|
443 |
if st.button("Reset Aplikasi"):
|
|
|
3 |
import cv2
|
4 |
import numpy as np
|
5 |
import re
|
|
|
6 |
from PIL import Image
|
|
|
7 |
import io
|
8 |
import math
|
9 |
|
10 |
+
# Try import PaddleOCR with graceful failure message inside app
|
11 |
+
try:
|
12 |
+
from paddleocr import PaddleOCR
|
13 |
+
except Exception:
|
14 |
+
PaddleOCR = None
|
15 |
+
|
16 |
# -----------------------
|
17 |
# CONFIG
|
18 |
# -----------------------
|
|
|
36 |
# UTILITIES
|
37 |
# -----------------------
|
38 |
def safe_parse_number(s: str) -> float:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
if s is None:
|
40 |
return 0.0
|
41 |
s = str(s).strip()
|
42 |
if s == "":
|
43 |
return 0.0
|
|
|
44 |
is_negative = False
|
45 |
if s.startswith("(") and s.endswith(")"):
|
46 |
is_negative = True
|
47 |
s = s[1:-1].strip()
|
|
|
48 |
s = re.sub(r"[^\d\.,\-\/]", "", s)
|
49 |
if s == "" or s in ["-", ".", ","]:
|
50 |
return 0.0
|
|
|
51 |
if "/" in s and not any(c.isalpha() for c in s):
|
52 |
try:
|
53 |
+
a, b = s.split("/", 1)
|
54 |
+
val = float(a) / float(b)
|
55 |
return -val if is_negative else val
|
56 |
except:
|
57 |
pass
|
|
|
58 |
if "." in s and "," in s:
|
59 |
if s.rfind(".") > s.rfind(","):
|
|
|
60 |
s = s.replace(",", "")
|
61 |
else:
|
|
|
62 |
s = s.replace(".", "").replace(",", ".")
|
63 |
else:
|
|
|
64 |
if "," in s and "." not in s:
|
|
|
65 |
parts = s.split(",")
|
66 |
+
if len(parts[-1]) in (1,2,3):
|
67 |
s = s.replace(",", ".")
|
68 |
else:
|
69 |
s = s.replace(",", "")
|
|
|
70 |
try:
|
71 |
val = float(s)
|
72 |
return -val if is_negative else val
|
73 |
except:
|
|
|
74 |
m = re.search(r"-?\d+([.,]\d+)?", s)
|
75 |
if m:
|
76 |
return float(m.group(0).replace(",", "."))
|
77 |
return 0.0
|
78 |
|
79 |
def preprocess_for_ocr(img: np.ndarray, max_dim=1600):
|
|
|
|
|
80 |
if len(img.shape) == 2:
|
81 |
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
|
82 |
h, w = img.shape[:2]
|
|
|
83 |
if max(h, w) > max_dim:
|
84 |
scale = max_dim / max(h, w)
|
85 |
img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA)
|
86 |
+
try:
|
87 |
+
img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
|
88 |
+
except Exception:
|
89 |
+
pass
|
90 |
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
91 |
return img_rgb
|
92 |
|
93 |
def try_ocr_variants(ocr, img_rgb):
|
|
|
|
|
|
|
|
|
94 |
variants = []
|
95 |
rotations = [0, 90, 180, 270]
|
96 |
+
scales = [1.0, 1.5, 0.8]
|
97 |
for rot in rotations:
|
98 |
if rot != 0:
|
99 |
M = cv2.getRotationMatrix2D((img_rgb.shape[1]/2, img_rgb.shape[0]/2), rot, 1.0)
|
|
|
106 |
else:
|
107 |
imgrs = imgr
|
108 |
try:
|
109 |
+
ocr_res = ocr.ocr(imgrs, cls=True)
|
110 |
except Exception:
|
111 |
try:
|
112 |
ocr_res = ocr.ocr(imgrs)
|
|
|
116 |
conf_sum = 0.0
|
117 |
num_tokens = 0
|
118 |
if ocr_res:
|
119 |
+
flat = ocr_res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
for item in flat:
|
121 |
try:
|
|
|
122 |
entry = item[1] if isinstance(item, (list, tuple)) and len(item) > 1 else item
|
123 |
if isinstance(entry, (list, tuple)):
|
124 |
txt = entry[0]
|
|
|
133 |
txt = str(item)
|
134 |
prob = 0.0
|
135 |
text_lines.append(txt)
|
136 |
+
try:
|
137 |
+
conf_sum += float(prob)
|
138 |
+
except:
|
139 |
+
pass
|
140 |
num_tokens += len(re.findall(r"[\d]+", txt))
|
141 |
joined = " ".join(text_lines)
|
|
|
142 |
score = num_tokens + conf_sum
|
143 |
variants.append({"rot": rot, "scale": s, "score": score, "text": joined, "raw": ocr_res})
|
|
|
144 |
variants = sorted(variants, key=lambda x: x["score"], reverse=True)
|
145 |
+
return variants[0] if variants else {"text": ""}
|
|
|
146 |
|
147 |
def extract_nutrition_info(full_text: str) -> dict:
|
148 |
+
txt = (full_text or "").lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
serving = None
|
150 |
serving_unit = None
|
151 |
m = re.search(r"(takaran saj(?:i|a)|serving size|serving)[^\d]{0,10}([-\d\.,\/ ]{1,20})", txt)
|
152 |
if m:
|
153 |
serving = m.group(2).strip()
|
154 |
else:
|
|
|
155 |
m2 = re.search(r"(\d{1,3}(?:[.,]\d+)?)(?:\s*)(ml|g)\b", txt)
|
156 |
if m2:
|
157 |
serving = m2.group(1)
|
158 |
serving_unit = m2.group(2)
|
|
|
159 |
sugar = None
|
160 |
sugar_unit = None
|
|
|
161 |
sugar_patterns = [
|
162 |
r"(gula|sugar)[^\d\-\,\.\d]{0,6}([-\d\.,\/ ]{1,20})",
|
163 |
r"sugars?[^\d]{0,6}([-\d\.,\/ ]{1,20})",
|
|
|
167 |
if m:
|
168 |
sugar = m.group(2).strip()
|
169 |
break
|
|
|
170 |
fat = None
|
171 |
fat_unit = None
|
172 |
fat_patterns = [
|
|
|
178 |
if m:
|
179 |
fat = m.group(2).strip()
|
180 |
break
|
|
|
181 |
per100 = False
|
182 |
if re.search(r"(per 100|per 100g|per 100 ml|/100g|/100 ml|/100ml|/100g)", txt):
|
183 |
per100 = True
|
|
|
|
|
184 |
if not sugar or sugar == "":
|
185 |
nums = re.findall(r"[-+]?\d{1,3}(?:[.,]\d{3})*(?:[.,]\d+)?", txt)
|
186 |
if nums:
|
|
|
187 |
for n in nums:
|
188 |
v = safe_parse_number(n)
|
189 |
if 0 <= v <= 100:
|
190 |
sugar = n
|
191 |
break
|
|
|
192 |
return {
|
193 |
"serving_raw": serving or "",
|
194 |
"serving_unit": serving_unit or "",
|
|
|
200 |
}
|
201 |
|
202 |
def normalize_to_per_100(value_raw: str, unit_raw: str, serving_size: float, values_are_per100: bool) -> float:
|
|
|
203 |
if value_raw is None or str(value_raw).strip() == "":
|
204 |
return 0.0
|
205 |
+
s = str(value_raw).lower()
|
206 |
+
s = re.sub(r"(g|mg|ml)\b", "", s)
|
207 |
val = safe_parse_number(s)
|
|
|
208 |
if values_are_per100:
|
209 |
return val
|
|
|
210 |
if serving_size and serving_size > 0:
|
211 |
+
try:
|
212 |
+
per100 = (val / serving_size) * 100.0
|
213 |
+
return per100
|
214 |
+
except Exception:
|
215 |
+
return val
|
216 |
return val
|
217 |
|
218 |
def get_grade_from_value(value: float, thresholds: dict) -> str:
|
|
|
|
|
|
|
|
|
219 |
try:
|
220 |
v = float(value)
|
221 |
except:
|
|
|
238 |
return colors.get(grade, ("#bdc3c7", "black"))
|
239 |
|
240 |
def nutrition_advice_rule_based(final_grade: str, sugar_per100: float, fat_per100: float):
|
|
|
241 |
adv = []
|
242 |
if final_grade == "Grade A":
|
243 |
adv.append("Produk tergolong baik. Konsumsi sesuai porsi.")
|
|
|
260 |
# -----------------------
|
261 |
@st.cache_resource
|
262 |
def init_ocr_model():
|
263 |
+
if PaddleOCR is None:
|
264 |
+
return None
|
265 |
try:
|
|
|
266 |
return PaddleOCR(lang="en", use_angle_cls=True, use_gpu=False)
|
267 |
+
except Exception:
|
|
|
268 |
return None
|
269 |
|
270 |
ocr_model = init_ocr_model()
|
|
|
|
|
271 |
|
272 |
# -----------------------
|
273 |
# UI
|
274 |
# -----------------------
|
275 |
st.title("🥗 Nutri-Grade Detection & Grade Calculator (No AI Chatbot)")
|
276 |
+
st.caption("Analisis gizi berdasarkan data pada label. Koreksi manual selalu disarankan.")
|
277 |
|
278 |
with st.expander("📋 Petunjuk"):
|
279 |
st.markdown(
|
280 |
"""
|
281 |
1. Upload gambar tabel gizi (jpg/png).
|
282 |
+
2. Klik **Analisis OCR** (butuh PaddleOCR terinstall).
|
283 |
3. Koreksi hasil jika perlu.
|
284 |
4. Klik **Hitung Grade**.
|
285 |
"""
|
286 |
)
|
287 |
|
288 |
+
if PaddleOCR is None:
|
289 |
+
st.error("PaddleOCR tidak ditemukan. Pastikan package `paddleocr` (dan `paddlepaddle`) telah terinstall di environment Anda.")
|
290 |
+
st.stop()
|
291 |
+
|
292 |
+
if ocr_model is None:
|
293 |
+
st.error("Gagal inisialisasi model OCR. Jika di Spaces atau Docker, pastikan dependency paddle telah terpasang dengan benar.")
|
294 |
+
st.stop()
|
295 |
+
|
296 |
st.header("1. Upload Gambar")
|
297 |
uploaded = st.file_uploader("Pilih gambar (jpg, jpeg, png)", type=["jpg","jpeg","png"])
|
298 |
|
|
|
302 |
del st.session_state[v]
|
303 |
|
304 |
if uploaded:
|
|
|
305 |
file_bytes = np.asarray(bytearray(uploaded.read()), dtype=np.uint8)
|
306 |
img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
|
307 |
st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), caption="Preview", use_column_width=False, width=350)
|
|
|
312 |
img_rgb = preprocess_for_ocr(img)
|
313 |
best = try_ocr_variants(ocr_model, img_rgb)
|
314 |
extracted = extract_nutrition_info(best.get("text", ""))
|
|
|
315 |
st.session_state[KEYS["ocr_done"]] = True
|
316 |
st.session_state[KEYS["ocr_text"]] = best.get("text", "")
|
317 |
st.session_state[KEYS["data"]] = extracted
|
|
|
319 |
except Exception as e:
|
320 |
st.error(f"Gagal menjalankan OCR: {e}")
|
321 |
|
|
|
322 |
if st.session_state.get(KEYS["ocr_done"], False):
|
323 |
st.header("2. Koreksi & Hitung Grade")
|
324 |
d = st.session_state.get(KEYS["data"], {})
|
|
|
329 |
values_are_per100 = st.checkbox("Nilai yang terdeteksi sudah per 100g/ml", value=d.get("values_are_per100", False))
|
330 |
submitted = st.form_submit_button("Hitung Grade")
|
331 |
if submitted:
|
|
|
|
|
332 |
m = re.search(r"(\d{1,4}(?:[.,]\d+)?)\s*(ml|g)?", serving_raw.lower())
|
333 |
serving_size = 0.0
|
334 |
serving_unit = ""
|
|
|
336 |
serving_size = safe_parse_number(m.group(1))
|
337 |
serving_unit = (m.group(2) or "").lower()
|
338 |
else:
|
|
|
339 |
serving_size = safe_parse_number(serving_raw)
|
|
|
340 |
sugar_per100 = normalize_to_per_100(sugar_raw, "", serving_size, values_are_per100)
|
341 |
fat_per100 = normalize_to_per_100(fat_raw, "", serving_size, values_are_per100)
|
|
|
342 |
st.session_state[KEYS["calc"]] = {"serving_size": serving_size, "serving_unit": serving_unit, "sugar_per100": sugar_per100, "fat_per100": fat_per100}
|
343 |
st.session_state[KEYS["calculated"]] = True
|
344 |
|
345 |
if st.session_state.get(KEYS["calculated"], False):
|
346 |
st.header("3. Hasil Grading & Saran")
|
347 |
c = st.session_state[KEYS["calc"]]
|
348 |
+
sugar_thresholds = {"A": 5.0, "B": 12.5, "C": 22.5}
|
349 |
+
fat_thresholds = {"A": 1.0, "B": 3.0, "C": 5.0}
|
|
|
350 |
gs = get_grade_from_value(c["sugar_per100"], sugar_thresholds)
|
351 |
gf = get_grade_from_value(c["fat_per100"], fat_thresholds)
|
|
|
352 |
order = ["Grade A", "Grade B", "Grade C", "Grade D"]
|
353 |
final = max([gs, gf], key=lambda x: order.index(x))
|
354 |
|
|
|
376 |
|
377 |
st.markdown("**Catatan:** Hasil tergantung akurasi OCR dan input. Jika nilai terlihat aneh, koreksi secara manual pada langkah 2 lalu hitung ulang.")
|
378 |
|
|
|
379 |
st.markdown("---")
|
380 |
st.write("Nutri-Grade App (improved). Pastikan memeriksa manual hasil OCR sebelum mengambil keputusan.")
|
381 |
if st.button("Reset Aplikasi"):
|