YourAIEngineer commited on
Commit
fe32dc1
·
verified ·
1 Parent(s): 08ce19d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +404 -285
app.py CHANGED
@@ -1,187 +1,268 @@
 
1
  import os
2
  import re
3
  import time
4
- from typing import Dict, List, Tuple, Optional
5
 
6
  import cv2
7
  import numpy as np
8
  import pandas as pd
 
9
  import requests
10
  import streamlit as st
11
- from paddleocr import PaddleOCR
12
- from PIL import Image
13
 
14
- # ---------------------------
15
- # CONFIG / SECURITY NOTE
16
- # ---------------------------
17
- st.set_page_config(page_title="Nutri-Grade Detector (Improved)", page_icon="🥗", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # Use environment variable or Streamlit secrets. Do NOT hardcode keys in source.
20
- OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY", None)
21
  OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 
 
22
 
23
- if OPENROUTER_API_KEY is None:
24
- st.warning("OPENROUTER_API_KEY tidak ditemukan. Fitur saran AI akan nonaktif kecuali Anda menambahkan key melalui Streamlit secrets atau environment variable.")
25
-
26
- # ---------------------------
27
- # HELPERS
28
- # ---------------------------
29
-
30
- @st.cache_resource
31
- def init_ocr(lang: List[str] = ["en", "id"], use_angle_cls: bool = True, det=False):
32
- """Inisialisasi PaddleOCR. Cached untuk performa."""
33
- try:
34
- # det=False disables detection model dump to speed up sometimes; adjust as needed
35
- return PaddleOCR(lang=lang, use_angle_cls=use_angle_cls, det=det)
36
- except Exception as e:
37
- st.error(f"Gagal inisialisasi OCR: {e}")
38
- return None
39
-
40
-
41
- def preprocess_variants(img: np.ndarray) -> List[np.ndarray]:
42
- """Buat beberapa varian pra-proses untuk meningkatkan peluang OCR."""
43
- variants = []
44
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
45
- variants.append(img) # original color
46
- variants.append(gray)
47
- # bilateral to reduce noise but preserve edges
48
- variants.append(cv2.bilateralFilter(gray, 9, 75, 75))
49
- # adaptive threshold
50
- th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
51
- cv2.THRESH_BINARY, 15, 7)
52
- variants.append(th)
53
- # increase contrast (CLAHE)
54
- clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)).apply(gray)
55
- variants.append(clahe)
56
- return variants
57
-
58
-
59
- def ocr_on_rotations(ocr, img: np.ndarray, try_rotations: List[int] = [0, 90, 180, 270]) -> List[Tuple]:
60
  """
61
- Lakukan OCR pada beberapa rotasi + beberapa preprocessed variants.
62
- Kembalikan list tuples (box, text, confidence, rotation)
 
 
 
63
  """
64
- results = []
65
- variants = preprocess_variants(img)
66
- for rot in try_rotations:
67
- # rotate image
68
- if rot != 0:
69
- h, w = img.shape[:2]
70
- M = cv2.getRotationMatrix2D((w / 2, h / 2), rot, 1.0)
71
- rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
72
- else:
73
- rotated = img
74
- for v in variants:
75
- # PaddleOCR expects BGR or grayscale arrays; we pass rotated directly
76
- try:
77
- # returns list of lists: res[0] has lines; each line: [box, (text, conf)]
78
- res = ocr.ocr(rotated, cls=True)
79
- except Exception:
80
- res = []
81
- for page in res:
82
- for ln in page:
83
- box = ln[0]
84
- text = ln[1][0]
85
- conf = float(ln[1][1]) if ln[1][1] not in (None, "") else 0.0
86
- results.append((box, text, conf, rot))
87
- # deduplicate by text + approximate box center
88
- dedup = {}
89
- for box, text, conf, rot in results:
90
- # normalize text
91
- t = text.strip()
92
- # compute center
93
- cx = int((box[0][0] + box[2][0]) / 2)
94
- cy = int((box[0][1] + box[2][1]) / 2)
95
- key = (re.sub(r"\s+", " ", t.lower()), round(cx/50)*50, round(cy/50)*50)
96
- if key not in dedup or conf > dedup[key][2]:
97
- dedup[key] = (box, t, conf, rot)
98
- return list(dedup.values())
99
-
100
-
101
- def annotate_image(img: np.ndarray, ocr_items: List[Tuple]) -> np.ndarray:
102
- """Buat overlay dari hasil OCR untuk ditampilkan ke user."""
103
- out = img.copy()
104
- for box, text, conf, rot in ocr_items:
105
- pts = np.array(box, dtype=np.int32).reshape((-1, 1, 2))
106
- cv2.polylines(out, [pts], True, (0, 255, 0), 1)
107
- # putText at top-left corner of box
108
- x, y = int(box[0][0]), int(box[0][1]) - 10
109
- cv2.putText(out, f"{text[:30]} ({conf:.2f})", (x, max(10, y)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)
110
- return out
111
-
112
-
113
- def parse_numeric_value(text: str) -> float:
114
- """Parse float dari teks; mendukung koma desimal dan nilai negatif tak diharapkan."""
115
- if text is None:
116
  return 0.0
117
- s = str(text).lower().strip()
118
- # ambil angka pertama yang muncul (dengan koma/ titik)
119
- m = re.search(r"(-?\d{1,3}(?:[.,]\d{3})*(?:[.,]\d+)?)", s)
120
  if not m:
121
  return 0.0
122
- num = m.group(1)
123
- # remove thousand separators (either '.' or ',') but keep decimal separator
124
- # heuristic: jika ada kedua '.' dan ',',anggap yang terakhir sebagai decimal separator
125
- if "." in num and "," in num:
126
- if num.rfind(",") > num.rfind("."):
127
- num = num.replace(".", "").replace(",", ".") # comma decimal
128
  else:
129
- num = num.replace(",", "") # dot decimal
130
  else:
131
- # only commas => treat comma as decimal if single comma and not thousand grouping
132
- if num.count(",") == 1 and num.count(".") == 0:
133
- num = num.replace(",", ".")
134
- else:
135
- num = num.replace(",", "")
136
  try:
137
- return float(num)
138
  except Exception:
139
  return 0.0
140
 
141
 
142
- def extract_nutrition_from_texts(texts: List[str]) -> Dict[str, Dict]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  """
144
- Dari daftar potongan teks (OCR), cari serving, sugar, saturated fat.
145
- Kembalikan dict: {'serving': {'value':..., 'unit':...,'per_100': bool}, ...}
146
  """
147
- combined = " | ".join(texts).lower()
148
- out = {}
149
- # patterns: cari label lalu angka yang dekat (d within ~30 chars)
150
- label_patterns = {
151
- "serving": r"(takaran\s*saj[i|a]|serving\s*size|portion)[^\d]{0,30}([0-9\.,]+)\s*(g|ml|mls)?",
152
- "sugar": r"(gula|sugar)[^\d]{0,30}([0-9\.,]+)\s*(g|gram)?",
153
- "saturated_fat": r"(lemak\s*jenuh|saturated\s*fat|sat fat|sat\.?\s*fat)[^\d]{0,30}([0-9\.,]+)\s*(g|gram)?"
154
- }
155
- for key, pat in label_patterns.items():
156
- m = re.search(pat, combined)
157
- if m:
158
- val = parse_numeric_value(m.group(2))
159
- unit = m.group(3) if len(m.groups()) >= 3 else ""
160
- out[key] = {"value": val, "unit": unit or "g"}
161
- # Additionally, detect per 100 values if explicitly shown like "per 100g" or "/100g"
162
- # If the OCR contains 'per 100' near the word, we mark per_100 True
163
- for k in out:
164
- out[k]["per_100"] = bool(re.search(fr"{k}.*per\s*100|per\s*100.*{k}|/100", combined[:300])) # simple heuristic
165
- # Fallback: if no serving found, assume 100
166
- if "serving" not in out:
167
- out["serving"] = {"value": 100.0, "unit": "g", "per_100": True}
168
- return out
169
-
170
-
171
- def normalize_to_per100(value: float, serving: float, is_per_100: bool) -> float:
172
- """Konversi nilai (yang mungkin per serving) menjadi per 100g/ml."""
173
- if is_per_100:
174
- return value
175
- if serving <= 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  return 0.0
177
- # value per serving -> convert to per 100
178
- return (value / serving) * 100.0
 
 
 
179
 
180
 
181
  def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
182
  """
183
- Ambil grade berdasarkan thresholds map, contoh thresholds {"A":1.0,"B":5.0,"C":10.0}
184
- Jika value <= A -> A, <=B -> B, <=C -> C else D.
185
  """
186
  try:
187
  if value <= thresholds["A"]:
@@ -190,14 +271,9 @@ def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
190
  return "Grade B"
191
  if value <= thresholds["C"]:
192
  return "Grade C"
193
- return "Grade D"
194
  except Exception:
195
- return "Grade D"
196
-
197
-
198
- def grade_order(grade: str) -> int:
199
- order = {"Grade A": 0, "Grade B": 1, "Grade C": 2, "Grade D": 3}
200
- return order.get(grade, 3)
201
 
202
 
203
  def get_grade_color(grade: str) -> Tuple[str, str]:
@@ -210,149 +286,192 @@ def get_grade_color(grade: str) -> Tuple[str, str]:
210
  return colors.get(grade, ("#bdc3c7", "black"))
211
 
212
 
213
- def call_openrouter_advice(api_key: str, prompt: str, timeout=30) -> str:
214
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  payload = {
216
  "model": "mistralai/mistral-7b-instruct:free",
217
  "messages": [{"role": "user", "content": prompt}],
218
  "max_tokens": 250,
219
  "temperature": 0.7
220
  }
 
221
  try:
222
- r = requests.post(f"{OPENROUTER_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=timeout)
223
  r.raise_for_status()
224
  data = r.json()
225
- return data["choices"][0]["message"]["content"].strip()
 
 
226
  except Exception as e:
227
- return f"Error fetching advice: {e}"
 
228
 
 
 
 
 
229
 
230
- # ---------------------------
231
- # UI
232
- # ---------------------------
233
 
234
- st.title("🥗 Nutri-Grade Detection — Improved")
235
- st.caption("Versi lebih robust: multiple preprocess, editable OCR table, configurable thresholds.")
 
236
 
237
- ocr = init_ocr()
 
 
 
 
 
 
238
 
239
- with st.sidebar:
240
- st.header("Pengaturan")
241
- use_ai_advice = st.checkbox("Aktifkan saran AI (OpenRouter)", value=bool(OPENROUTER_API_KEY))
242
- col1, col2 = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  with col1:
244
- st.markdown("**Threshold (g/100)**")
245
- sugar_thr_a = st.number_input("Sugar A ≤", value=1.0, step=0.1)
246
- sugar_thr_b = st.number_input("Sugar B ≤", value=5.0, step=0.1)
247
- sugar_thr_c = st.number_input("Sugar C ≤", value=10.0, step=0.1)
248
  with col2:
249
- fat_thr_a = st.number_input("SatFat A ≤", value=0.7, step=0.1)
250
- fat_thr_b = st.number_input("SatFat B ≤", value=1.2, step=0.1)
251
- fat_thr_c = st.number_input("SatFat C ", value=2.8, step=0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  st.markdown("---")
253
- st.write("Tips: Ambil foto rata, cahaya cukup, hindari pantulan.")
254
-
255
- uploaded = st.file_uploader("Upload gambar tabel gizi (jpg/png)", type=["jpg", "jpeg", "png"])
256
- if uploaded is None:
257
- st.info("Upload gambar supaya bisa mulai OCR.")
258
- st.stop()
259
-
260
- # read image
261
- file_bytes = np.frombuffer(uploaded.read(), np.uint8)
262
- img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
263
- display_w = 600
264
- st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), caption="Gambar yang diupload (preview)", width=min(display_w, img.shape[1]))
265
-
266
- if st.button("Jalankan OCR & Ekstraksi"):
267
- if ocr is None:
268
- st.error("OCR model belum tersedia.")
269
- st.stop()
270
- with st.spinner("Menjalankan OCR pada beberapa varian gambar..."):
271
- ocr_items = ocr_on_rotations(ocr, img)
272
- st.success(f"OCR selesai — {len(ocr_items)} potongan teks terdeteksi (deduped).")
273
- # show annotated image
274
- annotated = annotate_image(img, ocr_items)
275
- st.image(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB), caption="Overlay hasil OCR", width=min(display_w, img.shape[1]))
276
-
277
- # collect texts sorted by confidence desc
278
- ocr_texts = [t for _, t, _, _ in sorted(ocr_items, key=lambda x: -x[2])]
279
- extracted = extract_nutrition_from_texts(ocr_texts)
280
-
281
- # build editable DataFrame for user correction
282
- rows = []
283
- for nutr in ["serving", "sugar", "saturated_fat"]:
284
- ent = extracted.get(nutr, {"value": 0.0, "unit": ("g" if nutr != "serving" else "g"), "per_100": False})
285
- rows.append({
286
- "nutrient": nutr,
287
- "value": float(ent.get("value", 0.0)),
288
- "unit": ent.get("unit", "g"),
289
- "is_per_100": bool(ent.get("per_100", False))
290
- })
291
- df = pd.DataFrame(rows)
292
- st.markdown("### Koreksi hasil ekstraksi (ubah nilai jika OCR keliru)")
293
- edited = st.data_editor(df, num_rows="fixed", use_container_width=True)
294
-
295
- if st.button("Hitung Grade dari nilai di atas"):
296
- # read corrected
297
- serving_val = float(edited.loc[edited['nutrient'] == 'serving', 'value'].values[0])
298
- serving_unit = edited.loc[edited['nutrient'] == 'serving', 'unit'].values[0]
299
- sugar_val = float(edited.loc[edited['nutrient'] == 'sugar', 'value'].values[0])
300
- sugar_per100_flag = bool(edited.loc[edited['nutrient'] == 'sugar', 'is_per_100'].values[0])
301
- satfat_val = float(edited.loc[edited['nutrient'] == 'saturated_fat', 'value'].values[0])
302
- satfat_per100_flag = bool(edited.loc[edited['nutrient'] == 'saturated_fat', 'is_per_100'].values[0])
303
-
304
- # Normalize serving unit: if ml vs g, we assume density ~1 g/ml (common for liquids). Warn if ml.
305
- if isinstance(serving_unit, str) and "ml" in serving_unit.lower():
306
- st.info("Takaran dalam ml terdeteksi — diasumsikan densitas 1 g/ml untuk perhitungan per 100. Koreksi manual jika perlu.")
307
-
308
- # Normalize to per 100
309
- sugar_per100 = normalize_to_per100(sugar_val, serving_val, sugar_per100_flag)
310
- satfat_per100 = normalize_to_per100(satfat_val, serving_val, satfat_per100_flag)
311
-
312
- # Grades
313
- sugar_thresholds = {"A": sugar_thr_a, "B": sugar_thr_b, "C": sugar_thr_c}
314
- fat_thresholds = {"A": fat_thr_a, "B": fat_thr_b, "C": fat_thr_c}
315
- gs = get_grade_from_value(sugar_per100, sugar_thresholds)
316
- gf = get_grade_from_value(satfat_per100, fat_thresholds)
317
- # final grade = worse (higher order)
318
- final = gs if grade_order(gs) >= grade_order(gf) else gf
319
-
320
- # Show results nicely
321
- st.header("Hasil Perhitungan")
322
- c1, c2, c3 = st.columns(3)
323
- bg_s, tc_s = get_grade_color(gs)
324
- c1.markdown(f"<div style='background:{bg_s};padding:12px;border-radius:8px;text-align:center;color:{tc_s};'>"
325
- f"<strong>Gula</strong><p style='font-size:22px'>{sugar_per100:.2f} g/100g</p><h3>{gs}</h3></div>", unsafe_allow_html=True)
326
- bg_f, tc_f = get_grade_color(gf)
327
- c2.markdown(f"<div style='background:{bg_f};padding:12px;border-radius:8px;text-align:center;color:{tc_f};'>"
328
- f"<strong>Lemak Jenuh</strong><p style='font-size:22px'>{satfat_per100:.2f} g/100g</p><h3>{gf}</h3></div>", unsafe_allow_html=True)
329
- bg_fin, tc_fin = get_grade_color(final)
330
- c3.markdown(f"<div style='background:{bg_fin};padding:12px;border-radius:8px;text-align:center;color:{tc_fin};'>"
331
- f"<strong>Grade Akhir</strong><p style='font-size:22px'>{final}</p></div>", unsafe_allow_html=True)
332
-
333
- st.markdown("---")
334
- st.write("Detail nilai (per 100):")
335
- st.write(pd.DataFrame({
336
- "nutrient": ["sugar", "saturated_fat"],
337
- "per_100_g": [round(sugar_per100, 3), round(satfat_per100, 3)],
338
- "grade": [gs, gf]
339
- }))
340
-
341
- # AI advice (optional)
342
- if use_ai_advice and OPENROUTER_API_KEY:
343
- prompt = (
344
- f"Anda adalah ahli gizi Indonesia. "
345
- f"Takaran sajian diasumsikan {serving_val} g/ml. "
346
- f"Gula per 100g: {sugar_per100:.2f} g ({gs}). "
347
- f"Lemak jenuh per 100g: {satfat_per100:.2f} g ({gf}). "
348
- f"Grade akhir: {final}. Berikan saran singkat 40-80 kata, fokus pada kesehatan dan tips sederhana."
349
- )
350
- with st.spinner("Mengambil saran dari AI..."):
351
- advice = call_openrouter_advice(OPENROUTER_API_KEY, prompt)
352
- st.header("Saran Nutrisi (AI)")
353
- st.info(advice)
354
- elif use_ai_advice and not OPENROUTER_API_KEY:
355
- st.warning("Anda memilih aktifkan saran AI tetapi API key tidak tersedia. Tambahkan OPENROUTER_API_KEY di Streamlit secrets atau environment variable.")
356
 
357
  st.markdown("---")
358
- st.caption("Catatan: Aplikasi ini membantu estimasi — selalu verifikasi dengan label produk asli dan konsultasi ahli gizi untuk keputusan medis.")
 
1
+ # app.py - Nutri-Grade Detection (improved)
2
  import os
3
  import re
4
  import time
5
+ from typing import Optional, Tuple, List, Dict
6
 
7
  import cv2
8
  import numpy as np
9
  import pandas as pd
10
+ from PIL import Image
11
  import requests
12
  import streamlit as st
 
 
13
 
14
+ # Try imports that may be optional at runtime
15
+ try:
16
+ from paddleocr import PaddleOCR
17
+ PADDLE_AVAILABLE = True
18
+ except Exception:
19
+ PADDLE_AVAILABLE = False
20
+
21
+ try:
22
+ import pytesseract
23
+ PYTESSERACT_AVAILABLE = True
24
+ except Exception:
25
+ PYTESSERACT_AVAILABLE = False
26
+
27
+ # ---------------- CONFIG ----------------
28
+ st.set_page_config(
29
+ page_title="Nutri-Grade Label Detection",
30
+ page_icon="🥗",
31
+ layout="wide",
32
+ initial_sidebar_state="expanded"
33
+ )
34
 
 
 
35
  OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
36
+ # Get API key from secrets or env (do NOT hardcode)
37
+ OPENROUTER_API_KEY = st.secrets.get("OPENROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
38
 
39
+ # ---------------- HELPERS ----------------
40
+ def safe_float_from_str(s: str) -> float:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  """
42
+ Robust parse of numeric-like string:
43
+ - Accepts "1,234.56", "1.234,56" (tries common variants)
44
+ - Handles "1-2" by taking first number
45
+ - Removes non-numeric noise like "g", "mg", "%"
46
+ - Returns 0.0 if no parseable number found
47
  """
48
+ if s is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return 0.0
50
+ text = str(s).strip()
51
+ # find first numeric-like token (allow comma and dot and minus)
52
+ m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", text)
53
  if not m:
54
  return 0.0
55
+ token = m.group(0)
56
+ # if token contains both comma and dot, assume dot is decimal if dot after comma or vice versa
57
+ if "," in token and "." in token:
58
+ # choose the last separator as decimal
59
+ if token.rfind(".") > token.rfind(","):
60
+ token = token.replace(",", "")
61
  else:
62
+ token = token.replace(".", "").replace(",", ".")
63
  else:
64
+ # if only comma, treat as decimal (common in many locales)
65
+ if "," in token and "." not in token:
66
+ token = token.replace(",", ".")
 
 
67
  try:
68
+ return float(token)
69
  except Exception:
70
  return 0.0
71
 
72
 
73
+ def preprocess_for_ocr(image: np.ndarray, max_dim=1600) -> np.ndarray:
74
+ """Preprocess image to improve OCR: resize, denoise, grayscale, adaptive threshold."""
75
+ img = image.copy()
76
+ h, w = img.shape[:2]
77
+ scale = 1.0
78
+ if max(h, w) > max_dim:
79
+ scale = max_dim / max(h, w)
80
+ img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA)
81
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
82
+ # denoise
83
+ gray = cv2.fastNlMeansDenoising(gray, h=7)
84
+ # increase contrast via histogram equalization
85
+ gray = cv2.equalizeHist(gray)
86
+ # adaptive threshold to emphasize text
87
+ th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
88
+ cv2.THRESH_BINARY, 11, 2)
89
+ # combine threshold and gray to maintain readability
90
+ combined = cv2.bitwise_or(gray, th)
91
+ return combined
92
+
93
+
94
+ @st.cache_resource
95
+ def initialize_ocr(lang_list: Optional[List[str]] = None, use_gpu: bool = False):
96
  """
97
+ Initialize OCR backend. Prefer PaddleOCR if available; else fallback to pytesseract if available.
98
+ Returns a callable ocr(image) -> List[str] of extracted text lines.
99
  """
100
+ if lang_list is None:
101
+ lang_list = ["en", "id"]
102
+
103
+ if PADDLE_AVAILABLE:
104
+ try:
105
+ ocr = PaddleOCR(lang=lang_list, use_angle_cls=True, use_gpu=use_gpu)
106
+ def paddle_runner(img: np.ndarray) -> List[str]:
107
+ # Paddle expects BGR or numpy image
108
+ try:
109
+ res = ocr.ocr(img, det=True, rec=True, cls=True)
110
+ except Exception:
111
+ # try passing grayscale
112
+ res = ocr.ocr(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), det=True, rec=True, cls=True)
113
+ texts = []
114
+ # res is list of (line) groups
115
+ for line in res:
116
+ # line: [ [box], (text, confidence) ]
117
+ if isinstance(line, list) and len(line) > 0:
118
+ for item in line:
119
+ if len(item) >= 2 and isinstance(item[1], (list, tuple)):
120
+ text = item[1][0]
121
+ texts.append(str(text))
122
+ elif len(item) >= 2 and isinstance(item[1], str):
123
+ texts.append(item[1])
124
+ elif isinstance(line, tuple) and len(line) >= 2:
125
+ texts.append(str(line[1][0] if isinstance(line[1], (list, tuple)) else line[1]))
126
+ return texts
127
+ return paddle_runner
128
+ except Exception as e:
129
+ st.warning(f"PaddleOCR init failed: {e}")
130
+
131
+ if PYTESSERACT_AVAILABLE:
132
+ try:
133
+ def pytesseract_runner(img: np.ndarray) -> List[str]:
134
+ # pytesseract expects PIL Image or array
135
+ if img.ndim == 2:
136
+ pil = Image.fromarray(img)
137
+ else:
138
+ pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
139
+ raw = pytesseract.image_to_string(pil)
140
+ lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
141
+ return lines
142
+ return pytesseract_runner
143
+ except Exception as e:
144
+ st.warning(f"pytesseract init failed: {e}")
145
+
146
+ return None
147
+
148
+
149
+ def ocr_extract_texts(ocr_fn, image: np.ndarray) -> List[str]:
150
+ """Run OCR function and return cleaned list of lines."""
151
+ pre = preprocess_for_ocr(image)
152
+ lines = []
153
+ if ocr_fn is None:
154
+ return lines
155
+ try:
156
+ lines = ocr_fn(pre)
157
+ except Exception:
158
+ # fallback: try on original
159
+ try:
160
+ lines = ocr_fn(image)
161
+ except Exception:
162
+ lines = []
163
+ # normalize lines
164
+ cleaned = []
165
+ for ln in lines:
166
+ ln = re.sub(r"\s+", " ", str(ln)).strip()
167
+ if ln:
168
+ cleaned.append(ln)
169
+ return cleaned
170
+
171
+
172
+ def find_value_near_label(lines: List[str], label_variants: List[str]) -> Optional[str]:
173
+ """
174
+ Find numeric value near a label.
175
+ Strategy:
176
+ - Join lines with separator and search for label positions then find nearest numeric token within window.
177
+ - Also check next 2 lines for numeric presence.
178
+ """
179
+ joined = " ||| ".join(lines).lower()
180
+ # tokenize and keep positions
181
+ tokens = re.split(r"(\s+|\|\|\|)", joined) # keep splits so we can get token index
182
+ lowered = joined.lower()
183
+ for label in label_variants:
184
+ idx = lowered.find(label.lower())
185
+ if idx != -1:
186
+ # take substring after label up to 60 chars and look for number
187
+ sub = lowered[idx: idx + 120]
188
+ m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", sub)
189
+ if m:
190
+ return m.group(0)
191
+ # else scan ahead in joined string a bit more
192
+ sub2 = lowered[idx: idx + 360]
193
+ m2 = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", sub2)
194
+ if m2:
195
+ return m2.group(0)
196
+ # try pattern-based scanning line by line
197
+ for i, ln in enumerate(lines):
198
+ lowln = ln.lower()
199
+ for label in label_variants:
200
+ if label.lower() in lowln:
201
+ # first try number in same line
202
+ m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", lowln)
203
+ if m:
204
+ return m.group(0)
205
+ # else check next two lines
206
+ for j in range(1, 3):
207
+ if i + j < len(lines):
208
+ m2 = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", lines[i + j])
209
+ if m2:
210
+ return m2.group(0)
211
+ # fallback: try any number labeled with units like "g", "mg", "per 100"
212
+ anynum = re.search(r"(-?\d+[.,]?\d*(?:[eE][-+]?\d+)?)\s*(g|mg|ml|%)", joined)
213
+ if anynum:
214
+ return anynum.group(1)
215
+ return None
216
+
217
+
218
+ def detect_serving_unit(lines: List[str]) -> Tuple[float, str]:
219
+ """
220
+ Try to find serving size and unit. If not found, default 100 g.
221
+ Returns (serving_value, unit) where unit is 'g' or 'ml' or 'serving'
222
+ """
223
+ labels = ["serving size", "takaran saji", "takaran sajian", "portion", "porsi"]
224
+ for lab in labels:
225
+ val = find_value_near_label(lines, [lab])
226
+ if val:
227
+ v = safe_float_from_str(val)
228
+ # try to detect unit in same context
229
+ joint = " ".join(lines).lower()
230
+ # look for g / ml nearby
231
+ m = re.search(rf"{re.escape(lab)}[^\n\r]{{0,80}}?([0-9.,]+)\s*(g|ml|grams|gram|mL)", joint)
232
+ if m:
233
+ unit = "ml" if m.group(2).lower().startswith("m") else "g"
234
+ else:
235
+ # guess unit by presence of 'ml' or 'mL' anywhere after label
236
+ if re.search(r"ml\b", joint):
237
+ unit = "ml"
238
+ else:
239
+ unit = "g"
240
+ return (v if v > 0 else 100.0, unit)
241
+ # fallback default
242
+ return 100.0, "g"
243
+
244
+
245
+ def normalize_to_per_100(value: float, per_serving: float, unit_serving: str) -> float:
246
+ """
247
+ Convert value (per serving) => value per 100 units (g or ml).
248
+ If value already per 100 (we try to detect externally), this function should not be called.
249
+ """
250
+ if per_serving <= 0 or per_serving == 100:
251
+ # if per_serving == 100 -> already per 100
252
+ if per_serving == 100:
253
+ return value
254
  return 0.0
255
+ # convert to per 100
256
+ return (value / per_serving) * 100.0
257
+
258
+
259
+ GRADE_ORDER = ["Grade A", "Grade B", "Grade C", "Grade D"]
260
 
261
 
262
  def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
263
  """
264
+ thresholds: dict with keys "A","B","C" representing upper bounds for each grade
265
+ A if value <= thresholds['A'], B if value <= thresholds['B'], etc.
266
  """
267
  try:
268
  if value <= thresholds["A"]:
 
271
  return "Grade B"
272
  if value <= thresholds["C"]:
273
  return "Grade C"
 
274
  except Exception:
275
+ pass
276
+ return "Grade D"
 
 
 
 
277
 
278
 
279
  def get_grade_color(grade: str) -> Tuple[str, str]:
 
286
  return colors.get(grade, ("#bdc3c7", "black"))
287
 
288
 
289
+ def call_openrouter_advice(serving_val, sugar_per100, fat_per100, sugar_grade, fat_grade, final_grade) -> str:
290
+ """
291
+ Call OpenRouter-compatible endpoint for short nutrition advice.
292
+ Use API key from secrets/env. Non-blocking timeout and friendly fallback on error.
293
+ """
294
+ if not OPENROUTER_API_KEY:
295
+ return "OpenRouter API key not configured. Set OPENROUTER_API_KEY in secrets or environment variables."
296
+
297
+ prompt = f"""
298
+ Anda adalah ahli gizi dari Indonesia yang ramah.
299
+ - Takaran Saji: {serving_val}
300
+ - Gula (per 100): {sugar_per100:.2f} g (Grade {sugar_grade.replace('Grade ', '')})
301
+ - Lemak Jenuh (per 100): {fat_per100:.2f} g (Grade {fat_grade.replace('Grade ', '')})
302
+ - Grade Akhir: {final_grade.replace('Grade ', '')}
303
+ Berikan saran nutrisi singkat 50-80 kata, fokus pada dampak kesehatan dan tips praktis.
304
+ """
305
  payload = {
306
  "model": "mistralai/mistral-7b-instruct:free",
307
  "messages": [{"role": "user", "content": prompt}],
308
  "max_tokens": 250,
309
  "temperature": 0.7
310
  }
311
+ headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
312
  try:
313
+ r = requests.post(f"{OPENROUTER_BASE_URL}/chat/completions", json=payload, headers=headers, timeout=20)
314
  r.raise_for_status()
315
  data = r.json()
316
+ # defensive access
317
+ content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
318
+ return content.strip() if content else "Tidak ada saran (jawaban kosong)."
319
  except Exception as e:
320
+ return f"Gagal memanggil AI: {e}"
321
+
322
 
323
+ def reset_state():
324
+ for k in ["ocr_done", "data", "calculated", "calc"]:
325
+ if k in st.session_state:
326
+ del st.session_state[k]
327
 
 
 
 
328
 
329
+ # --------------- UI ---------------
330
+ st.title("🥗 Nutri-Grade Detection & Grade Calculator (Improved)")
331
+ st.caption("Analisis gizi produk berdasarkan standar Nutri-Grade (contoh: gula & lemak jenuh).")
332
 
333
+ with st.expander("📋 Petunjuk (singkat)"):
334
+ st.markdown("""
335
+ 1. Upload gambar label nutrisi (JPG/PNG).
336
+ 2. Klik **Analisis OCR** → koreksi hasil jika perlu.
337
+ 3. Klik **Hitung Grade**.
338
+ \n**Catatan deploy:** taruh OPENROUTER_API_KEY di Secrets Hugging Face (atau `os.environ`). Sertakan dependency seperti `paddlepaddle` & `paddleocr` di requirements.\
339
+ """)
340
 
341
+ # Upload
342
+ st.header("1) Upload Gambar")
343
+ uploaded = st.file_uploader("Pilih gambar tabel nutrisi (jpg/jpeg/png)", type=["jpg", "jpeg", "png"], on_change=reset_state)
344
+
345
+ if uploaded:
346
+ file_bytes = np.frombuffer(uploaded.read(), np.uint8)
347
+ img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
348
+ if img is None:
349
+ st.error("Gagal membaca file gambar. Pastikan file valid.")
350
+ st.stop()
351
+ st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), width=380)
352
+
353
+ ocr_fn = initialize_ocr()
354
+ if ocr_fn is None:
355
+ st.warning("OCR backend tidak tersedia (PaddleOCR/pytesseract). Install dependency yang diperlukan.")
356
+ else:
357
+ if st.button("Analisis OCR"):
358
+ with st.spinner("Menjalankan OCR — ini bisa memakan beberapa saat..."):
359
+ lines = ocr_extract_texts(ocr_fn, img)
360
+ if not lines:
361
+ st.warning("Tidak ada teks terdeteksi. Coba gambar lebih jelas atau upload foto yang lebih tajam.")
362
+ st.session_state.ocr_done = False
363
+ else:
364
+ # store extracted
365
+ st.session_state.data = {"ocr_lines": lines}
366
+ st.session_state.ocr_done = True
367
+ st.success("OCR selesai — silakan koreksi nilai jika perlu.")
368
+
369
+ # Correction & Calculation input
370
+ if st.session_state.get("ocr_done"):
371
+ st.header("2) Koreksi Hasil OCR & Hitung")
372
+ lines = st.session_state.data.get("ocr_lines", [])
373
+ # Show extracted lines and allow user to edit/clean
374
+ st.subheader("Teks hasil OCR (edit jika perlu)")
375
+ joined = "\n".join(lines)
376
+ edited = st.text_area("Hasil OCR (baris per baris)", value=joined, height=180)
377
+ edited_lines = [ln.strip() for ln in edited.splitlines() if ln.strip()]
378
+
379
+ # Attempt auto-detection using edited lines
380
+ serving_val, serving_unit = detect_serving_unit(edited_lines)
381
+ sugar_label_variants = ["total sugar", "gula total", "gula", "sugars", "sugar"]
382
+ fat_label_variants = ["saturated fat", "lemak jenuh", "saturated fats", "sat fat"]
383
+
384
+ sugar_raw = find_value_near_label(edited_lines, sugar_label_variants) or ""
385
+ fat_raw = find_value_near_label(edited_lines, fat_label_variants) or ""
386
+
387
+ col1, col2, col3 = st.columns(3)
388
  with col1:
389
+ serving_input = st.text_input("Takaran saji (detected)", value=f"{serving_val} {serving_unit}")
 
 
 
390
  with col2:
391
+ sugar_input = st.text_input("Gula (detected)", value=str(sugar_raw))
392
+ with col3:
393
+ fat_input = st.text_input("Lemak jenuh (detected)", value=str(fat_raw))
394
+
395
+ # Option: user can indicate values are already per 100
396
+ per100_checkbox = st.checkbox("Nilai yang saya masukkan sudah satuan per 100g/ml (centang jika ya)", value=False)
397
+
398
+ if st.button("Hitung Grade"):
399
+ # parse numeric
400
+ # parse serving_input - may contain unit
401
+ mserv = re.search(r"(-?\d+[.,]?\d*)", serving_input)
402
+ if mserv:
403
+ parsed_serv = safe_float_from_str(mserv.group(0))
404
+ else:
405
+ parsed_serv = serving_val or 100.0
406
+
407
+ parsed_sugar = safe_float_from_str(sugar_input)
408
+ parsed_fat = safe_float_from_str(fat_input)
409
+
410
+ # convert to per 100 if needed
411
+ if per100_checkbox:
412
+ sugar_per100 = parsed_sugar
413
+ fat_per100 = parsed_fat
414
+ else:
415
+ sugar_per100 = normalize_to_per_100(parsed_sugar, parsed_serv, serving_unit)
416
+ fat_per100 = normalize_to_per_100(parsed_fat, parsed_serv, serving_unit)
417
+
418
+ # Save computed values
419
+ st.session_state.calc = {
420
+ "serving": parsed_serv,
421
+ "serving_unit": serving_unit,
422
+ "sugar_per100": sugar_per100,
423
+ "fat_per100": fat_per100,
424
+ "raw_sugar": parsed_sugar,
425
+ "raw_fat": parsed_fat
426
+ }
427
+ st.session_state.calculated = True
428
+ st.success("Perhitungan selesai.")
429
+
430
+ # Display Results
431
+ if st.session_state.get("calculated"):
432
+ st.header("3) Hasil Grading")
433
+ c = st.session_state.calc
434
+
435
+ # Thresholds (Nutri-Grade-like example — adjust sesuai standar)
436
+ sugar_thresholds = {"A": 1.0, "B": 5.0, "C": 10.0} # g per 100
437
+ fat_thresholds = {"A": 0.7, "B": 1.2, "C": 2.8} # g per 100
438
+
439
+ gs = get_grade_from_value(c["sugar_per100"], sugar_thresholds)
440
+ gf = get_grade_from_value(c["fat_per100"], fat_thresholds)
441
+ # final grade is the worst (max index)
442
+ final_grade = max([gs, gf], key=lambda x: GRADE_ORDER.index(x))
443
+
444
+ cols = st.columns(3)
445
+
446
+ def show(col, title, value, unit, grade):
447
+ bg, textc = get_grade_color(grade)
448
+ col.markdown(
449
+ f"<div style='background:{bg};padding:12px;border-radius:10px;text-align:center;color:{textc};'>"
450
+ f"<strong>{title}</strong><p style='font-size:20px;margin:4px 0;'>{value:.2f} {unit}</p>"
451
+ f"<h3 style='margin:2px;'>{grade}</h3></div>",
452
+ unsafe_allow_html=True
453
+ )
454
+
455
+ show(cols[0], "Gula (per 100)", c["sugar_per100"], "g", gs)
456
+ show(cols[1], "Lemak Jenuh (per 100)", c["fat_per100"], "g", gf)
457
+ show(cols[2], "Grade Akhir", 0.0, "", final_grade)
458
+
459
  st.markdown("---")
460
+ st.subheader("Rincian input & perhitungan")
461
+ st.write(pd.DataFrame([{
462
+ "serving_value": c["serving"],
463
+ "serving_unit": c["serving_unit"],
464
+ "raw_sugar_in_serving": c["raw_sugar"],
465
+ "raw_fat_in_serving": c["raw_fat"],
466
+ "sugar_per_100": c["sugar_per100"],
467
+ "fat_per_100": c["fat_per100"]
468
+ }]))
469
+
470
+ st.markdown("---")
471
+ st.header("4) Saran Nutrisi (AI)")
472
+ with st.spinner("Meminta saran AI..."):
473
+ advice = call_openrouter_advice(c["serving"], c["sugar_per100"], c["fat_per100"], gs, gf, final_grade)
474
+ st.info(advice)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
  st.markdown("---")
477
+ st.markdown("<p style='text-align:center;'>Nutri-Grade App (improved) &copy; 2025</p>", unsafe_allow_html=True)