Spaces:

YourAIEngineer
/

Nutri-Label

Build error

App Files Files Community

YourAIEngineer commited on 28 days ago

Commit

fe32dc1

verified ·

1 Parent(s): 08ce19d

Update app.py

Browse files

Files changed (1) hide show

app.py +404 -285

app.py CHANGED Viewed

@@ -1,187 +1,268 @@
 import os
 import re
 import time
-from typing import Dict, List, Tuple, Optional
 import cv2
 import numpy as np
 import pandas as pd
 import requests
 import streamlit as st
-from paddleocr import PaddleOCR
-from PIL import Image
-# ---------------------------
-# CONFIG / SECURITY NOTE
-# ---------------------------
-st.set_page_config(page_title="Nutri-Grade Detector (Improved)", page_icon="🥗", layout="wide")
-# Use environment variable or Streamlit secrets. Do NOT hardcode keys in source.
-OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY", None)
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
-if OPENROUTER_API_KEY is None:
-    st.warning("OPENROUTER_API_KEY tidak ditemukan. Fitur saran AI akan nonaktif kecuali Anda menambahkan key melalui Streamlit secrets atau environment variable.")
-# ---------------------------
-# HELPERS
-# ---------------------------
-@st.cache_resource
-def init_ocr(lang: List[str] = ["en", "id"], use_angle_cls: bool = True, det=False):
-    """Inisialisasi PaddleOCR. Cached untuk performa."""
-    try:
-        # det=False disables detection model dump to speed up sometimes; adjust as needed
-        return PaddleOCR(lang=lang, use_angle_cls=use_angle_cls, det=det)
-    except Exception as e:
-        st.error(f"Gagal inisialisasi OCR: {e}")
-        return None
-def preprocess_variants(img: np.ndarray) -> List[np.ndarray]:
-    """Buat beberapa varian pra-proses untuk meningkatkan peluang OCR."""
-    variants = []
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    variants.append(img)  # original color
-    variants.append(gray)
-    # bilateral to reduce noise but preserve edges
-    variants.append(cv2.bilateralFilter(gray, 9, 75, 75))
-    # adaptive threshold
-    th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                               cv2.THRESH_BINARY, 15, 7)
-    variants.append(th)
-    # increase contrast (CLAHE)
-    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)).apply(gray)
-    variants.append(clahe)
-    return variants
-def ocr_on_rotations(ocr, img: np.ndarray, try_rotations: List[int] = [0, 90, 180, 270]) -> List[Tuple]:
     """
-    Lakukan OCR pada beberapa rotasi + beberapa preprocessed variants.
-    Kembalikan list tuples (box, text, confidence, rotation)
     """
-    results = []
-    variants = preprocess_variants(img)
-    for rot in try_rotations:
-        # rotate image
-        if rot != 0:
-            h, w = img.shape[:2]
-            M = cv2.getRotationMatrix2D((w / 2, h / 2), rot, 1.0)
-            rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
-        else:
-            rotated = img
-        for v in variants:
-            # PaddleOCR expects BGR or grayscale arrays; we pass rotated directly
-            try:
-                # returns list of lists: res[0] has lines; each line: [box, (text, conf)]
-                res = ocr.ocr(rotated, cls=True)
-            except Exception:
-                res = []
-            for page in res:
-                for ln in page:
-                    box = ln[0]
-                    text = ln[1][0]
-                    conf = float(ln[1][1]) if ln[1][1] not in (None, "") else 0.0
-                    results.append((box, text, conf, rot))
-    # deduplicate by text + approximate box center
-    dedup = {}
-    for box, text, conf, rot in results:
-        # normalize text
-        t = text.strip()
-        # compute center
-        cx = int((box[0][0] + box[2][0]) / 2)
-        cy = int((box[0][1] + box[2][1]) / 2)
-        key = (re.sub(r"\s+", " ", t.lower()), round(cx/50)*50, round(cy/50)*50)
-        if key not in dedup or conf > dedup[key][2]:
-            dedup[key] = (box, t, conf, rot)
-    return list(dedup.values())
-def annotate_image(img: np.ndarray, ocr_items: List[Tuple]) -> np.ndarray:
-    """Buat overlay dari hasil OCR untuk ditampilkan ke user."""
-    out = img.copy()
-    for box, text, conf, rot in ocr_items:
-        pts = np.array(box, dtype=np.int32).reshape((-1, 1, 2))
-        cv2.polylines(out, [pts], True, (0, 255, 0), 1)
-        # putText at top-left corner of box
-        x, y = int(box[0][0]), int(box[0][1]) - 10
-        cv2.putText(out, f"{text[:30]} ({conf:.2f})", (x, max(10, y)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)
-    return out
-def parse_numeric_value(text: str) -> float:
-    """Parse float dari teks; mendukung koma desimal dan nilai negatif tak diharapkan."""
-    if text is None:
         return 0.0
-    s = str(text).lower().strip()
-    # ambil angka pertama yang muncul (dengan koma/ titik)
-    m = re.search(r"(-?\d{1,3}(?:[.,]\d{3})*(?:[.,]\d+)?)", s)
     if not m:
         return 0.0
-    num = m.group(1)
-    # remove thousand separators (either '.' or ',') but keep decimal separator
-    # heuristic: jika ada kedua '.' dan ',',anggap yang terakhir sebagai decimal separator
-    if "." in num and "," in num:
-        if num.rfind(",") > num.rfind("."):
-            num = num.replace(".", "").replace(",", ".")  # comma decimal
         else:
-            num = num.replace(",", "")  # dot decimal
     else:
-        # only commas => treat comma as decimal if single comma and not thousand grouping
-        if num.count(",") == 1 and num.count(".") == 0:
-            num = num.replace(",", ".")
-        else:
-            num = num.replace(",", "")
     try:
-        return float(num)
     except Exception:
         return 0.0
-def extract_nutrition_from_texts(texts: List[str]) -> Dict[str, Dict]:
     """
-    Dari daftar potongan teks (OCR), cari serving, sugar, saturated fat.
-    Kembalikan dict: {'serving': {'value':..., 'unit':...,'per_100': bool}, ...}
     """
-    combined = " | ".join(texts).lower()
-    out = {}
-    # patterns: cari label lalu angka yang dekat (d within ~30 chars)
-    label_patterns = {
-        "serving": r"(takaran\s*saj[i|a]|serving\s*size|portion)[^\d]{0,30}([0-9\.,]+)\s*(g|ml|mls)?",
-        "sugar": r"(gula|sugar)[^\d]{0,30}([0-9\.,]+)\s*(g|gram)?",
-        "saturated_fat": r"(lemak\s*jenuh|saturated\s*fat|sat fat|sat\.?\s*fat)[^\d]{0,30}([0-9\.,]+)\s*(g|gram)?"
-    }
-    for key, pat in label_patterns.items():
-        m = re.search(pat, combined)
-        if m:
-            val = parse_numeric_value(m.group(2))
-            unit = m.group(3) if len(m.groups()) >= 3 else ""
-            out[key] = {"value": val, "unit": unit or "g"}
-    # Additionally, detect per 100 values if explicitly shown like "per 100g" or "/100g"
-    # If the OCR contains 'per 100' near the word, we mark per_100 True
-    for k in out:
-        out[k]["per_100"] = bool(re.search(fr"{k}.*per\s*100|per\s*100.*{k}|/100", combined[:300]))  # simple heuristic
-    # Fallback: if no serving found, assume 100
-    if "serving" not in out:
-        out["serving"] = {"value": 100.0, "unit": "g", "per_100": True}
-    return out
-def normalize_to_per100(value: float, serving: float, is_per_100: bool) -> float:
-    """Konversi nilai (yang mungkin per serving) menjadi per 100g/ml."""
-    if is_per_100:
-        return value
-    if serving <= 0:
         return 0.0
-    # value per serving -> convert to per 100
-    return (value / serving) * 100.0
 def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
     """
-    Ambil grade berdasarkan thresholds map, contoh thresholds {"A":1.0,"B":5.0,"C":10.0}
-    Jika value <= A -> A, <=B -> B, <=C -> C else D.
     """
     try:
         if value <= thresholds["A"]:
@@ -190,14 +271,9 @@ def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
             return "Grade B"
         if value <= thresholds["C"]:
             return "Grade C"
-        return "Grade D"
     except Exception:
-        return "Grade D"
-def grade_order(grade: str) -> int:
-    order = {"Grade A": 0, "Grade B": 1, "Grade C": 2, "Grade D": 3}
-    return order.get(grade, 3)
 def get_grade_color(grade: str) -> Tuple[str, str]:
@@ -210,149 +286,192 @@ def get_grade_color(grade: str) -> Tuple[str, str]:
     return colors.get(grade, ("#bdc3c7", "black"))
-def call_openrouter_advice(api_key: str, prompt: str, timeout=30) -> str:
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
         "model": "mistralai/mistral-7b-instruct:free",
         "messages": [{"role": "user", "content": prompt}],
         "max_tokens": 250,
         "temperature": 0.7
     }
     try:
-        r = requests.post(f"{OPENROUTER_BASE_URL}/chat/completions", headers=headers, json=payload, timeout=timeout)
         r.raise_for_status()
         data = r.json()
-        return data["choices"][0]["message"]["content"].strip()
     except Exception as e:
-        return f"Error fetching advice: {e}"
-# ---------------------------
-# UI
-# ---------------------------
-st.title("🥗 Nutri-Grade Detection — Improved")
-st.caption("Versi lebih robust: multiple preprocess, editable OCR table, configurable thresholds.")
-ocr = init_ocr()
-with st.sidebar:
-    st.header("Pengaturan")
-    use_ai_advice = st.checkbox("Aktifkan saran AI (OpenRouter)", value=bool(OPENROUTER_API_KEY))
-    col1, col2 = st.columns(2)
     with col1:
-        st.markdown("**Threshold (g/100)**")
-        sugar_thr_a = st.number_input("Sugar A ≤", value=1.0, step=0.1)
-        sugar_thr_b = st.number_input("Sugar B ≤", value=5.0, step=0.1)
-        sugar_thr_c = st.number_input("Sugar C ≤", value=10.0, step=0.1)
     with col2:
-        fat_thr_a = st.number_input("SatFat A ≤", value=0.7, step=0.1)
-        fat_thr_b = st.number_input("SatFat B ≤", value=1.2, step=0.1)
-        fat_thr_c = st.number_input("SatFat C ≤", value=2.8, step=0.1)
     st.markdown("---")
-    st.write("Tips: Ambil foto rata, cahaya cukup, hindari pantulan.")
-uploaded = st.file_uploader("Upload gambar tabel gizi (jpg/png)", type=["jpg", "jpeg", "png"])
-if uploaded is None:
-    st.info("Upload gambar supaya bisa mulai OCR.")
-    st.stop()
-# read image
-file_bytes = np.frombuffer(uploaded.read(), np.uint8)
-img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
-display_w = 600
-st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), caption="Gambar yang diupload (preview)", width=min(display_w, img.shape[1]))
-if st.button("Jalankan OCR & Ekstraksi"):
-    if ocr is None:
-        st.error("OCR model belum tersedia.")
-        st.stop()
-    with st.spinner("Menjalankan OCR pada beberapa varian gambar..."):
-        ocr_items = ocr_on_rotations(ocr, img)
-    st.success(f"OCR selesai — {len(ocr_items)} potongan teks terdeteksi (deduped).")
-    # show annotated image
-    annotated = annotate_image(img, ocr_items)
-    st.image(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB), caption="Overlay hasil OCR", width=min(display_w, img.shape[1]))
-    # collect texts sorted by confidence desc
-    ocr_texts = [t for _, t, _, _ in sorted(ocr_items, key=lambda x: -x[2])]
-    extracted = extract_nutrition_from_texts(ocr_texts)
-    # build editable DataFrame for user correction
-    rows = []
-    for nutr in ["serving", "sugar", "saturated_fat"]:
-        ent = extracted.get(nutr, {"value": 0.0, "unit": ("g" if nutr != "serving" else "g"), "per_100": False})
-        rows.append({
-            "nutrient": nutr,
-            "value": float(ent.get("value", 0.0)),
-            "unit": ent.get("unit", "g"),
-            "is_per_100": bool(ent.get("per_100", False))
-        })
-    df = pd.DataFrame(rows)
-    st.markdown("### Koreksi hasil ekstraksi (ubah nilai jika OCR keliru)")
-    edited = st.data_editor(df, num_rows="fixed", use_container_width=True)
-    if st.button("Hitung Grade dari nilai di atas"):
-        # read corrected
-        serving_val = float(edited.loc[edited['nutrient'] == 'serving', 'value'].values[0])
-        serving_unit = edited.loc[edited['nutrient'] == 'serving', 'unit'].values[0]
-        sugar_val = float(edited.loc[edited['nutrient'] == 'sugar', 'value'].values[0])
-        sugar_per100_flag = bool(edited.loc[edited['nutrient'] == 'sugar', 'is_per_100'].values[0])
-        satfat_val = float(edited.loc[edited['nutrient'] == 'saturated_fat', 'value'].values[0])
-        satfat_per100_flag = bool(edited.loc[edited['nutrient'] == 'saturated_fat', 'is_per_100'].values[0])
-        # Normalize serving unit: if ml vs g, we assume density ~1 g/ml (common for liquids). Warn if ml.
-        if isinstance(serving_unit, str) and "ml" in serving_unit.lower():
-            st.info("Takaran dalam ml terdeteksi — diasumsikan densitas 1 g/ml untuk perhitungan per 100. Koreksi manual jika perlu.")
-        # Normalize to per 100
-        sugar_per100 = normalize_to_per100(sugar_val, serving_val, sugar_per100_flag)
-        satfat_per100 = normalize_to_per100(satfat_val, serving_val, satfat_per100_flag)
-        # Grades
-        sugar_thresholds = {"A": sugar_thr_a, "B": sugar_thr_b, "C": sugar_thr_c}
-        fat_thresholds = {"A": fat_thr_a, "B": fat_thr_b, "C": fat_thr_c}
-        gs = get_grade_from_value(sugar_per100, sugar_thresholds)
-        gf = get_grade_from_value(satfat_per100, fat_thresholds)
-        # final grade = worse (higher order)
-        final = gs if grade_order(gs) >= grade_order(gf) else gf
-        # Show results nicely
-        st.header("Hasil Perhitungan")
-        c1, c2, c3 = st.columns(3)
-        bg_s, tc_s = get_grade_color(gs)
-        c1.markdown(f"<div style='background:{bg_s};padding:12px;border-radius:8px;text-align:center;color:{tc_s};'>"
-                    f"<strong>Gula</strong><p style='font-size:22px'>{sugar_per100:.2f} g/100g</p><h3>{gs}</h3></div>", unsafe_allow_html=True)
-        bg_f, tc_f = get_grade_color(gf)
-        c2.markdown(f"<div style='background:{bg_f};padding:12px;border-radius:8px;text-align:center;color:{tc_f};'>"
-                    f"<strong>Lemak Jenuh</strong><p style='font-size:22px'>{satfat_per100:.2f} g/100g</p><h3>{gf}</h3></div>", unsafe_allow_html=True)
-        bg_fin, tc_fin = get_grade_color(final)
-        c3.markdown(f"<div style='background:{bg_fin};padding:12px;border-radius:8px;text-align:center;color:{tc_fin};'>"
-                    f"<strong>Grade Akhir</strong><p style='font-size:22px'>{final}</p></div>", unsafe_allow_html=True)
-        st.markdown("---")
-        st.write("Detail nilai (per 100):")
-        st.write(pd.DataFrame({
-            "nutrient": ["sugar", "saturated_fat"],
-            "per_100_g": [round(sugar_per100, 3), round(satfat_per100, 3)],
-            "grade": [gs, gf]
-        }))
-        # AI advice (optional)
-        if use_ai_advice and OPENROUTER_API_KEY:
-            prompt = (
-                f"Anda adalah ahli gizi Indonesia. "
-                f"Takaran sajian diasumsikan {serving_val} g/ml. "
-                f"Gula per 100g: {sugar_per100:.2f} g ({gs}). "
-                f"Lemak jenuh per 100g: {satfat_per100:.2f} g ({gf}). "
-                f"Grade akhir: {final}. Berikan saran singkat 40-80 kata, fokus pada kesehatan dan tips sederhana."
-            )
-            with st.spinner("Mengambil saran dari AI..."):
-                advice = call_openrouter_advice(OPENROUTER_API_KEY, prompt)
-            st.header("Saran Nutrisi (AI)")
-            st.info(advice)
-        elif use_ai_advice and not OPENROUTER_API_KEY:
-            st.warning("Anda memilih aktifkan saran AI tetapi API key tidak tersedia. Tambahkan OPENROUTER_API_KEY di Streamlit secrets atau environment variable.")
 st.markdown("---")
-st.caption("Catatan: Aplikasi ini membantu estimasi — selalu verifikasi dengan label produk asli dan konsultasi ahli gizi untuk keputusan medis.")

+# app.py - Nutri-Grade Detection (improved)
 import os
 import re
 import time
+from typing import Optional, Tuple, List, Dict
 import cv2
 import numpy as np
 import pandas as pd
+from PIL import Image
 import requests
 import streamlit as st
+# Try imports that may be optional at runtime
+try:
+    from paddleocr import PaddleOCR
+    PADDLE_AVAILABLE = True
+except Exception:
+    PADDLE_AVAILABLE = False
+try:
+    import pytesseract
+    PYTESSERACT_AVAILABLE = True
+except Exception:
+    PYTESSERACT_AVAILABLE = False
+# ---------------- CONFIG ----------------
+st.set_page_config(
+    page_title="Nutri-Grade Label Detection",
+    page_icon="🥗",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+# Get API key from secrets or env (do NOT hardcode)
+OPENROUTER_API_KEY = st.secrets.get("OPENROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
+# ---------------- HELPERS ----------------
+def safe_float_from_str(s: str) -> float:
     """
+    Robust parse of numeric-like string:
+    - Accepts "1,234.56", "1.234,56" (tries common variants)
+    - Handles "1-2" by taking first number
+    - Removes non-numeric noise like "g", "mg", "%"
+    - Returns 0.0 if no parseable number found
     """
+    if s is None:
         return 0.0
+    text = str(s).strip()
+    # find first numeric-like token (allow comma and dot and minus)
+    m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", text)
     if not m:
         return 0.0
+    token = m.group(0)
+    # if token contains both comma and dot, assume dot is decimal if dot after comma or vice versa
+    if "," in token and "." in token:
+        # choose the last separator as decimal
+        if token.rfind(".") > token.rfind(","):
+            token = token.replace(",", "")
         else:
+            token = token.replace(".", "").replace(",", ".")
     else:
+        # if only comma, treat as decimal (common in many locales)
+        if "," in token and "." not in token:
+            token = token.replace(",", ".")
     try:
+        return float(token)
     except Exception:
         return 0.0
+def preprocess_for_ocr(image: np.ndarray, max_dim=1600) -> np.ndarray:
+    """Preprocess image to improve OCR: resize, denoise, grayscale, adaptive threshold."""
+    img = image.copy()
+    h, w = img.shape[:2]
+    scale = 1.0
+    if max(h, w) > max_dim:
+        scale = max_dim / max(h, w)
+        img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # denoise
+    gray = cv2.fastNlMeansDenoising(gray, h=7)
+    # increase contrast via histogram equalization
+    gray = cv2.equalizeHist(gray)
+    # adaptive threshold to emphasize text
+    th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                               cv2.THRESH_BINARY, 11, 2)
+    # combine threshold and gray to maintain readability
+    combined = cv2.bitwise_or(gray, th)
+    return combined
+@st.cache_resource
+def initialize_ocr(lang_list: Optional[List[str]] = None, use_gpu: bool = False):
     """
+    Initialize OCR backend. Prefer PaddleOCR if available; else fallback to pytesseract if available.
+    Returns a callable ocr(image) -> List[str] of extracted text lines.
     """
+    if lang_list is None:
+        lang_list = ["en", "id"]
+    if PADDLE_AVAILABLE:
+        try:
+            ocr = PaddleOCR(lang=lang_list, use_angle_cls=True, use_gpu=use_gpu)
+            def paddle_runner(img: np.ndarray) -> List[str]:
+                # Paddle expects BGR or numpy image
+                try:
+                    res = ocr.ocr(img, det=True, rec=True, cls=True)
+                except Exception:
+                    # try passing grayscale
+                    res = ocr.ocr(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), det=True, rec=True, cls=True)
+                texts = []
+                # res is list of (line) groups
+                for line in res:
+                    # line: [ [box], (text, confidence) ]
+                    if isinstance(line, list) and len(line) > 0:
+                        for item in line:
+                            if len(item) >= 2 and isinstance(item[1], (list, tuple)):
+                                text = item[1][0]
+                                texts.append(str(text))
+                            elif len(item) >= 2 and isinstance(item[1], str):
+                                texts.append(item[1])
+                    elif isinstance(line, tuple) and len(line) >= 2:
+                        texts.append(str(line[1][0] if isinstance(line[1], (list, tuple)) else line[1]))
+                return texts
+            return paddle_runner
+        except Exception as e:
+            st.warning(f"PaddleOCR init failed: {e}")
+    if PYTESSERACT_AVAILABLE:
+        try:
+            def pytesseract_runner(img: np.ndarray) -> List[str]:
+                # pytesseract expects PIL Image or array
+                if img.ndim == 2:
+                    pil = Image.fromarray(img)
+                else:
+                    pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+                raw = pytesseract.image_to_string(pil)
+                lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
+                return lines
+            return pytesseract_runner
+        except Exception as e:
+            st.warning(f"pytesseract init failed: {e}")
+    return None
+def ocr_extract_texts(ocr_fn, image: np.ndarray) -> List[str]:
+    """Run OCR function and return cleaned list of lines."""
+    pre = preprocess_for_ocr(image)
+    lines = []
+    if ocr_fn is None:
+        return lines
+    try:
+        lines = ocr_fn(pre)
+    except Exception:
+        # fallback: try on original
+        try:
+            lines = ocr_fn(image)
+        except Exception:
+            lines = []
+    # normalize lines
+    cleaned = []
+    for ln in lines:
+        ln = re.sub(r"\s+", " ", str(ln)).strip()
+        if ln:
+            cleaned.append(ln)
+    return cleaned
+def find_value_near_label(lines: List[str], label_variants: List[str]) -> Optional[str]:
+    """
+    Find numeric value near a label.
+    Strategy:
+    - Join lines with separator and search for label positions then find nearest numeric token within window.
+    - Also check next 2 lines for numeric presence.
+    """
+    joined = " ||| ".join(lines).lower()
+    # tokenize and keep positions
+    tokens = re.split(r"(\s+|\|\|\|)", joined)  # keep splits so we can get token index
+    lowered = joined.lower()
+    for label in label_variants:
+        idx = lowered.find(label.lower())
+        if idx != -1:
+            # take substring after label up to 60 chars and look for number
+            sub = lowered[idx: idx + 120]
+            m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", sub)
+            if m:
+                return m.group(0)
+            # else scan ahead in joined string a bit more
+            sub2 = lowered[idx: idx + 360]
+            m2 = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", sub2)
+            if m2:
+                return m2.group(0)
+    # try pattern-based scanning line by line
+    for i, ln in enumerate(lines):
+        lowln = ln.lower()
+        for label in label_variants:
+            if label.lower() in lowln:
+                # first try number in same line
+                m = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", lowln)
+                if m:
+                    return m.group(0)
+                # else check next two lines
+                for j in range(1, 3):
+                    if i + j < len(lines):
+                        m2 = re.search(r"-?\d+[.,]?\d*(?:[eE][-+]?\d+)?", lines[i + j])
+                        if m2:
+                            return m2.group(0)
+    # fallback: try any number labeled with units like "g", "mg", "per 100"
+    anynum = re.search(r"(-?\d+[.,]?\d*(?:[eE][-+]?\d+)?)\s*(g|mg|ml|%)", joined)
+    if anynum:
+        return anynum.group(1)
+    return None
+def detect_serving_unit(lines: List[str]) -> Tuple[float, str]:
+    """
+    Try to find serving size and unit. If not found, default 100 g.
+    Returns (serving_value, unit) where unit is 'g' or 'ml' or 'serving'
+    """
+    labels = ["serving size", "takaran saji", "takaran sajian", "portion", "porsi"]
+    for lab in labels:
+        val = find_value_near_label(lines, [lab])
+        if val:
+            v = safe_float_from_str(val)
+            # try to detect unit in same context
+            joint = " ".join(lines).lower()
+            # look for g / ml nearby
+            m = re.search(rf"{re.escape(lab)}[^\n\r]{{0,80}}?([0-9.,]+)\s*(g|ml|grams|gram|mL)", joint)
+            if m:
+                unit = "ml" if m.group(2).lower().startswith("m") else "g"
+            else:
+                # guess unit by presence of 'ml' or 'mL' anywhere after label
+                if re.search(r"ml\b", joint):
+                    unit = "ml"
+                else:
+                    unit = "g"
+            return (v if v > 0 else 100.0, unit)
+    # fallback default
+    return 100.0, "g"
+def normalize_to_per_100(value: float, per_serving: float, unit_serving: str) -> float:
+    """
+    Convert value (per serving) => value per 100 units (g or ml).
+    If value already per 100 (we try to detect externally), this function should not be called.
+    """
+    if per_serving <= 0 or per_serving == 100:
+        # if per_serving == 100 -> already per 100
+        if per_serving == 100:
+            return value
         return 0.0
+    # convert to per 100
+    return (value / per_serving) * 100.0
+GRADE_ORDER = ["Grade A", "Grade B", "Grade C", "Grade D"]
 def get_grade_from_value(value: float, thresholds: Dict[str, float]) -> str:
     """
+    thresholds: dict with keys "A","B","C" representing upper bounds for each grade
+    A if value <= thresholds['A'], B if value <= thresholds['B'], etc.
     """
     try:
         if value <= thresholds["A"]:
             return "Grade B"
         if value <= thresholds["C"]:
             return "Grade C"
     except Exception:
+        pass
+    return "Grade D"
 def get_grade_color(grade: str) -> Tuple[str, str]:
     return colors.get(grade, ("#bdc3c7", "black"))
+def call_openrouter_advice(serving_val, sugar_per100, fat_per100, sugar_grade, fat_grade, final_grade) -> str:
+    """
+    Call OpenRouter-compatible endpoint for short nutrition advice.
+    Use API key from secrets/env. Non-blocking timeout and friendly fallback on error.
+    """
+    if not OPENROUTER_API_KEY:
+        return "OpenRouter API key not configured. Set OPENROUTER_API_KEY in secrets or environment variables."
+    prompt = f"""
+Anda adalah ahli gizi dari Indonesia yang ramah.
+- Takaran Saji: {serving_val}
+- Gula (per 100): {sugar_per100:.2f} g (Grade {sugar_grade.replace('Grade ', '')})
+- Lemak Jenuh (per 100): {fat_per100:.2f} g (Grade {fat_grade.replace('Grade ', '')})
+- Grade Akhir: {final_grade.replace('Grade ', '')}
+Berikan saran nutrisi singkat 50-80 kata, fokus pada dampak kesehatan dan tips praktis.
+"""
     payload = {
         "model": "mistralai/mistral-7b-instruct:free",
         "messages": [{"role": "user", "content": prompt}],
         "max_tokens": 250,
         "temperature": 0.7
     }
+    headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
     try:
+        r = requests.post(f"{OPENROUTER_BASE_URL}/chat/completions", json=payload, headers=headers, timeout=20)
         r.raise_for_status()
         data = r.json()
+        # defensive access
+        content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
+        return content.strip() if content else "Tidak ada saran (jawaban kosong)."
     except Exception as e:
+        return f"Gagal memanggil AI: {e}"
+def reset_state():
+    for k in ["ocr_done", "data", "calculated", "calc"]:
+        if k in st.session_state:
+            del st.session_state[k]
+# --------------- UI ---------------
+st.title("🥗 Nutri-Grade Detection & Grade Calculator (Improved)")
+st.caption("Analisis gizi produk berdasarkan standar Nutri-Grade (contoh: gula & lemak jenuh).")
+with st.expander("📋 Petunjuk (singkat)"):
+    st.markdown("""
+    1. Upload gambar label nutrisi (JPG/PNG).
+    2. Klik **Analisis OCR** → koreksi hasil jika perlu.
+    3. Klik **Hitung Grade**.
+    \n**Catatan deploy:** taruh OPENROUTER_API_KEY di Secrets Hugging Face (atau `os.environ`). Sertakan dependency seperti `paddlepaddle` & `paddleocr` di requirements.\
+    """)
+# Upload
+st.header("1) Upload Gambar")
+uploaded = st.file_uploader("Pilih gambar tabel nutrisi (jpg/jpeg/png)", type=["jpg", "jpeg", "png"], on_change=reset_state)
+if uploaded:
+    file_bytes = np.frombuffer(uploaded.read(), np.uint8)
+    img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+    if img is None:
+        st.error("Gagal membaca file gambar. Pastikan file valid.")
+        st.stop()
+    st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), width=380)
+    ocr_fn = initialize_ocr()
+    if ocr_fn is None:
+        st.warning("OCR backend tidak tersedia (PaddleOCR/pytesseract). Install dependency yang diperlukan.")
+    else:
+        if st.button("Analisis OCR"):
+            with st.spinner("Menjalankan OCR — ini bisa memakan beberapa saat..."):
+                lines = ocr_extract_texts(ocr_fn, img)
+            if not lines:
+                st.warning("Tidak ada teks terdeteksi. Coba gambar lebih jelas atau upload foto yang lebih tajam.")
+                st.session_state.ocr_done = False
+            else:
+                # store extracted
+                st.session_state.data = {"ocr_lines": lines}
+                st.session_state.ocr_done = True
+                st.success("OCR selesai — silakan koreksi nilai jika perlu.")
+# Correction & Calculation input
+if st.session_state.get("ocr_done"):
+    st.header("2) Koreksi Hasil OCR & Hitung")
+    lines = st.session_state.data.get("ocr_lines", [])
+    # Show extracted lines and allow user to edit/clean
+    st.subheader("Teks hasil OCR (edit jika perlu)")
+    joined = "\n".join(lines)
+    edited = st.text_area("Hasil OCR (baris per baris)", value=joined, height=180)
+    edited_lines = [ln.strip() for ln in edited.splitlines() if ln.strip()]
+    # Attempt auto-detection using edited lines
+    serving_val, serving_unit = detect_serving_unit(edited_lines)
+    sugar_label_variants = ["total sugar", "gula total", "gula", "sugars", "sugar"]
+    fat_label_variants = ["saturated fat", "lemak jenuh", "saturated fats", "sat fat"]
+    sugar_raw = find_value_near_label(edited_lines, sugar_label_variants) or ""
+    fat_raw = find_value_near_label(edited_lines, fat_label_variants) or ""
+    col1, col2, col3 = st.columns(3)
     with col1:
+        serving_input = st.text_input("Takaran saji (detected)", value=f"{serving_val} {serving_unit}")
     with col2:
+        sugar_input = st.text_input("Gula (detected)", value=str(sugar_raw))
+    with col3:
+        fat_input = st.text_input("Lemak jenuh (detected)", value=str(fat_raw))
+    # Option: user can indicate values are already per 100
+    per100_checkbox = st.checkbox("Nilai yang saya masukkan sudah satuan per 100g/ml (centang jika ya)", value=False)
+    if st.button("Hitung Grade"):
+        # parse numeric
+        # parse serving_input - may contain unit
+        mserv = re.search(r"(-?\d+[.,]?\d*)", serving_input)
+        if mserv:
+            parsed_serv = safe_float_from_str(mserv.group(0))
+        else:
+            parsed_serv = serving_val or 100.0
+        parsed_sugar = safe_float_from_str(sugar_input)
+        parsed_fat = safe_float_from_str(fat_input)
+        # convert to per 100 if needed
+        if per100_checkbox:
+            sugar_per100 = parsed_sugar
+            fat_per100 = parsed_fat
+        else:
+            sugar_per100 = normalize_to_per_100(parsed_sugar, parsed_serv, serving_unit)
+            fat_per100 = normalize_to_per_100(parsed_fat, parsed_serv, serving_unit)
+        # Save computed values
+        st.session_state.calc = {
+            "serving": parsed_serv,
+            "serving_unit": serving_unit,
+            "sugar_per100": sugar_per100,
+            "fat_per100": fat_per100,
+            "raw_sugar": parsed_sugar,
+            "raw_fat": parsed_fat
+        }
+        st.session_state.calculated = True
+        st.success("Perhitungan selesai.")
+# Display Results
+if st.session_state.get("calculated"):
+    st.header("3) Hasil Grading")
+    c = st.session_state.calc
+    # Thresholds (Nutri-Grade-like example — adjust sesuai standar)
+    sugar_thresholds = {"A": 1.0, "B": 5.0, "C": 10.0}       # g per 100
+    fat_thresholds = {"A": 0.7, "B": 1.2, "C": 2.8}         # g per 100
+    gs = get_grade_from_value(c["sugar_per100"], sugar_thresholds)
+    gf = get_grade_from_value(c["fat_per100"], fat_thresholds)
+    # final grade is the worst (max index)
+    final_grade = max([gs, gf], key=lambda x: GRADE_ORDER.index(x))
+    cols = st.columns(3)
+    def show(col, title, value, unit, grade):
+        bg, textc = get_grade_color(grade)
+        col.markdown(
+            f"<div style='background:{bg};padding:12px;border-radius:10px;text-align:center;color:{textc};'>"
+            f"<strong>{title}</strong><p style='font-size:20px;margin:4px 0;'>{value:.2f} {unit}</p>"
+            f"<h3 style='margin:2px;'>{grade}</h3></div>",
+            unsafe_allow_html=True
+        )
+    show(cols[0], "Gula (per 100)", c["sugar_per100"], "g", gs)
+    show(cols[1], "Lemak Jenuh (per 100)", c["fat_per100"], "g", gf)
+    show(cols[2], "Grade Akhir", 0.0, "", final_grade)
     st.markdown("---")
+    st.subheader("Rincian input & perhitungan")
+    st.write(pd.DataFrame([{
+        "serving_value": c["serving"],
+        "serving_unit": c["serving_unit"],
+        "raw_sugar_in_serving": c["raw_sugar"],
+        "raw_fat_in_serving": c["raw_fat"],
+        "sugar_per_100": c["sugar_per100"],
+        "fat_per_100": c["fat_per100"]
+    }]))
+    st.markdown("---")
+    st.header("4) Saran Nutrisi (AI)")
+    with st.spinner("Meminta saran AI..."):
+        advice = call_openrouter_advice(c["serving"], c["sugar_per100"], c["fat_per100"], gs, gf, final_grade)
+    st.info(advice)
 st.markdown("---")
+st.markdown("<p style='text-align:center;'>Nutri-Grade App (improved) &copy; 2025</p>", unsafe_allow_html=True)