Spaces:

Yeetek
/

insightflowv2

Runtime error

App Files Files Community

Yeetek commited on Jun 6

Commit

efa5b1a

verified ·

1 Parent(s): 1b63618

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -56

app.py CHANGED Viewed

@@ -1,28 +1,25 @@
-# ---------- BEGIN app.py ----------
-import os
-# 1️⃣  Hard-disable numba’s on-disk cache OR redirect it:
-os.environ["NUMBA_DISABLE_CACHE"] = "1"                 # always works
-os.environ.setdefault("NUMBA_CACHE_DIR", "/tmp/numba")  # keeps option to re-enable
-os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
-# (optional) quick diagnostics – delete once confirmed
-import sys, json, uuid, sys
-print("ENV snapshot (first 20):",
-      json.dumps(dict(list(os.environ.items())[:20]))); sys.stdout.flush()
-# 2️⃣  *Now* it’s safe to import big libs
 from typing import List
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from bertopic import BERTopic
 from sentence_transformers import SentenceTransformer
-# ---- configuration via env vars ----
 MODEL_NAME = os.getenv("EMBED_MODEL", "Seznam/simcse-small-e-czech")
 MIN_TOPIC  = int(os.getenv("MIN_TOPIC_SIZE", "10"))
 MAX_DOCS   = int(os.getenv("MAX_DOCS", "5000"))
-# ---- model initialisation (runs once at container start) ----
 embeddings  = SentenceTransformer(MODEL_NAME)
 topic_model = BERTopic(
     embedding_model=embeddings,
@@ -30,7 +27,7 @@ topic_model = BERTopic(
     calculate_probabilities=True,
 )
-# -------- FastAPI schema ----------
 class Sentence(BaseModel):
     text: str
     start: float
@@ -39,45 +36,4 @@ class Sentence(BaseModel):
 class Segment(BaseModel):
     topic_id: int
-    label: str | None
-    keywords: List[str]
-    start: float
-    end: float
-    probability: float | None
-    sentences: List[int]
-class SegmentationResponse(BaseModel):
-    run_id: str
-    segments: List[Segment]
-app = FastAPI(title="CZ Topic Segmenter", version="1.0")
-@app.post("/segment", response_model=SegmentationResponse)
-def segment(sentences: List[Sentence]):
-    if len(sentences) > MAX_DOCS:
-        raise HTTPException(413, f"Too many sentences ({len(sentences)} > {MAX_DOCS})")
-    docs = [s.text for s in sentences]
-    topics, probs = topic_model.fit_transform(docs)
-    segments, cur = [], None
-    for idx, (t_id, prob) in enumerate(zip(topics, probs)):
-        if cur is None or t_id != cur["topic_id"]:
-            if cur:
-                segments.append(cur)
-            words = [w for w, _ in topic_model.get_topic(t_id)[:5]]
-            cur = dict(topic_id=t_id,
-                       label=" ".join(words) if t_id != -1 else None,
-                       keywords=words,
-                       start=sentences[idx].start,
-                       end=sentences[idx].end,
-                       probability=float(prob or 0),
-                       sentences=[idx])
-        else:
-            cur["end"] = sentences[idx].end
-            cur["sentences"].append(idx)
-    if cur:
-        segments.append(cur)
-    return {"run_id": str(uuid.uuid4()), "segments": segments}
-# ---------- END app.py ----------

+# ---------- BEGIN app.py (diagnostic build) ----------
+import os, sys, json, uuid
+#  DEBUG – capture the first 20 env-vars Hugging Face passes in
+print("ENV-snapshot:", json.dumps(dict(list(os.environ.items())[:20])))
+sys.stdout.flush()          # make sure it appears in HF build logs
+#  Optional: leave the numba lines in place for the real fix
+os.environ["NUMBA_DISABLE_CACHE"] = "1"
+os.environ.setdefault("NUMBA_CACHE_DIR", "/tmp/numba")
+os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
 from typing import List
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from bertopic import BERTopic
 from sentence_transformers import SentenceTransformer
 MODEL_NAME = os.getenv("EMBED_MODEL", "Seznam/simcse-small-e-czech")
 MIN_TOPIC  = int(os.getenv("MIN_TOPIC_SIZE", "10"))
 MAX_DOCS   = int(os.getenv("MAX_DOCS", "5000"))
 embeddings  = SentenceTransformer(MODEL_NAME)
 topic_model = BERTopic(
     embedding_model=embeddings,
     calculate_probabilities=True,
 )
+# ----- FastAPI schema & endpoint (unchanged) -----
 class Sentence(BaseModel):
     text: str
     start: float
 class Segment(BaseModel):
     topic_id: int
+    label: