Spaces:

zamal
/

Multimodal-Chat-Playground

Running on Zero

zamal commited on 22 days ago

Commit

0a3438b

verified ·

1 Parent(s): 3ad87bd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -174,12 +174,14 @@ def extract_data_from_pdfs(
     # 2) Vision–language model
     proc = LlavaNextProcessor.from_pretrained(vlm_choice)
-    vis = (LlavaNextForConditionalGeneration
-           .from_pretrained(vlm_choice, torch_dtype=torch.float16, low_cpu_mem_usage=True)
-           .to("cuda"))
-    # 3) Monkey-patch caption fn
-    def describe(img):
         torch.cuda.empty_cache(); gc.collect()
         prompt = "[INST] <image>\nDescribe the image in a sentence [/INST]"
         inp = proc(prompt, img, return_tensors="pt").to("cuda")
@@ -206,15 +208,14 @@ def extract_data_from_pdfs(
             images.extend(imgs)
             names.extend([os.path.basename(path)] * len(imgs))
-    # 5) Build + persist the vectordb
     progress(0.6, "Indexing in vector DB…")
-    client = get_vectordb(all_text, images, names)
     global CURRENT_VDB
     CURRENT_VDB = get_vectordb(all_text, images, names)
     session["processed"] = True
-    sample = images[:4] if include_images=="Include Images" else []
     return (
         session,
         all_text[:2000] + "...",

     # 2) Vision–language model
     proc = LlavaNextProcessor.from_pretrained(vlm_choice)
+    vis = (
+        LlavaNextForConditionalGeneration
+        .from_pretrained(vlm_choice, torch_dtype=torch.float16, low_cpu_mem_usage=True)
+        .to("cuda")
+    )
+    # 3) Monkey‐patch caption fn
+    def describe(img: Image.Image) -> str:
         torch.cuda.empty_cache(); gc.collect()
         prompt = "[INST] <image>\nDescribe the image in a sentence [/INST]"
         inp = proc(prompt, img, return_tensors="pt").to("cuda")
             images.extend(imgs)
             names.extend([os.path.basename(path)] * len(imgs))
+    # 5) Build the in‐memory vector DB once
     progress(0.6, "Indexing in vector DB…")
     global CURRENT_VDB
     CURRENT_VDB = get_vectordb(all_text, images, names)
+    # 6) Mark session and return UI outputs
     session["processed"] = True
+    sample = images[:4] if include_images == "Include Images" else []
     return (
         session,
         all_text[:2000] + "...",