Spaces:

baliddeki
/

phronesis-ml-endpoint

Sleeping

App Files Files Community

baliddeki commited on May 2

Commit

a54164e

1 Parent(s): e65d0e5

fix with endpoints 2

Browse files

Files changed (1) hide show

app.py +39 -30

app.py CHANGED Viewed

@@ -14,11 +14,10 @@ import gc
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Environment setup
 HF_TOKEN = os.getenv("HF_TOKEN")
 os.environ["HF_HOME"] = "/tmp/huggingface_cache"
-# Model initialization
 tokenizer = AutoTokenizer.from_pretrained("baliddeki/phronesis-ml", token=HF_TOKEN)
 video_model = models.video.r3d_18(weights="KINETICS400_V1")
 video_model.fc = torch.nn.Linear(video_model.fc.in_features, 512)
@@ -28,20 +27,29 @@ projector = ImageToTextProjector(512, report_generator.config.d_model)
 num_classes = 4
 class_names = ["acute", "normal", "chronic", "lacunar"]
-combined_model = CombinedModel(video_model, report_generator, num_classes, projector, tokenizer)
-model_file = hf_hub_download("baliddeki/phronesis-ml", "pytorch_model.bin", token=HF_TOKEN)
 state_dict = torch.load(model_file, map_location=device)
 combined_model.load_state_dict(state_dict)
 combined_model.to(device)
 combined_model.eval()
 # Image transforms
-image_transform = transforms.Compose([
-    transforms.Resize((112, 112)),
-    transforms.ToTensor(),
-    transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
-])
 def dicom_to_image(file_bytes):
     dicom_file = pydicom.dcmread(io.BytesIO(file_bytes))
@@ -50,19 +58,20 @@ def dicom_to_image(file_bytes):
     pixel_array = pixel_array.astype(np.uint8)
     return Image.fromarray(pixel_array).convert("RGB")
-def predict(images):
-    if not images:
         return "No image uploaded.", ""
     processed_imgs = []
-    for img in images:
-        filename = img.name.lower()
         if filename.endswith((".dcm", ".ima")):
-            file_bytes = img.read()
             dicom_img = dicom_to_image(file_bytes)
             processed_imgs.append(dicom_img)
         else:
-            pil_img = Image.open(img).convert("RGB")
             processed_imgs.append(pil_img)
     n_frames = 16
@@ -72,7 +81,9 @@ def predict(images):
             for i in np.linspace(0, len(processed_imgs) - 1, n_frames, dtype=int)
         ]
     else:
-        images_sampled = processed_imgs + [processed_imgs[-1]] * (n_frames - len(processed_imgs))
     tensor_imgs = [image_transform(i) for i in images_sampled]
     input_tensor = torch.stack(tensor_imgs).permute(1, 0, 2, 3).unsqueeze(0).to(device)
@@ -88,21 +99,19 @@ def predict(images):
     return class_name, report[0] if report else "No report generated."
-# Define Gradio Interface explicitly
-demo = gr.Interface(
-    fn=predict,
-    inputs=gr.File(
-        file_types=[".dcm", ".jpg", ".jpeg", ".png"],
         file_count="multiple",
         label="Upload CT Scan Images",
-    ),
-    outputs=[
-        gr.Textbox(label="Predicted Class"),
-        gr.Textbox(label="Generated Report")
-    ],
-    title="Phronesis Medical Report Generator",
-    description="Upload CT scan DICOM or image files. Returns diagnosis classification and generated report.",
-)
-# Launch with explicit api_name for REST API compatibility
 demo.launch()

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 HF_TOKEN = os.getenv("HF_TOKEN")
 os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+# Load tokenizer and models
 tokenizer = AutoTokenizer.from_pretrained("baliddeki/phronesis-ml", token=HF_TOKEN)
 video_model = models.video.r3d_18(weights="KINETICS400_V1")
 video_model.fc = torch.nn.Linear(video_model.fc.in_features, 512)
 num_classes = 4
 class_names = ["acute", "normal", "chronic", "lacunar"]
+combined_model = CombinedModel(
+    video_model, report_generator, num_classes, projector, tokenizer
+)
+model_file = hf_hub_download(
+    "baliddeki/phronesis-ml", "pytorch_model.bin", token=HF_TOKEN
+)
 state_dict = torch.load(model_file, map_location=device)
 combined_model.load_state_dict(state_dict)
 combined_model.to(device)
 combined_model.eval()
 # Image transforms
+image_transform = transforms.Compose(
+    [
+        transforms.Resize((112, 112)),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]
+        ),
+    ]
+)
 def dicom_to_image(file_bytes):
     dicom_file = pydicom.dcmread(io.BytesIO(file_bytes))
     pixel_array = pixel_array.astype(np.uint8)
     return Image.fromarray(pixel_array).convert("RGB")
+def predict(files):
+    if not files:
         return "No image uploaded.", ""
     processed_imgs = []
+    for file in files:
+        filename = file.name.lower()
         if filename.endswith((".dcm", ".ima")):
+            file_bytes = file.read()
             dicom_img = dicom_to_image(file_bytes)
             processed_imgs.append(dicom_img)
         else:
+            pil_img = Image.open(file).convert("RGB")
             processed_imgs.append(pil_img)
     n_frames = 16
             for i in np.linspace(0, len(processed_imgs) - 1, n_frames, dtype=int)
         ]
     else:
+        images_sampled = processed_imgs + [processed_imgs[-1]] * (
+            n_frames - len(processed_imgs)
+        )
     tensor_imgs = [image_transform(i) for i in images_sampled]
     input_tensor = torch.stack(tensor_imgs).permute(1, 0, 2, 3).unsqueeze(0).to(device)
     return class_name, report[0] if report else "No report generated."
+# Gradio Blocks setup (explicitly)
+with gr.Blocks() as demo:
+    gr.Markdown("## 🩺 Phronesis Medical Report Generator")
+    file_input = gr.File(
         file_count="multiple",
+        file_types=[".dcm", ".jpg", ".jpeg", ".png"],
         label="Upload CT Scan Images",
+    )
+    btn = gr.Button("Generate Report")
+    class_output = gr.Textbox(label="Predicted Class")
+    report_output = gr.Textbox(label="Generated Report")
+    btn.click(fn=predict, inputs=file_input, outputs=[class_output, report_output])
 demo.launch()