Spaces:

baliddeki
/

phronesis-ml-endpoint

Sleeping

App Files Files Community

baliddeki commited on May 2

Commit

8c6ba75

1 Parent(s): a54164e

fix with endpoints 2

Browse files

Files changed (1) hide show

app.py +30 -41

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 HF_TOKEN = os.getenv("HF_TOKEN")
 os.environ["HF_HOME"] = "/tmp/huggingface_cache"
-# Load tokenizer and models
 tokenizer = AutoTokenizer.from_pretrained("baliddeki/phronesis-ml", token=HF_TOKEN)
 video_model = models.video.r3d_18(weights="KINETICS400_V1")
 video_model.fc = torch.nn.Linear(video_model.fc.in_features, 512)
@@ -27,29 +27,19 @@ projector = ImageToTextProjector(512, report_generator.config.d_model)
 num_classes = 4
 class_names = ["acute", "normal", "chronic", "lacunar"]
-combined_model = CombinedModel(
-    video_model, report_generator, num_classes, projector, tokenizer
-)
-model_file = hf_hub_download(
-    "baliddeki/phronesis-ml", "pytorch_model.bin", token=HF_TOKEN
-)
 state_dict = torch.load(model_file, map_location=device)
 combined_model.load_state_dict(state_dict)
 combined_model.to(device)
 combined_model.eval()
-# Image transforms
-image_transform = transforms.Compose(
-    [
-        transforms.Resize((112, 112)),
-        transforms.ToTensor(),
-        transforms.Normalize(
-            mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]
-        ),
-    ]
-)
 def dicom_to_image(file_bytes):
     dicom_file = pydicom.dcmread(io.BytesIO(file_bytes))
@@ -58,32 +48,28 @@ def dicom_to_image(file_bytes):
     pixel_array = pixel_array.astype(np.uint8)
     return Image.fromarray(pixel_array).convert("RGB")
 def predict(files):
     if not files:
-        return "No image uploaded.", ""
     processed_imgs = []
-    for file in files:
-        filename = file.name.lower()
         if filename.endswith((".dcm", ".ima")):
-            file_bytes = file.read()
-            dicom_img = dicom_to_image(file_bytes)
-            processed_imgs.append(dicom_img)
         else:
-            pil_img = Image.open(file).convert("RGB")
-            processed_imgs.append(pil_img)
     n_frames = 16
     if len(processed_imgs) >= n_frames:
         images_sampled = [
             processed_imgs[i]
-            for i in np.linspace(0, len(processed_imgs) - 1, n_frames, dtype=int)
         ]
     else:
-        images_sampled = processed_imgs + [processed_imgs[-1]] * (
-            n_frames - len(processed_imgs)
-        )
     tensor_imgs = [image_transform(i) for i in images_sampled]
     input_tensor = torch.stack(tensor_imgs).permute(1, 0, 2, 3).unsqueeze(0).to(device)
@@ -99,19 +85,22 @@ def predict(files):
     return class_name, report[0] if report else "No report generated."
-# Gradio Blocks setup (explicitly)
 with gr.Blocks() as demo:
-    gr.Markdown("## 🩺 Phronesis Medical Report Generator")
-    file_input = gr.File(
-        file_count="multiple",
-        file_types=[".dcm", ".jpg", ".jpeg", ".png"],
-        label="Upload CT Scan Images",
-    )
-    btn = gr.Button("Generate Report")
     class_output = gr.Textbox(label="Predicted Class")
     report_output = gr.Textbox(label="Generated Report")
-    btn.click(fn=predict, inputs=file_input, outputs=[class_output, report_output])
 demo.launch()

 HF_TOKEN = os.getenv("HF_TOKEN")
 os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+# Model loading
 tokenizer = AutoTokenizer.from_pretrained("baliddeki/phronesis-ml", token=HF_TOKEN)
 video_model = models.video.r3d_18(weights="KINETICS400_V1")
 video_model.fc = torch.nn.Linear(video_model.fc.in_features, 512)
 num_classes = 4
 class_names = ["acute", "normal", "chronic", "lacunar"]
+combined_model = CombinedModel(video_model, report_generator, num_classes, projector, tokenizer)
+model_file = hf_hub_download("baliddeki/phronesis-ml", "pytorch_model.bin", token=HF_TOKEN)
 state_dict = torch.load(model_file, map_location=device)
 combined_model.load_state_dict(state_dict)
 combined_model.to(device)
 combined_model.eval()
+image_transform = transforms.Compose([
+    transforms.Resize((112, 112)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
+])
 def dicom_to_image(file_bytes):
     dicom_file = pydicom.dcmread(io.BytesIO(file_bytes))
     pixel_array = pixel_array.astype(np.uint8)
     return Image.fromarray(pixel_array).convert("RGB")
 def predict(files):
     if not files:
+        return "No images uploaded.", ""
     processed_imgs = []
+    for file_obj in files:
+        filename = file_obj.name.lower()
         if filename.endswith((".dcm", ".ima")):
+            file_bytes = file_obj.read()
+            img = dicom_to_image(file_bytes)
         else:
+            img = Image.open(file_obj).convert("RGB")
+        processed_imgs.append(img)
     n_frames = 16
     if len(processed_imgs) >= n_frames:
         images_sampled = [
             processed_imgs[i]
+            for i in np.linspace(0, len(processed_imgs)-1, n_frames, dtype=int)
         ]
     else:
+        images_sampled = processed_imgs + [processed_imgs[-1]] * (n_frames - len(processed_imgs))
     tensor_imgs = [image_transform(i) for i in images_sampled]
     input_tensor = torch.stack(tensor_imgs).permute(1, 0, 2, 3).unsqueeze(0).to(device)
     return class_name, report[0] if report else "No report generated."
+# Gradio Blocks (100% reliable approach)
 with gr.Blocks() as demo:
+    gr.Markdown("# 🩺 Phronesis Medical Report Generator")
+    upload_button = gr.UploadButton("Upload CT Scan Images", file_types=[".dcm", ".jpg", ".jpeg", ".png"], file_count="multiple")
+    files_state = gr.State([])
+    def store_files(new_files):
+        return new_files
+    upload_button.upload(store_files, upload_button, files_state)
+    generate_btn = gr.Button("Generate Report")
     class_output = gr.Textbox(label="Predicted Class")
     report_output = gr.Textbox(label="Generated Report")
+    generate_btn.click(predict, files_state, [class_output, report_output])
 demo.launch()