Spaces:

Ravindu9904
/

DICOM_Interpreter

Configuration error

App Files Files Community

Ravindu9904 commited on May 9

Commit

412e767

verified ·

1 Parent(s): 29d60bc

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -51

app.py CHANGED Viewed

@@ -1,65 +1,58 @@
 import gradio as gr
 import pydicom
 import numpy as np
-import matplotlib.pyplot as plt
 import torch
-from monai.networks.nets import UNet
-from monai.transforms import Compose, ScaleIntensity, ToTensor
-# 1. Define a simple MONAI model (2D UNet)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = UNet(
-    dimensions=2,
-    in_channels=1,
-    out_channels=1,
-    channels=(16, 32, 64, 128, 256),
-    strides=(2, 2, 2, 2),
-    num_res_units=2,
-).to(device)
-model.eval()  # Set model to evaluation mode
-# 2. Dummy weights (for demo only)
-# In real use, load pre-trained weights:
-# model.load_state_dict(torch.load("your_model.pth", map_location=device))
-def interpret_dicom(files):
     slices = []
     for file in files:
         ds = pydicom.dcmread(file.name)
-        slices.append(ds.pixel_array)
-    slices = np.array(slices)
-    mid_slice = slices[len(slices)//2]
-    # Preprocess for MONAI model
-    transform = Compose([ScaleIntensity(), ToTensor()])
-    input_tensor = transform(mid_slice.astype(np.float32))
-    input_tensor = input_tensor.unsqueeze(0).to(device)  # Add batch dimension
-    # 3. Run through MONAI model (dummy output for now)
-    with torch.no_grad():
-        output = model(input_tensor)
-        output_np = output.cpu().numpy()[0, 0]
-    # 4. Show original and model output side by side
-    fig, axs = plt.subplots(1, 2, figsize=(8, 4))
-    axs[0].imshow(mid_slice, cmap='gray')
-    axs[0].set_title('Original')
-    axs[0].axis('off')
-    axs[1].imshow(output_np, cmap='hot')
-    axs[1].set_title('Model Output')
-    axs[1].axis('off')
-    plt.tight_layout()
-    plt.savefig('output.png')
-    plt.close()
-    return 'output.png', "Interpretation: Model output shown (demo weights)."
 iface = gr.Interface(
-    fn=interpret_dicom,
-    inputs=gr.File(file_count="multiple", label="Upload DICOM files"),
-    outputs=[gr.Image(type="filepath", label="Result"), gr.Textbox(label="Interpretation")],
-    title="DICOM Radiology Interpreter with MONAI",
-    description="Upload your DICOM files (e.g., CT scan slices). The app will show the middle slice and a MONAI model output."
 )
 if __name__ == "__main__":

 import gradio as gr
 import pydicom
 import numpy as np
+from PIL import Image
+from transformers import AutoModelForVision2Seq, AutoProcessor
 import torch
+# Load the model and processor
+model_id = "MONAI/Llama3-VILA-M3-3B"
+model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
+processor = AutoProcessor.from_pretrained(model_id)
+def dicom_to_image(files):
+    # Read all DICOM files and sort by InstanceNumber if available
     slices = []
     for file in files:
         ds = pydicom.dcmread(file.name)
+        slices.append((ds, ds.get('InstanceNumber', 0)))
+    slices.sort(key=lambda x: x[1])
+    images = [s[0].pixel_array for s in slices]
+    # If multiple slices, take the middle one
+    img = images[len(images)//2] if len(images) > 1 else images[0]
+    # Normalize and convert to 8-bit
+    img = img.astype(np.float32)
+    img = (img - img.min()) / (img.max() - img.min() + 1e-5) * 255
+    img = img.astype(np.uint8)
+    pil_img = Image.fromarray(img)
+    return pil_img
+def interpret(files, prompt):
+    pil_img = dicom_to_image(files)
+    # Prepare input for the model
+    inputs = processor(images=pil_img, text=prompt, return_tensors="pt")
+    # Move to GPU if available
+    if torch.cuda.is_available():
+        model.to("cuda")
+        for k in inputs:
+            inputs[k] = inputs[k].to("cuda")
+    # Generate report
+    output = model.generate(**inputs, max_new_tokens=256)
+    report = processor.decode(output[0], skip_special_tokens=True)
+    return pil_img, report
 iface = gr.Interface(
+    fn=interpret,
+    inputs=[
+        gr.File(file_count="multiple", label="Upload DICOM files"),
+        gr.Textbox(label="Prompt", value="Describe the findings in this image.")
+    ],
+    outputs=[
+        gr.Image(type="pil", label="Selected Image"),
+        gr.Textbox(label="AI-generated Report")
+    ],
+    title="Radiology Image Interpretation (VILA-M3-3B)",
+    description="Upload DICOM files (CT, MRI, or X-ray). The app will select the middle slice (for stacks), send it to MONAI/Llama3-VILA-M3-3B, and display the AI-generated report."
 )
 if __name__ == "__main__":