Spaces:

s12144251
/

phi4-medqa-demo

Runtime error

App Files Files Community

s12144251 commited on May 5

Commit

b9ba0f4

verified ·

1 Parent(s): 3e24f35

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -54

app.py CHANGED Viewed

@@ -1,54 +1,60 @@
-# Install required libraries
-!pip install unsloth peft bitsandbytes accelerate transformers
-# Import necessary modules
-from transformers import AutoTokenizer
-from unsloth import FastLanguageModel
-# Define the MedQA prompt
-medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences.
-### Question:
-{}
-### Answer:
-"""
-# Load the model and tokenizer using unsloth
-model_name = "Vijayendra/Phi4-MedQA"
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=model_name,
-    max_seq_length=2048,
-    dtype=None,  # Use default precision
-    load_in_4bit=True,  # Enable 4-bit quantization
-    device_map="auto"  # Automatically map model to available devices
-)
-# Enable faster inference
-FastLanguageModel.for_inference(model)
-# Prepare the medical question
-medical_question = "What are the common symptoms of diabetes?"  # Replace with your medical question
-inputs = tokenizer(
-    [medqa_prompt.format(medical_question)],
-    return_tensors="pt",
-    padding=True,
-    truncation=True,
-    max_length=1024
-).to("cuda")  # Ensure inputs are on the GPU
-# Generate the output
-outputs = model.generate(
-    **inputs,
-    max_new_tokens=512,  # Allow for detailed responses
-    use_cache=True  # Speeds up generation
-)
-# Decode and clean the response
-response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Extract and print the generated answer
-answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip()
-print(f"Question: {medical_question}")
-print(f"Answer: {answer_text}")

+# Install required libraries
+pip install unsloth peft bitsandbytes accelerate transformers
+import subprocess
+import sys
+subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"])
+# Import necessary modules
+from transformers import AutoTokenizer
+from unsloth import FastLanguageModel
+# Define the MedQA prompt
+medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences.
+### Question:
+{}
+### Answer:
+"""
+# Load the model and tokenizer using unsloth
+model_name = "Vijayendra/Phi4-MedQA"
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=2048,
+    dtype=None,  # Use default precision
+    load_in_4bit=True,  # Enable 4-bit quantization
+    device_map="auto"  # Automatically map model to available devices
+)
+# Enable faster inference
+FastLanguageModel.for_inference(model)
+# Prepare the medical question
+medical_question = "What are the common symptoms of diabetes?"  # Replace with your medical question
+inputs = tokenizer(
+    [medqa_prompt.format(medical_question)],
+    return_tensors="pt",
+    padding=True,
+    truncation=True,
+    max_length=1024
+).to("cuda")  # Ensure inputs are on the GPU
+# Generate the output
+outputs = model.generate(
+    **inputs,
+    max_new_tokens=512,  # Allow for detailed responses
+    use_cache=True  # Speeds up generation
+)
+# Decode and clean the response
+response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Extract and print the generated answer
+answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip()
+print(f"Question: {medical_question}")
+print(f"Answer: {answer_text}")