Spaces:
Runtime error
Runtime error
! pip install unsloth peft bitsandbytes accelerate transformers | |
import subprocess | |
import sys | |
subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth", "peft", "bitsandbytes", "accelerate", "transformers"]) | |
# Import necessary modules | |
from transformers import AutoTokenizer | |
from unsloth import FastLanguageModel | |
# Define the MedQA prompt | |
medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences. | |
### Question: | |
{} | |
### Answer: | |
""" | |
# Load the model and tokenizer using unsloth | |
model_name = "Vijayendra/Phi4-MedQA" | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name=model_name, | |
max_seq_length=2048, | |
dtype=None, # Use default precision | |
load_in_4bit=True, # Enable 4-bit quantization | |
device_map="auto" # Automatically map model to available devices | |
) | |
# Enable faster inference | |
FastLanguageModel.for_inference(model) | |
# Prepare the medical question | |
medical_question = "What are the common symptoms of diabetes?" # Replace with your medical question | |
inputs = tokenizer( | |
[medqa_prompt.format(medical_question)], | |
return_tensors="pt", | |
padding=True, | |
truncation=True, | |
max_length=1024 | |
).to("cuda") # Ensure inputs are on the GPU | |
# Generate the output | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=512, # Allow for detailed responses | |
use_cache=True # Speeds up generation | |
) | |
# Decode and clean the response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract and print the generated answer | |
answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip() | |
print(f"Question: {medical_question}") | |
print(f"Answer: {answer_text}") |