s12144251 commited on
Commit
9d823d2
·
verified ·
1 Parent(s): aa0ae5d
Files changed (1) hide show
  1. app.py.txt +54 -0
app.py.txt ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install required libraries
2
+ !pip install unsloth peft bitsandbytes accelerate transformers
3
+
4
+ # Import necessary modules
5
+ from transformers import AutoTokenizer
6
+ from unsloth import FastLanguageModel
7
+
8
+ # Define the MedQA prompt
9
+ medqa_prompt = """You are a medical QA system. Answer the following medical question clearly and in detail with complete sentences.
10
+
11
+ ### Question:
12
+ {}
13
+
14
+ ### Answer:
15
+ """
16
+
17
+ # Load the model and tokenizer using unsloth
18
+ model_name = "Vijayendra/Phi4-MedQA"
19
+ model, tokenizer = FastLanguageModel.from_pretrained(
20
+ model_name=model_name,
21
+ max_seq_length=2048,
22
+ dtype=None, # Use default precision
23
+ load_in_4bit=True, # Enable 4-bit quantization
24
+ device_map="auto" # Automatically map model to available devices
25
+ )
26
+
27
+ # Enable faster inference
28
+ FastLanguageModel.for_inference(model)
29
+
30
+ # Prepare the medical question
31
+ medical_question = "What are the common symptoms of diabetes?" # Replace with your medical question
32
+ inputs = tokenizer(
33
+ [medqa_prompt.format(medical_question)],
34
+ return_tensors="pt",
35
+ padding=True,
36
+ truncation=True,
37
+ max_length=1024
38
+ ).to("cuda") # Ensure inputs are on the GPU
39
+
40
+ # Generate the output
41
+ outputs = model.generate(
42
+ **inputs,
43
+ max_new_tokens=512, # Allow for detailed responses
44
+ use_cache=True # Speeds up generation
45
+ )
46
+
47
+ # Decode and clean the response
48
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+
50
+ # Extract and print the generated answer
51
+ answer_text = response.split("### Answer:")[1].strip() if "### Answer:" in response else response.strip()
52
+
53
+ print(f"Question: {medical_question}")
54
+ print(f"Answer: {answer_text}")