Gajendra5490 commited on
Commit
2998bd1
·
verified ·
1 Parent(s): d1b7db6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+
4
+ # Model name
5
+ model_name = "MONAI/Llama3-VILA-M3-8B"
6
+
7
+ # Load tokenizer and model with trust_remote_code=True
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ torch_dtype=torch.float16, # Use float16 for optimized inference
12
+ device_map="auto", # Automatically assigns model to available GPU/CPU
13
+ trust_remote_code=True # Allows loading custom model code if needed
14
+ )
15
+
16
+ # Example input prompt
17
+ prompt = "Explain the findings in this chest X-ray report:"
18
+
19
+ # Tokenize input
20
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
21
+
22
+ # Generate response
23
+ with torch.no_grad():
24
+ output = model.generate(**inputs, max_length=200)
25
+
26
+ # Decode and print response
27
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
28
+ print("\nGenerated Response:\n", response)