File size: 2,049 Bytes
f0efcef
0f47e56
 
832ce7b
882bd69
fa9231d
5f6d422
832ce7b
0f47e56
 
832ce7b
0f47e56
 
 
 
 
fa9231d
0b2a88c
0f47e56
 
 
9998c92
0f47e56
 
fa9231d
882bd69
0f47e56
 
882bd69
0f47e56
 
 
 
 
f0efcef
0f47e56
 
 
 
 
 
 
 
882bd69
9998c92
0f47e56
 
 
9998c92
882bd69
0f47e56
882bd69
 
0f47e56
882bd69
0f47e56
 
882bd69
0f47e56
 
882bd69
 
0f47e56
882bd69
0f47e56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

# Retrieve the token from environment variables
api_token = os.getenv("HF_TOKEN").strip()

# Model name
model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"

# Load the Hugging Face model and tokenizer with required arguments
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    token=api_token,  # Authenticate with Hugging Face token
    trust_remote_code=True  # Allow custom code from the repository
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=api_token,
    trust_remote_code=True,
    device_map="auto",  # Efficient device allocation
    torch_dtype=torch.float16  # Mixed precision for faster inference
)

# Define the function to process user input
def generate_response(input_text):
    try:
        # Tokenize the input text
        inputs = tokenizer(input_text, return_tensors="pt")

        # Ensure input tensor is sent to the same device as the model
        input_ids = inputs["input_ids"].to(model.device)

        # Generate a response using the model
        outputs = model.generate(
            input_ids,
            max_length=256,  # Limit the output length
            num_return_sequences=1,  # Generate a single response
            temperature=0.7,  # Adjust for creativity vs. determinism
            top_p=0.9,  # Nucleus sampling
            top_k=50  # Top-k sampling
        )

        # Decode and return the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response

    except Exception as e:
        # Return error details in case of failure
        return f"Error: {str(e)}"

# Create a Gradio interface
iface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="ContactDoctor Medical Assistant",
    description="Provide input symptoms or queries and get AI-powered medical advice."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()