import os import subprocess def install(package): subprocess.check_call([os.sys.executable, "-m", "pip", "install", package]) install("transformers") import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import spaces # Dictionary to store loaded models and tokenizers loaded_models = {} def load_model(model_name, progress=gr.Progress()): """Load the model and tokenizer with a progress bar.""" if model_name not in loaded_models: access_token = os.getenv("HF_TOKEN") progress(0, desc="Initializing model loading...") tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=access_token) progress(0.5, desc="Tokenizer loaded. Loading model...") model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto", use_auth_token=access_token ) progress(1, desc="Model loaded successfully.") loaded_models[model_name] = (tokenizer, model) return loaded_models[model_name] @spaces.GPU def generate_text(model_name, prompt, progress=gr.Progress()): """Generate text using the selected model with a loading indicator.""" tokenizer, model = load_model(model_name, progress) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=256) return tokenizer.decode(outputs[0], skip_special_tokens=True) # List of models to choose from model_choices = [ "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "meta-llama/Llama-3.2-3B-Instruct", "google/gemma-7b" ] with gr.Blocks() as demo: gr.Markdown("## Clinical Text Analysis with Multiple Models") model_selector = gr.Dropdown(choices=model_choices, label="Select Model") input_text = gr.Textbox(label="Input Clinical Text") output_text = gr.Textbox(label="Generated Output") analyze_button = gr.Button("Analyze") analyze_button.click(fn=generate_text, inputs=[model_selector, input_text], outputs=output_text) demo.launch()