Spaces:

Ruurd
/

radiolm

Sleeping

App Files Files Community

radiolm / app.py

Ruurd

Make it into a chatbot

0115682 4 months ago

raw

history blame

3.32 kB

	import os
	import torch
	import gradio as gr
	import spaces
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Global model/tokenizer
	current_model = None
	current_tokenizer = None

	# Load model when selected
	def load_model_on_selection(model_name, progress=gr.Progress(track_tqdm=False)):
	global current_model, current_tokenizer
	token = os.getenv("HF_TOKEN")

	progress(0, desc="Loading tokenizer...")
	current_tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)

	progress(0.5, desc="Loading model...")
	current_model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="cpu",
	use_auth_token=token
	)

	progress(1, desc="Model ready.")
	return f"{model_name} loaded and ready!"

	# Inference - yields response token-by-token
	@spaces.GPU
	def chat_with_model(history):
	global current_model, current_tokenizer
	if current_model is None or current_tokenizer is None:
	yield history + [("⚠️ No model loaded.", "")]

	current_model.to("cuda")

	# Combine conversation history into prompt
	prompt = ""
	for user_msg, bot_msg in history:
	prompt += f"[INST] {user_msg.strip()} [/INST] {bot_msg.strip()} "
	prompt += f"[INST] {history[-1][0]} [/INST]"

	inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
	output_ids = []

	# Clone history to avoid mutating during yield
	updated_history = history.copy()
	updated_history[-1] = (history[-1][0], "")

	for token_id in current_model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False,
	return_dict_in_generate=True,
	output_scores=False
	).sequences[0]:
	output_ids.append(token_id.item())
	decoded = current_tokenizer.decode(output_ids, skip_special_tokens=True)
	updated_history[-1] = (history[-1][0], decoded)
	yield updated_history

	# When user submits a message
	def add_user_message(message, history):
	return "", history + [(message, "")]

	# Model choices
	model_choices = [
	"meta-llama/Llama-3.2-3B-Instruct",
	"deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
	"google/gemma-7b"
	]

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## Clinical Chatbot — LLaMA, DeepSeek, Gemma")

	default_model = gr.State("meta-llama/Llama-3.2-3B-Instruct")

	with gr.Row():
	model_selector = gr.Dropdown(choices=model_choices, label="Select Model")
	model_status = gr.Textbox(label="Model Status", interactive=False)

	chatbot = gr.Chatbot(label="Chat")
	msg = gr.Textbox(label="Your Message", placeholder="Enter your clinical query...", show_label=False)
	clear_btn = gr.Button("Clear Chat")

	# Load model on launch
	demo.load(fn=load_model_on_selection, inputs=default_model, outputs=model_status)

	# Load model on dropdown selection
	model_selector.change(fn=load_model_on_selection, inputs=model_selector, outputs=model_status)

	# On message submit: update history, then stream bot reply
	msg.submit(add_user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
	fn=chat_with_model, inputs=chatbot, outputs=chatbot
	)

	# Clear chat
	clear_btn.click(lambda: [], None, chatbot, queue=False)

	demo.launch()