import os import base64 import gradio as gr import requests import json from io import BytesIO from PIL import Image import time # Get API key from environment variable for security OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") # Model information free_models = [ ("Google: Gemini Pro 2.0 Experimental (free)", "google/gemini-2.0-pro-exp-02-05:free", 0, 0, 2000000), ("Google: Gemini 2.0 Flash Thinking Experimental 01-21 (free)", "google/gemini-2.0-flash-thinking-exp:free", 0, 0, 1048576), ("Google: Gemini Flash 2.0 Experimental (free)", "google/gemini-2.0-flash-exp:free", 0, 0, 1048576), ("Google: Gemini Pro 2.5 Experimental (free)", "google/gemini-2.5-pro-exp-03-25:free", 0, 0, 1000000), ("Google: Gemini Flash 1.5 8B Experimental", "google/gemini-flash-1.5-8b-exp", 0, 0, 1000000), ("DeepSeek: DeepSeek R1 Zero (free)", "deepseek/deepseek-r1-zero:free", 0, 0, 163840), ("DeepSeek: R1 (free)", "deepseek/deepseek-r1:free", 0, 0, 163840), ("DeepSeek: DeepSeek V3 Base (free)", "deepseek/deepseek-v3-base:free", 0, 0, 131072), ("DeepSeek: DeepSeek V3 0324 (free)", "deepseek/deepseek-chat-v3-0324:free", 0, 0, 131072), ("Google: Gemma 3 4B (free)", "google/gemma-3-4b-it:free", 0, 0, 131072), ("Google: Gemma 3 12B (free)", "google/gemma-3-12b-it:free", 0, 0, 131072), ("Nous: DeepHermes 3 Llama 3 8B Preview (free)", "nousresearch/deephermes-3-llama-3-8b-preview:free", 0, 0, 131072), ("Qwen: Qwen2.5 VL 72B Instruct (free)", "qwen/qwen2.5-vl-72b-instruct:free", 0, 0, 131072), ("DeepSeek: DeepSeek V3 (free)", "deepseek/deepseek-chat:free", 0, 0, 131072), ("NVIDIA: Llama 3.1 Nemotron 70B Instruct (free)", "nvidia/llama-3.1-nemotron-70b-instruct:free", 0, 0, 131072), ("Meta: Llama 3.2 1B Instruct (free)", "meta-llama/llama-3.2-1b-instruct:free", 0, 0, 131072), ("Meta: Llama 3.2 11B Vision Instruct (free)", "meta-llama/llama-3.2-11b-vision-instruct:free", 0, 0, 131072), ("Meta: Llama 3.1 8B Instruct (free)", "meta-llama/llama-3.1-8b-instruct:free", 0, 0, 131072), ("Mistral: Mistral Nemo (free)", "mistralai/mistral-nemo:free", 0, 0, 128000), ("Mistral: Mistral Small 3.1 24B (free)", "mistralai/mistral-small-3.1-24b-instruct:free", 0, 0, 96000), ("Google: Gemma 3 27B (free)", "google/gemma-3-27b-it:free", 0, 0, 96000), ("Qwen: Qwen2.5 VL 3B Instruct (free)", "qwen/qwen2.5-vl-3b-instruct:free", 0, 0, 64000), ("DeepSeek: R1 Distill Qwen 14B (free)", "deepseek/deepseek-r1-distill-qwen-14b:free", 0, 0, 64000), ("Qwen: Qwen2.5-VL 7B Instruct (free)", "qwen/qwen-2.5-vl-7b-instruct:free", 0, 0, 64000), ("Google: LearnLM 1.5 Pro Experimental (free)", "google/learnlm-1.5-pro-experimental:free", 0, 0, 40960), ("Qwen: QwQ 32B (free)", "qwen/qwq-32b:free", 0, 0, 40000), ("Google: Gemini 2.0 Flash Thinking Experimental (free)", "google/gemini-2.0-flash-thinking-exp-1219:free", 0, 0, 40000), ("Bytedance: UI-TARS 72B (free)", "bytedance-research/ui-tars-72b:free", 0, 0, 32768), ("Qwerky 72b (free)", "featherless/qwerky-72b:free", 0, 0, 32768), ("OlympicCoder 7B (free)", "open-r1/olympiccoder-7b:free", 0, 0, 32768), ("OlympicCoder 32B (free)", "open-r1/olympiccoder-32b:free", 0, 0, 32768), ("Google: Gemma 3 1B (free)", "google/gemma-3-1b-it:free", 0, 0, 32768), ("Reka: Flash 3 (free)", "rekaai/reka-flash-3:free", 0, 0, 32768), ("Dolphin3.0 R1 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-r1-mistral-24b:free", 0, 0, 32768), ("Dolphin3.0 Mistral 24B (free)", "cognitivecomputations/dolphin3.0-mistral-24b:free", 0, 0, 32768), ("Mistral: Mistral Small 3 (free)", "mistralai/mistral-small-24b-instruct-2501:free", 0, 0, 32768), ("Qwen2.5 Coder 32B Instruct (free)", "qwen/qwen-2.5-coder-32b-instruct:free", 0, 0, 32768), ("Qwen2.5 72B Instruct (free)", "qwen/qwen-2.5-72b-instruct:free", 0, 0, 32768), ("Meta: Llama 3.2 3B Instruct (free)", "meta-llama/llama-3.2-3b-instruct:free", 0, 0, 20000), ("Qwen: QwQ 32B Preview (free)", "qwen/qwq-32b-preview:free", 0, 0, 16384), ("DeepSeek: R1 Distill Qwen 32B (free)", "deepseek/deepseek-r1-distill-qwen-32b:free", 0, 0, 16000), ("Qwen: Qwen2.5 VL 32B Instruct (free)", "qwen/qwen2.5-vl-32b-instruct:free", 0, 0, 8192), ("Moonshot AI: Moonlight 16B A3B Instruct (free)", "moonshotai/moonlight-16b-a3b-instruct:free", 0, 0, 8192), ("DeepSeek: R1 Distill Llama 70B (free)", "deepseek/deepseek-r1-distill-llama-70b:free", 0, 0, 8192), ("Qwen 2 7B Instruct (free)", "qwen/qwen-2-7b-instruct:free", 0, 0, 8192), ("Google: Gemma 2 9B (free)", "google/gemma-2-9b-it:free", 0, 0, 8192), ("Mistral: Mistral 7B Instruct (free)", "mistralai/mistral-7b-instruct:free", 0, 0, 8192), ("Microsoft: Phi-3 Mini 128K Instruct (free)", "microsoft/phi-3-mini-128k-instruct:free", 0, 0, 8192), ("Microsoft: Phi-3 Medium 128K Instruct (free)", "microsoft/phi-3-medium-128k-instruct:free", 0, 0, 8192), ("Meta: Llama 3 8B Instruct (free)", "meta-llama/llama-3-8b-instruct:free", 0, 0, 8192), ("OpenChat 3.5 7B (free)", "openchat/openchat-7b:free", 0, 0, 8192), ("Meta: Llama 3.3 70B Instruct (free)", "meta-llama/llama-3.3-70b-instruct:free", 0, 0, 8000), ("AllenAI: Molmo 7B D (free)", "allenai/molmo-7b-d:free", 0, 0, 4096), ("Rogue Rose 103B v0.2 (free)", "sophosympatheia/rogue-rose-103b-v0.2:free", 0, 0, 4096), ("Toppy M 7B (free)", "undi95/toppy-m-7b:free", 0, 0, 4096), ("Hugging Face: Zephyr 7B (free)", "huggingfaceh4/zephyr-7b-beta:free", 0, 0, 4096), ("MythoMax 13B (free)", "gryphe/mythomax-l2-13b:free", 0, 0, 4096), ] # Filter for vision models vision_model_ids = [ "meta-llama/llama-3.2-11b-vision-instruct:free", "qwen/qwen2.5-vl-72b-instruct:free", "qwen/qwen2.5-vl-3b-instruct:free", "qwen/qwen2.5-vl-32b-instruct:free", "qwen/qwen-2.5-vl-7b-instruct:free", "google/gemini-2.0-pro-exp-02-05:free", "google/gemini-2.5-pro-exp-03-25:free" ] # Prefilter vision models vision_models = [(name, model_id) for name, model_id, _, _, _ in free_models if model_id in vision_model_ids] text_models = [(name, model_id) for name, model_id, _, _, _ in free_models] def encode_image(image): """Convert PIL Image to base64 string""" buffered = BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode("utf-8") def process_message_stream(message, chat_history, model_name, uploaded_image=None): """Process message and stream the model response""" model_id = next((model_id for name, model_id, _, _, _ in free_models if name == model_name), text_models[0][1]) # Check if API key is set if not OPENROUTER_API_KEY: yield "Please set your OpenRouter API key in the environment variables.", chat_history return # Setup headers and URL headers = { "Content-Type": "application/json", "Authorization": f"Bearer {OPENROUTER_API_KEY}", "HTTP-Referer": "https://huggingface.co/spaces/cstr/CrispChat", # Replace with your actual space URL in production } url = "https://openrouter.ai/api/v1/chat/completions" # Build message content messages = [] # Add chat history for human_msg, ai_msg in chat_history: messages.append({"role": "user", "content": human_msg}) messages.append({"role": "assistant", "content": ai_msg}) # Add current message if uploaded_image: # Image processing for vision models base64_image = encode_image(uploaded_image) content = [ {"type": "text", "text": message}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] messages.append({"role": "user", "content": content}) else: messages.append({"role": "user", "content": message}) # Build request data data = { "model": model_id, "messages": messages, "stream": True, "temperature": 0.7 } try: # Create a new message pair in the chat history chat_history.append((message, "")) full_response = "" # Make streaming API call with requests.post(url, headers=headers, json=data, stream=True) as response: response.raise_for_status() buffer = "" for chunk in response.iter_content(chunk_size=1024, decode_unicode=False): if chunk: buffer += chunk.decode('utf-8') while True: line_end = buffer.find('\n') if line_end == -1: break line = buffer[:line_end].strip() buffer = buffer[line_end + 1:] if line.startswith('data: '): data = line[6:] if data == '[DONE]': break try: data_obj = json.loads(data) delta_content = data_obj["choices"][0]["delta"].get("content", "") if delta_content: full_response += delta_content # Update the last assistant message chat_history[-1] = (message, full_response) yield full_response, chat_history except json.JSONDecodeError: pass return full_response, chat_history except Exception as e: error_msg = f"Error: {str(e)}" chat_history[-1] = (message, error_msg) yield error_msg, chat_history # Create a nice CSS theme css = """ .gradio-container { font-family: 'Segoe UI', Arial, sans-serif; } #chat-message { min-height: 100px; } #model-selector { max-width: 100%; } .app-header { text-align: center; margin-bottom: 10px; } .app-header h1 { font-weight: 700; color: #2C3E50; } .app-header p { color: #7F8C8D; } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: gr.HTML("""
Chat with AI models - supports text and images