import random import httpx import asyncio import json class OFFDeepInfraAPI: headers = { 'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8', 'Connection': 'keep-alive', 'Content-Type': 'application/json', 'Origin': 'https://deepinfra.com', 'Referer': 'https://deepinfra.com/', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-site', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36', 'X-Deepinfra-Source': 'web-embed', 'accept': 'text/event-stream', 'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"', 'sec-ch-ua-mobile': '?1', 'sec-ch-ua-platform': '"Android"', } def __init__(self): self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions" def get_model_list(self): return [ 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'deepseek-ai/DeepSeek-R1-Turbo', 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B' ] async def generate(self, json_data: dict): json_data['stream'] = True # Ensure stream is enabled json_data['stream_options'] = { 'include_usage': True, 'continuous_usage_stats': True, } chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32)) created = int(asyncio.get_event_loop().time()) total_completion_tokens = 0 model_name = json_data.get("model", "unknown") try: async with httpx.AsyncClient(timeout=None) as client: async with client.stream( "POST", self.base_url, headers=OFFDeepInfraAPI.headers, json=json_data ) as response: if response.status_code != 200: yield f"data: [Unexpected status code: {response.status_code}]\n\n" return async for line in response.aiter_lines(): if not line or not line.startswith("data:"): continue data_str = line.removeprefix("data:").strip() if data_str == "[DONE]": yield "data: [DONE]\n\n" return try: data = json.loads(data_str) delta = data["choices"][0].get("delta", {}) content = delta.get("content", "") finish_reason = data["choices"][0].get("finish_reason", None) if content or finish_reason: transformed = { "id": chunk_id, "object": "chat.completion.chunk", "created": created, "choices": [{ "index": 0, "text": content, "logprobs": None, "finish_reason": finish_reason, "delta": { "token_id": None, "role": delta.get("role", "assistant"), "content": content, "tool_calls": delta.get("tool_calls"), } }], "model": model_name, "usage": None } yield f"data: {json.dumps(transformed)}\n\n" # Update usage stats usage = data.get("usage") if usage: total_completion_tokens = usage.get("completion_tokens", total_completion_tokens) except json.JSONDecodeError: continue # Final usage chunk final = { "id": chunk_id, "object": "chat.completion.chunk", "created": created, "choices": [], "model": model_name, "usage": { "prompt_tokens": 0, "completion_tokens": total_completion_tokens, "total_tokens": total_completion_tokens } } yield f"data: {json.dumps(final)}\n\n" yield "data: [DONE]\n\n" except Exception as e: yield f"data: [Connection error: {str(e)}]\n\n"