File size: 5,191 Bytes
b166a40 f6ab7af b166a40 f6ab7af 8f054a9 b166a40 f6ab7af b166a40 f6ab7af b166a40 f6ab7af b166a40 8f054a9 f6ab7af b166a40 f6ab7af b166a40 f6ab7af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import random
import httpx
import asyncio
import json
class OFFDeepInfraAPI:
headers = {
'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Origin': 'https://deepinfra.com',
'Referer': 'https://deepinfra.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36',
'X-Deepinfra-Source': 'web-embed',
'accept': 'text/event-stream',
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
'sec-ch-ua-mobile': '?1',
'sec-ch-ua-platform': '"Android"',
}
def __init__(self):
self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"
def get_model_list(self):
return [
'meta-llama/Llama-3.3-70B-Instruct-Turbo',
'deepseek-ai/DeepSeek-R1-Turbo',
'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'
]
async def generate(self, json_data: dict):
json_data['stream'] = True # Ensure stream is enabled
json_data['stream_options'] = {
'include_usage': True,
'continuous_usage_stats': True,
}
chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
created = int(asyncio.get_event_loop().time())
total_completion_tokens = 0
model_name = json_data.get("model", "unknown")
try:
async with httpx.AsyncClient(timeout=None) as client:
async with client.stream(
"POST",
self.base_url,
headers=OFFDeepInfraAPI.headers,
json=json_data
) as response:
if response.status_code != 200:
yield f"data: [Unexpected status code: {response.status_code}]\n\n"
return
async for line in response.aiter_lines():
if not line or not line.startswith("data:"):
continue
data_str = line.removeprefix("data:").strip()
if data_str == "[DONE]":
yield "data: [DONE]\n\n"
return
try:
data = json.loads(data_str)
delta = data["choices"][0].get("delta", {})
content = delta.get("content", "")
finish_reason = data["choices"][0].get("finish_reason", None)
if content or finish_reason:
transformed = {
"id": chunk_id,
"object": "chat.completion.chunk",
"created": created,
"choices": [{
"index": 0,
"text": content,
"logprobs": None,
"finish_reason": finish_reason,
"delta": {
"token_id": None,
"role": delta.get("role", "assistant"),
"content": content,
"tool_calls": delta.get("tool_calls"),
}
}],
"model": model_name,
"usage": None
}
yield f"data: {json.dumps(transformed)}\n\n"
# Update usage stats
usage = data.get("usage")
if usage:
total_completion_tokens = usage.get("completion_tokens", total_completion_tokens)
except json.JSONDecodeError:
continue
# Final usage chunk
final = {
"id": chunk_id,
"object": "chat.completion.chunk",
"created": created,
"choices": [],
"model": model_name,
"usage": {
"prompt_tokens": 0,
"completion_tokens": total_completion_tokens,
"total_tokens": total_completion_tokens
}
}
yield f"data: {json.dumps(final)}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
yield f"data: [Connection error: {str(e)}]\n\n" |