|
import random |
|
import httpx |
|
import asyncio |
|
import json |
|
|
|
class OFFDeepInfraAPI: |
|
|
|
headers = { |
|
'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8', |
|
'Connection': 'keep-alive', |
|
'Content-Type': 'application/json', |
|
'Origin': 'https://deepinfra.com', |
|
'Referer': 'https://deepinfra.com/', |
|
'Sec-Fetch-Dest': 'empty', |
|
'Sec-Fetch-Mode': 'cors', |
|
'Sec-Fetch-Site': 'same-site', |
|
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36', |
|
'X-Deepinfra-Source': 'web-embed', |
|
'accept': 'text/event-stream', |
|
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"', |
|
'sec-ch-ua-mobile': '?1', |
|
'sec-ch-ua-platform': '"Android"', |
|
} |
|
|
|
def __init__(self): |
|
self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions" |
|
|
|
def get_model_list(self): |
|
models = ['meta-llama/Llama-3.3-70B-Instruct-Turbo', 'deepseek-ai/DeepSeek-R1-Turbo', 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'] |
|
return models |
|
|
|
|
|
async def generate(self, json_data: dict): |
|
json_data['stream_options'] = { |
|
'include_usage': True, |
|
'continuous_usage_stats': True, |
|
} |
|
chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32)) |
|
created = int(asyncio.get_event_loop().time()) |
|
total_tokens = 0 |
|
|
|
try: |
|
async with httpx.AsyncClient(timeout=None) as client: |
|
async with client.stream( |
|
"POST", |
|
"https://api.deepinfra.com/v1/openai/chat/completions", |
|
headers=OFFDeepInfraAPI.headers, |
|
json=json_data |
|
) as request_ctx: |
|
print(request_ctx.status_code) |
|
if request_ctx.status_code == 200: |
|
async for line in request_ctx.aiter_lines(): |
|
if line: |
|
if line.startswith('0:'): |
|
|
|
text = line[2:].strip() |
|
if text.startswith('"') and text.endswith('"'): |
|
text = text[1:-1] |
|
text = text.replace('\\n', '\n').replace('\\', '') |
|
|
|
response = { |
|
"id": chunk_id, |
|
"object": "chat.completion.chunk", |
|
"created": created, |
|
"model": json_data.get("model", "deepseek-r1-distill-llama-70b"), |
|
"choices": [{ |
|
"index": 0, |
|
"text": text, |
|
"logprobs": None, |
|
"finish_reason": None |
|
}], |
|
"usage": None |
|
} |
|
yield f"data: {json.dumps(response)}\n\n" |
|
total_tokens += 1 |
|
elif line.startswith('d:'): |
|
final = { |
|
"id": chunk_id, |
|
"object": "chat.completion.chunk", |
|
"created": created, |
|
"model": json_data.get("model", "deepseek-r1-distill-llama-70b"), |
|
"choices": [], |
|
"usage": { |
|
"prompt_tokens": len(messages), |
|
"completion_tokens": total_tokens, |
|
"total_tokens": len(messages) + total_tokens |
|
} |
|
} |
|
yield f"data: {json.dumps(final)}\n\n" |
|
yield "data: [DONE]\n\n" |
|
return |
|
else: |
|
yield f"data: [Unexpected status code: {request_ctx.status_code}]\n\n" |
|
except Exception as e: |
|
yield f"data: [Connection error: {str(e)}]\n\n" |
|
|