File size: 5,191 Bytes
b166a40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6ab7af
 
 
 
 
 
b166a40
 
f6ab7af
8f054a9
 
 
b166a40
f6ab7af
b166a40
 
f6ab7af
 
b166a40
 
 
 
 
f6ab7af
b166a40
8f054a9
f6ab7af
 
 
b166a40
f6ab7af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b166a40
f6ab7af
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import random
import httpx
import asyncio
import json

class OFFDeepInfraAPI:
    headers = {
        'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'Origin': 'https://deepinfra.com',
        'Referer': 'https://deepinfra.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36',
        'X-Deepinfra-Source': 'web-embed',
        'accept': 'text/event-stream',
        'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
        'sec-ch-ua-mobile': '?1',
        'sec-ch-ua-platform': '"Android"',
    }

    def __init__(self):
        self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"

    def get_model_list(self):
        return [
            'meta-llama/Llama-3.3-70B-Instruct-Turbo',
            'deepseek-ai/DeepSeek-R1-Turbo',
            'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
            'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'
        ]

    async def generate(self, json_data: dict):
        json_data['stream'] = True  # Ensure stream is enabled
        json_data['stream_options'] = {
            'include_usage': True,
            'continuous_usage_stats': True,
        }

        chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
        created = int(asyncio.get_event_loop().time())
        total_completion_tokens = 0
        model_name = json_data.get("model", "unknown")

        try:
            async with httpx.AsyncClient(timeout=None) as client:
                async with client.stream(
                    "POST",
                    self.base_url,
                    headers=OFFDeepInfraAPI.headers,
                    json=json_data
                ) as response:
                    if response.status_code != 200:
                        yield f"data: [Unexpected status code: {response.status_code}]\n\n"
                        return

                    async for line in response.aiter_lines():
                        if not line or not line.startswith("data:"):
                            continue

                        data_str = line.removeprefix("data:").strip()
                        if data_str == "[DONE]":
                            yield "data: [DONE]\n\n"
                            return

                        try:
                            data = json.loads(data_str)
                            delta = data["choices"][0].get("delta", {})
                            content = delta.get("content", "")
                            finish_reason = data["choices"][0].get("finish_reason", None)

                            if content or finish_reason:
                                transformed = {
                                    "id": chunk_id,
                                    "object": "chat.completion.chunk",
                                    "created": created,
                                    "choices": [{
                                        "index": 0,
                                        "text": content,
                                        "logprobs": None,
                                        "finish_reason": finish_reason,
                                        "delta": {
                                            "token_id": None,
                                            "role": delta.get("role", "assistant"),
                                            "content": content,
                                            "tool_calls": delta.get("tool_calls"),
                                        }
                                    }],
                                    "model": model_name,
                                    "usage": None
                                }
                                yield f"data: {json.dumps(transformed)}\n\n"

                            # Update usage stats
                            usage = data.get("usage")
                            if usage:
                                total_completion_tokens = usage.get("completion_tokens", total_completion_tokens)

                        except json.JSONDecodeError:
                            continue

                    # Final usage chunk
                    final = {
                        "id": chunk_id,
                        "object": "chat.completion.chunk",
                        "created": created,
                        "choices": [],
                        "model": model_name,
                        "usage": {
                            "prompt_tokens": 0,
                            "completion_tokens": total_completion_tokens,
                            "total_tokens": total_completion_tokens
                        }
                    }
                    yield f"data: {json.dumps(final)}\n\n"
                    yield "data: [DONE]\n\n"

        except Exception as e:
            yield f"data: [Connection error: {str(e)}]\n\n"