File size: 4,692 Bytes
b166a40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f054a9
 
 
b166a40
 
 
 
 
 
 
 
 
 
 
8f054a9
b166a40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import random
import httpx
import asyncio
import json

class OFFDeepInfraAPI:

    headers = {
        'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'Origin': 'https://deepinfra.com',
        'Referer': 'https://deepinfra.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Mobile Safari/537.36',
        'X-Deepinfra-Source': 'web-embed',
        'accept': 'text/event-stream',
        'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
        'sec-ch-ua-mobile': '?1',
        'sec-ch-ua-platform': '"Android"',
    }

    def __init__(self):
        self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"

    def get_model_list(self):
       models = ['meta-llama/Llama-3.3-70B-Instruct-Turbo', 'deepseek-ai/DeepSeek-R1-Turbo', 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B']
       return models
    

    async def generate(self, json_data: dict):
        json_data['stream_options'] = {
            'include_usage': True,
            'continuous_usage_stats': True,
        }
        chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
        created = int(asyncio.get_event_loop().time())
        total_tokens = 0

        try:
            async with httpx.AsyncClient(timeout=None) as client:
                async with client.stream(
                    "POST",
                    "https://api.deepinfra.com/v1/openai/chat/completions",
                    headers=OFFDeepInfraAPI.headers,
                    json=json_data
                ) as request_ctx:
                    print(request_ctx.status_code)
                    if request_ctx.status_code == 200:
                        async for line in request_ctx.aiter_lines():
                            if line:
                                if line.startswith('0:'):
                                    # Clean up the text and properly escape JSON characters
                                    text = line[2:].strip()
                                    if text.startswith('"') and text.endswith('"'):
                                        text = text[1:-1]
                                    text = text.replace('\\n', '\n').replace('\\', '')
                                    
                                    response = {
                                        "id": chunk_id,
                                        "object": "chat.completion.chunk",
                                        "created": created,
                                        "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
                                        "choices": [{
                                            "index": 0,
                                            "text": text,
                                            "logprobs": None,
                                            "finish_reason": None
                                        }],
                                        "usage": None
                                    }
                                    yield f"data: {json.dumps(response)}\n\n"
                                    total_tokens += 1
                                elif line.startswith('d:'):
                                    final = {
                                        "id": chunk_id,
                                        "object": "chat.completion.chunk", 
                                        "created": created,
                                        "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
                                        "choices": [],
                                        "usage": {
                                            "prompt_tokens": len(messages),
                                            "completion_tokens": total_tokens,
                                            "total_tokens": len(messages) + total_tokens
                                        }
                                    }
                                    yield f"data: {json.dumps(final)}\n\n"
                                    yield "data: [DONE]\n\n"
                        return
                    else:
                        yield f"data: [Unexpected status code: {request_ctx.status_code}]\n\n"
        except Exception as e:
            yield f"data: [Connection error: {str(e)}]\n\n"