Orensomekh commited on
Commit
6cfe4e2
·
verified ·
1 Parent(s): 46ed82b

Upload Falcon_Ai71_Usage.ipynb

Browse files
Operational_Instructions/Falcon_Ai71_Usage.ipynb ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "!pip install ai71 python-dotenv"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import os\n",
19
+ "import time\n",
20
+ "from ai71 import AI71\n",
21
+ "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
22
+ "\n",
23
+ "# Optinal, but nice way to load environment variables from a .env file\n",
24
+ "from dotenv import load_dotenv\n",
25
+ "\n",
26
+ "load_dotenv()\n",
27
+ "AI71_API_KEY = os.getenv(\"AI71_API_KEY\")\n",
28
+ "AI71_BASE_URL = os.getenv(\"AI71_BASE_URL\")\n",
29
+ "\n",
30
+ "client = AI71(api_key=AI71_API_KEY, base_url=AI71_BASE_URL)\n",
31
+ "\n",
32
+ "def complete(client: AI71, messages: list[dict], model: str = \"tiiuae/falcon3-10b-instruct\", max_tokens: int = 100, n_retries: int = 5):\n",
33
+ " \"\"\"Runs a single completion request.\n",
34
+ " Args:\n",
35
+ " client (AI71): The AI71 client.\n",
36
+ " messages (list[dict]): List of messages for the request. (a conversation)\n",
37
+ " model (str): Model to use for completion.\n",
38
+ " max_tokens (int): Maximum number of tokens to generate.\n",
39
+ " n_retries (int): Number of retries on failure.\n",
40
+ " Returns:\n",
41
+ " dict: The result of the completion request.\n",
42
+ " \"\"\"\n",
43
+ " retries = 0\n",
44
+ " while True:\n",
45
+ " try:\n",
46
+ " return client.chat.completions.create(\n",
47
+ " model=model,\n",
48
+ " messages=messages,\n",
49
+ " max_tokens=max_tokens,\n",
50
+ " )\n",
51
+ " except Exception as e:\n",
52
+ " retries += 1\n",
53
+ " if n_retries < retries:\n",
54
+ " raise e\n",
55
+ " print(f\"Retrying for the {retries} time(s)... (error: {e})\")\n",
56
+ " time.sleep(retries)\n",
57
+ "\n",
58
+ "def batch_complete(\n",
59
+ " client: AI71,\n",
60
+ " list_of_messages: list[list[dict]],\n",
61
+ " model: str = \"tiiuae/falcon3-10b-instruct\",\n",
62
+ " max_tokens: int = 100,\n",
63
+ " n_retries: int = 5,\n",
64
+ " n_parallel: int = 10):\n",
65
+ " \"\"\"Runs a batch of completions in parallel.\n",
66
+ " Args:\n",
67
+ " client (AI71): The AI71 client.\n",
68
+ " list_of_messages (list[list[dict]]): List of messages for each request. (list of conversations)\n",
69
+ " model (str): Model to use for completion.\n",
70
+ " max_tokens (int): Maximum number of tokens to generate.\n",
71
+ " n_retries (int): Number of retries on failure.\n",
72
+ " n_parallel (int): Number of parallel requests.\n",
73
+ " Returns:\n",
74
+ " list: List of results for each request.\n",
75
+ " \"\"\"\n",
76
+ "\n",
77
+ " results = []\n",
78
+ "\n",
79
+ " with ThreadPoolExecutor(max_workers=n_parallel) as executor:\n",
80
+ " # Submit requests\n",
81
+ " futures = [\n",
82
+ " executor.submit(complete, client, messages, model, max_tokens, n_retries)\n",
83
+ " for i, messages in enumerate(list_of_messages)\n",
84
+ " ]\n",
85
+ "\n",
86
+ " # Collect results as they complete\n",
87
+ " for future in as_completed(futures):\n",
88
+ " try:\n",
89
+ " result = future.result()\n",
90
+ " results.append(result)\n",
91
+ " except Exception as e:\n",
92
+ " print(f\"Request failed: {e}\")\n",
93
+ " results.append(None)\n",
94
+ "\n",
95
+ " return results\n",
96
+ "\n",
97
+ "# Simple single request:\n",
98
+ "result = complete(client, [\n",
99
+ " {\"role\":\"system\",\"content\": \"You are a helpful assistant\"},\n",
100
+ " {\"role\":\"user\",\"content\":\"What is artificial intelligence?\"}\n",
101
+ "])\n",
102
+ "print(result)\n",
103
+ "\n",
104
+ "# Run a batch of requests:\n",
105
+ "results = batch_complete(\n",
106
+ " client,\n",
107
+ " [\n",
108
+ " [\n",
109
+ " {\"role\":\"system\",\"content\": \"You are a helpful assistant\"},\n",
110
+ " {\"role\":\"user\",\"content\":\"What is artificial intelligence?\"}\n",
111
+ " ]\n",
112
+ " ] * 20,\n",
113
+ " n_parallel=10,\n",
114
+ ")\n",
115
+ "results"
116
+ ]
117
+ }
118
+ ],
119
+ "metadata": {
120
+ "kernelspec": {
121
+ "display_name": ".venv",
122
+ "language": "python",
123
+ "name": "python3"
124
+ },
125
+ "language_info": {
126
+ "codemirror_mode": {
127
+ "name": "ipython",
128
+ "version": 3
129
+ },
130
+ "file_extension": ".py",
131
+ "mimetype": "text/x-python",
132
+ "name": "python",
133
+ "nbconvert_exporter": "python",
134
+ "pygments_lexer": "ipython3",
135
+ "version": "3.12.8"
136
+ }
137
+ },
138
+ "nbformat": 4,
139
+ "nbformat_minor": 2
140
+ }