Spaces:
Sleeping
Sleeping
Jordi Catafal
commited on
Commit
·
023e423
1
Parent(s):
03eefac
cleaning + readme
Browse files- README.md +259 -334
- app_endpoints.py +0 -308
- app_hybrid_backup.py +0 -189
- app_old.py +0 -159
- app_old_minimal.py +0 -165
- test_api.py +0 -64
- test_hybrid.py +0 -98
README.md
CHANGED
@@ -7,54 +7,67 @@ sdk: docker
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
11 |
-
|
12 |
-
--------------------------------
|
13 |
# Multilingual & Legal Embeddings API
|
14 |
|
15 |
-
A high-performance
|
|
|
|
|
|
|
16 |
|
17 |
## 🚀 Quick Start
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
33 |
## 🔗 API Endpoints
|
34 |
|
35 |
-
###
|
36 |
-
```
|
37 |
-
POST /embed
|
38 |
-
```
|
39 |
-
Generate embeddings for up to 50 texts in a single request.
|
40 |
|
41 |
-
|
42 |
-
```
|
43 |
-
GET /models
|
44 |
-
```
|
45 |
-
Get detailed information about available models.
|
46 |
|
47 |
-
### Health Check
|
48 |
```
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
```
|
51 |
-
Check API status and model availability.
|
52 |
|
53 |
-
###
|
|
|
54 |
```
|
55 |
-
GET /
|
|
|
|
|
56 |
```
|
57 |
-
Basic API information and status.
|
58 |
|
59 |
## 📖 Usage Examples
|
60 |
|
@@ -62,164 +75,100 @@ Basic API information and status.
|
|
62 |
|
63 |
```python
|
64 |
import requests
|
65 |
-
import numpy as np
|
66 |
|
67 |
API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
|
68 |
|
69 |
-
# Example 1:
|
70 |
response = requests.post(
|
71 |
-
f"{API_URL}/embed",
|
72 |
json={
|
73 |
-
"texts": [
|
74 |
-
|
|
|
|
|
|
|
|
|
75 |
"normalize": True
|
76 |
}
|
77 |
)
|
78 |
-
|
79 |
result = response.json()
|
80 |
-
|
81 |
-
print(f"Generated {len(embeddings)} embeddings of {result['dimensions']} dimensions")
|
82 |
|
83 |
-
# Example 2:
|
84 |
-
|
85 |
-
f"{API_URL}/embed",
|
86 |
json={
|
87 |
"texts": [
|
88 |
-
"
|
89 |
-
"
|
90 |
-
"
|
91 |
-
"Hallo Welt" # German
|
92 |
],
|
93 |
-
"model": "jina-v3",
|
94 |
"normalize": True
|
95 |
}
|
96 |
)
|
97 |
-
|
|
|
98 |
|
99 |
-
# Example 3:
|
100 |
-
|
101 |
-
f"{API_URL}/embed",
|
102 |
json={
|
103 |
"texts": [
|
104 |
-
"
|
105 |
-
"
|
106 |
-
"
|
107 |
],
|
108 |
-
"model": "roberta-ca",
|
109 |
"normalize": True
|
110 |
}
|
111 |
)
|
112 |
-
|
|
|
113 |
|
114 |
-
# Example 4:
|
115 |
-
|
116 |
-
f"{API_URL}/embed",
|
117 |
json={
|
118 |
"texts": [
|
119 |
-
"
|
120 |
-
"
|
|
|
121 |
],
|
122 |
-
"model": "robertalex",
|
123 |
"normalize": True
|
124 |
}
|
125 |
)
|
|
|
|
|
126 |
|
127 |
-
# Example 5:
|
128 |
-
|
129 |
-
f"{API_URL}/embed",
|
130 |
json={
|
131 |
"texts": [
|
132 |
-
"
|
133 |
-
"
|
|
|
134 |
],
|
135 |
-
"model": "legal-bert",
|
136 |
"normalize": True
|
137 |
}
|
138 |
)
|
139 |
-
|
140 |
-
|
141 |
-
text_es = "inteligencia artificial"
|
142 |
-
text_ca = "intel·ligència artificial"
|
143 |
-
models_comparison = {}
|
144 |
-
|
145 |
-
for model, text in [("jina", text_es), ("roberta-ca", text_ca), ("jina-v3", text_es)]:
|
146 |
-
resp = requests.post(
|
147 |
-
f"{API_URL}/embed",
|
148 |
-
json={"texts": [text], "model": model, "normalize": True}
|
149 |
-
)
|
150 |
-
models_comparison[model] = resp.json()["dimensions"]
|
151 |
-
|
152 |
-
print("Embedding dimensions by model:", models_comparison)
|
153 |
-
```
|
154 |
-
|
155 |
-
### cURL
|
156 |
-
|
157 |
-
```bash
|
158 |
-
# Basic embedding generation with Jina v2 Spanish
|
159 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
160 |
-
-H "Content-Type: application/json" \
|
161 |
-
-d '{
|
162 |
-
"texts": ["Texto de ejemplo", "Otro texto en español"],
|
163 |
-
"model": "jina",
|
164 |
-
"normalize": true
|
165 |
-
}'
|
166 |
-
|
167 |
-
# Catalan text with RoBERTa-ca
|
168 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
169 |
-
-H "Content-Type: application/json" \
|
170 |
-
-d '{
|
171 |
-
"texts": ["Bon dia", "Com està vostè?", "Catalunya és meravellosa"],
|
172 |
-
"model": "roberta-ca",
|
173 |
-
"normalize": true
|
174 |
-
}'
|
175 |
-
|
176 |
-
# Using Jina v3 for multilingual embeddings
|
177 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
178 |
-
-H "Content-Type: application/json" \
|
179 |
-
-d '{
|
180 |
-
"texts": ["Hello world", "Hola mundo", "Bonjour le monde"],
|
181 |
-
"model": "jina-v3",
|
182 |
-
"normalize": true
|
183 |
-
}'
|
184 |
-
|
185 |
-
# English legal text with Legal-BERT
|
186 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
187 |
-
-H "Content-Type: application/json" \
|
188 |
-
-d '{
|
189 |
-
"texts": ["This agreement is legally binding"],
|
190 |
-
"model": "legal-bert",
|
191 |
-
"normalize": true
|
192 |
-
}'
|
193 |
-
|
194 |
-
# Spanish legal text with RoBERTalex
|
195 |
-
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
|
196 |
-
-H "Content-Type: application/json" \
|
197 |
-
-d '{
|
198 |
-
"texts": ["Artículo primero de la constitución"],
|
199 |
-
"model": "robertalex",
|
200 |
-
"normalize": true,
|
201 |
-
"max_length": 512
|
202 |
-
}'
|
203 |
-
|
204 |
-
# Get all model information
|
205 |
-
curl "https://aurasystems-spanish-embeddings-api.hf.space/models"
|
206 |
```
|
207 |
|
208 |
-
### JavaScript/
|
209 |
|
210 |
```javascript
|
211 |
const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
|
212 |
|
213 |
-
//
|
214 |
-
async function getEmbeddings(
|
215 |
-
const response = await fetch(`${API_URL}/embed`, {
|
216 |
method: 'POST',
|
217 |
headers: {
|
218 |
'Content-Type': 'application/json',
|
219 |
},
|
220 |
body: JSON.stringify({
|
221 |
texts: texts,
|
222 |
-
model: model,
|
223 |
normalize: true
|
224 |
})
|
225 |
});
|
@@ -231,104 +180,79 @@ async function getEmbeddings(texts, model = 'jina') {
|
|
231 |
return await response.json();
|
232 |
}
|
233 |
|
234 |
-
// Usage
|
235 |
try {
|
236 |
-
|
|
|
|
|
237 |
'Hola mundo',
|
238 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
]);
|
240 |
-
console.log('
|
241 |
-
|
242 |
} catch (error) {
|
243 |
-
console.error('Error
|
244 |
}
|
245 |
```
|
246 |
|
247 |
-
###
|
248 |
|
249 |
-
```
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
253 |
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
self.api_url,
|
270 |
-
json={
|
271 |
-
"texts": texts,
|
272 |
-
"model": self.model,
|
273 |
-
"normalize": True
|
274 |
-
}
|
275 |
-
)
|
276 |
-
response.raise_for_status()
|
277 |
-
return response.json()["embeddings"]
|
278 |
-
|
279 |
-
def embed_query(self, text: str) -> List[float]:
|
280 |
-
return self.embed_documents([text])[0]
|
281 |
|
282 |
-
#
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
#
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
# Multilingual embeddings with Jina v3
|
299 |
-
multilingual_embeddings = MultilingualEmbeddings(model="jina-v3")
|
300 |
-
mixed_docs = multilingual_embeddings.embed_documents([
|
301 |
-
"English document",
|
302 |
-
"Documento en español",
|
303 |
-
"Document en français",
|
304 |
-
"Document en català"
|
305 |
-
])
|
306 |
-
|
307 |
-
# Legal embeddings for English
|
308 |
-
legal_embeddings = MultilingualEmbeddings(model="legal-bert")
|
309 |
-
legal_docs = legal_embeddings.embed_documents([
|
310 |
-
"This contract is governed by English law",
|
311 |
-
"The party shall indemnify and hold harmless"
|
312 |
-
])
|
313 |
-
|
314 |
-
# Spanish legal embeddings
|
315 |
-
spanish_legal_embeddings = MultilingualEmbeddings(model="robertalex")
|
316 |
-
spanish_legal_docs = spanish_legal_embeddings.embed_documents([
|
317 |
-
"Artículo 1: De los derechos fundamentales",
|
318 |
-
"La presente ley entrará en vigor"
|
319 |
-
])
|
320 |
```
|
321 |
|
322 |
-
## 📋 Request/Response
|
323 |
|
324 |
-
### Request Body
|
325 |
|
326 |
```json
|
327 |
{
|
328 |
-
"texts": [
|
329 |
-
"string"
|
330 |
-
],
|
331 |
-
"model": "jina",
|
332 |
"normalize": true,
|
333 |
"max_length": null
|
334 |
}
|
@@ -336,18 +260,17 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
|
|
336 |
|
337 |
| Field | Type | Required | Default | Description |
|
338 |
|-------|------|----------|---------|-------------|
|
339 |
-
| texts | array[string] | Yes | - |
|
340 |
-
|
|
341 |
-
|
|
342 |
-
| max_length | integer/null | No | null | Maximum tokens per text (null = model default) |
|
343 |
|
344 |
-
### Response
|
345 |
|
346 |
```json
|
347 |
{
|
348 |
-
"embeddings": [[0.123, -0.456, ...]],
|
349 |
-
"model_used": "jina",
|
350 |
-
"dimensions":
|
351 |
"num_texts": 2
|
352 |
}
|
353 |
```
|
@@ -355,166 +278,168 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
|
|
355 |
## ⚡ Performance & Limits
|
356 |
|
357 |
- **Maximum texts per request**: 50
|
358 |
-
- **
|
359 |
-
- **
|
360 |
-
- **
|
361 |
-
- **
|
|
|
362 |
|
363 |
## 🔧 Advanced Usage
|
364 |
|
365 |
-
###
|
366 |
-
|
367 |
-
For processing large datasets, implement batching:
|
368 |
|
369 |
```python
|
370 |
-
|
371 |
-
|
372 |
-
|
|
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
response = requests.post(
|
377 |
-
|
378 |
-
json={
|
379 |
-
"texts": batch,
|
380 |
-
"model": "jina",
|
381 |
-
"normalize": True
|
382 |
-
}
|
383 |
)
|
384 |
-
|
|
|
385 |
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
```
|
388 |
|
389 |
-
### Semantic Search
|
390 |
|
391 |
```python
|
392 |
import numpy as np
|
393 |
from typing import List, Tuple
|
394 |
|
395 |
-
def semantic_search(
|
396 |
-
|
397 |
-
documents: List[str],
|
398 |
-
top_k: int = 5
|
399 |
-
) -> List[Tuple[int, float]]:
|
400 |
-
"""Find most similar documents to query"""
|
401 |
|
402 |
-
# Get embeddings for query and documents
|
403 |
response = requests.post(
|
404 |
-
"https://aurasystems-spanish-embeddings-api.hf.space/embed",
|
405 |
-
json={
|
406 |
-
"texts": [query] + documents,
|
407 |
-
"model": "jina",
|
408 |
-
"normalize": True
|
409 |
-
}
|
410 |
)
|
411 |
|
412 |
embeddings = np.array(response.json()["embeddings"])
|
413 |
query_embedding = embeddings[0]
|
414 |
doc_embeddings = embeddings[1:]
|
415 |
|
416 |
-
# Calculate similarities
|
417 |
similarities = np.dot(doc_embeddings, query_embedding)
|
418 |
-
|
419 |
-
# Get top-k results
|
420 |
top_indices = np.argsort(similarities)[::-1][:top_k]
|
421 |
|
422 |
return [(idx, similarities[idx]) for idx in top_indices]
|
423 |
|
424 |
-
# Example
|
425 |
documents = [
|
426 |
-
"Python
|
427 |
-
"
|
428 |
-
"
|
429 |
-
"
|
430 |
]
|
431 |
|
432 |
-
results = semantic_search(
|
433 |
-
"inteligencia artificial y programación",
|
434 |
-
documents,
|
435 |
-
top_k=2
|
436 |
-
)
|
437 |
-
|
438 |
for idx, score in results:
|
439 |
-
print(f"
|
440 |
-
print(f"Similarity: {score:.4f}\n")
|
441 |
```
|
442 |
|
443 |
## 🚨 Error Handling
|
444 |
|
445 |
-
|
446 |
|
447 |
-
|
|
448 |
-
|
449 |
| 200 | Success |
|
450 |
-
| 400 | Bad Request (
|
451 |
-
| 422 |
|
452 |
-
|
|
453 |
-
| 500 | Internal Server Error |
|
454 |
|
455 |
-
###
|
456 |
|
457 |
-
```
|
458 |
-
|
459 |
-
|
460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
```
|
462 |
|
463 |
-
|
464 |
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
}
|
470 |
-
```
|
471 |
-
**Solution**: Use a positive integer or omit max_length
|
472 |
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
}
|
478 |
-
```
|
479 |
-
**Solution**: Batch your requests
|
480 |
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
}
|
486 |
-
```
|
487 |
-
**Solution**: Filter out empty strings before sending
|
488 |
|
489 |
-
## 🔒 Authentication
|
490 |
|
491 |
-
|
|
|
|
|
|
|
492 |
|
493 |
-
##
|
494 |
|
495 |
-
|
|
|
|
|
|
|
|
|
496 |
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
|
504 |
-
##
|
505 |
|
506 |
-
- **
|
507 |
-
- **
|
508 |
-
- **
|
509 |
-
- [Jina Embeddings v2 Spanish](https://huggingface.co/jinaai/jina-embeddings-v2-base-es)
|
510 |
-
- [RoBERTalex](https://huggingface.co/PlanTL-GOB-ES/RoBERTalex)
|
511 |
|
512 |
-
##
|
513 |
|
514 |
-
|
515 |
-
-
|
516 |
-
-
|
517 |
|
518 |
---
|
519 |
|
520 |
-
Built with ❤️ using FastAPI and Hugging Face Transformers
|
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
|
|
|
|
|
|
10 |
# Multilingual & Legal Embeddings API
|
11 |
|
12 |
+
A high-performance FastAPI application providing access to **5 specialized embedding models** for Spanish, Catalan, English, and multilingual text. Each model has its own dedicated endpoint for optimal performance and clarity.
|
13 |
+
|
14 |
+
🌐 **Live API**: [https://aurasystems-spanish-embeddings-api.hf.space](https://aurasystems-spanish-embeddings-api.hf.space)
|
15 |
+
📖 **Interactive Docs**: [https://aurasystems-spanish-embeddings-api.hf.space/docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
|
16 |
|
17 |
## 🚀 Quick Start
|
18 |
|
19 |
+
### Basic Usage
|
20 |
+
```bash
|
21 |
+
# Test jina-v3 endpoint (multilingual, loads at startup)
|
22 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
|
23 |
+
-H "Content-Type: application/json" \
|
24 |
+
-d '{"texts": ["Hello world", "Hola mundo"], "normalize": true}'
|
25 |
|
26 |
+
# Test Catalan RoBERTa endpoint
|
27 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
|
28 |
+
-H "Content-Type: application/json" \
|
29 |
+
-d '{"texts": ["Bon dia", "Com estàs?"], "normalize": true}'
|
30 |
+
```
|
31 |
+
|
32 |
+
## 📚 Available Models & Endpoints
|
33 |
|
34 |
+
| Endpoint | Model | Languages | Dimensions | Max Tokens | Loading Strategy |
|
35 |
+
|----------|--------|-----------|------------|------------|------------------|
|
36 |
+
| `/embed/jina-v3` | jinaai/jina-embeddings-v3 | Multilingual (30+) | 1024 | 8192 | **Startup** |
|
37 |
+
| `/embed/roberta-ca` | projecte-aina/roberta-large-ca-v2 | Catalan | 1024 | 512 | On-demand |
|
38 |
+
| `/embed/jina` | jinaai/jina-embeddings-v2-base-es | Spanish, English | 768 | 8192 | On-demand |
|
39 |
+
| `/embed/robertalex` | PlanTL-GOB-ES/RoBERTalex | Spanish Legal | 768 | 512 | On-demand |
|
40 |
+
| `/embed/legal-bert` | nlpaueb/legal-bert-base-uncased | English Legal | 768 | 512 | On-demand |
|
41 |
|
42 |
+
### Model Recommendations
|
43 |
+
|
44 |
+
- **🌍 General multilingual**: Use `/embed/jina-v3` - Best overall performance
|
45 |
+
- **🇪🇸 Spanish general**: Use `/embed/jina` - Excellent for Spanish/English
|
46 |
+
- **🇪🇸 Spanish legal**: Use `/embed/robertalex` - Specialized for legal texts
|
47 |
+
- **🏴 Catalan**: Use `/embed/roberta-ca` - Best for Catalan text
|
48 |
+
- **🇬🇧 English legal**: Use `/embed/legal-bert` - Specialized for legal documents
|
49 |
|
50 |
## 🔗 API Endpoints
|
51 |
|
52 |
+
### Model-Specific Embedding Endpoints
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
Each model has its dedicated endpoint:
|
|
|
|
|
|
|
|
|
55 |
|
|
|
56 |
```
|
57 |
+
POST /embed/jina-v3 # Multilingual (startup model)
|
58 |
+
POST /embed/roberta-ca # Catalan
|
59 |
+
POST /embed/jina # Spanish/English
|
60 |
+
POST /embed/robertalex # Spanish Legal
|
61 |
+
POST /embed/legal-bert # English Legal
|
62 |
```
|
|
|
63 |
|
64 |
+
### Utility Endpoints
|
65 |
+
|
66 |
```
|
67 |
+
GET / # API information
|
68 |
+
GET /health # Health check and model status
|
69 |
+
GET /models # List all models with specifications
|
70 |
```
|
|
|
71 |
|
72 |
## 📖 Usage Examples
|
73 |
|
|
|
75 |
|
76 |
```python
|
77 |
import requests
|
|
|
78 |
|
79 |
API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
|
80 |
|
81 |
+
# Example 1: Multilingual with Jina v3 (startup model - fastest)
|
82 |
response = requests.post(
|
83 |
+
f"{API_URL}/embed/jina-v3",
|
84 |
json={
|
85 |
+
"texts": [
|
86 |
+
"Hello world", # English
|
87 |
+
"Hola mundo", # Spanish
|
88 |
+
"Bonjour monde", # French
|
89 |
+
"こんにちは世界" # Japanese
|
90 |
+
],
|
91 |
"normalize": True
|
92 |
}
|
93 |
)
|
|
|
94 |
result = response.json()
|
95 |
+
print(f"Jina v3: {result['dimensions']} dimensions") # 1024
|
|
|
96 |
|
97 |
+
# Example 2: Catalan text with RoBERTa-ca
|
98 |
+
response = requests.post(
|
99 |
+
f"{API_URL}/embed/roberta-ca",
|
100 |
json={
|
101 |
"texts": [
|
102 |
+
"Bon dia, com estàs?",
|
103 |
+
"Barcelona és una ciutat meravellosa",
|
104 |
+
"M'agrada la cultura catalana"
|
|
|
105 |
],
|
|
|
106 |
"normalize": True
|
107 |
}
|
108 |
)
|
109 |
+
catalan_result = response.json()
|
110 |
+
print(f"Catalan: {catalan_result['dimensions']} dimensions") # 1024
|
111 |
|
112 |
+
# Example 3: Spanish legal text with RoBERTalex
|
113 |
+
response = requests.post(
|
114 |
+
f"{API_URL}/embed/robertalex",
|
115 |
json={
|
116 |
"texts": [
|
117 |
+
"Artículo primero de la constitución",
|
118 |
+
"El contrato será válido desde la fecha de firma",
|
119 |
+
"La jurisprudencia establece que..."
|
120 |
],
|
|
|
121 |
"normalize": True
|
122 |
}
|
123 |
)
|
124 |
+
legal_result = response.json()
|
125 |
+
print(f"Spanish Legal: {legal_result['dimensions']} dimensions") # 768
|
126 |
|
127 |
+
# Example 4: English legal text with Legal-BERT
|
128 |
+
response = requests.post(
|
129 |
+
f"{API_URL}/embed/legal-bert",
|
130 |
json={
|
131 |
"texts": [
|
132 |
+
"This agreement is legally binding",
|
133 |
+
"The contract shall be governed by English law",
|
134 |
+
"The party hereby agrees and covenants"
|
135 |
],
|
|
|
136 |
"normalize": True
|
137 |
}
|
138 |
)
|
139 |
+
english_legal_result = response.json()
|
140 |
+
print(f"English Legal: {english_legal_result['dimensions']} dimensions") # 768
|
141 |
|
142 |
+
# Example 5: Spanish/English bilingual with Jina v2
|
143 |
+
response = requests.post(
|
144 |
+
f"{API_URL}/embed/jina",
|
145 |
json={
|
146 |
"texts": [
|
147 |
+
"Inteligencia artificial y machine learning",
|
148 |
+
"Artificial intelligence and machine learning",
|
149 |
+
"Procesamiento de lenguaje natural"
|
150 |
],
|
|
|
151 |
"normalize": True
|
152 |
}
|
153 |
)
|
154 |
+
bilingual_result = response.json()
|
155 |
+
print(f"Bilingual: {bilingual_result['dimensions']} dimensions") # 768
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
```
|
157 |
|
158 |
+
### JavaScript/Node.js
|
159 |
|
160 |
```javascript
|
161 |
const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
|
162 |
|
163 |
+
// Function to get embeddings from specific endpoint
|
164 |
+
async function getEmbeddings(endpoint, texts) {
|
165 |
+
const response = await fetch(`${API_URL}/embed/${endpoint}`, {
|
166 |
method: 'POST',
|
167 |
headers: {
|
168 |
'Content-Type': 'application/json',
|
169 |
},
|
170 |
body: JSON.stringify({
|
171 |
texts: texts,
|
|
|
172 |
normalize: true
|
173 |
})
|
174 |
});
|
|
|
180 |
return await response.json();
|
181 |
}
|
182 |
|
183 |
+
// Usage examples
|
184 |
try {
|
185 |
+
// Multilingual embeddings
|
186 |
+
const multilingualResult = await getEmbeddings('jina-v3', [
|
187 |
+
'Hello world',
|
188 |
'Hola mundo',
|
189 |
+
'Ciao mondo'
|
190 |
+
]);
|
191 |
+
console.log('Multilingual dimensions:', multilingualResult.dimensions);
|
192 |
+
|
193 |
+
// Catalan embeddings
|
194 |
+
const catalanResult = await getEmbeddings('roberta-ca', [
|
195 |
+
'Bon dia',
|
196 |
+
'Com estàs?'
|
197 |
]);
|
198 |
+
console.log('Catalan dimensions:', catalanResult.dimensions);
|
199 |
+
|
200 |
} catch (error) {
|
201 |
+
console.error('Error:', error);
|
202 |
}
|
203 |
```
|
204 |
|
205 |
+
### cURL Examples
|
206 |
|
207 |
+
```bash
|
208 |
+
# Multilingual with Jina v3 (startup model)
|
209 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
|
210 |
+
-H "Content-Type: application/json" \
|
211 |
+
-d '{
|
212 |
+
"texts": ["Hello", "Hola", "Bonjour"],
|
213 |
+
"normalize": true
|
214 |
+
}'
|
215 |
|
216 |
+
# Catalan with RoBERTa-ca
|
217 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
|
218 |
+
-H "Content-Type: application/json" \
|
219 |
+
-d '{
|
220 |
+
"texts": ["Bon dia", "Com estàs?"],
|
221 |
+
"normalize": true
|
222 |
+
}'
|
223 |
+
|
224 |
+
# Spanish legal with RoBERTalex
|
225 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/robertalex" \
|
226 |
+
-H "Content-Type: application/json" \
|
227 |
+
-d '{
|
228 |
+
"texts": ["Artículo primero"],
|
229 |
+
"normalize": true
|
230 |
+
}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
+
# English legal with Legal-BERT
|
233 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/legal-bert" \
|
234 |
+
-H "Content-Type: application/json" \
|
235 |
+
-d '{
|
236 |
+
"texts": ["This agreement is binding"],
|
237 |
+
"normalize": true
|
238 |
+
}'
|
239 |
+
|
240 |
+
# Spanish/English bilingual with Jina v2
|
241 |
+
curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina" \
|
242 |
+
-H "Content-Type: application/json" \
|
243 |
+
-d '{
|
244 |
+
"texts": ["Texto en español", "Text in English"],
|
245 |
+
"normalize": true
|
246 |
+
}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
```
|
248 |
|
249 |
+
## 📋 Request/Response Schema
|
250 |
|
251 |
+
### Request Body
|
252 |
|
253 |
```json
|
254 |
{
|
255 |
+
"texts": ["text1", "text2", "..."],
|
|
|
|
|
|
|
256 |
"normalize": true,
|
257 |
"max_length": null
|
258 |
}
|
|
|
260 |
|
261 |
| Field | Type | Required | Default | Description |
|
262 |
|-------|------|----------|---------|-------------|
|
263 |
+
| `texts` | array[string] | ✅ Yes | - | 1-50 texts to embed |
|
264 |
+
| `normalize` | boolean | No | `true` | L2-normalize embeddings |
|
265 |
+
| `max_length` | integer/null | No | `null` | Max tokens (model-specific limits) |
|
|
|
266 |
|
267 |
+
### Response Body
|
268 |
|
269 |
```json
|
270 |
{
|
271 |
+
"embeddings": [[0.123, -0.456, ...], [0.789, -0.012, ...]],
|
272 |
+
"model_used": "jina-v3",
|
273 |
+
"dimensions": 1024,
|
274 |
"num_texts": 2
|
275 |
}
|
276 |
```
|
|
|
278 |
## ⚡ Performance & Limits
|
279 |
|
280 |
- **Maximum texts per request**: 50
|
281 |
+
- **Startup model**: `jina-v3` loads at startup (fastest response)
|
282 |
+
- **On-demand models**: Load on first request (~30-60s first time)
|
283 |
+
- **Typical response time**: 100-300ms after models are loaded
|
284 |
+
- **Memory optimization**: Automatic cleanup for large batches
|
285 |
+
- **CORS enabled**: Works from any domain
|
286 |
|
287 |
## 🔧 Advanced Usage
|
288 |
|
289 |
+
### LangChain Integration
|
|
|
|
|
290 |
|
291 |
```python
|
292 |
+
from langchain.embeddings.base import Embeddings
|
293 |
+
from typing import List
|
294 |
+
import requests
|
295 |
+
|
296 |
+
class MultilingualEmbeddings(Embeddings):
|
297 |
+
"""LangChain integration for multilingual embeddings"""
|
298 |
|
299 |
+
def __init__(self, endpoint: str = "jina-v3"):
|
300 |
+
"""
|
301 |
+
Initialize with specific endpoint
|
302 |
+
|
303 |
+
Args:
|
304 |
+
endpoint: One of "jina-v3", "roberta-ca", "jina", "robertalex", "legal-bert"
|
305 |
+
"""
|
306 |
+
self.api_url = f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}"
|
307 |
+
self.endpoint = endpoint
|
308 |
+
|
309 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
310 |
response = requests.post(
|
311 |
+
self.api_url,
|
312 |
+
json={"texts": texts, "normalize": True}
|
|
|
|
|
|
|
|
|
313 |
)
|
314 |
+
response.raise_for_status()
|
315 |
+
return response.json()["embeddings"]
|
316 |
|
317 |
+
def embed_query(self, text: str) -> List[float]:
|
318 |
+
return self.embed_documents([text])[0]
|
319 |
+
|
320 |
+
# Usage examples
|
321 |
+
multilingual_embeddings = MultilingualEmbeddings("jina-v3")
|
322 |
+
catalan_embeddings = MultilingualEmbeddings("roberta-ca")
|
323 |
+
spanish_legal_embeddings = MultilingualEmbeddings("robertalex")
|
324 |
```
|
325 |
|
326 |
+
### Semantic Search
|
327 |
|
328 |
```python
|
329 |
import numpy as np
|
330 |
from typing import List, Tuple
|
331 |
|
332 |
+
def semantic_search(query: str, documents: List[str], endpoint: str = "jina-v3", top_k: int = 5):
|
333 |
+
"""Semantic search using specific model endpoint"""
|
|
|
|
|
|
|
|
|
334 |
|
|
|
335 |
response = requests.post(
|
336 |
+
f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}",
|
337 |
+
json={"texts": [query] + documents, "normalize": True}
|
|
|
|
|
|
|
|
|
338 |
)
|
339 |
|
340 |
embeddings = np.array(response.json()["embeddings"])
|
341 |
query_embedding = embeddings[0]
|
342 |
doc_embeddings = embeddings[1:]
|
343 |
|
344 |
+
# Calculate cosine similarities (already normalized)
|
345 |
similarities = np.dot(doc_embeddings, query_embedding)
|
|
|
|
|
346 |
top_indices = np.argsort(similarities)[::-1][:top_k]
|
347 |
|
348 |
return [(idx, similarities[idx]) for idx in top_indices]
|
349 |
|
350 |
+
# Example: Multilingual search
|
351 |
documents = [
|
352 |
+
"Python programming language",
|
353 |
+
"Lenguaje de programación Python",
|
354 |
+
"Llenguatge de programació Python",
|
355 |
+
"Language de programmation Python"
|
356 |
]
|
357 |
|
358 |
+
results = semantic_search("código en Python", documents, "jina-v3")
|
|
|
|
|
|
|
|
|
|
|
359 |
for idx, score in results:
|
360 |
+
print(f"{score:.4f}: {documents[idx]}")
|
|
|
361 |
```
|
362 |
|
363 |
## 🚨 Error Handling
|
364 |
|
365 |
+
### HTTP Status Codes
|
366 |
|
367 |
+
| Code | Description |
|
368 |
+
|------|-------------|
|
369 |
| 200 | Success |
|
370 |
+
| 400 | Bad Request (validation error) |
|
371 |
+
| 422 | Unprocessable Entity (schema error) |
|
372 |
+
| 500 | Internal Server Error (model loading failed) |
|
|
|
373 |
|
374 |
+
### Common Errors
|
375 |
|
376 |
+
```python
|
377 |
+
# Handle errors properly
|
378 |
+
try:
|
379 |
+
response = requests.post(
|
380 |
+
"https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3",
|
381 |
+
json={"texts": ["text"], "normalize": True}
|
382 |
+
)
|
383 |
+
response.raise_for_status()
|
384 |
+
result = response.json()
|
385 |
+
except requests.exceptions.HTTPError as e:
|
386 |
+
print(f"HTTP error: {e}")
|
387 |
+
print(f"Response: {response.text}")
|
388 |
+
except requests.exceptions.RequestException as e:
|
389 |
+
print(f"Request error: {e}")
|
390 |
```
|
391 |
|
392 |
+
## 📊 Model Status Check
|
393 |
|
394 |
+
```python
|
395 |
+
# Check which models are loaded
|
396 |
+
health = requests.get("https://aurasystems-spanish-embeddings-api.hf.space/health")
|
397 |
+
status = health.json()
|
|
|
|
|
|
|
398 |
|
399 |
+
print(f"API Status: {status['status']}")
|
400 |
+
print(f"Startup model loaded: {status['startup_model_loaded']}")
|
401 |
+
print(f"Available models: {status['available_models']}")
|
402 |
+
print(f"Models loaded: {status['models_count']}/5")
|
|
|
|
|
|
|
403 |
|
404 |
+
# Check endpoint status
|
405 |
+
for model, endpoint_status in status['endpoints'].items():
|
406 |
+
print(f"{model}: {endpoint_status}")
|
407 |
+
```
|
|
|
|
|
|
|
408 |
|
409 |
+
## 🔒 Authentication & Rate Limits
|
410 |
|
411 |
+
- **Authentication**: None required (open API)
|
412 |
+
- **Rate limits**: Generous limits on Hugging Face Spaces
|
413 |
+
- **CORS**: Enabled for all origins
|
414 |
+
- **Usage**: Free for research and commercial use
|
415 |
|
416 |
+
## 🏗️ Architecture
|
417 |
|
418 |
+
### Endpoint-Per-Model Design
|
419 |
+
- **Startup model**: `jina-v3` loads at application startup for fastest response
|
420 |
+
- **On-demand loading**: Other models load when first requested
|
421 |
+
- **Memory optimization**: Progressive loading reduces startup time
|
422 |
+
- **Model caching**: Once loaded, models remain in memory for fast inference
|
423 |
|
424 |
+
### Technical Stack
|
425 |
+
- **FastAPI**: Modern async web framework
|
426 |
+
- **Transformers**: Hugging Face model library
|
427 |
+
- **PyTorch**: Deep learning backend
|
428 |
+
- **Docker**: Containerized deployment
|
429 |
+
- **Hugging Face Spaces**: Cloud hosting platform
|
430 |
|
431 |
+
## 📄 Model Licenses
|
432 |
|
433 |
+
- **Jina models**: Apache 2.0
|
434 |
+
- **RoBERTa models**: MIT/Apache 2.0
|
435 |
+
- **Legal-BERT**: Apache 2.0
|
|
|
|
|
436 |
|
437 |
+
## 🤝 Support & Contributing
|
438 |
|
439 |
+
- **Issues**: [GitHub Issues](https://huggingface.co/spaces/AuraSystems/spanish-embeddings-api/discussions)
|
440 |
+
- **Interactive Docs**: [FastAPI Swagger UI](https://aurasystems-spanish-embeddings-api.hf.space/docs)
|
441 |
+
- **Model Papers**: Check individual model pages on Hugging Face
|
442 |
|
443 |
---
|
444 |
|
445 |
+
Built with ❤️ using **FastAPI** and **Hugging Face Transformers**
|
app_endpoints.py
DELETED
@@ -1,308 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
from contextlib import asynccontextmanager
|
4 |
-
from typing import List
|
5 |
-
import torch
|
6 |
-
import uvicorn
|
7 |
-
|
8 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
9 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
10 |
-
|
11 |
-
# Global model cache
|
12 |
-
models_cache = {}
|
13 |
-
|
14 |
-
# Load jina-v3 at startup (most important model)
|
15 |
-
STARTUP_MODEL = "jina-v3"
|
16 |
-
|
17 |
-
@asynccontextmanager
|
18 |
-
async def lifespan(app: FastAPI):
|
19 |
-
"""Application lifespan handler for startup and shutdown"""
|
20 |
-
# Startup - load jina-v3 model
|
21 |
-
try:
|
22 |
-
global models_cache
|
23 |
-
print(f"Loading startup model: {STARTUP_MODEL}...")
|
24 |
-
models_cache = load_models([STARTUP_MODEL])
|
25 |
-
print(f"Startup model loaded successfully: {list(models_cache.keys())}")
|
26 |
-
yield
|
27 |
-
except Exception as e:
|
28 |
-
print(f"Failed to load startup model: {str(e)}")
|
29 |
-
# Continue anyway - jina-v3 can be loaded on demand if startup fails
|
30 |
-
yield
|
31 |
-
finally:
|
32 |
-
# Shutdown - cleanup resources
|
33 |
-
cleanup_memory()
|
34 |
-
|
35 |
-
def ensure_model_loaded(model_name: str, max_length_limit: int):
|
36 |
-
"""Load a specific model on demand if not already loaded"""
|
37 |
-
global models_cache
|
38 |
-
if model_name not in models_cache:
|
39 |
-
try:
|
40 |
-
print(f"Loading model on demand: {model_name}...")
|
41 |
-
new_models = load_models([model_name])
|
42 |
-
models_cache.update(new_models)
|
43 |
-
print(f"Model {model_name} loaded successfully!")
|
44 |
-
except Exception as e:
|
45 |
-
print(f"Failed to load model {model_name}: {str(e)}")
|
46 |
-
raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
|
47 |
-
|
48 |
-
def validate_request_for_model(request: EmbeddingRequest, model_name: str, max_length_limit: int):
|
49 |
-
"""Validate request parameters for specific model"""
|
50 |
-
if not request.texts:
|
51 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
52 |
-
|
53 |
-
if len(request.texts) > 50:
|
54 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
55 |
-
|
56 |
-
if request.max_length is not None and request.max_length > max_length_limit:
|
57 |
-
raise HTTPException(status_code=400, detail=f"Max length for {model_name} is {max_length_limit}")
|
58 |
-
|
59 |
-
app = FastAPI(
|
60 |
-
title="Multilingual & Legal Embedding API",
|
61 |
-
description="Multi-model embedding API with dedicated endpoints per model",
|
62 |
-
version="4.0.0",
|
63 |
-
lifespan=lifespan
|
64 |
-
)
|
65 |
-
|
66 |
-
# Add CORS middleware to allow cross-origin requests
|
67 |
-
app.add_middleware(
|
68 |
-
CORSMiddleware,
|
69 |
-
allow_origins=["*"], # In production, specify actual domains
|
70 |
-
allow_credentials=True,
|
71 |
-
allow_methods=["*"],
|
72 |
-
allow_headers=["*"],
|
73 |
-
)
|
74 |
-
|
75 |
-
@app.get("/")
|
76 |
-
async def root():
|
77 |
-
return {
|
78 |
-
"message": "Multilingual & Legal Embedding API - Endpoint Per Model",
|
79 |
-
"version": "4.0.0",
|
80 |
-
"status": "running",
|
81 |
-
"docs": "/docs",
|
82 |
-
"startup_model": STARTUP_MODEL,
|
83 |
-
"available_endpoints": {
|
84 |
-
"jina-v3": "/embed/jina-v3",
|
85 |
-
"roberta-ca": "/embed/roberta-ca",
|
86 |
-
"jina": "/embed/jina",
|
87 |
-
"robertalex": "/embed/robertalex",
|
88 |
-
"legal-bert": "/embed/legal-bert"
|
89 |
-
}
|
90 |
-
}
|
91 |
-
|
92 |
-
# Jina v3 - Multilingual (loads at startup)
|
93 |
-
@app.post("/embed/jina-v3", response_model=EmbeddingResponse)
|
94 |
-
async def embed_jina_v3(request: EmbeddingRequest):
|
95 |
-
"""Generate embeddings using Jina v3 model (multilingual)"""
|
96 |
-
try:
|
97 |
-
ensure_model_loaded("jina-v3", 8192)
|
98 |
-
validate_request_for_model(request, "jina-v3", 8192)
|
99 |
-
|
100 |
-
embeddings = get_embeddings(
|
101 |
-
request.texts,
|
102 |
-
"jina-v3",
|
103 |
-
models_cache,
|
104 |
-
request.normalize,
|
105 |
-
request.max_length
|
106 |
-
)
|
107 |
-
|
108 |
-
return EmbeddingResponse(
|
109 |
-
embeddings=embeddings,
|
110 |
-
model_used="jina-v3",
|
111 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
112 |
-
num_texts=len(request.texts)
|
113 |
-
)
|
114 |
-
|
115 |
-
except ValueError as e:
|
116 |
-
raise HTTPException(status_code=400, detail=str(e))
|
117 |
-
except Exception as e:
|
118 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
119 |
-
|
120 |
-
# Catalan RoBERTa
|
121 |
-
@app.post("/embed/roberta-ca", response_model=EmbeddingResponse)
|
122 |
-
async def embed_roberta_ca(request: EmbeddingRequest):
|
123 |
-
"""Generate embeddings using Catalan RoBERTa model"""
|
124 |
-
try:
|
125 |
-
ensure_model_loaded("roberta-ca", 512)
|
126 |
-
validate_request_for_model(request, "roberta-ca", 512)
|
127 |
-
|
128 |
-
embeddings = get_embeddings(
|
129 |
-
request.texts,
|
130 |
-
"roberta-ca",
|
131 |
-
models_cache,
|
132 |
-
request.normalize,
|
133 |
-
request.max_length
|
134 |
-
)
|
135 |
-
|
136 |
-
return EmbeddingResponse(
|
137 |
-
embeddings=embeddings,
|
138 |
-
model_used="roberta-ca",
|
139 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
140 |
-
num_texts=len(request.texts)
|
141 |
-
)
|
142 |
-
|
143 |
-
except ValueError as e:
|
144 |
-
raise HTTPException(status_code=400, detail=str(e))
|
145 |
-
except Exception as e:
|
146 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
147 |
-
|
148 |
-
# Jina v2 - Spanish/English
|
149 |
-
@app.post("/embed/jina", response_model=EmbeddingResponse)
|
150 |
-
async def embed_jina(request: EmbeddingRequest):
|
151 |
-
"""Generate embeddings using Jina v2 Spanish/English model"""
|
152 |
-
try:
|
153 |
-
ensure_model_loaded("jina", 8192)
|
154 |
-
validate_request_for_model(request, "jina", 8192)
|
155 |
-
|
156 |
-
embeddings = get_embeddings(
|
157 |
-
request.texts,
|
158 |
-
"jina",
|
159 |
-
models_cache,
|
160 |
-
request.normalize,
|
161 |
-
request.max_length
|
162 |
-
)
|
163 |
-
|
164 |
-
return EmbeddingResponse(
|
165 |
-
embeddings=embeddings,
|
166 |
-
model_used="jina",
|
167 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
168 |
-
num_texts=len(request.texts)
|
169 |
-
)
|
170 |
-
|
171 |
-
except ValueError as e:
|
172 |
-
raise HTTPException(status_code=400, detail=str(e))
|
173 |
-
except Exception as e:
|
174 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
175 |
-
|
176 |
-
# RoBERTalex - Spanish Legal
|
177 |
-
@app.post("/embed/robertalex", response_model=EmbeddingResponse)
|
178 |
-
async def embed_robertalex(request: EmbeddingRequest):
|
179 |
-
"""Generate embeddings using RoBERTalex Spanish legal model"""
|
180 |
-
try:
|
181 |
-
ensure_model_loaded("robertalex", 512)
|
182 |
-
validate_request_for_model(request, "robertalex", 512)
|
183 |
-
|
184 |
-
embeddings = get_embeddings(
|
185 |
-
request.texts,
|
186 |
-
"robertalex",
|
187 |
-
models_cache,
|
188 |
-
request.normalize,
|
189 |
-
request.max_length
|
190 |
-
)
|
191 |
-
|
192 |
-
return EmbeddingResponse(
|
193 |
-
embeddings=embeddings,
|
194 |
-
model_used="robertalex",
|
195 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
196 |
-
num_texts=len(request.texts)
|
197 |
-
)
|
198 |
-
|
199 |
-
except ValueError as e:
|
200 |
-
raise HTTPException(status_code=400, detail=str(e))
|
201 |
-
except Exception as e:
|
202 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
203 |
-
|
204 |
-
# Legal BERT - English Legal
|
205 |
-
@app.post("/embed/legal-bert", response_model=EmbeddingResponse)
|
206 |
-
async def embed_legal_bert(request: EmbeddingRequest):
|
207 |
-
"""Generate embeddings using Legal BERT English model"""
|
208 |
-
try:
|
209 |
-
ensure_model_loaded("legal-bert", 512)
|
210 |
-
validate_request_for_model(request, "legal-bert", 512)
|
211 |
-
|
212 |
-
embeddings = get_embeddings(
|
213 |
-
request.texts,
|
214 |
-
"legal-bert",
|
215 |
-
models_cache,
|
216 |
-
request.normalize,
|
217 |
-
request.max_length
|
218 |
-
)
|
219 |
-
|
220 |
-
return EmbeddingResponse(
|
221 |
-
embeddings=embeddings,
|
222 |
-
model_used="legal-bert",
|
223 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
224 |
-
num_texts=len(request.texts)
|
225 |
-
)
|
226 |
-
|
227 |
-
except ValueError as e:
|
228 |
-
raise HTTPException(status_code=400, detail=str(e))
|
229 |
-
except Exception as e:
|
230 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
231 |
-
|
232 |
-
@app.get("/models", response_model=List[ModelInfo])
|
233 |
-
async def list_models():
|
234 |
-
"""List available models and their specifications"""
|
235 |
-
return [
|
236 |
-
ModelInfo(
|
237 |
-
model_id="jina-v3",
|
238 |
-
name="jinaai/jina-embeddings-v3",
|
239 |
-
dimensions=1024,
|
240 |
-
max_sequence_length=8192,
|
241 |
-
languages=["Multilingual"],
|
242 |
-
model_type="multilingual",
|
243 |
-
description="Latest Jina v3 with superior multilingual performance - loaded at startup"
|
244 |
-
),
|
245 |
-
ModelInfo(
|
246 |
-
model_id="roberta-ca",
|
247 |
-
name="projecte-aina/roberta-large-ca-v2",
|
248 |
-
dimensions=1024,
|
249 |
-
max_sequence_length=512,
|
250 |
-
languages=["Catalan"],
|
251 |
-
model_type="general",
|
252 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
253 |
-
),
|
254 |
-
ModelInfo(
|
255 |
-
model_id="jina",
|
256 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
257 |
-
dimensions=768,
|
258 |
-
max_sequence_length=8192,
|
259 |
-
languages=["Spanish", "English"],
|
260 |
-
model_type="bilingual",
|
261 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
262 |
-
),
|
263 |
-
ModelInfo(
|
264 |
-
model_id="robertalex",
|
265 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
266 |
-
dimensions=768,
|
267 |
-
max_sequence_length=512,
|
268 |
-
languages=["Spanish"],
|
269 |
-
model_type="legal domain",
|
270 |
-
description="Spanish legal domain specialized embeddings"
|
271 |
-
),
|
272 |
-
ModelInfo(
|
273 |
-
model_id="legal-bert",
|
274 |
-
name="nlpaueb/legal-bert-base-uncased",
|
275 |
-
dimensions=768,
|
276 |
-
max_sequence_length=512,
|
277 |
-
languages=["English"],
|
278 |
-
model_type="legal domain",
|
279 |
-
description="English legal domain BERT model"
|
280 |
-
)
|
281 |
-
]
|
282 |
-
|
283 |
-
@app.get("/health")
|
284 |
-
async def health_check():
|
285 |
-
"""Health check endpoint"""
|
286 |
-
startup_loaded = STARTUP_MODEL in models_cache
|
287 |
-
|
288 |
-
return {
|
289 |
-
"status": "healthy" if startup_loaded else "partial",
|
290 |
-
"startup_model": STARTUP_MODEL,
|
291 |
-
"startup_model_loaded": startup_loaded,
|
292 |
-
"available_models": list(models_cache.keys()),
|
293 |
-
"models_count": len(models_cache),
|
294 |
-
"endpoints": {
|
295 |
-
"jina-v3": f"/embed/jina-v3 {'(ready)' if 'jina-v3' in models_cache else '(loads on demand)'}",
|
296 |
-
"roberta-ca": f"/embed/roberta-ca {'(ready)' if 'roberta-ca' in models_cache else '(loads on demand)'}",
|
297 |
-
"jina": f"/embed/jina {'(ready)' if 'jina' in models_cache else '(loads on demand)'}",
|
298 |
-
"robertalex": f"/embed/robertalex {'(ready)' if 'robertalex' in models_cache else '(loads on demand)'}",
|
299 |
-
"legal-bert": f"/embed/legal-bert {'(ready)' if 'legal-bert' in models_cache else '(loads on demand)'}"
|
300 |
-
}
|
301 |
-
}
|
302 |
-
|
303 |
-
if __name__ == "__main__":
|
304 |
-
# Set multi-threading for CPU
|
305 |
-
torch.set_num_threads(8)
|
306 |
-
torch.set_num_interop_threads(1)
|
307 |
-
|
308 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_hybrid_backup.py
DELETED
@@ -1,189 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
from contextlib import asynccontextmanager
|
4 |
-
from typing import List
|
5 |
-
import torch
|
6 |
-
import uvicorn
|
7 |
-
|
8 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
9 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
10 |
-
|
11 |
-
# Global model cache
|
12 |
-
models_cache = {}
|
13 |
-
|
14 |
-
# Models to load at startup (most frequently used)
|
15 |
-
STARTUP_MODELS = ["jina-v3", "roberta-ca"]
|
16 |
-
# Models to load on demand
|
17 |
-
ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
|
18 |
-
|
19 |
-
@asynccontextmanager
|
20 |
-
async def lifespan(app: FastAPI):
|
21 |
-
"""Application lifespan handler for startup and shutdown"""
|
22 |
-
# Startup - load priority models
|
23 |
-
try:
|
24 |
-
global models_cache
|
25 |
-
print(f"Loading startup models: {STARTUP_MODELS}...")
|
26 |
-
models_cache = load_models(STARTUP_MODELS)
|
27 |
-
print(f"Startup models loaded successfully: {list(models_cache.keys())}")
|
28 |
-
yield
|
29 |
-
except Exception as e:
|
30 |
-
print(f"Failed to load startup models: {str(e)}")
|
31 |
-
# Continue anyway - models can be loaded on demand
|
32 |
-
yield
|
33 |
-
finally:
|
34 |
-
# Shutdown - cleanup resources
|
35 |
-
cleanup_memory()
|
36 |
-
|
37 |
-
def ensure_model_loaded(model_name: str):
|
38 |
-
"""Load a specific model on demand if not already loaded"""
|
39 |
-
global models_cache
|
40 |
-
if model_name not in models_cache:
|
41 |
-
if model_name in ON_DEMAND_MODELS:
|
42 |
-
try:
|
43 |
-
print(f"Loading model on demand: {model_name}...")
|
44 |
-
new_models = load_models([model_name])
|
45 |
-
models_cache.update(new_models)
|
46 |
-
print(f"Model {model_name} loaded successfully!")
|
47 |
-
except Exception as e:
|
48 |
-
print(f"Failed to load model {model_name}: {str(e)}")
|
49 |
-
raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
|
50 |
-
else:
|
51 |
-
raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
|
52 |
-
|
53 |
-
app = FastAPI(
|
54 |
-
title="Multilingual & Legal Embedding API",
|
55 |
-
description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
|
56 |
-
version="3.0.0",
|
57 |
-
lifespan=lifespan
|
58 |
-
)
|
59 |
-
|
60 |
-
# Add CORS middleware to allow cross-origin requests
|
61 |
-
app.add_middleware(
|
62 |
-
CORSMiddleware,
|
63 |
-
allow_origins=["*"], # In production, specify actual domains
|
64 |
-
allow_credentials=True,
|
65 |
-
allow_methods=["*"],
|
66 |
-
allow_headers=["*"],
|
67 |
-
)
|
68 |
-
|
69 |
-
@app.get("/")
|
70 |
-
async def root():
|
71 |
-
return {
|
72 |
-
"message": "Multilingual & Legal Embedding API",
|
73 |
-
"models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
74 |
-
"status": "running",
|
75 |
-
"docs": "/docs",
|
76 |
-
"total_models": 5
|
77 |
-
}
|
78 |
-
|
79 |
-
@app.post("/embed", response_model=EmbeddingResponse)
|
80 |
-
async def create_embeddings(request: EmbeddingRequest):
|
81 |
-
"""Generate embeddings for input texts"""
|
82 |
-
try:
|
83 |
-
# Load specific model on demand if needed
|
84 |
-
ensure_model_loaded(request.model)
|
85 |
-
|
86 |
-
if not request.texts:
|
87 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
88 |
-
|
89 |
-
if len(request.texts) > 50: # Rate limiting
|
90 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
91 |
-
|
92 |
-
embeddings = get_embeddings(
|
93 |
-
request.texts,
|
94 |
-
request.model,
|
95 |
-
models_cache,
|
96 |
-
request.normalize,
|
97 |
-
request.max_length
|
98 |
-
)
|
99 |
-
|
100 |
-
# Cleanup memory after large batches
|
101 |
-
if len(request.texts) > 20:
|
102 |
-
cleanup_memory()
|
103 |
-
|
104 |
-
return EmbeddingResponse(
|
105 |
-
embeddings=embeddings,
|
106 |
-
model_used=request.model,
|
107 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
108 |
-
num_texts=len(request.texts)
|
109 |
-
)
|
110 |
-
|
111 |
-
except ValueError as e:
|
112 |
-
raise HTTPException(status_code=400, detail=str(e))
|
113 |
-
except Exception as e:
|
114 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
115 |
-
|
116 |
-
@app.get("/models", response_model=List[ModelInfo])
|
117 |
-
async def list_models():
|
118 |
-
"""List available models and their specifications"""
|
119 |
-
return [
|
120 |
-
ModelInfo(
|
121 |
-
model_id="jina",
|
122 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
123 |
-
dimensions=768,
|
124 |
-
max_sequence_length=8192,
|
125 |
-
languages=["Spanish", "English"],
|
126 |
-
model_type="bilingual",
|
127 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
128 |
-
),
|
129 |
-
ModelInfo(
|
130 |
-
model_id="robertalex",
|
131 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
132 |
-
dimensions=768,
|
133 |
-
max_sequence_length=512,
|
134 |
-
languages=["Spanish"],
|
135 |
-
model_type="legal domain",
|
136 |
-
description="Spanish legal domain specialized embeddings"
|
137 |
-
),
|
138 |
-
ModelInfo(
|
139 |
-
model_id="jina-v3",
|
140 |
-
name="jinaai/jina-embeddings-v3",
|
141 |
-
dimensions=1024,
|
142 |
-
max_sequence_length=8192,
|
143 |
-
languages=["Multilingual"],
|
144 |
-
model_type="multilingual",
|
145 |
-
description="Latest Jina v3 with superior multilingual performance"
|
146 |
-
),
|
147 |
-
ModelInfo(
|
148 |
-
model_id="legal-bert",
|
149 |
-
name="nlpaueb/legal-bert-base-uncased",
|
150 |
-
dimensions=768,
|
151 |
-
max_sequence_length=512,
|
152 |
-
languages=["English"],
|
153 |
-
model_type="legal domain",
|
154 |
-
description="English legal domain BERT model"
|
155 |
-
),
|
156 |
-
ModelInfo(
|
157 |
-
model_id="roberta-ca",
|
158 |
-
name="projecte-aina/roberta-large-ca-v2",
|
159 |
-
dimensions=1024,
|
160 |
-
max_sequence_length=512,
|
161 |
-
languages=["Catalan"],
|
162 |
-
model_type="general",
|
163 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
164 |
-
)
|
165 |
-
]
|
166 |
-
|
167 |
-
@app.get("/health")
|
168 |
-
async def health_check():
|
169 |
-
"""Health check endpoint"""
|
170 |
-
startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
|
171 |
-
all_models_loaded = len(models_cache) == 5
|
172 |
-
|
173 |
-
return {
|
174 |
-
"status": "healthy" if startup_models_loaded else "partial",
|
175 |
-
"startup_models_loaded": startup_models_loaded,
|
176 |
-
"all_models_loaded": all_models_loaded,
|
177 |
-
"available_models": list(models_cache.keys()),
|
178 |
-
"startup_models": STARTUP_MODELS,
|
179 |
-
"on_demand_models": ON_DEMAND_MODELS,
|
180 |
-
"models_count": len(models_cache),
|
181 |
-
"note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
|
182 |
-
}
|
183 |
-
|
184 |
-
if __name__ == "__main__":
|
185 |
-
# Set multi-threading for CPU
|
186 |
-
torch.set_num_threads(8)
|
187 |
-
torch.set_num_interop_threads(1)
|
188 |
-
|
189 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_old.py
DELETED
@@ -1,159 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
from contextlib import asynccontextmanager
|
4 |
-
from typing import List
|
5 |
-
import torch
|
6 |
-
import uvicorn
|
7 |
-
|
8 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
9 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
10 |
-
|
11 |
-
# Global model cache
|
12 |
-
models_cache = {}
|
13 |
-
|
14 |
-
@asynccontextmanager
|
15 |
-
async def lifespan(app: FastAPI):
|
16 |
-
"""Application lifespan handler for startup and shutdown"""
|
17 |
-
# Startup
|
18 |
-
try:
|
19 |
-
global models_cache
|
20 |
-
print("Loading models...")
|
21 |
-
models_cache = load_models()
|
22 |
-
print("All models loaded successfully!")
|
23 |
-
yield
|
24 |
-
except Exception as e:
|
25 |
-
print(f"Failed to load models: {str(e)}")
|
26 |
-
raise
|
27 |
-
finally:
|
28 |
-
# Shutdown - cleanup resources
|
29 |
-
cleanup_memory()
|
30 |
-
|
31 |
-
app = FastAPI(
|
32 |
-
title="Multilingual & Legal Embedding API",
|
33 |
-
description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
|
34 |
-
version="3.0.0",
|
35 |
-
lifespan=lifespan
|
36 |
-
)
|
37 |
-
|
38 |
-
# Add CORS middleware to allow cross-origin requests
|
39 |
-
app.add_middleware(
|
40 |
-
CORSMiddleware,
|
41 |
-
allow_origins=["*"], # In production, specify actual domains
|
42 |
-
allow_credentials=True,
|
43 |
-
allow_methods=["*"],
|
44 |
-
allow_headers=["*"],
|
45 |
-
)
|
46 |
-
|
47 |
-
@app.get("/")
|
48 |
-
async def root():
|
49 |
-
return {
|
50 |
-
"message": "Multilingual & Legal Embedding API",
|
51 |
-
"models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
52 |
-
"status": "running",
|
53 |
-
"docs": "/docs",
|
54 |
-
"total_models": 5
|
55 |
-
}
|
56 |
-
|
57 |
-
@app.post("/embed", response_model=EmbeddingResponse)
|
58 |
-
async def create_embeddings(request: EmbeddingRequest):
|
59 |
-
"""Generate embeddings for input texts"""
|
60 |
-
try:
|
61 |
-
if not request.texts:
|
62 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
63 |
-
|
64 |
-
if len(request.texts) > 50: # Rate limiting
|
65 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
66 |
-
|
67 |
-
embeddings = get_embeddings(
|
68 |
-
request.texts,
|
69 |
-
request.model,
|
70 |
-
models_cache,
|
71 |
-
request.normalize,
|
72 |
-
request.max_length
|
73 |
-
)
|
74 |
-
|
75 |
-
# Cleanup memory after large batches
|
76 |
-
if len(request.texts) > 20:
|
77 |
-
cleanup_memory()
|
78 |
-
|
79 |
-
return EmbeddingResponse(
|
80 |
-
embeddings=embeddings,
|
81 |
-
model_used=request.model,
|
82 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
83 |
-
num_texts=len(request.texts)
|
84 |
-
)
|
85 |
-
|
86 |
-
except ValueError as e:
|
87 |
-
raise HTTPException(status_code=400, detail=str(e))
|
88 |
-
except Exception as e:
|
89 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
90 |
-
|
91 |
-
@app.get("/models", response_model=List[ModelInfo])
|
92 |
-
async def list_models():
|
93 |
-
"""List available models and their specifications"""
|
94 |
-
return [
|
95 |
-
ModelInfo(
|
96 |
-
model_id="jina",
|
97 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
98 |
-
dimensions=768,
|
99 |
-
max_sequence_length=8192,
|
100 |
-
languages=["Spanish", "English"],
|
101 |
-
model_type="bilingual",
|
102 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
103 |
-
),
|
104 |
-
ModelInfo(
|
105 |
-
model_id="robertalex",
|
106 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
107 |
-
dimensions=768,
|
108 |
-
max_sequence_length=512,
|
109 |
-
languages=["Spanish"],
|
110 |
-
model_type="legal domain",
|
111 |
-
description="Spanish legal domain specialized embeddings"
|
112 |
-
),
|
113 |
-
ModelInfo(
|
114 |
-
model_id="jina-v3",
|
115 |
-
name="jinaai/jina-embeddings-v3",
|
116 |
-
dimensions=1024,
|
117 |
-
max_sequence_length=8192,
|
118 |
-
languages=["Multilingual"],
|
119 |
-
model_type="multilingual",
|
120 |
-
description="Latest Jina v3 with superior multilingual performance"
|
121 |
-
),
|
122 |
-
ModelInfo(
|
123 |
-
model_id="legal-bert",
|
124 |
-
name="nlpaueb/legal-bert-base-uncased",
|
125 |
-
dimensions=768,
|
126 |
-
max_sequence_length=512,
|
127 |
-
languages=["English"],
|
128 |
-
model_type="legal domain",
|
129 |
-
description="English legal domain BERT model"
|
130 |
-
),
|
131 |
-
ModelInfo(
|
132 |
-
model_id="roberta-ca",
|
133 |
-
name="projecte-aina/roberta-large-ca-v2",
|
134 |
-
dimensions=1024,
|
135 |
-
max_sequence_length=512,
|
136 |
-
languages=["Catalan"],
|
137 |
-
model_type="general",
|
138 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
139 |
-
)
|
140 |
-
]
|
141 |
-
|
142 |
-
@app.get("/health")
|
143 |
-
async def health_check():
|
144 |
-
"""Health check endpoint"""
|
145 |
-
models_loaded = len(models_cache) == 5
|
146 |
-
return {
|
147 |
-
"status": "healthy" if models_loaded else "degraded",
|
148 |
-
"models_loaded": models_loaded,
|
149 |
-
"available_models": list(models_cache.keys()),
|
150 |
-
"expected_models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
151 |
-
"models_count": len(models_cache)
|
152 |
-
}
|
153 |
-
|
154 |
-
if __name__ == "__main__":
|
155 |
-
# Set multi-threading for CPU
|
156 |
-
torch.set_num_threads(8)
|
157 |
-
torch.set_num_interop_threads(1)
|
158 |
-
|
159 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_old_minimal.py
DELETED
@@ -1,165 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
-
from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
from typing import List
|
4 |
-
import torch
|
5 |
-
import uvicorn
|
6 |
-
|
7 |
-
from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
|
8 |
-
from utils.helpers import load_models, get_embeddings, cleanup_memory
|
9 |
-
|
10 |
-
# Global model cache - completely on-demand loading
|
11 |
-
models_cache = {}
|
12 |
-
|
13 |
-
# All models load on demand to test deployment
|
14 |
-
ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]
|
15 |
-
|
16 |
-
def ensure_model_loaded(model_name: str):
|
17 |
-
"""Load a specific model on demand if not already loaded"""
|
18 |
-
global models_cache
|
19 |
-
if model_name not in models_cache:
|
20 |
-
if model_name in ON_DEMAND_MODELS:
|
21 |
-
try:
|
22 |
-
print(f"Loading model on demand: {model_name}...")
|
23 |
-
new_models = load_models([model_name])
|
24 |
-
models_cache.update(new_models)
|
25 |
-
print(f"Model {model_name} loaded successfully!")
|
26 |
-
except Exception as e:
|
27 |
-
print(f"Failed to load model {model_name}: {str(e)}")
|
28 |
-
raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
|
29 |
-
else:
|
30 |
-
raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
|
31 |
-
|
32 |
-
app = FastAPI(
|
33 |
-
title="Multilingual & Legal Embedding API",
|
34 |
-
description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
|
35 |
-
version="3.0.0"
|
36 |
-
)
|
37 |
-
|
38 |
-
# Add CORS middleware to allow cross-origin requests
|
39 |
-
app.add_middleware(
|
40 |
-
CORSMiddleware,
|
41 |
-
allow_origins=["*"], # In production, specify actual domains
|
42 |
-
allow_credentials=True,
|
43 |
-
allow_methods=["*"],
|
44 |
-
allow_headers=["*"],
|
45 |
-
)
|
46 |
-
|
47 |
-
@app.get("/")
|
48 |
-
async def root():
|
49 |
-
return {
|
50 |
-
"message": "Multilingual & Legal Embedding API - Minimal Version",
|
51 |
-
"models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
|
52 |
-
"status": "running",
|
53 |
-
"docs": "/docs",
|
54 |
-
"total_models": 5,
|
55 |
-
"note": "All models load on first request"
|
56 |
-
}
|
57 |
-
|
58 |
-
@app.post("/embed", response_model=EmbeddingResponse)
|
59 |
-
async def create_embeddings(request: EmbeddingRequest):
|
60 |
-
"""Generate embeddings for input texts"""
|
61 |
-
try:
|
62 |
-
# Load specific model on demand
|
63 |
-
ensure_model_loaded(request.model)
|
64 |
-
|
65 |
-
if not request.texts:
|
66 |
-
raise HTTPException(status_code=400, detail="No texts provided")
|
67 |
-
|
68 |
-
if len(request.texts) > 50: # Rate limiting
|
69 |
-
raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
|
70 |
-
|
71 |
-
embeddings = get_embeddings(
|
72 |
-
request.texts,
|
73 |
-
request.model,
|
74 |
-
models_cache,
|
75 |
-
request.normalize,
|
76 |
-
request.max_length
|
77 |
-
)
|
78 |
-
|
79 |
-
# Cleanup memory after large batches
|
80 |
-
if len(request.texts) > 20:
|
81 |
-
cleanup_memory()
|
82 |
-
|
83 |
-
return EmbeddingResponse(
|
84 |
-
embeddings=embeddings,
|
85 |
-
model_used=request.model,
|
86 |
-
dimensions=len(embeddings[0]) if embeddings else 0,
|
87 |
-
num_texts=len(request.texts)
|
88 |
-
)
|
89 |
-
|
90 |
-
except ValueError as e:
|
91 |
-
raise HTTPException(status_code=400, detail=str(e))
|
92 |
-
except Exception as e:
|
93 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
94 |
-
|
95 |
-
@app.get("/models", response_model=List[ModelInfo])
|
96 |
-
async def list_models():
|
97 |
-
"""List available models and their specifications"""
|
98 |
-
return [
|
99 |
-
ModelInfo(
|
100 |
-
model_id="jina",
|
101 |
-
name="jinaai/jina-embeddings-v2-base-es",
|
102 |
-
dimensions=768,
|
103 |
-
max_sequence_length=8192,
|
104 |
-
languages=["Spanish", "English"],
|
105 |
-
model_type="bilingual",
|
106 |
-
description="Bilingual Spanish-English embeddings with long context support"
|
107 |
-
),
|
108 |
-
ModelInfo(
|
109 |
-
model_id="robertalex",
|
110 |
-
name="PlanTL-GOB-ES/RoBERTalex",
|
111 |
-
dimensions=768,
|
112 |
-
max_sequence_length=512,
|
113 |
-
languages=["Spanish"],
|
114 |
-
model_type="legal domain",
|
115 |
-
description="Spanish legal domain specialized embeddings"
|
116 |
-
),
|
117 |
-
ModelInfo(
|
118 |
-
model_id="jina-v3",
|
119 |
-
name="jinaai/jina-embeddings-v3",
|
120 |
-
dimensions=1024,
|
121 |
-
max_sequence_length=8192,
|
122 |
-
languages=["Multilingual"],
|
123 |
-
model_type="multilingual",
|
124 |
-
description="Latest Jina v3 with superior multilingual performance"
|
125 |
-
),
|
126 |
-
ModelInfo(
|
127 |
-
model_id="legal-bert",
|
128 |
-
name="nlpaueb/legal-bert-base-uncased",
|
129 |
-
dimensions=768,
|
130 |
-
max_sequence_length=512,
|
131 |
-
languages=["English"],
|
132 |
-
model_type="legal domain",
|
133 |
-
description="English legal domain BERT model"
|
134 |
-
),
|
135 |
-
ModelInfo(
|
136 |
-
model_id="roberta-ca",
|
137 |
-
name="projecte-aina/roberta-large-ca-v2",
|
138 |
-
dimensions=1024,
|
139 |
-
max_sequence_length=512,
|
140 |
-
languages=["Catalan"],
|
141 |
-
model_type="general",
|
142 |
-
description="Catalan RoBERTa-large model trained on large corpus"
|
143 |
-
)
|
144 |
-
]
|
145 |
-
|
146 |
-
@app.get("/health")
|
147 |
-
async def health_check():
|
148 |
-
"""Health check endpoint"""
|
149 |
-
all_models_loaded = len(models_cache) == 5
|
150 |
-
|
151 |
-
return {
|
152 |
-
"status": "healthy",
|
153 |
-
"all_models_loaded": all_models_loaded,
|
154 |
-
"available_models": list(models_cache.keys()),
|
155 |
-
"on_demand_models": ON_DEMAND_MODELS,
|
156 |
-
"models_count": len(models_cache),
|
157 |
-
"note": "All models load on first embedding request - minimal deployment version"
|
158 |
-
}
|
159 |
-
|
160 |
-
if __name__ == "__main__":
|
161 |
-
# Set multi-threading for CPU
|
162 |
-
torch.set_num_threads(8)
|
163 |
-
torch.set_num_interop_threads(1)
|
164 |
-
|
165 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_api.py
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Simple test script for the embedding API
|
4 |
-
"""
|
5 |
-
|
6 |
-
import requests
|
7 |
-
import json
|
8 |
-
import time
|
9 |
-
|
10 |
-
def test_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
|
11 |
-
"""Test the API endpoints"""
|
12 |
-
|
13 |
-
print(f"Testing API at {base_url}")
|
14 |
-
|
15 |
-
# Test root endpoint
|
16 |
-
try:
|
17 |
-
response = requests.get(f"{base_url}/")
|
18 |
-
print(f"✓ Root endpoint: {response.status_code}")
|
19 |
-
print(f" Response: {response.json()}")
|
20 |
-
except Exception as e:
|
21 |
-
print(f"✗ Root endpoint failed: {e}")
|
22 |
-
return False
|
23 |
-
|
24 |
-
# Test health endpoint
|
25 |
-
try:
|
26 |
-
response = requests.get(f"{base_url}/health")
|
27 |
-
print(f"✓ Health endpoint: {response.status_code}")
|
28 |
-
health_data = response.json()
|
29 |
-
print(f" Models loaded: {health_data.get('models_loaded', False)}")
|
30 |
-
print(f" Available models: {health_data.get('available_models', [])}")
|
31 |
-
except Exception as e:
|
32 |
-
print(f"✗ Health endpoint failed: {e}")
|
33 |
-
|
34 |
-
# Test models endpoint
|
35 |
-
try:
|
36 |
-
response = requests.get(f"{base_url}/models")
|
37 |
-
print(f"✓ Models endpoint: {response.status_code}")
|
38 |
-
models = response.json()
|
39 |
-
print(f" Found {len(models)} model definitions")
|
40 |
-
except Exception as e:
|
41 |
-
print(f"✗ Models endpoint failed: {e}")
|
42 |
-
|
43 |
-
# Test embedding endpoint
|
44 |
-
try:
|
45 |
-
payload = {
|
46 |
-
"texts": ["Hello world", "Test text"],
|
47 |
-
"model": "jina",
|
48 |
-
"normalize": True
|
49 |
-
}
|
50 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
51 |
-
print(f"✓ Embed endpoint: {response.status_code}")
|
52 |
-
if response.status_code == 200:
|
53 |
-
data = response.json()
|
54 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
55 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
56 |
-
else:
|
57 |
-
print(f" Error: {response.text}")
|
58 |
-
except Exception as e:
|
59 |
-
print(f"✗ Embed endpoint failed: {e}")
|
60 |
-
|
61 |
-
return True
|
62 |
-
|
63 |
-
if __name__ == "__main__":
|
64 |
-
test_api()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_hybrid.py
DELETED
@@ -1,98 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Test script for hybrid model loading
|
4 |
-
"""
|
5 |
-
|
6 |
-
import requests
|
7 |
-
import json
|
8 |
-
import time
|
9 |
-
|
10 |
-
def test_hybrid_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
|
11 |
-
"""Test the hybrid API"""
|
12 |
-
|
13 |
-
print(f"Testing hybrid API at {base_url}")
|
14 |
-
|
15 |
-
# Test health endpoint first
|
16 |
-
try:
|
17 |
-
response = requests.get(f"{base_url}/health")
|
18 |
-
print(f"✓ Health endpoint: {response.status_code}")
|
19 |
-
if response.status_code == 200:
|
20 |
-
health_data = response.json()
|
21 |
-
print(f" Startup models loaded: {health_data.get('startup_models_loaded', False)}")
|
22 |
-
print(f" Available models: {health_data.get('available_models', [])}")
|
23 |
-
print(f" Note: {health_data.get('note', 'N/A')}")
|
24 |
-
else:
|
25 |
-
print(f" Error: {response.text}")
|
26 |
-
except Exception as e:
|
27 |
-
print(f"✗ Health endpoint failed: {e}")
|
28 |
-
return False
|
29 |
-
|
30 |
-
# Test startup model (jina-v3)
|
31 |
-
try:
|
32 |
-
payload = {
|
33 |
-
"texts": ["Hola mundo", "Bonjour le monde"],
|
34 |
-
"model": "jina-v3",
|
35 |
-
"normalize": True
|
36 |
-
}
|
37 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
38 |
-
print(f"✓ Startup model (jina-v3): {response.status_code}")
|
39 |
-
if response.status_code == 200:
|
40 |
-
data = response.json()
|
41 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
42 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
43 |
-
else:
|
44 |
-
print(f" Error: {response.text}")
|
45 |
-
except Exception as e:
|
46 |
-
print(f"✗ Startup model test failed: {e}")
|
47 |
-
|
48 |
-
# Test startup model (roberta-ca)
|
49 |
-
try:
|
50 |
-
payload = {
|
51 |
-
"texts": ["Bon dia", "Com estàs?"],
|
52 |
-
"model": "roberta-ca",
|
53 |
-
"normalize": True
|
54 |
-
}
|
55 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
56 |
-
print(f"✓ Startup model (roberta-ca): {response.status_code}")
|
57 |
-
if response.status_code == 200:
|
58 |
-
data = response.json()
|
59 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
60 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
61 |
-
else:
|
62 |
-
print(f" Error: {response.text}")
|
63 |
-
except Exception as e:
|
64 |
-
print(f"✗ Startup model test failed: {e}")
|
65 |
-
|
66 |
-
# Test on-demand model (jina)
|
67 |
-
try:
|
68 |
-
payload = {
|
69 |
-
"texts": ["Texto en español"],
|
70 |
-
"model": "jina",
|
71 |
-
"normalize": True
|
72 |
-
}
|
73 |
-
response = requests.post(f"{base_url}/embed", json=payload)
|
74 |
-
print(f"✓ On-demand model (jina): {response.status_code}")
|
75 |
-
if response.status_code == 200:
|
76 |
-
data = response.json()
|
77 |
-
print(f" Generated {data.get('num_texts', 0)} embeddings")
|
78 |
-
print(f" Dimensions: {data.get('dimensions', 0)}")
|
79 |
-
else:
|
80 |
-
print(f" Error: {response.text}")
|
81 |
-
except Exception as e:
|
82 |
-
print(f"✗ On-demand model test failed: {e}")
|
83 |
-
|
84 |
-
# Check health again to see all models
|
85 |
-
try:
|
86 |
-
response = requests.get(f"{base_url}/health")
|
87 |
-
if response.status_code == 200:
|
88 |
-
health_data = response.json()
|
89 |
-
print(f"✓ Final health check:")
|
90 |
-
print(f" All models loaded: {health_data.get('all_models_loaded', False)}")
|
91 |
-
print(f" Available models: {health_data.get('available_models', [])}")
|
92 |
-
except Exception as e:
|
93 |
-
print(f"✗ Final health check failed: {e}")
|
94 |
-
|
95 |
-
return True
|
96 |
-
|
97 |
-
if __name__ == "__main__":
|
98 |
-
test_hybrid_api()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|