Jordi Catafal commited on
Commit
023e423
·
1 Parent(s): 03eefac

cleaning + readme

Browse files
Files changed (7) hide show
  1. README.md +259 -334
  2. app_endpoints.py +0 -308
  3. app_hybrid_backup.py +0 -189
  4. app_old.py +0 -159
  5. app_old_minimal.py +0 -165
  6. test_api.py +0 -64
  7. test_hybrid.py +0 -98
README.md CHANGED
@@ -7,54 +7,67 @@ sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
-
12
- --------------------------------
13
  # Multilingual & Legal Embeddings API
14
 
15
- A high-performance API for generating embeddings from Spanish, Catalan, English, and multilingual text using state-of-the-art models. This API provides access to five specialized models optimized for different use cases and languages.
 
 
 
16
 
17
  ## 🚀 Quick Start
18
 
19
- **Base URL**: `https://aurasystems-spanish-embeddings-api.hf.space`
 
 
 
 
 
20
 
21
- **Interactive Documentation**: [https://aurasystems-spanish-embeddings-api.hf.space/docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
 
 
 
 
 
 
22
 
23
- ## 📚 Available Models
 
 
 
 
 
 
24
 
25
- | Model | Max Tokens | Languages | Dimensions | Best Use Case |
26
- |-------|------------|-----------|------------|---------------|
27
- | **jina** | 8,192 | Spanish, English | 768 | General purpose, long documents, cross-lingual tasks |
28
- | **robertalex** | 512 | Spanish | 768 | Spanish legal documents, formal Spanish |
29
- | **jina-v3** | 8,192 | Multilingual (30+ languages) | 1,024 | Superior multilingual embeddings, long context |
30
- | **legal-bert** | 512 | English | 768 | English legal documents, contracts, law texts |
31
- | **roberta-ca** | 512 | Catalan | 1,024 | Catalan text, general purpose, RoBERTa-large architecture |
32
 
33
  ## 🔗 API Endpoints
34
 
35
- ### Generate Embeddings
36
- ```
37
- POST /embed
38
- ```
39
- Generate embeddings for up to 50 texts in a single request.
40
 
41
- ### List Models
42
- ```
43
- GET /models
44
- ```
45
- Get detailed information about available models.
46
 
47
- ### Health Check
48
  ```
49
- GET /health
 
 
 
 
50
  ```
51
- Check API status and model availability.
52
 
53
- ### API Info
 
54
  ```
55
- GET /
 
 
56
  ```
57
- Basic API information and status.
58
 
59
  ## 📖 Usage Examples
60
 
@@ -62,164 +75,100 @@ Basic API information and status.
62
 
63
  ```python
64
  import requests
65
- import numpy as np
66
 
67
  API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
68
 
69
- # Example 1: Basic usage with Jina v2 Spanish
70
  response = requests.post(
71
- f"{API_URL}/embed",
72
  json={
73
- "texts": ["Hola, ¿cómo estás?", "Me gusta programar en Python"],
74
- "model": "jina",
 
 
 
 
75
  "normalize": True
76
  }
77
  )
78
-
79
  result = response.json()
80
- embeddings = result["embeddings"]
81
- print(f"Generated {len(embeddings)} embeddings of {result['dimensions']} dimensions")
82
 
83
- # Example 2: Using Jina v3 for multilingual texts
84
- multilingual_response = requests.post(
85
- f"{API_URL}/embed",
86
  json={
87
  "texts": [
88
- "Hello world", # English
89
- "Hola mundo", # Spanish
90
- "Bonjour le monde", # French
91
- "Hallo Welt" # German
92
  ],
93
- "model": "jina-v3",
94
  "normalize": True
95
  }
96
  )
97
- print(f"Jina v3 dimensions: {multilingual_response.json()['dimensions']}") # 1024 dims
 
98
 
99
- # Example 3: Catalan text with RoBERTa-ca
100
- catalan_response = requests.post(
101
- f"{API_URL}/embed",
102
  json={
103
  "texts": [
104
- "Bon dia, com estàs?",
105
- "M'agrada programar en Python",
106
- "Barcelona és una ciutat meravellosa"
107
  ],
108
- "model": "roberta-ca",
109
  "normalize": True
110
  }
111
  )
112
- print(f"Catalan RoBERTa dimensions: {catalan_response.json()['dimensions']}") # 1024 dims
 
113
 
114
- # Example 4: Legal text with RoBERTalex (Spanish)
115
- spanish_legal_response = requests.post(
116
- f"{API_URL}/embed",
117
  json={
118
  "texts": [
119
- "El contrato será válido desde la fecha de firma",
120
- "La validez contractual inicia en el momento de suscripción"
 
121
  ],
122
- "model": "robertalex",
123
  "normalize": True
124
  }
125
  )
 
 
126
 
127
- # Example 5: Legal text with Legal-BERT (English)
128
- english_legal_response = requests.post(
129
- f"{API_URL}/embed",
130
  json={
131
  "texts": [
132
- "The contract shall be valid from the date of signature",
133
- "This agreement is governed by the laws of the state"
 
134
  ],
135
- "model": "legal-bert",
136
  "normalize": True
137
  }
138
  )
139
-
140
- # Example 6: Compare similarity across models
141
- text_es = "inteligencia artificial"
142
- text_ca = "intel·ligència artificial"
143
- models_comparison = {}
144
-
145
- for model, text in [("jina", text_es), ("roberta-ca", text_ca), ("jina-v3", text_es)]:
146
- resp = requests.post(
147
- f"{API_URL}/embed",
148
- json={"texts": [text], "model": model, "normalize": True}
149
- )
150
- models_comparison[model] = resp.json()["dimensions"]
151
-
152
- print("Embedding dimensions by model:", models_comparison)
153
- ```
154
-
155
- ### cURL
156
-
157
- ```bash
158
- # Basic embedding generation with Jina v2 Spanish
159
- curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
160
- -H "Content-Type: application/json" \
161
- -d '{
162
- "texts": ["Texto de ejemplo", "Otro texto en español"],
163
- "model": "jina",
164
- "normalize": true
165
- }'
166
-
167
- # Catalan text with RoBERTa-ca
168
- curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
169
- -H "Content-Type: application/json" \
170
- -d '{
171
- "texts": ["Bon dia", "Com està vostè?", "Catalunya és meravellosa"],
172
- "model": "roberta-ca",
173
- "normalize": true
174
- }'
175
-
176
- # Using Jina v3 for multilingual embeddings
177
- curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
178
- -H "Content-Type: application/json" \
179
- -d '{
180
- "texts": ["Hello world", "Hola mundo", "Bonjour le monde"],
181
- "model": "jina-v3",
182
- "normalize": true
183
- }'
184
-
185
- # English legal text with Legal-BERT
186
- curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
187
- -H "Content-Type: application/json" \
188
- -d '{
189
- "texts": ["This agreement is legally binding"],
190
- "model": "legal-bert",
191
- "normalize": true
192
- }'
193
-
194
- # Spanish legal text with RoBERTalex
195
- curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed" \
196
- -H "Content-Type: application/json" \
197
- -d '{
198
- "texts": ["Artículo primero de la constitución"],
199
- "model": "robertalex",
200
- "normalize": true,
201
- "max_length": 512
202
- }'
203
-
204
- # Get all model information
205
- curl "https://aurasystems-spanish-embeddings-api.hf.space/models"
206
  ```
207
 
208
- ### JavaScript/TypeScript
209
 
210
  ```javascript
211
  const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
212
 
213
- // Basic function to get embeddings
214
- async function getEmbeddings(texts, model = 'jina') {
215
- const response = await fetch(`${API_URL}/embed`, {
216
  method: 'POST',
217
  headers: {
218
  'Content-Type': 'application/json',
219
  },
220
  body: JSON.stringify({
221
  texts: texts,
222
- model: model,
223
  normalize: true
224
  })
225
  });
@@ -231,104 +180,79 @@ async function getEmbeddings(texts, model = 'jina') {
231
  return await response.json();
232
  }
233
 
234
- // Usage example
235
  try {
236
- const result = await getEmbeddings([
 
 
237
  'Hola mundo',
238
- 'Programación en JavaScript'
 
 
 
 
 
 
 
239
  ]);
240
- console.log('Embeddings:', result.embeddings);
241
- console.log('Dimensions:', result.dimensions);
242
  } catch (error) {
243
- console.error('Error generating embeddings:', error);
244
  }
245
  ```
246
 
247
- ### Using with LangChain
248
 
249
- ```python
250
- from langchain.embeddings.base import Embeddings
251
- from typing import List
252
- import requests
 
 
 
 
253
 
254
- class MultilingualEmbeddings(Embeddings):
255
- """Custom LangChain embeddings class for multilingual text"""
256
-
257
- def __init__(self, model: str = "jina-v3"):
258
- """
259
- Initialize embeddings
260
-
261
- Args:
262
- model: One of "jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"
263
- """
264
- self.api_url = "https://aurasystems-spanish-embeddings-api.hf.space/embed"
265
- self.model = model
266
-
267
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
268
- response = requests.post(
269
- self.api_url,
270
- json={
271
- "texts": texts,
272
- "model": self.model,
273
- "normalize": True
274
- }
275
- )
276
- response.raise_for_status()
277
- return response.json()["embeddings"]
278
-
279
- def embed_query(self, text: str) -> List[float]:
280
- return self.embed_documents([text])[0]
281
 
282
- # Usage examples with different models
283
- # Spanish embeddings
284
- spanish_embeddings = MultilingualEmbeddings(model="jina")
285
- spanish_docs = spanish_embeddings.embed_documents([
286
- "Primer documento en español",
287
- "Segundo documento en español"
288
- ])
289
-
290
- # Catalan embeddings
291
- catalan_embeddings = MultilingualEmbeddings(model="roberta-ca")
292
- catalan_docs = catalan_embeddings.embed_documents([
293
- "Primer document en català",
294
- "Segon document en català",
295
- "La cultura catalana és rica i diversa"
296
- ])
297
-
298
- # Multilingual embeddings with Jina v3
299
- multilingual_embeddings = MultilingualEmbeddings(model="jina-v3")
300
- mixed_docs = multilingual_embeddings.embed_documents([
301
- "English document",
302
- "Documento en español",
303
- "Document en français",
304
- "Document en català"
305
- ])
306
-
307
- # Legal embeddings for English
308
- legal_embeddings = MultilingualEmbeddings(model="legal-bert")
309
- legal_docs = legal_embeddings.embed_documents([
310
- "This contract is governed by English law",
311
- "The party shall indemnify and hold harmless"
312
- ])
313
-
314
- # Spanish legal embeddings
315
- spanish_legal_embeddings = MultilingualEmbeddings(model="robertalex")
316
- spanish_legal_docs = spanish_legal_embeddings.embed_documents([
317
- "Artículo 1: De los derechos fundamentales",
318
- "La presente ley entrará en vigor"
319
- ])
320
  ```
321
 
322
- ## 📋 Request/Response Formats
323
 
324
- ### Request Body Schema
325
 
326
  ```json
327
  {
328
- "texts": [
329
- "string"
330
- ],
331
- "model": "jina",
332
  "normalize": true,
333
  "max_length": null
334
  }
@@ -336,18 +260,17 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
336
 
337
  | Field | Type | Required | Default | Description |
338
  |-------|------|----------|---------|-------------|
339
- | texts | array[string] | Yes | - | List of texts to embed (1-50 texts) |
340
- | model | string | No | "jina" | Model to use: "jina" or "robertalex" |
341
- | normalize | boolean | No | true | Whether to L2-normalize embeddings |
342
- | max_length | integer/null | No | null | Maximum tokens per text (null = model default) |
343
 
344
- ### Response Schema
345
 
346
  ```json
347
  {
348
- "embeddings": [[0.123, -0.456, ...]],
349
- "model_used": "jina",
350
- "dimensions": 768,
351
  "num_texts": 2
352
  }
353
  ```
@@ -355,166 +278,168 @@ spanish_legal_docs = spanish_legal_embeddings.embed_documents([
355
  ## ⚡ Performance & Limits
356
 
357
  - **Maximum texts per request**: 50
358
- - **Maximum concurrent requests**: 4 (on free tier)
359
- - **Typical response time**: 100-200ms for 10 texts
360
- - **Embedding dimensions**: 768 (both models)
361
- - **API availability**: 24/7 on Hugging Face Spaces
 
362
 
363
  ## 🔧 Advanced Usage
364
 
365
- ### Batch Processing
366
-
367
- For processing large datasets, implement batching:
368
 
369
  ```python
370
- def process_large_dataset(texts, batch_size=50):
371
- """Process large text dataset in batches"""
372
- embeddings = []
 
 
 
373
 
374
- for i in range(0, len(texts), batch_size):
375
- batch = texts[i:i + batch_size]
 
 
 
 
 
 
 
 
 
376
  response = requests.post(
377
- "https://aurasystems-spanish-embeddings-api.hf.space/embed",
378
- json={
379
- "texts": batch,
380
- "model": "jina",
381
- "normalize": True
382
- }
383
  )
384
- embeddings.extend(response.json()["embeddings"])
 
385
 
386
- return embeddings
 
 
 
 
 
 
387
  ```
388
 
389
- ### Semantic Search Example
390
 
391
  ```python
392
  import numpy as np
393
  from typing import List, Tuple
394
 
395
- def semantic_search(
396
- query: str,
397
- documents: List[str],
398
- top_k: int = 5
399
- ) -> List[Tuple[int, float]]:
400
- """Find most similar documents to query"""
401
 
402
- # Get embeddings for query and documents
403
  response = requests.post(
404
- "https://aurasystems-spanish-embeddings-api.hf.space/embed",
405
- json={
406
- "texts": [query] + documents,
407
- "model": "jina",
408
- "normalize": True
409
- }
410
  )
411
 
412
  embeddings = np.array(response.json()["embeddings"])
413
  query_embedding = embeddings[0]
414
  doc_embeddings = embeddings[1:]
415
 
416
- # Calculate similarities
417
  similarities = np.dot(doc_embeddings, query_embedding)
418
-
419
- # Get top-k results
420
  top_indices = np.argsort(similarities)[::-1][:top_k]
421
 
422
  return [(idx, similarities[idx]) for idx in top_indices]
423
 
424
- # Example usage
425
  documents = [
426
- "Python es un lenguaje de programación",
427
- "Madrid es la capital de España",
428
- "El machine learning está revolucionando la tecnología",
429
- "La paella es un plato típico español"
430
  ]
431
 
432
- results = semantic_search(
433
- "inteligencia artificial y programación",
434
- documents,
435
- top_k=2
436
- )
437
-
438
  for idx, score in results:
439
- print(f"Document: {documents[idx]}")
440
- print(f"Similarity: {score:.4f}\n")
441
  ```
442
 
443
  ## 🚨 Error Handling
444
 
445
- The API returns standard HTTP status codes:
446
 
447
- | Status Code | Description |
448
- |-------------|-------------|
449
  | 200 | Success |
450
- | 400 | Bad Request (invalid parameters) |
451
- | 422 | Validation Error (check request format) |
452
- | 429 | Too Many Requests (rate limit exceeded) |
453
- | 500 | Internal Server Error |
454
 
455
- ### Error Response Format
456
 
457
- ```json
458
- {
459
- "detail": "Error message description"
460
- }
 
 
 
 
 
 
 
 
 
 
461
  ```
462
 
463
- ### Common Errors and Solutions
464
 
465
- 1. **Invalid max_length**
466
- ```json
467
- {
468
- "detail": "Value error, Max length must be positive"
469
- }
470
- ```
471
- **Solution**: Use a positive integer or omit max_length
472
 
473
- 2. **Too many texts**
474
- ```json
475
- {
476
- "detail": "Maximum 50 texts per request"
477
- }
478
- ```
479
- **Solution**: Batch your requests
480
 
481
- 3. **Empty texts**
482
- ```json
483
- {
484
- "detail": "Empty texts are not allowed"
485
- }
486
- ```
487
- **Solution**: Filter out empty strings before sending
488
 
489
- ## 🔒 Authentication
490
 
491
- This API is currently **open and does not require authentication**. It's hosted on Hugging Face Spaces and is free to use within the rate limits.
 
 
 
492
 
493
- ## 📊 Monitoring
494
 
495
- Check API status and health:
 
 
 
 
496
 
497
- ```python
498
- # Health check
499
- health = requests.get("https://aurasystems-spanish-embeddings-api.hf.space/health")
500
- print(health.json())
501
- # Output: {'status': 'healthy', 'models_loaded': True, 'available_models': ['jina', 'robertalex']}
502
- ```
503
 
504
- ## 🤝 Support
505
 
506
- - **Issues**: Create an issue in the [Hugging Face Space discussions](https://huggingface.co/spaces/AuraSystems/spanish-embeddings-api/discussions)
507
- - **Documentation**: Visit the [interactive API docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
508
- - **Model Information**:
509
- - [Jina Embeddings v2 Spanish](https://huggingface.co/jinaai/jina-embeddings-v2-base-es)
510
- - [RoBERTalex](https://huggingface.co/PlanTL-GOB-ES/RoBERTalex)
511
 
512
- ## 📄 License
513
 
514
- This API is provided as-is for research and commercial use. The underlying models have their own licenses:
515
- - Jina models: Apache 2.0
516
- - RoBERTalex: Apache 2.0
517
 
518
  ---
519
 
520
- Built with ❤️ using FastAPI and Hugging Face Transformers
 
7
  pinned: false
8
  ---
9
 
 
 
 
10
  # Multilingual & Legal Embeddings API
11
 
12
+ A high-performance FastAPI application providing access to **5 specialized embedding models** for Spanish, Catalan, English, and multilingual text. Each model has its own dedicated endpoint for optimal performance and clarity.
13
+
14
+ 🌐 **Live API**: [https://aurasystems-spanish-embeddings-api.hf.space](https://aurasystems-spanish-embeddings-api.hf.space)
15
+ 📖 **Interactive Docs**: [https://aurasystems-spanish-embeddings-api.hf.space/docs](https://aurasystems-spanish-embeddings-api.hf.space/docs)
16
 
17
  ## 🚀 Quick Start
18
 
19
+ ### Basic Usage
20
+ ```bash
21
+ # Test jina-v3 endpoint (multilingual, loads at startup)
22
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
23
+ -H "Content-Type: application/json" \
24
+ -d '{"texts": ["Hello world", "Hola mundo"], "normalize": true}'
25
 
26
+ # Test Catalan RoBERTa endpoint
27
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
28
+ -H "Content-Type: application/json" \
29
+ -d '{"texts": ["Bon dia", "Com estàs?"], "normalize": true}'
30
+ ```
31
+
32
+ ## 📚 Available Models & Endpoints
33
 
34
+ | Endpoint | Model | Languages | Dimensions | Max Tokens | Loading Strategy |
35
+ |----------|--------|-----------|------------|------------|------------------|
36
+ | `/embed/jina-v3` | jinaai/jina-embeddings-v3 | Multilingual (30+) | 1024 | 8192 | **Startup** |
37
+ | `/embed/roberta-ca` | projecte-aina/roberta-large-ca-v2 | Catalan | 1024 | 512 | On-demand |
38
+ | `/embed/jina` | jinaai/jina-embeddings-v2-base-es | Spanish, English | 768 | 8192 | On-demand |
39
+ | `/embed/robertalex` | PlanTL-GOB-ES/RoBERTalex | Spanish Legal | 768 | 512 | On-demand |
40
+ | `/embed/legal-bert` | nlpaueb/legal-bert-base-uncased | English Legal | 768 | 512 | On-demand |
41
 
42
+ ### Model Recommendations
43
+
44
+ - **🌍 General multilingual**: Use `/embed/jina-v3` - Best overall performance
45
+ - **🇪🇸 Spanish general**: Use `/embed/jina` - Excellent for Spanish/English
46
+ - **🇪🇸 Spanish legal**: Use `/embed/robertalex` - Specialized for legal texts
47
+ - **🏴󠁧󠁢󠁣󠁡󠁴󠁿 Catalan**: Use `/embed/roberta-ca` - Best for Catalan text
48
+ - **🇬🇧 English legal**: Use `/embed/legal-bert` - Specialized for legal documents
49
 
50
  ## 🔗 API Endpoints
51
 
52
+ ### Model-Specific Embedding Endpoints
 
 
 
 
53
 
54
+ Each model has its dedicated endpoint:
 
 
 
 
55
 
 
56
  ```
57
+ POST /embed/jina-v3 # Multilingual (startup model)
58
+ POST /embed/roberta-ca # Catalan
59
+ POST /embed/jina # Spanish/English
60
+ POST /embed/robertalex # Spanish Legal
61
+ POST /embed/legal-bert # English Legal
62
  ```
 
63
 
64
+ ### Utility Endpoints
65
+
66
  ```
67
+ GET / # API information
68
+ GET /health # Health check and model status
69
+ GET /models # List all models with specifications
70
  ```
 
71
 
72
  ## 📖 Usage Examples
73
 
 
75
 
76
  ```python
77
  import requests
 
78
 
79
  API_URL = "https://aurasystems-spanish-embeddings-api.hf.space"
80
 
81
+ # Example 1: Multilingual with Jina v3 (startup model - fastest)
82
  response = requests.post(
83
+ f"{API_URL}/embed/jina-v3",
84
  json={
85
+ "texts": [
86
+ "Hello world", # English
87
+ "Hola mundo", # Spanish
88
+ "Bonjour monde", # French
89
+ "こんにちは世界" # Japanese
90
+ ],
91
  "normalize": True
92
  }
93
  )
 
94
  result = response.json()
95
+ print(f"Jina v3: {result['dimensions']} dimensions") # 1024
 
96
 
97
+ # Example 2: Catalan text with RoBERTa-ca
98
+ response = requests.post(
99
+ f"{API_URL}/embed/roberta-ca",
100
  json={
101
  "texts": [
102
+ "Bon dia, com estàs?",
103
+ "Barcelona és una ciutat meravellosa",
104
+ "M'agrada la cultura catalana"
 
105
  ],
 
106
  "normalize": True
107
  }
108
  )
109
+ catalan_result = response.json()
110
+ print(f"Catalan: {catalan_result['dimensions']} dimensions") # 1024
111
 
112
+ # Example 3: Spanish legal text with RoBERTalex
113
+ response = requests.post(
114
+ f"{API_URL}/embed/robertalex",
115
  json={
116
  "texts": [
117
+ "Artículo primero de la constitución",
118
+ "El contrato será válido desde la fecha de firma",
119
+ "La jurisprudencia establece que..."
120
  ],
 
121
  "normalize": True
122
  }
123
  )
124
+ legal_result = response.json()
125
+ print(f"Spanish Legal: {legal_result['dimensions']} dimensions") # 768
126
 
127
+ # Example 4: English legal text with Legal-BERT
128
+ response = requests.post(
129
+ f"{API_URL}/embed/legal-bert",
130
  json={
131
  "texts": [
132
+ "This agreement is legally binding",
133
+ "The contract shall be governed by English law",
134
+ "The party hereby agrees and covenants"
135
  ],
 
136
  "normalize": True
137
  }
138
  )
139
+ english_legal_result = response.json()
140
+ print(f"English Legal: {english_legal_result['dimensions']} dimensions") # 768
141
 
142
+ # Example 5: Spanish/English bilingual with Jina v2
143
+ response = requests.post(
144
+ f"{API_URL}/embed/jina",
145
  json={
146
  "texts": [
147
+ "Inteligencia artificial y machine learning",
148
+ "Artificial intelligence and machine learning",
149
+ "Procesamiento de lenguaje natural"
150
  ],
 
151
  "normalize": True
152
  }
153
  )
154
+ bilingual_result = response.json()
155
+ print(f"Bilingual: {bilingual_result['dimensions']} dimensions") # 768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  ```
157
 
158
+ ### JavaScript/Node.js
159
 
160
  ```javascript
161
  const API_URL = 'https://aurasystems-spanish-embeddings-api.hf.space';
162
 
163
+ // Function to get embeddings from specific endpoint
164
+ async function getEmbeddings(endpoint, texts) {
165
+ const response = await fetch(`${API_URL}/embed/${endpoint}`, {
166
  method: 'POST',
167
  headers: {
168
  'Content-Type': 'application/json',
169
  },
170
  body: JSON.stringify({
171
  texts: texts,
 
172
  normalize: true
173
  })
174
  });
 
180
  return await response.json();
181
  }
182
 
183
+ // Usage examples
184
  try {
185
+ // Multilingual embeddings
186
+ const multilingualResult = await getEmbeddings('jina-v3', [
187
+ 'Hello world',
188
  'Hola mundo',
189
+ 'Ciao mondo'
190
+ ]);
191
+ console.log('Multilingual dimensions:', multilingualResult.dimensions);
192
+
193
+ // Catalan embeddings
194
+ const catalanResult = await getEmbeddings('roberta-ca', [
195
+ 'Bon dia',
196
+ 'Com estàs?'
197
  ]);
198
+ console.log('Catalan dimensions:', catalanResult.dimensions);
199
+
200
  } catch (error) {
201
+ console.error('Error:', error);
202
  }
203
  ```
204
 
205
+ ### cURL Examples
206
 
207
+ ```bash
208
+ # Multilingual with Jina v3 (startup model)
209
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3" \
210
+ -H "Content-Type: application/json" \
211
+ -d '{
212
+ "texts": ["Hello", "Hola", "Bonjour"],
213
+ "normalize": true
214
+ }'
215
 
216
+ # Catalan with RoBERTa-ca
217
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/roberta-ca" \
218
+ -H "Content-Type: application/json" \
219
+ -d '{
220
+ "texts": ["Bon dia", "Com estàs?"],
221
+ "normalize": true
222
+ }'
223
+
224
+ # Spanish legal with RoBERTalex
225
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/robertalex" \
226
+ -H "Content-Type: application/json" \
227
+ -d '{
228
+ "texts": ["Artículo primero"],
229
+ "normalize": true
230
+ }'
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
+ # English legal with Legal-BERT
233
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/legal-bert" \
234
+ -H "Content-Type: application/json" \
235
+ -d '{
236
+ "texts": ["This agreement is binding"],
237
+ "normalize": true
238
+ }'
239
+
240
+ # Spanish/English bilingual with Jina v2
241
+ curl -X POST "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina" \
242
+ -H "Content-Type: application/json" \
243
+ -d '{
244
+ "texts": ["Texto en español", "Text in English"],
245
+ "normalize": true
246
+ }'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  ```
248
 
249
+ ## 📋 Request/Response Schema
250
 
251
+ ### Request Body
252
 
253
  ```json
254
  {
255
+ "texts": ["text1", "text2", "..."],
 
 
 
256
  "normalize": true,
257
  "max_length": null
258
  }
 
260
 
261
  | Field | Type | Required | Default | Description |
262
  |-------|------|----------|---------|-------------|
263
+ | `texts` | array[string] | Yes | - | 1-50 texts to embed |
264
+ | `normalize` | boolean | No | `true` | L2-normalize embeddings |
265
+ | `max_length` | integer/null | No | `null` | Max tokens (model-specific limits) |
 
266
 
267
+ ### Response Body
268
 
269
  ```json
270
  {
271
+ "embeddings": [[0.123, -0.456, ...], [0.789, -0.012, ...]],
272
+ "model_used": "jina-v3",
273
+ "dimensions": 1024,
274
  "num_texts": 2
275
  }
276
  ```
 
278
  ## ⚡ Performance & Limits
279
 
280
  - **Maximum texts per request**: 50
281
+ - **Startup model**: `jina-v3` loads at startup (fastest response)
282
+ - **On-demand models**: Load on first request (~30-60s first time)
283
+ - **Typical response time**: 100-300ms after models are loaded
284
+ - **Memory optimization**: Automatic cleanup for large batches
285
+ - **CORS enabled**: Works from any domain
286
 
287
  ## 🔧 Advanced Usage
288
 
289
+ ### LangChain Integration
 
 
290
 
291
  ```python
292
+ from langchain.embeddings.base import Embeddings
293
+ from typing import List
294
+ import requests
295
+
296
+ class MultilingualEmbeddings(Embeddings):
297
+ """LangChain integration for multilingual embeddings"""
298
 
299
+ def __init__(self, endpoint: str = "jina-v3"):
300
+ """
301
+ Initialize with specific endpoint
302
+
303
+ Args:
304
+ endpoint: One of "jina-v3", "roberta-ca", "jina", "robertalex", "legal-bert"
305
+ """
306
+ self.api_url = f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}"
307
+ self.endpoint = endpoint
308
+
309
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
310
  response = requests.post(
311
+ self.api_url,
312
+ json={"texts": texts, "normalize": True}
 
 
 
 
313
  )
314
+ response.raise_for_status()
315
+ return response.json()["embeddings"]
316
 
317
+ def embed_query(self, text: str) -> List[float]:
318
+ return self.embed_documents([text])[0]
319
+
320
+ # Usage examples
321
+ multilingual_embeddings = MultilingualEmbeddings("jina-v3")
322
+ catalan_embeddings = MultilingualEmbeddings("roberta-ca")
323
+ spanish_legal_embeddings = MultilingualEmbeddings("robertalex")
324
  ```
325
 
326
+ ### Semantic Search
327
 
328
  ```python
329
  import numpy as np
330
  from typing import List, Tuple
331
 
332
+ def semantic_search(query: str, documents: List[str], endpoint: str = "jina-v3", top_k: int = 5):
333
+ """Semantic search using specific model endpoint"""
 
 
 
 
334
 
 
335
  response = requests.post(
336
+ f"https://aurasystems-spanish-embeddings-api.hf.space/embed/{endpoint}",
337
+ json={"texts": [query] + documents, "normalize": True}
 
 
 
 
338
  )
339
 
340
  embeddings = np.array(response.json()["embeddings"])
341
  query_embedding = embeddings[0]
342
  doc_embeddings = embeddings[1:]
343
 
344
+ # Calculate cosine similarities (already normalized)
345
  similarities = np.dot(doc_embeddings, query_embedding)
 
 
346
  top_indices = np.argsort(similarities)[::-1][:top_k]
347
 
348
  return [(idx, similarities[idx]) for idx in top_indices]
349
 
350
+ # Example: Multilingual search
351
  documents = [
352
+ "Python programming language",
353
+ "Lenguaje de programación Python",
354
+ "Llenguatge de programació Python",
355
+ "Language de programmation Python"
356
  ]
357
 
358
+ results = semantic_search("código en Python", documents, "jina-v3")
 
 
 
 
 
359
  for idx, score in results:
360
+ print(f"{score:.4f}: {documents[idx]}")
 
361
  ```
362
 
363
  ## 🚨 Error Handling
364
 
365
+ ### HTTP Status Codes
366
 
367
+ | Code | Description |
368
+ |------|-------------|
369
  | 200 | Success |
370
+ | 400 | Bad Request (validation error) |
371
+ | 422 | Unprocessable Entity (schema error) |
372
+ | 500 | Internal Server Error (model loading failed) |
 
373
 
374
+ ### Common Errors
375
 
376
+ ```python
377
+ # Handle errors properly
378
+ try:
379
+ response = requests.post(
380
+ "https://aurasystems-spanish-embeddings-api.hf.space/embed/jina-v3",
381
+ json={"texts": ["text"], "normalize": True}
382
+ )
383
+ response.raise_for_status()
384
+ result = response.json()
385
+ except requests.exceptions.HTTPError as e:
386
+ print(f"HTTP error: {e}")
387
+ print(f"Response: {response.text}")
388
+ except requests.exceptions.RequestException as e:
389
+ print(f"Request error: {e}")
390
  ```
391
 
392
+ ## 📊 Model Status Check
393
 
394
+ ```python
395
+ # Check which models are loaded
396
+ health = requests.get("https://aurasystems-spanish-embeddings-api.hf.space/health")
397
+ status = health.json()
 
 
 
398
 
399
+ print(f"API Status: {status['status']}")
400
+ print(f"Startup model loaded: {status['startup_model_loaded']}")
401
+ print(f"Available models: {status['available_models']}")
402
+ print(f"Models loaded: {status['models_count']}/5")
 
 
 
403
 
404
+ # Check endpoint status
405
+ for model, endpoint_status in status['endpoints'].items():
406
+ print(f"{model}: {endpoint_status}")
407
+ ```
 
 
 
408
 
409
+ ## 🔒 Authentication & Rate Limits
410
 
411
+ - **Authentication**: None required (open API)
412
+ - **Rate limits**: Generous limits on Hugging Face Spaces
413
+ - **CORS**: Enabled for all origins
414
+ - **Usage**: Free for research and commercial use
415
 
416
+ ## 🏗️ Architecture
417
 
418
+ ### Endpoint-Per-Model Design
419
+ - **Startup model**: `jina-v3` loads at application startup for fastest response
420
+ - **On-demand loading**: Other models load when first requested
421
+ - **Memory optimization**: Progressive loading reduces startup time
422
+ - **Model caching**: Once loaded, models remain in memory for fast inference
423
 
424
+ ### Technical Stack
425
+ - **FastAPI**: Modern async web framework
426
+ - **Transformers**: Hugging Face model library
427
+ - **PyTorch**: Deep learning backend
428
+ - **Docker**: Containerized deployment
429
+ - **Hugging Face Spaces**: Cloud hosting platform
430
 
431
+ ## 📄 Model Licenses
432
 
433
+ - **Jina models**: Apache 2.0
434
+ - **RoBERTa models**: MIT/Apache 2.0
435
+ - **Legal-BERT**: Apache 2.0
 
 
436
 
437
+ ## 🤝 Support & Contributing
438
 
439
+ - **Issues**: [GitHub Issues](https://huggingface.co/spaces/AuraSystems/spanish-embeddings-api/discussions)
440
+ - **Interactive Docs**: [FastAPI Swagger UI](https://aurasystems-spanish-embeddings-api.hf.space/docs)
441
+ - **Model Papers**: Check individual model pages on Hugging Face
442
 
443
  ---
444
 
445
+ Built with ❤️ using **FastAPI** and **Hugging Face Transformers**
app_endpoints.py DELETED
@@ -1,308 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from contextlib import asynccontextmanager
4
- from typing import List
5
- import torch
6
- import uvicorn
7
-
8
- from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
9
- from utils.helpers import load_models, get_embeddings, cleanup_memory
10
-
11
- # Global model cache
12
- models_cache = {}
13
-
14
- # Load jina-v3 at startup (most important model)
15
- STARTUP_MODEL = "jina-v3"
16
-
17
- @asynccontextmanager
18
- async def lifespan(app: FastAPI):
19
- """Application lifespan handler for startup and shutdown"""
20
- # Startup - load jina-v3 model
21
- try:
22
- global models_cache
23
- print(f"Loading startup model: {STARTUP_MODEL}...")
24
- models_cache = load_models([STARTUP_MODEL])
25
- print(f"Startup model loaded successfully: {list(models_cache.keys())}")
26
- yield
27
- except Exception as e:
28
- print(f"Failed to load startup model: {str(e)}")
29
- # Continue anyway - jina-v3 can be loaded on demand if startup fails
30
- yield
31
- finally:
32
- # Shutdown - cleanup resources
33
- cleanup_memory()
34
-
35
- def ensure_model_loaded(model_name: str, max_length_limit: int):
36
- """Load a specific model on demand if not already loaded"""
37
- global models_cache
38
- if model_name not in models_cache:
39
- try:
40
- print(f"Loading model on demand: {model_name}...")
41
- new_models = load_models([model_name])
42
- models_cache.update(new_models)
43
- print(f"Model {model_name} loaded successfully!")
44
- except Exception as e:
45
- print(f"Failed to load model {model_name}: {str(e)}")
46
- raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
47
-
48
- def validate_request_for_model(request: EmbeddingRequest, model_name: str, max_length_limit: int):
49
- """Validate request parameters for specific model"""
50
- if not request.texts:
51
- raise HTTPException(status_code=400, detail="No texts provided")
52
-
53
- if len(request.texts) > 50:
54
- raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
55
-
56
- if request.max_length is not None and request.max_length > max_length_limit:
57
- raise HTTPException(status_code=400, detail=f"Max length for {model_name} is {max_length_limit}")
58
-
59
- app = FastAPI(
60
- title="Multilingual & Legal Embedding API",
61
- description="Multi-model embedding API with dedicated endpoints per model",
62
- version="4.0.0",
63
- lifespan=lifespan
64
- )
65
-
66
- # Add CORS middleware to allow cross-origin requests
67
- app.add_middleware(
68
- CORSMiddleware,
69
- allow_origins=["*"], # In production, specify actual domains
70
- allow_credentials=True,
71
- allow_methods=["*"],
72
- allow_headers=["*"],
73
- )
74
-
75
- @app.get("/")
76
- async def root():
77
- return {
78
- "message": "Multilingual & Legal Embedding API - Endpoint Per Model",
79
- "version": "4.0.0",
80
- "status": "running",
81
- "docs": "/docs",
82
- "startup_model": STARTUP_MODEL,
83
- "available_endpoints": {
84
- "jina-v3": "/embed/jina-v3",
85
- "roberta-ca": "/embed/roberta-ca",
86
- "jina": "/embed/jina",
87
- "robertalex": "/embed/robertalex",
88
- "legal-bert": "/embed/legal-bert"
89
- }
90
- }
91
-
92
- # Jina v3 - Multilingual (loads at startup)
93
- @app.post("/embed/jina-v3", response_model=EmbeddingResponse)
94
- async def embed_jina_v3(request: EmbeddingRequest):
95
- """Generate embeddings using Jina v3 model (multilingual)"""
96
- try:
97
- ensure_model_loaded("jina-v3", 8192)
98
- validate_request_for_model(request, "jina-v3", 8192)
99
-
100
- embeddings = get_embeddings(
101
- request.texts,
102
- "jina-v3",
103
- models_cache,
104
- request.normalize,
105
- request.max_length
106
- )
107
-
108
- return EmbeddingResponse(
109
- embeddings=embeddings,
110
- model_used="jina-v3",
111
- dimensions=len(embeddings[0]) if embeddings else 0,
112
- num_texts=len(request.texts)
113
- )
114
-
115
- except ValueError as e:
116
- raise HTTPException(status_code=400, detail=str(e))
117
- except Exception as e:
118
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
119
-
120
- # Catalan RoBERTa
121
- @app.post("/embed/roberta-ca", response_model=EmbeddingResponse)
122
- async def embed_roberta_ca(request: EmbeddingRequest):
123
- """Generate embeddings using Catalan RoBERTa model"""
124
- try:
125
- ensure_model_loaded("roberta-ca", 512)
126
- validate_request_for_model(request, "roberta-ca", 512)
127
-
128
- embeddings = get_embeddings(
129
- request.texts,
130
- "roberta-ca",
131
- models_cache,
132
- request.normalize,
133
- request.max_length
134
- )
135
-
136
- return EmbeddingResponse(
137
- embeddings=embeddings,
138
- model_used="roberta-ca",
139
- dimensions=len(embeddings[0]) if embeddings else 0,
140
- num_texts=len(request.texts)
141
- )
142
-
143
- except ValueError as e:
144
- raise HTTPException(status_code=400, detail=str(e))
145
- except Exception as e:
146
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
147
-
148
- # Jina v2 - Spanish/English
149
- @app.post("/embed/jina", response_model=EmbeddingResponse)
150
- async def embed_jina(request: EmbeddingRequest):
151
- """Generate embeddings using Jina v2 Spanish/English model"""
152
- try:
153
- ensure_model_loaded("jina", 8192)
154
- validate_request_for_model(request, "jina", 8192)
155
-
156
- embeddings = get_embeddings(
157
- request.texts,
158
- "jina",
159
- models_cache,
160
- request.normalize,
161
- request.max_length
162
- )
163
-
164
- return EmbeddingResponse(
165
- embeddings=embeddings,
166
- model_used="jina",
167
- dimensions=len(embeddings[0]) if embeddings else 0,
168
- num_texts=len(request.texts)
169
- )
170
-
171
- except ValueError as e:
172
- raise HTTPException(status_code=400, detail=str(e))
173
- except Exception as e:
174
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
175
-
176
- # RoBERTalex - Spanish Legal
177
- @app.post("/embed/robertalex", response_model=EmbeddingResponse)
178
- async def embed_robertalex(request: EmbeddingRequest):
179
- """Generate embeddings using RoBERTalex Spanish legal model"""
180
- try:
181
- ensure_model_loaded("robertalex", 512)
182
- validate_request_for_model(request, "robertalex", 512)
183
-
184
- embeddings = get_embeddings(
185
- request.texts,
186
- "robertalex",
187
- models_cache,
188
- request.normalize,
189
- request.max_length
190
- )
191
-
192
- return EmbeddingResponse(
193
- embeddings=embeddings,
194
- model_used="robertalex",
195
- dimensions=len(embeddings[0]) if embeddings else 0,
196
- num_texts=len(request.texts)
197
- )
198
-
199
- except ValueError as e:
200
- raise HTTPException(status_code=400, detail=str(e))
201
- except Exception as e:
202
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
203
-
204
- # Legal BERT - English Legal
205
- @app.post("/embed/legal-bert", response_model=EmbeddingResponse)
206
- async def embed_legal_bert(request: EmbeddingRequest):
207
- """Generate embeddings using Legal BERT English model"""
208
- try:
209
- ensure_model_loaded("legal-bert", 512)
210
- validate_request_for_model(request, "legal-bert", 512)
211
-
212
- embeddings = get_embeddings(
213
- request.texts,
214
- "legal-bert",
215
- models_cache,
216
- request.normalize,
217
- request.max_length
218
- )
219
-
220
- return EmbeddingResponse(
221
- embeddings=embeddings,
222
- model_used="legal-bert",
223
- dimensions=len(embeddings[0]) if embeddings else 0,
224
- num_texts=len(request.texts)
225
- )
226
-
227
- except ValueError as e:
228
- raise HTTPException(status_code=400, detail=str(e))
229
- except Exception as e:
230
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
231
-
232
- @app.get("/models", response_model=List[ModelInfo])
233
- async def list_models():
234
- """List available models and their specifications"""
235
- return [
236
- ModelInfo(
237
- model_id="jina-v3",
238
- name="jinaai/jina-embeddings-v3",
239
- dimensions=1024,
240
- max_sequence_length=8192,
241
- languages=["Multilingual"],
242
- model_type="multilingual",
243
- description="Latest Jina v3 with superior multilingual performance - loaded at startup"
244
- ),
245
- ModelInfo(
246
- model_id="roberta-ca",
247
- name="projecte-aina/roberta-large-ca-v2",
248
- dimensions=1024,
249
- max_sequence_length=512,
250
- languages=["Catalan"],
251
- model_type="general",
252
- description="Catalan RoBERTa-large model trained on large corpus"
253
- ),
254
- ModelInfo(
255
- model_id="jina",
256
- name="jinaai/jina-embeddings-v2-base-es",
257
- dimensions=768,
258
- max_sequence_length=8192,
259
- languages=["Spanish", "English"],
260
- model_type="bilingual",
261
- description="Bilingual Spanish-English embeddings with long context support"
262
- ),
263
- ModelInfo(
264
- model_id="robertalex",
265
- name="PlanTL-GOB-ES/RoBERTalex",
266
- dimensions=768,
267
- max_sequence_length=512,
268
- languages=["Spanish"],
269
- model_type="legal domain",
270
- description="Spanish legal domain specialized embeddings"
271
- ),
272
- ModelInfo(
273
- model_id="legal-bert",
274
- name="nlpaueb/legal-bert-base-uncased",
275
- dimensions=768,
276
- max_sequence_length=512,
277
- languages=["English"],
278
- model_type="legal domain",
279
- description="English legal domain BERT model"
280
- )
281
- ]
282
-
283
- @app.get("/health")
284
- async def health_check():
285
- """Health check endpoint"""
286
- startup_loaded = STARTUP_MODEL in models_cache
287
-
288
- return {
289
- "status": "healthy" if startup_loaded else "partial",
290
- "startup_model": STARTUP_MODEL,
291
- "startup_model_loaded": startup_loaded,
292
- "available_models": list(models_cache.keys()),
293
- "models_count": len(models_cache),
294
- "endpoints": {
295
- "jina-v3": f"/embed/jina-v3 {'(ready)' if 'jina-v3' in models_cache else '(loads on demand)'}",
296
- "roberta-ca": f"/embed/roberta-ca {'(ready)' if 'roberta-ca' in models_cache else '(loads on demand)'}",
297
- "jina": f"/embed/jina {'(ready)' if 'jina' in models_cache else '(loads on demand)'}",
298
- "robertalex": f"/embed/robertalex {'(ready)' if 'robertalex' in models_cache else '(loads on demand)'}",
299
- "legal-bert": f"/embed/legal-bert {'(ready)' if 'legal-bert' in models_cache else '(loads on demand)'}"
300
- }
301
- }
302
-
303
- if __name__ == "__main__":
304
- # Set multi-threading for CPU
305
- torch.set_num_threads(8)
306
- torch.set_num_interop_threads(1)
307
-
308
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_hybrid_backup.py DELETED
@@ -1,189 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from contextlib import asynccontextmanager
4
- from typing import List
5
- import torch
6
- import uvicorn
7
-
8
- from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
9
- from utils.helpers import load_models, get_embeddings, cleanup_memory
10
-
11
- # Global model cache
12
- models_cache = {}
13
-
14
- # Models to load at startup (most frequently used)
15
- STARTUP_MODELS = ["jina-v3", "roberta-ca"]
16
- # Models to load on demand
17
- ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
18
-
19
- @asynccontextmanager
20
- async def lifespan(app: FastAPI):
21
- """Application lifespan handler for startup and shutdown"""
22
- # Startup - load priority models
23
- try:
24
- global models_cache
25
- print(f"Loading startup models: {STARTUP_MODELS}...")
26
- models_cache = load_models(STARTUP_MODELS)
27
- print(f"Startup models loaded successfully: {list(models_cache.keys())}")
28
- yield
29
- except Exception as e:
30
- print(f"Failed to load startup models: {str(e)}")
31
- # Continue anyway - models can be loaded on demand
32
- yield
33
- finally:
34
- # Shutdown - cleanup resources
35
- cleanup_memory()
36
-
37
- def ensure_model_loaded(model_name: str):
38
- """Load a specific model on demand if not already loaded"""
39
- global models_cache
40
- if model_name not in models_cache:
41
- if model_name in ON_DEMAND_MODELS:
42
- try:
43
- print(f"Loading model on demand: {model_name}...")
44
- new_models = load_models([model_name])
45
- models_cache.update(new_models)
46
- print(f"Model {model_name} loaded successfully!")
47
- except Exception as e:
48
- print(f"Failed to load model {model_name}: {str(e)}")
49
- raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
50
- else:
51
- raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
52
-
53
- app = FastAPI(
54
- title="Multilingual & Legal Embedding API",
55
- description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
56
- version="3.0.0",
57
- lifespan=lifespan
58
- )
59
-
60
- # Add CORS middleware to allow cross-origin requests
61
- app.add_middleware(
62
- CORSMiddleware,
63
- allow_origins=["*"], # In production, specify actual domains
64
- allow_credentials=True,
65
- allow_methods=["*"],
66
- allow_headers=["*"],
67
- )
68
-
69
- @app.get("/")
70
- async def root():
71
- return {
72
- "message": "Multilingual & Legal Embedding API",
73
- "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
74
- "status": "running",
75
- "docs": "/docs",
76
- "total_models": 5
77
- }
78
-
79
- @app.post("/embed", response_model=EmbeddingResponse)
80
- async def create_embeddings(request: EmbeddingRequest):
81
- """Generate embeddings for input texts"""
82
- try:
83
- # Load specific model on demand if needed
84
- ensure_model_loaded(request.model)
85
-
86
- if not request.texts:
87
- raise HTTPException(status_code=400, detail="No texts provided")
88
-
89
- if len(request.texts) > 50: # Rate limiting
90
- raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
91
-
92
- embeddings = get_embeddings(
93
- request.texts,
94
- request.model,
95
- models_cache,
96
- request.normalize,
97
- request.max_length
98
- )
99
-
100
- # Cleanup memory after large batches
101
- if len(request.texts) > 20:
102
- cleanup_memory()
103
-
104
- return EmbeddingResponse(
105
- embeddings=embeddings,
106
- model_used=request.model,
107
- dimensions=len(embeddings[0]) if embeddings else 0,
108
- num_texts=len(request.texts)
109
- )
110
-
111
- except ValueError as e:
112
- raise HTTPException(status_code=400, detail=str(e))
113
- except Exception as e:
114
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
115
-
116
- @app.get("/models", response_model=List[ModelInfo])
117
- async def list_models():
118
- """List available models and their specifications"""
119
- return [
120
- ModelInfo(
121
- model_id="jina",
122
- name="jinaai/jina-embeddings-v2-base-es",
123
- dimensions=768,
124
- max_sequence_length=8192,
125
- languages=["Spanish", "English"],
126
- model_type="bilingual",
127
- description="Bilingual Spanish-English embeddings with long context support"
128
- ),
129
- ModelInfo(
130
- model_id="robertalex",
131
- name="PlanTL-GOB-ES/RoBERTalex",
132
- dimensions=768,
133
- max_sequence_length=512,
134
- languages=["Spanish"],
135
- model_type="legal domain",
136
- description="Spanish legal domain specialized embeddings"
137
- ),
138
- ModelInfo(
139
- model_id="jina-v3",
140
- name="jinaai/jina-embeddings-v3",
141
- dimensions=1024,
142
- max_sequence_length=8192,
143
- languages=["Multilingual"],
144
- model_type="multilingual",
145
- description="Latest Jina v3 with superior multilingual performance"
146
- ),
147
- ModelInfo(
148
- model_id="legal-bert",
149
- name="nlpaueb/legal-bert-base-uncased",
150
- dimensions=768,
151
- max_sequence_length=512,
152
- languages=["English"],
153
- model_type="legal domain",
154
- description="English legal domain BERT model"
155
- ),
156
- ModelInfo(
157
- model_id="roberta-ca",
158
- name="projecte-aina/roberta-large-ca-v2",
159
- dimensions=1024,
160
- max_sequence_length=512,
161
- languages=["Catalan"],
162
- model_type="general",
163
- description="Catalan RoBERTa-large model trained on large corpus"
164
- )
165
- ]
166
-
167
- @app.get("/health")
168
- async def health_check():
169
- """Health check endpoint"""
170
- startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
171
- all_models_loaded = len(models_cache) == 5
172
-
173
- return {
174
- "status": "healthy" if startup_models_loaded else "partial",
175
- "startup_models_loaded": startup_models_loaded,
176
- "all_models_loaded": all_models_loaded,
177
- "available_models": list(models_cache.keys()),
178
- "startup_models": STARTUP_MODELS,
179
- "on_demand_models": ON_DEMAND_MODELS,
180
- "models_count": len(models_cache),
181
- "note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
182
- }
183
-
184
- if __name__ == "__main__":
185
- # Set multi-threading for CPU
186
- torch.set_num_threads(8)
187
- torch.set_num_interop_threads(1)
188
-
189
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_old.py DELETED
@@ -1,159 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from contextlib import asynccontextmanager
4
- from typing import List
5
- import torch
6
- import uvicorn
7
-
8
- from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
9
- from utils.helpers import load_models, get_embeddings, cleanup_memory
10
-
11
- # Global model cache
12
- models_cache = {}
13
-
14
- @asynccontextmanager
15
- async def lifespan(app: FastAPI):
16
- """Application lifespan handler for startup and shutdown"""
17
- # Startup
18
- try:
19
- global models_cache
20
- print("Loading models...")
21
- models_cache = load_models()
22
- print("All models loaded successfully!")
23
- yield
24
- except Exception as e:
25
- print(f"Failed to load models: {str(e)}")
26
- raise
27
- finally:
28
- # Shutdown - cleanup resources
29
- cleanup_memory()
30
-
31
- app = FastAPI(
32
- title="Multilingual & Legal Embedding API",
33
- description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
34
- version="3.0.0",
35
- lifespan=lifespan
36
- )
37
-
38
- # Add CORS middleware to allow cross-origin requests
39
- app.add_middleware(
40
- CORSMiddleware,
41
- allow_origins=["*"], # In production, specify actual domains
42
- allow_credentials=True,
43
- allow_methods=["*"],
44
- allow_headers=["*"],
45
- )
46
-
47
- @app.get("/")
48
- async def root():
49
- return {
50
- "message": "Multilingual & Legal Embedding API",
51
- "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
52
- "status": "running",
53
- "docs": "/docs",
54
- "total_models": 5
55
- }
56
-
57
- @app.post("/embed", response_model=EmbeddingResponse)
58
- async def create_embeddings(request: EmbeddingRequest):
59
- """Generate embeddings for input texts"""
60
- try:
61
- if not request.texts:
62
- raise HTTPException(status_code=400, detail="No texts provided")
63
-
64
- if len(request.texts) > 50: # Rate limiting
65
- raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
66
-
67
- embeddings = get_embeddings(
68
- request.texts,
69
- request.model,
70
- models_cache,
71
- request.normalize,
72
- request.max_length
73
- )
74
-
75
- # Cleanup memory after large batches
76
- if len(request.texts) > 20:
77
- cleanup_memory()
78
-
79
- return EmbeddingResponse(
80
- embeddings=embeddings,
81
- model_used=request.model,
82
- dimensions=len(embeddings[0]) if embeddings else 0,
83
- num_texts=len(request.texts)
84
- )
85
-
86
- except ValueError as e:
87
- raise HTTPException(status_code=400, detail=str(e))
88
- except Exception as e:
89
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
90
-
91
- @app.get("/models", response_model=List[ModelInfo])
92
- async def list_models():
93
- """List available models and their specifications"""
94
- return [
95
- ModelInfo(
96
- model_id="jina",
97
- name="jinaai/jina-embeddings-v2-base-es",
98
- dimensions=768,
99
- max_sequence_length=8192,
100
- languages=["Spanish", "English"],
101
- model_type="bilingual",
102
- description="Bilingual Spanish-English embeddings with long context support"
103
- ),
104
- ModelInfo(
105
- model_id="robertalex",
106
- name="PlanTL-GOB-ES/RoBERTalex",
107
- dimensions=768,
108
- max_sequence_length=512,
109
- languages=["Spanish"],
110
- model_type="legal domain",
111
- description="Spanish legal domain specialized embeddings"
112
- ),
113
- ModelInfo(
114
- model_id="jina-v3",
115
- name="jinaai/jina-embeddings-v3",
116
- dimensions=1024,
117
- max_sequence_length=8192,
118
- languages=["Multilingual"],
119
- model_type="multilingual",
120
- description="Latest Jina v3 with superior multilingual performance"
121
- ),
122
- ModelInfo(
123
- model_id="legal-bert",
124
- name="nlpaueb/legal-bert-base-uncased",
125
- dimensions=768,
126
- max_sequence_length=512,
127
- languages=["English"],
128
- model_type="legal domain",
129
- description="English legal domain BERT model"
130
- ),
131
- ModelInfo(
132
- model_id="roberta-ca",
133
- name="projecte-aina/roberta-large-ca-v2",
134
- dimensions=1024,
135
- max_sequence_length=512,
136
- languages=["Catalan"],
137
- model_type="general",
138
- description="Catalan RoBERTa-large model trained on large corpus"
139
- )
140
- ]
141
-
142
- @app.get("/health")
143
- async def health_check():
144
- """Health check endpoint"""
145
- models_loaded = len(models_cache) == 5
146
- return {
147
- "status": "healthy" if models_loaded else "degraded",
148
- "models_loaded": models_loaded,
149
- "available_models": list(models_cache.keys()),
150
- "expected_models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
151
- "models_count": len(models_cache)
152
- }
153
-
154
- if __name__ == "__main__":
155
- # Set multi-threading for CPU
156
- torch.set_num_threads(8)
157
- torch.set_num_interop_threads(1)
158
-
159
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_old_minimal.py DELETED
@@ -1,165 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from typing import List
4
- import torch
5
- import uvicorn
6
-
7
- from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
8
- from utils.helpers import load_models, get_embeddings, cleanup_memory
9
-
10
- # Global model cache - completely on-demand loading
11
- models_cache = {}
12
-
13
- # All models load on demand to test deployment
14
- ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]
15
-
16
- def ensure_model_loaded(model_name: str):
17
- """Load a specific model on demand if not already loaded"""
18
- global models_cache
19
- if model_name not in models_cache:
20
- if model_name in ON_DEMAND_MODELS:
21
- try:
22
- print(f"Loading model on demand: {model_name}...")
23
- new_models = load_models([model_name])
24
- models_cache.update(new_models)
25
- print(f"Model {model_name} loaded successfully!")
26
- except Exception as e:
27
- print(f"Failed to load model {model_name}: {str(e)}")
28
- raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
29
- else:
30
- raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
31
-
32
- app = FastAPI(
33
- title="Multilingual & Legal Embedding API",
34
- description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
35
- version="3.0.0"
36
- )
37
-
38
- # Add CORS middleware to allow cross-origin requests
39
- app.add_middleware(
40
- CORSMiddleware,
41
- allow_origins=["*"], # In production, specify actual domains
42
- allow_credentials=True,
43
- allow_methods=["*"],
44
- allow_headers=["*"],
45
- )
46
-
47
- @app.get("/")
48
- async def root():
49
- return {
50
- "message": "Multilingual & Legal Embedding API - Minimal Version",
51
- "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
52
- "status": "running",
53
- "docs": "/docs",
54
- "total_models": 5,
55
- "note": "All models load on first request"
56
- }
57
-
58
- @app.post("/embed", response_model=EmbeddingResponse)
59
- async def create_embeddings(request: EmbeddingRequest):
60
- """Generate embeddings for input texts"""
61
- try:
62
- # Load specific model on demand
63
- ensure_model_loaded(request.model)
64
-
65
- if not request.texts:
66
- raise HTTPException(status_code=400, detail="No texts provided")
67
-
68
- if len(request.texts) > 50: # Rate limiting
69
- raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
70
-
71
- embeddings = get_embeddings(
72
- request.texts,
73
- request.model,
74
- models_cache,
75
- request.normalize,
76
- request.max_length
77
- )
78
-
79
- # Cleanup memory after large batches
80
- if len(request.texts) > 20:
81
- cleanup_memory()
82
-
83
- return EmbeddingResponse(
84
- embeddings=embeddings,
85
- model_used=request.model,
86
- dimensions=len(embeddings[0]) if embeddings else 0,
87
- num_texts=len(request.texts)
88
- )
89
-
90
- except ValueError as e:
91
- raise HTTPException(status_code=400, detail=str(e))
92
- except Exception as e:
93
- raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
94
-
95
- @app.get("/models", response_model=List[ModelInfo])
96
- async def list_models():
97
- """List available models and their specifications"""
98
- return [
99
- ModelInfo(
100
- model_id="jina",
101
- name="jinaai/jina-embeddings-v2-base-es",
102
- dimensions=768,
103
- max_sequence_length=8192,
104
- languages=["Spanish", "English"],
105
- model_type="bilingual",
106
- description="Bilingual Spanish-English embeddings with long context support"
107
- ),
108
- ModelInfo(
109
- model_id="robertalex",
110
- name="PlanTL-GOB-ES/RoBERTalex",
111
- dimensions=768,
112
- max_sequence_length=512,
113
- languages=["Spanish"],
114
- model_type="legal domain",
115
- description="Spanish legal domain specialized embeddings"
116
- ),
117
- ModelInfo(
118
- model_id="jina-v3",
119
- name="jinaai/jina-embeddings-v3",
120
- dimensions=1024,
121
- max_sequence_length=8192,
122
- languages=["Multilingual"],
123
- model_type="multilingual",
124
- description="Latest Jina v3 with superior multilingual performance"
125
- ),
126
- ModelInfo(
127
- model_id="legal-bert",
128
- name="nlpaueb/legal-bert-base-uncased",
129
- dimensions=768,
130
- max_sequence_length=512,
131
- languages=["English"],
132
- model_type="legal domain",
133
- description="English legal domain BERT model"
134
- ),
135
- ModelInfo(
136
- model_id="roberta-ca",
137
- name="projecte-aina/roberta-large-ca-v2",
138
- dimensions=1024,
139
- max_sequence_length=512,
140
- languages=["Catalan"],
141
- model_type="general",
142
- description="Catalan RoBERTa-large model trained on large corpus"
143
- )
144
- ]
145
-
146
- @app.get("/health")
147
- async def health_check():
148
- """Health check endpoint"""
149
- all_models_loaded = len(models_cache) == 5
150
-
151
- return {
152
- "status": "healthy",
153
- "all_models_loaded": all_models_loaded,
154
- "available_models": list(models_cache.keys()),
155
- "on_demand_models": ON_DEMAND_MODELS,
156
- "models_count": len(models_cache),
157
- "note": "All models load on first embedding request - minimal deployment version"
158
- }
159
-
160
- if __name__ == "__main__":
161
- # Set multi-threading for CPU
162
- torch.set_num_threads(8)
163
- torch.set_num_interop_threads(1)
164
-
165
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_api.py DELETED
@@ -1,64 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Simple test script for the embedding API
4
- """
5
-
6
- import requests
7
- import json
8
- import time
9
-
10
- def test_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
11
- """Test the API endpoints"""
12
-
13
- print(f"Testing API at {base_url}")
14
-
15
- # Test root endpoint
16
- try:
17
- response = requests.get(f"{base_url}/")
18
- print(f"✓ Root endpoint: {response.status_code}")
19
- print(f" Response: {response.json()}")
20
- except Exception as e:
21
- print(f"✗ Root endpoint failed: {e}")
22
- return False
23
-
24
- # Test health endpoint
25
- try:
26
- response = requests.get(f"{base_url}/health")
27
- print(f"✓ Health endpoint: {response.status_code}")
28
- health_data = response.json()
29
- print(f" Models loaded: {health_data.get('models_loaded', False)}")
30
- print(f" Available models: {health_data.get('available_models', [])}")
31
- except Exception as e:
32
- print(f"✗ Health endpoint failed: {e}")
33
-
34
- # Test models endpoint
35
- try:
36
- response = requests.get(f"{base_url}/models")
37
- print(f"✓ Models endpoint: {response.status_code}")
38
- models = response.json()
39
- print(f" Found {len(models)} model definitions")
40
- except Exception as e:
41
- print(f"✗ Models endpoint failed: {e}")
42
-
43
- # Test embedding endpoint
44
- try:
45
- payload = {
46
- "texts": ["Hello world", "Test text"],
47
- "model": "jina",
48
- "normalize": True
49
- }
50
- response = requests.post(f"{base_url}/embed", json=payload)
51
- print(f"✓ Embed endpoint: {response.status_code}")
52
- if response.status_code == 200:
53
- data = response.json()
54
- print(f" Generated {data.get('num_texts', 0)} embeddings")
55
- print(f" Dimensions: {data.get('dimensions', 0)}")
56
- else:
57
- print(f" Error: {response.text}")
58
- except Exception as e:
59
- print(f"✗ Embed endpoint failed: {e}")
60
-
61
- return True
62
-
63
- if __name__ == "__main__":
64
- test_api()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_hybrid.py DELETED
@@ -1,98 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for hybrid model loading
4
- """
5
-
6
- import requests
7
- import json
8
- import time
9
-
10
- def test_hybrid_api(base_url="https://aurasystems-spanish-embeddings-api.hf.space"):
11
- """Test the hybrid API"""
12
-
13
- print(f"Testing hybrid API at {base_url}")
14
-
15
- # Test health endpoint first
16
- try:
17
- response = requests.get(f"{base_url}/health")
18
- print(f"✓ Health endpoint: {response.status_code}")
19
- if response.status_code == 200:
20
- health_data = response.json()
21
- print(f" Startup models loaded: {health_data.get('startup_models_loaded', False)}")
22
- print(f" Available models: {health_data.get('available_models', [])}")
23
- print(f" Note: {health_data.get('note', 'N/A')}")
24
- else:
25
- print(f" Error: {response.text}")
26
- except Exception as e:
27
- print(f"✗ Health endpoint failed: {e}")
28
- return False
29
-
30
- # Test startup model (jina-v3)
31
- try:
32
- payload = {
33
- "texts": ["Hola mundo", "Bonjour le monde"],
34
- "model": "jina-v3",
35
- "normalize": True
36
- }
37
- response = requests.post(f"{base_url}/embed", json=payload)
38
- print(f"✓ Startup model (jina-v3): {response.status_code}")
39
- if response.status_code == 200:
40
- data = response.json()
41
- print(f" Generated {data.get('num_texts', 0)} embeddings")
42
- print(f" Dimensions: {data.get('dimensions', 0)}")
43
- else:
44
- print(f" Error: {response.text}")
45
- except Exception as e:
46
- print(f"✗ Startup model test failed: {e}")
47
-
48
- # Test startup model (roberta-ca)
49
- try:
50
- payload = {
51
- "texts": ["Bon dia", "Com estàs?"],
52
- "model": "roberta-ca",
53
- "normalize": True
54
- }
55
- response = requests.post(f"{base_url}/embed", json=payload)
56
- print(f"✓ Startup model (roberta-ca): {response.status_code}")
57
- if response.status_code == 200:
58
- data = response.json()
59
- print(f" Generated {data.get('num_texts', 0)} embeddings")
60
- print(f" Dimensions: {data.get('dimensions', 0)}")
61
- else:
62
- print(f" Error: {response.text}")
63
- except Exception as e:
64
- print(f"✗ Startup model test failed: {e}")
65
-
66
- # Test on-demand model (jina)
67
- try:
68
- payload = {
69
- "texts": ["Texto en español"],
70
- "model": "jina",
71
- "normalize": True
72
- }
73
- response = requests.post(f"{base_url}/embed", json=payload)
74
- print(f"✓ On-demand model (jina): {response.status_code}")
75
- if response.status_code == 200:
76
- data = response.json()
77
- print(f" Generated {data.get('num_texts', 0)} embeddings")
78
- print(f" Dimensions: {data.get('dimensions', 0)}")
79
- else:
80
- print(f" Error: {response.text}")
81
- except Exception as e:
82
- print(f"✗ On-demand model test failed: {e}")
83
-
84
- # Check health again to see all models
85
- try:
86
- response = requests.get(f"{base_url}/health")
87
- if response.status_code == 200:
88
- health_data = response.json()
89
- print(f"✓ Final health check:")
90
- print(f" All models loaded: {health_data.get('all_models_loaded', False)}")
91
- print(f" Available models: {health_data.get('available_models', [])}")
92
- except Exception as e:
93
- print(f"✗ Final health check failed: {e}")
94
-
95
- return True
96
-
97
- if __name__ == "__main__":
98
- test_hybrid_api()