Jordi Catafal commited on
Commit
ebb30ca
·
1 Parent(s): 5861022

another try

Browse files
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
__pycache__/app_minimal.cpython-311.pyc ADDED
Binary file (6.78 kB). View file
 
app.py CHANGED
@@ -1,6 +1,5 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from contextlib import asynccontextmanager
4
  from typing import List
5
  import torch
6
  import uvicorn
@@ -8,31 +7,11 @@ import uvicorn
8
  from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
9
  from utils.helpers import load_models, get_embeddings, cleanup_memory
10
 
11
- # Global model cache
12
  models_cache = {}
13
 
14
- # Models to load at startup (most frequently used)
15
- STARTUP_MODELS = ["jina-v3", "roberta-ca"]
16
- # Models to load on demand
17
- ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
18
-
19
- @asynccontextmanager
20
- async def lifespan(app: FastAPI):
21
- """Application lifespan handler for startup and shutdown"""
22
- # Startup - load priority models
23
- try:
24
- global models_cache
25
- print(f"Loading startup models: {STARTUP_MODELS}...")
26
- models_cache = load_models(STARTUP_MODELS)
27
- print(f"Startup models loaded successfully: {list(models_cache.keys())}")
28
- yield
29
- except Exception as e:
30
- print(f"Failed to load startup models: {str(e)}")
31
- # Continue anyway - models can be loaded on demand
32
- yield
33
- finally:
34
- # Shutdown - cleanup resources
35
- cleanup_memory()
36
 
37
  def ensure_model_loaded(model_name: str):
38
  """Load a specific model on demand if not already loaded"""
@@ -53,8 +32,7 @@ def ensure_model_loaded(model_name: str):
53
  app = FastAPI(
54
  title="Multilingual & Legal Embedding API",
55
  description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
56
- version="3.0.0",
57
- lifespan=lifespan
58
  )
59
 
60
  # Add CORS middleware to allow cross-origin requests
@@ -69,18 +47,19 @@ app.add_middleware(
69
  @app.get("/")
70
  async def root():
71
  return {
72
- "message": "Multilingual & Legal Embedding API",
73
  "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
74
  "status": "running",
75
  "docs": "/docs",
76
- "total_models": 5
 
77
  }
78
 
79
  @app.post("/embed", response_model=EmbeddingResponse)
80
  async def create_embeddings(request: EmbeddingRequest):
81
  """Generate embeddings for input texts"""
82
  try:
83
- # Load specific model on demand if needed
84
  ensure_model_loaded(request.model)
85
 
86
  if not request.texts:
@@ -167,18 +146,15 @@ async def list_models():
167
  @app.get("/health")
168
  async def health_check():
169
  """Health check endpoint"""
170
- startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
171
  all_models_loaded = len(models_cache) == 5
172
 
173
  return {
174
- "status": "healthy" if startup_models_loaded else "partial",
175
- "startup_models_loaded": startup_models_loaded,
176
  "all_models_loaded": all_models_loaded,
177
  "available_models": list(models_cache.keys()),
178
- "startup_models": STARTUP_MODELS,
179
  "on_demand_models": ON_DEMAND_MODELS,
180
  "models_count": len(models_cache),
181
- "note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
182
  }
183
 
184
  if __name__ == "__main__":
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
 
3
  from typing import List
4
  import torch
5
  import uvicorn
 
7
  from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
8
  from utils.helpers import load_models, get_embeddings, cleanup_memory
9
 
10
+ # Global model cache - completely on-demand loading
11
  models_cache = {}
12
 
13
+ # All models load on demand to test deployment
14
+ ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def ensure_model_loaded(model_name: str):
17
  """Load a specific model on demand if not already loaded"""
 
32
  app = FastAPI(
33
  title="Multilingual & Legal Embedding API",
34
  description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
35
+ version="3.0.0"
 
36
  )
37
 
38
  # Add CORS middleware to allow cross-origin requests
 
47
  @app.get("/")
48
  async def root():
49
  return {
50
+ "message": "Multilingual & Legal Embedding API - Minimal Version",
51
  "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
52
  "status": "running",
53
  "docs": "/docs",
54
+ "total_models": 5,
55
+ "note": "All models load on first request"
56
  }
57
 
58
  @app.post("/embed", response_model=EmbeddingResponse)
59
  async def create_embeddings(request: EmbeddingRequest):
60
  """Generate embeddings for input texts"""
61
  try:
62
+ # Load specific model on demand
63
  ensure_model_loaded(request.model)
64
 
65
  if not request.texts:
 
146
  @app.get("/health")
147
  async def health_check():
148
  """Health check endpoint"""
 
149
  all_models_loaded = len(models_cache) == 5
150
 
151
  return {
152
+ "status": "healthy",
 
153
  "all_models_loaded": all_models_loaded,
154
  "available_models": list(models_cache.keys()),
 
155
  "on_demand_models": ON_DEMAND_MODELS,
156
  "models_count": len(models_cache),
157
+ "note": "All models load on first embedding request - minimal deployment version"
158
  }
159
 
160
  if __name__ == "__main__":
app_hybrid_backup.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from contextlib import asynccontextmanager
4
+ from typing import List
5
+ import torch
6
+ import uvicorn
7
+
8
+ from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
9
+ from utils.helpers import load_models, get_embeddings, cleanup_memory
10
+
11
+ # Global model cache
12
+ models_cache = {}
13
+
14
+ # Models to load at startup (most frequently used)
15
+ STARTUP_MODELS = ["jina-v3", "roberta-ca"]
16
+ # Models to load on demand
17
+ ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
18
+
19
+ @asynccontextmanager
20
+ async def lifespan(app: FastAPI):
21
+ """Application lifespan handler for startup and shutdown"""
22
+ # Startup - load priority models
23
+ try:
24
+ global models_cache
25
+ print(f"Loading startup models: {STARTUP_MODELS}...")
26
+ models_cache = load_models(STARTUP_MODELS)
27
+ print(f"Startup models loaded successfully: {list(models_cache.keys())}")
28
+ yield
29
+ except Exception as e:
30
+ print(f"Failed to load startup models: {str(e)}")
31
+ # Continue anyway - models can be loaded on demand
32
+ yield
33
+ finally:
34
+ # Shutdown - cleanup resources
35
+ cleanup_memory()
36
+
37
+ def ensure_model_loaded(model_name: str):
38
+ """Load a specific model on demand if not already loaded"""
39
+ global models_cache
40
+ if model_name not in models_cache:
41
+ if model_name in ON_DEMAND_MODELS:
42
+ try:
43
+ print(f"Loading model on demand: {model_name}...")
44
+ new_models = load_models([model_name])
45
+ models_cache.update(new_models)
46
+ print(f"Model {model_name} loaded successfully!")
47
+ except Exception as e:
48
+ print(f"Failed to load model {model_name}: {str(e)}")
49
+ raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
50
+ else:
51
+ raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
52
+
53
+ app = FastAPI(
54
+ title="Multilingual & Legal Embedding API",
55
+ description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
56
+ version="3.0.0",
57
+ lifespan=lifespan
58
+ )
59
+
60
+ # Add CORS middleware to allow cross-origin requests
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=["*"], # In production, specify actual domains
64
+ allow_credentials=True,
65
+ allow_methods=["*"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ @app.get("/")
70
+ async def root():
71
+ return {
72
+ "message": "Multilingual & Legal Embedding API",
73
+ "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
74
+ "status": "running",
75
+ "docs": "/docs",
76
+ "total_models": 5
77
+ }
78
+
79
+ @app.post("/embed", response_model=EmbeddingResponse)
80
+ async def create_embeddings(request: EmbeddingRequest):
81
+ """Generate embeddings for input texts"""
82
+ try:
83
+ # Load specific model on demand if needed
84
+ ensure_model_loaded(request.model)
85
+
86
+ if not request.texts:
87
+ raise HTTPException(status_code=400, detail="No texts provided")
88
+
89
+ if len(request.texts) > 50: # Rate limiting
90
+ raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
91
+
92
+ embeddings = get_embeddings(
93
+ request.texts,
94
+ request.model,
95
+ models_cache,
96
+ request.normalize,
97
+ request.max_length
98
+ )
99
+
100
+ # Cleanup memory after large batches
101
+ if len(request.texts) > 20:
102
+ cleanup_memory()
103
+
104
+ return EmbeddingResponse(
105
+ embeddings=embeddings,
106
+ model_used=request.model,
107
+ dimensions=len(embeddings[0]) if embeddings else 0,
108
+ num_texts=len(request.texts)
109
+ )
110
+
111
+ except ValueError as e:
112
+ raise HTTPException(status_code=400, detail=str(e))
113
+ except Exception as e:
114
+ raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
115
+
116
+ @app.get("/models", response_model=List[ModelInfo])
117
+ async def list_models():
118
+ """List available models and their specifications"""
119
+ return [
120
+ ModelInfo(
121
+ model_id="jina",
122
+ name="jinaai/jina-embeddings-v2-base-es",
123
+ dimensions=768,
124
+ max_sequence_length=8192,
125
+ languages=["Spanish", "English"],
126
+ model_type="bilingual",
127
+ description="Bilingual Spanish-English embeddings with long context support"
128
+ ),
129
+ ModelInfo(
130
+ model_id="robertalex",
131
+ name="PlanTL-GOB-ES/RoBERTalex",
132
+ dimensions=768,
133
+ max_sequence_length=512,
134
+ languages=["Spanish"],
135
+ model_type="legal domain",
136
+ description="Spanish legal domain specialized embeddings"
137
+ ),
138
+ ModelInfo(
139
+ model_id="jina-v3",
140
+ name="jinaai/jina-embeddings-v3",
141
+ dimensions=1024,
142
+ max_sequence_length=8192,
143
+ languages=["Multilingual"],
144
+ model_type="multilingual",
145
+ description="Latest Jina v3 with superior multilingual performance"
146
+ ),
147
+ ModelInfo(
148
+ model_id="legal-bert",
149
+ name="nlpaueb/legal-bert-base-uncased",
150
+ dimensions=768,
151
+ max_sequence_length=512,
152
+ languages=["English"],
153
+ model_type="legal domain",
154
+ description="English legal domain BERT model"
155
+ ),
156
+ ModelInfo(
157
+ model_id="roberta-ca",
158
+ name="projecte-aina/roberta-large-ca-v2",
159
+ dimensions=1024,
160
+ max_sequence_length=512,
161
+ languages=["Catalan"],
162
+ model_type="general",
163
+ description="Catalan RoBERTa-large model trained on large corpus"
164
+ )
165
+ ]
166
+
167
+ @app.get("/health")
168
+ async def health_check():
169
+ """Health check endpoint"""
170
+ startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
171
+ all_models_loaded = len(models_cache) == 5
172
+
173
+ return {
174
+ "status": "healthy" if startup_models_loaded else "partial",
175
+ "startup_models_loaded": startup_models_loaded,
176
+ "all_models_loaded": all_models_loaded,
177
+ "available_models": list(models_cache.keys()),
178
+ "startup_models": STARTUP_MODELS,
179
+ "on_demand_models": ON_DEMAND_MODELS,
180
+ "models_count": len(models_cache),
181
+ "note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
182
+ }
183
+
184
+ if __name__ == "__main__":
185
+ # Set multi-threading for CPU
186
+ torch.set_num_threads(8)
187
+ torch.set_num_interop_threads(1)
188
+
189
+ uvicorn.run(app, host="0.0.0.0", port=7860)