Spaces:

AuraSystems
/

spanish-embeddings-api

Running

App Files Files Community

Jordi Catafal commited on Jun 3

Commit

ebb30ca

1 Parent(s): 5861022

another try

Browse files

Files changed (4) hide show

__pycache__/app.cpython-311.pyc +0 -0
__pycache__/app_minimal.cpython-311.pyc +0 -0
app.py +10 -34
app_hybrid_backup.py +189 -0

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

__pycache__/app_minimal.cpython-311.pyc ADDED Viewed

Binary file (6.78 kB). View file

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from contextlib import asynccontextmanager
 from typing import List
 import torch
 import uvicorn
@@ -8,31 +7,11 @@ import uvicorn
 from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
 from utils.helpers import load_models, get_embeddings, cleanup_memory
-# Global model cache
 models_cache = {}
-# Models to load at startup (most frequently used)
-STARTUP_MODELS = ["jina-v3", "roberta-ca"]
-# Models to load on demand
-ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Application lifespan handler for startup and shutdown"""
-    # Startup - load priority models
-    try:
-        global models_cache
-        print(f"Loading startup models: {STARTUP_MODELS}...")
-        models_cache = load_models(STARTUP_MODELS)
-        print(f"Startup models loaded successfully: {list(models_cache.keys())}")
-        yield
-    except Exception as e:
-        print(f"Failed to load startup models: {str(e)}")
-        # Continue anyway - models can be loaded on demand
-        yield
-    finally:
-        # Shutdown - cleanup resources
-        cleanup_memory()
 def ensure_model_loaded(model_name: str):
     """Load a specific model on demand if not already loaded"""
@@ -53,8 +32,7 @@ def ensure_model_loaded(model_name: str):
 app = FastAPI(
     title="Multilingual & Legal Embedding API",
     description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
-    version="3.0.0",
-    lifespan=lifespan
 )
 # Add CORS middleware to allow cross-origin requests
@@ -69,18 +47,19 @@ app.add_middleware(
 @app.get("/")
 async def root():
     return {
-        "message": "Multilingual & Legal Embedding API",
         "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
         "status": "running",
         "docs": "/docs",
-        "total_models": 5
     }
 @app.post("/embed", response_model=EmbeddingResponse)
 async def create_embeddings(request: EmbeddingRequest):
     """Generate embeddings for input texts"""
     try:
-        # Load specific model on demand if needed
         ensure_model_loaded(request.model)
         if not request.texts:
@@ -167,18 +146,15 @@ async def list_models():
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
-    startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
     all_models_loaded = len(models_cache) == 5
     return {
-        "status": "healthy" if startup_models_loaded else "partial",
-        "startup_models_loaded": startup_models_loaded,
         "all_models_loaded": all_models_loaded,
         "available_models": list(models_cache.keys()),
-        "startup_models": STARTUP_MODELS,
         "on_demand_models": ON_DEMAND_MODELS,
         "models_count": len(models_cache),
-        "note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
     }
 if __name__ == "__main__":

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from typing import List
 import torch
 import uvicorn
 from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
 from utils.helpers import load_models, get_embeddings, cleanup_memory
+# Global model cache - completely on-demand loading
 models_cache = {}
+# All models load on demand to test deployment
+ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]
 def ensure_model_loaded(model_name: str):
     """Load a specific model on demand if not already loaded"""
 app = FastAPI(
     title="Multilingual & Legal Embedding API",
     description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
+    version="3.0.0"
 )
 # Add CORS middleware to allow cross-origin requests
 @app.get("/")
 async def root():
     return {
+        "message": "Multilingual & Legal Embedding API - Minimal Version",
         "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
         "status": "running",
         "docs": "/docs",
+        "total_models": 5,
+        "note": "All models load on first request"
     }
 @app.post("/embed", response_model=EmbeddingResponse)
 async def create_embeddings(request: EmbeddingRequest):
     """Generate embeddings for input texts"""
     try:
+        # Load specific model on demand
         ensure_model_loaded(request.model)
         if not request.texts:
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
     all_models_loaded = len(models_cache) == 5
     return {
+        "status": "healthy",
         "all_models_loaded": all_models_loaded,
         "available_models": list(models_cache.keys()),
         "on_demand_models": ON_DEMAND_MODELS,
         "models_count": len(models_cache),
+        "note": "All models load on first embedding request - minimal deployment version"
     }
 if __name__ == "__main__":

app_hybrid_backup.py ADDED Viewed

	@@ -0,0 +1,189 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+from typing import List
+import torch
+import uvicorn
+from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
+from utils.helpers import load_models, get_embeddings, cleanup_memory
+# Global model cache
+models_cache = {}
+# Models to load at startup (most frequently used)
+STARTUP_MODELS = ["jina-v3", "roberta-ca"]
+# Models to load on demand
+ON_DEMAND_MODELS = ["jina", "robertalex", "legal-bert"]
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan handler for startup and shutdown"""
+    # Startup - load priority models
+    try:
+        global models_cache
+        print(f"Loading startup models: {STARTUP_MODELS}...")
+        models_cache = load_models(STARTUP_MODELS)
+        print(f"Startup models loaded successfully: {list(models_cache.keys())}")
+        yield
+    except Exception as e:
+        print(f"Failed to load startup models: {str(e)}")
+        # Continue anyway - models can be loaded on demand
+        yield
+    finally:
+        # Shutdown - cleanup resources
+        cleanup_memory()
+def ensure_model_loaded(model_name: str):
+    """Load a specific model on demand if not already loaded"""
+    global models_cache
+    if model_name not in models_cache:
+        if model_name in ON_DEMAND_MODELS:
+            try:
+                print(f"Loading model on demand: {model_name}...")
+                new_models = load_models([model_name])
+                models_cache.update(new_models)
+                print(f"Model {model_name} loaded successfully!")
+            except Exception as e:
+                print(f"Failed to load model {model_name}: {str(e)}")
+                raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
+        else:
+            raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")
+app = FastAPI(
+    title="Multilingual & Legal Embedding API",
+    description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
+    version="3.0.0",
+    lifespan=lifespan
+)
+# Add CORS middleware to allow cross-origin requests
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify actual domains
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    return {
+        "message": "Multilingual & Legal Embedding API",
+        "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
+        "status": "running",
+        "docs": "/docs",
+        "total_models": 5
+    }
+@app.post("/embed", response_model=EmbeddingResponse)
+async def create_embeddings(request: EmbeddingRequest):
+    """Generate embeddings for input texts"""
+    try:
+        # Load specific model on demand if needed
+        ensure_model_loaded(request.model)
+        if not request.texts:
+            raise HTTPException(status_code=400, detail="No texts provided")
+        if len(request.texts) > 50:  # Rate limiting
+            raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
+        embeddings = get_embeddings(
+            request.texts,
+            request.model,
+            models_cache,
+            request.normalize,
+            request.max_length
+        )
+        # Cleanup memory after large batches
+        if len(request.texts) > 20:
+            cleanup_memory()
+        return EmbeddingResponse(
+            embeddings=embeddings,
+            model_used=request.model,
+            dimensions=len(embeddings[0]) if embeddings else 0,
+            num_texts=len(request.texts)
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
+@app.get("/models", response_model=List[ModelInfo])
+async def list_models():
+    """List available models and their specifications"""
+    return [
+        ModelInfo(
+            model_id="jina",
+            name="jinaai/jina-embeddings-v2-base-es",
+            dimensions=768,
+            max_sequence_length=8192,
+            languages=["Spanish", "English"],
+            model_type="bilingual",
+            description="Bilingual Spanish-English embeddings with long context support"
+        ),
+        ModelInfo(
+            model_id="robertalex",
+            name="PlanTL-GOB-ES/RoBERTalex",
+            dimensions=768,
+            max_sequence_length=512,
+            languages=["Spanish"],
+            model_type="legal domain",
+            description="Spanish legal domain specialized embeddings"
+        ),
+        ModelInfo(
+            model_id="jina-v3",
+            name="jinaai/jina-embeddings-v3",
+            dimensions=1024,
+            max_sequence_length=8192,
+            languages=["Multilingual"],
+            model_type="multilingual",
+            description="Latest Jina v3 with superior multilingual performance"
+        ),
+        ModelInfo(
+            model_id="legal-bert",
+            name="nlpaueb/legal-bert-base-uncased",
+            dimensions=768,
+            max_sequence_length=512,
+            languages=["English"],
+            model_type="legal domain",
+            description="English legal domain BERT model"
+        ),
+        ModelInfo(
+            model_id="roberta-ca",
+            name="projecte-aina/roberta-large-ca-v2",
+            dimensions=1024,
+            max_sequence_length=512,
+            languages=["Catalan"],
+            model_type="general",
+            description="Catalan RoBERTa-large model trained on large corpus"
+        )
+    ]
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    startup_models_loaded = all(model in models_cache for model in STARTUP_MODELS)
+    all_models_loaded = len(models_cache) == 5
+    return {
+        "status": "healthy" if startup_models_loaded else "partial",
+        "startup_models_loaded": startup_models_loaded,
+        "all_models_loaded": all_models_loaded,
+        "available_models": list(models_cache.keys()),
+        "startup_models": STARTUP_MODELS,
+        "on_demand_models": ON_DEMAND_MODELS,
+        "models_count": len(models_cache),
+        "note": f"Startup models: {STARTUP_MODELS} | On-demand: {ON_DEMAND_MODELS}"
+    }
+if __name__ == "__main__":
+    # Set multi-threading for CPU
+    torch.set_num_threads(8)
+    torch.set_num_interop_threads(1)
+    uvicorn.run(app, host="0.0.0.0", port=7860)