Spaces:

AuraSystems
/

spanish-embeddings-api

Sleeping

App Files Files Community

Jordi Catafal commited on Jun 1

Commit

c3aef13

1 Parent(s): f26f739

initial deployment

Browse files

Files changed (6) hide show

embeddings_api/Dockerfile +33 -0
embeddings_api/app.py +108 -0
embeddings_api/models/__init__.py +6 -0
embeddings_api/models/schemas.py +109 -0
embeddings_api/requirements.txt +10 -0
embeddings_api/utils/helpers.py +207 -0

embeddings_api/Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+FROM python:3.9-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV TRANSFORMERS_CACHE=/app/cache
+ENV HF_HOME=/app/cache
+ENV PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128
+# Create non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set working directory
+WORKDIR /app
+# Copy requirements and install dependencies
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY --chown=user . .
+# Create cache directory
+RUN mkdir -p /app/cache
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

embeddings_api/app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from fastapi import FastAPI, HTTPException
+from typing import List
+import torch
+import uvicorn
+import gc
+import os
+from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
+from utils.helpers import load_models, get_embeddings, cleanup_memory
+app = FastAPI(
+    title="Spanish Embedding API",
+    description="Dual Spanish embedding models API",
+    version="1.0.0"
+)
+# Global model cache
+models_cache = {}
+@app.on_event("startup")
+async def startup_event():
+    """Load models on startup"""
+    global models_cache
+    models_cache = load_models()
+    print("Models loaded successfully!")
+@app.get("/")
+async def root():
+    return {
+        "message": "Spanish Embedding API",
+        "models": ["jina", "robertalex"],
+        "status": "running",
+        "docs": "/docs"
+    }
+@app.post("/embed", response_model=EmbeddingResponse)
+async def create_embeddings(request: EmbeddingRequest):
+    """Generate embeddings for input texts"""
+    try:
+        if not request.texts:
+            raise HTTPException(status_code=400, detail="No texts provided")
+        if len(request.texts) > 50:  # Rate limiting
+            raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
+        embeddings = get_embeddings(
+            request.texts,
+            request.model,
+            models_cache,
+            request.normalize,
+            request.max_length
+        )
+        # Cleanup memory after large batches
+        if len(request.texts) > 20:
+            cleanup_memory()
+        return EmbeddingResponse(
+            embeddings=embeddings,
+            model_used=request.model,
+            dimensions=len(embeddings[0]) if embeddings else 0,
+            num_texts=len(request.texts)
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
+@app.get("/models", response_model=List[ModelInfo])
+async def list_models():
+    """List available models and their specifications"""
+    return [
+        ModelInfo(
+            model_id="jina",
+            name="jinaai/jina-embeddings-v2-base-es",
+            dimensions=768,
+            max_sequence_length=8192,
+            languages=["Spanish", "English"],
+            model_type="bilingual",
+            description="Bilingual Spanish-English embeddings with long context support"
+        ),
+        ModelInfo(
+            model_id="robertalex",
+            name="PlanTL-GOB-ES/RoBERTalex",
+            dimensions=768,
+            max_sequence_length=512,
+            languages=["Spanish"],
+            model_type="legal domain",
+            description="Spanish legal domain specialized embeddings"
+        )
+    ]
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "models_loaded": len(models_cache) == 2,
+        "available_models": list(models_cache.keys())
+    }
+if __name__ == "__main__":
+    # Set multi-threading for CPU
+    torch.set_num_threads(8)
+    torch.set_num_interop_threads(1)
+    uvicorn.run(app, host="0.0.0.0", port=7860)

embeddings_api/models/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# models/__init__.py
+"""Models package for embedding API schemas and configurations"""
+from .schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
+__all__ = ['EmbeddingRequest', 'EmbeddingResponse', 'ModelInfo']

embeddings_api/models/schemas.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# models/schemas.py
+"""Pydantic models for request/response validation"""
+from pydantic import BaseModel, Field, validator
+from typing import List, Optional, Literal
+class EmbeddingRequest(BaseModel):
+    """Request model for embedding generation"""
+    texts: List[str] = Field(
+        ...,
+        description="List of texts to embed",
+        example=["Hola mundo", "¿Cómo estás?"]
+    )
+    model: Literal["jina", "robertalex"] = Field(
+        default="jina",
+        description="Model to use for embeddings"
+    )
+    normalize: bool = Field(
+        default=True,
+        description="Whether to normalize embeddings to unit length"
+    )
+    max_length: Optional[int] = Field(
+        default=None,
+        description="Maximum sequence length (uses model default if not specified)"
+    )
+    @validator('texts')
+    def validate_texts(cls, v):
+        if not v:
+            raise ValueError("At least one text must be provided")
+        if len(v) > 50:
+            raise ValueError("Maximum 50 texts per request")
+        # Check for empty strings
+        if any(not text.strip() for text in v):
+            raise ValueError("Empty texts are not allowed")
+        return v
+    @validator('max_length')
+    def validate_max_length(cls, v, values):
+        if v is not None:
+            model = values.get('model', 'jina')
+            if model == 'jina' and v > 8192:
+                raise ValueError("Max length for Jina model is 8192")
+            elif model == 'robertalex' and v > 512:
+                raise ValueError("Max length for RoBERTalex model is 512")
+            if v < 1:
+                raise ValueError("Max length must be positive")
+        return v
+class EmbeddingResponse(BaseModel):
+    """Response model for embedding generation"""
+    embeddings: List[List[float]] = Field(
+        ...,
+        description="List of embedding vectors"
+    )
+    model_used: str = Field(
+        ...,
+        description="Model that was used"
+    )
+    dimensions: int = Field(
+        ...,
+        description="Dimension of embedding vectors"
+    )
+    num_texts: int = Field(
+        ...,
+        description="Number of texts processed"
+    )
+class ModelInfo(BaseModel):
+    """Information about available models"""
+    model_id: str = Field(
+        ...,
+        description="Model identifier for API calls"
+    )
+    name: str = Field(
+        ...,
+        description="Full Hugging Face model name"
+    )
+    dimensions: int = Field(
+        ...,
+        description="Output embedding dimensions"
+    )
+    max_sequence_length: int = Field(
+        ...,
+        description="Maximum input sequence length"
+    )
+    languages: List[str] = Field(
+        ...,
+        description="Supported languages"
+    )
+    model_type: str = Field(
+        ...,
+        description="Type/domain of model"
+    )
+    description: str = Field(
+        ...,
+        description="Model description"
+    )
+class ErrorResponse(BaseModel):
+    """Error response model"""
+    detail: str = Field(
+        ...,
+        description="Error message"
+    )
+    error_type: Optional[str] = Field(
+        default=None,
+        description="Type of error"
+    )

embeddings_api/requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+transformers==4.36.0
+torch==2.1.0+cpu --index-url https://download.pytorch.org/whl/cpu
+sentence-transformers==2.2.2
+numpy<2.0.0
+scikit-learn==1.3.2
+pydantic==2.5.0
+huggingface-hub==0.19.4
+python-multipart==0.0.6

embeddings_api/utils/helpers.py ADDED Viewed

	@@ -0,0 +1,207 @@

+# utils/helpers.py
+"""Helper functions for model loading and embedding generation"""
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel, RobertaTokenizer, RobertaModel
+from typing import List, Dict, Optional
+import gc
+import os
+def load_models() -> Dict:
+    """
+    Load both embedding models with memory optimization
+    Returns:
+        Dict containing loaded models and tokenizers
+    """
+    models_cache = {}
+    # Set device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    try:
+        # Load Jina model
+        print("Loading Jina embeddings model...")
+        jina_tokenizer = AutoTokenizer.from_pretrained(
+            'jinaai/jina-embeddings-v2-base-es',
+            trust_remote_code=True
+        )
+        jina_model = AutoModel.from_pretrained(
+            'jinaai/jina-embeddings-v2-base-es',
+            trust_remote_code=True,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+        ).to(device)
+        jina_model.eval()
+        # Load RoBERTalex model
+        print("Loading RoBERTalex model...")
+        robertalex_tokenizer = RobertaTokenizer.from_pretrained('PlanTL-GOB-ES/RoBERTalex')
+        robertalex_model = RobertaModel.from_pretrained(
+            'PlanTL-GOB-ES/RoBERTalex',
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+        ).to(device)
+        robertalex_model.eval()
+        models_cache = {
+            'jina': {
+                'tokenizer': jina_tokenizer,
+                'model': jina_model,
+                'device': device
+            },
+            'robertalex': {
+                'tokenizer': robertalex_tokenizer,
+                'model': robertalex_model,
+                'device': device
+            }
+        }
+        # Force garbage collection after loading
+        gc.collect()
+        return models_cache
+    except Exception as e:
+        print(f"Error loading models: {str(e)}")
+        raise
+def mean_pooling(model_output, attention_mask):
+    """
+    Apply mean pooling to get sentence embeddings
+    Args:
+        model_output: Model output containing token embeddings
+        attention_mask: Attention mask for valid tokens
+    Returns:
+        Pooled embeddings
+    """
+    token_embeddings = model_output[0]
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+def get_embeddings(
+    texts: List[str],
+    model_name: str,
+    models_cache: Dict,
+    normalize: bool = True,
+    max_length: Optional[int] = None
+) -> List[List[float]]:
+    """
+    Generate embeddings for texts using specified model
+    Args:
+        texts: List of texts to embed
+        model_name: Name of model to use ('jina' or 'robertalex')
+        models_cache: Dictionary containing loaded models
+        normalize: Whether to normalize embeddings
+        max_length: Maximum sequence length
+    Returns:
+        List of embedding vectors
+    """
+    if model_name not in models_cache:
+        raise ValueError(f"Model {model_name} not available. Choose 'jina' or 'robertalex'")
+    tokenizer = models_cache[model_name]['tokenizer']
+    model = models_cache[model_name]['model']
+    device = models_cache[model_name]['device']
+    # Set max length based on model capabilities
+    if max_length is None:
+        max_length = 8192 if model_name == 'jina' else 512
+    # Process in batches for memory efficiency
+    batch_size = 8 if len(texts) > 8 else len(texts)
+    all_embeddings = []
+    for i in range(0, len(texts), batch_size):
+        batch_texts = texts[i:i + batch_size]
+        # Tokenize inputs
+        encoded_input = tokenizer(
+            batch_texts,
+            padding=True,
+            truncation=True,
+            max_length=max_length,
+            return_tensors='pt'
+        ).to(device)
+        # Generate embeddings
+        with torch.no_grad():
+            model_output = model(**encoded_input)
+            if model_name == 'jina':
+                # Jina models require mean pooling
+                embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+            else:
+                # RoBERTalex: use [CLS] token embedding
+                embeddings = model_output.last_hidden_state[:, 0, :]
+        # Normalize if requested
+        if normalize:
+            embeddings = F.normalize(embeddings, p=2, dim=1)
+        # Convert to CPU and list
+        batch_embeddings = embeddings.cpu().numpy().tolist()
+        all_embeddings.extend(batch_embeddings)
+    return all_embeddings
+def cleanup_memory():
+    """Force garbage collection and clear cache"""
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+def validate_input_texts(texts: List[str]) -> List[str]:
+    """
+    Validate and clean input texts
+    Args:
+        texts: List of input texts
+    Returns:
+        Cleaned texts
+    """
+    cleaned_texts = []
+    for text in texts:
+        # Remove excess whitespace
+        text = ' '.join(text.split())
+        # Skip empty texts
+        if text:
+            cleaned_texts.append(text)
+    if not cleaned_texts:
+        raise ValueError("No valid texts provided after cleaning")
+    return cleaned_texts
+def get_model_info(model_name: str) -> Dict:
+    """
+    Get detailed information about a model
+    Args:
+        model_name: Model identifier
+    Returns:
+        Dictionary with model information
+    """
+    model_info = {
+        'jina': {
+            'full_name': 'jinaai/jina-embeddings-v2-base-es',
+            'dimensions': 768,
+            'max_length': 8192,
+            'pooling': 'mean',
+            'languages': ['Spanish', 'English']
+        },
+        'robertalex': {
+            'full_name': 'PlanTL-GOB-ES/RoBERTalex',
+            'dimensions': 768,
+            'max_length': 512,
+            'pooling': 'cls',
+            'languages': ['Spanish']
+        }
+    }
+    return model_info.get(model_name, {})