chunkr-api / app.py
KJ24's picture
Update app.py
bec7021 verified
raw
history blame
1.65 kB
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
from llama_index.core import Document, ServiceContext
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import os
app = FastAPI()
# 🔹 Schéma d'entrée
class ChunkRequest(BaseModel):
text: str
source_id: Optional[str] = None
titre: Optional[str] = None
source: Optional[str] = None
type: Optional[str] = None
# 🔹 Endpoint principal
@app.post("/chunk")
async def chunk_text(data: ChunkRequest):
# Modèle LLM (OpenRouter - Llama 4 Maverick)
llm = OpenAI(
model="meta-llama/llama-4-maverick:free",
api_base="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENROUTER_API_KEY")
)
# 🔹 Embedding open source gratuit
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# 🔹 Service Context avec LLM + embeddings
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model
)
try:
parser = SemanticSplitterNodeParser.from_defaults(service_context=service_context)
nodes = parser.get_nodes_from_documents([Document(text=data.text)])
return {
"chunks": [node.text for node in nodes],
"metadatas": [node.metadata for node in nodes],
"source_id": data.source_id,
"titre": data.titre,
"source": data.source,
"type": data.type
}
except Exception as e:
return {"error": str(e)}