KJ24 commited on
Commit
bec7021
·
verified ·
1 Parent(s): dbd9820

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -7
app.py CHANGED
@@ -1,16 +1,16 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from typing import Optional # ✅ à AJOUTER ici
4
 
5
  from llama_index.core import Document, ServiceContext
6
  from llama_index.llms.openai import OpenAI
7
  from llama_index.core.node_parser import SemanticSplitterNodeParser
 
8
  import os
9
 
10
-
11
  app = FastAPI()
12
 
13
- # Requête d'entrée
14
  class ChunkRequest(BaseModel):
15
  text: str
16
  source_id: Optional[str] = None
@@ -18,21 +18,36 @@ class ChunkRequest(BaseModel):
18
  source: Optional[str] = None
19
  type: Optional[str] = None
20
 
21
-
22
- # Réponse
23
  @app.post("/chunk")
24
  async def chunk_text(data: ChunkRequest):
 
25
  llm = OpenAI(
26
  model="meta-llama/llama-4-maverick:free",
27
  api_base="https://openrouter.ai/api/v1",
28
  api_key=os.getenv("OPENROUTER_API_KEY")
29
  )
30
 
31
- service_context = ServiceContext.from_defaults(llm=llm)
 
 
 
 
 
 
 
32
 
33
  try:
34
  parser = SemanticSplitterNodeParser.from_defaults(service_context=service_context)
35
  nodes = parser.get_nodes_from_documents([Document(text=data.text)])
36
- return {"chunks": [node.text for node in nodes]}
 
 
 
 
 
 
 
 
37
  except Exception as e:
38
  return {"error": str(e)}
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from typing import Optional
4
 
5
  from llama_index.core import Document, ServiceContext
6
  from llama_index.llms.openai import OpenAI
7
  from llama_index.core.node_parser import SemanticSplitterNodeParser
8
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
9
  import os
10
 
 
11
  app = FastAPI()
12
 
13
+ # 🔹 Schéma d'entrée
14
  class ChunkRequest(BaseModel):
15
  text: str
16
  source_id: Optional[str] = None
 
18
  source: Optional[str] = None
19
  type: Optional[str] = None
20
 
21
+ # 🔹 Endpoint principal
 
22
  @app.post("/chunk")
23
  async def chunk_text(data: ChunkRequest):
24
+ # Modèle LLM (OpenRouter - Llama 4 Maverick)
25
  llm = OpenAI(
26
  model="meta-llama/llama-4-maverick:free",
27
  api_base="https://openrouter.ai/api/v1",
28
  api_key=os.getenv("OPENROUTER_API_KEY")
29
  )
30
 
31
+ # 🔹 Embedding open source gratuit
32
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
33
+
34
+ # 🔹 Service Context avec LLM + embeddings
35
+ service_context = ServiceContext.from_defaults(
36
+ llm=llm,
37
+ embed_model=embed_model
38
+ )
39
 
40
  try:
41
  parser = SemanticSplitterNodeParser.from_defaults(service_context=service_context)
42
  nodes = parser.get_nodes_from_documents([Document(text=data.text)])
43
+
44
+ return {
45
+ "chunks": [node.text for node in nodes],
46
+ "metadatas": [node.metadata for node in nodes],
47
+ "source_id": data.source_id,
48
+ "titre": data.titre,
49
+ "source": data.source,
50
+ "type": data.type
51
+ }
52
  except Exception as e:
53
  return {"error": str(e)}