File size: 1,370 Bytes
12776c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from fastapi import FastAPI, Query
from pydantic import BaseModel
from typing import List
from simcse import SimCSE
import os

app = FastAPI()

# Load models and indexes once at startup
sentence_path = os.path.join("./static/", "model_names.txt")
embedder0 = SimCSE("princeton-nlp/sup-simcse-bert-base-uncased", device="cpu")
embedder1 = SimCSE("princeton-nlp/sup-simcse-bert-base-uncased", device="cpu")

embedder0.build_index(sentence_path, 0)
embedder1.build_index(sentence_path, 1)

# Response schema
class SearchResult(BaseModel):
    sentence: str
    score: float

@app.get("/search", response_model=List[SearchResult])
def search(prompt: str = Query(..., description="Input text prompt")):
    results0 = embedder0.search(prompt, top_k=5, threshold=0.6)
    results1 = embedder1.search(prompt, top_k=5, threshold=0.6)

    # Combine and sort results by score
    combined = results0 + results1
    sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)

    # Optional: deduplicate by sentence text
    seen = set()
    unique_sorted = []
    for sentence, score in sorted_combined:
        if sentence not in seen:
            seen.add(sentence)
            unique_sorted.append({"sentence": sentence, "score": score})

    return unique_sorted

if __name__ == "__main__":
    import uvicorn
    uvicorn.run("demo:app", host="0.0.0.0", port=10001)