Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,11 +6,29 @@ from datasets import load_dataset
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
-
# 1.
|
10 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
|
11 |
|
12 |
-
# 2.
|
13 |
dataset = load_dataset("MedRAG/textbooks", split="train", streaming=True)
|
14 |
|
15 |
# 3. ๋ฐ์ดํฐ ๋ณํ
|
16 |
texts = [entry["content"] for entry in dataset] # "content" ํ๋ ํ์ฉ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
+
# 1. MiniLM ๋ชจ๋ธ ๋ก๋ (๋ฐ์ดํฐ ๋ฒกํฐํ)
|
10 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
|
11 |
|
12 |
+
# 2. MedRAG ๋ฐ์ดํฐ์
๋ก๋
|
13 |
dataset = load_dataset("MedRAG/textbooks", split="train", streaming=True)
|
14 |
|
15 |
# 3. ๋ฐ์ดํฐ ๋ณํ
|
16 |
texts = [entry["content"] for entry in dataset] # "content" ํ๋ ํ์ฉ
|
17 |
+
|
18 |
+
# 4. ๋ฒกํฐ ์๋ฒ ๋ฉ ์์ฑ ๋ฐ FAISS์ ์ ์ฅ
|
19 |
+
vectors = embed_model.encode(texts)
|
20 |
+
dimension = vectors.shape[1] # ์๋ฒ ๋ฉ ์ฐจ์
|
21 |
+
index = faiss.IndexFlatL2(dimension) # L2 ๊ฑฐ๋ฆฌ ๊ธฐ๋ฐ FAISS ์ธ๋ฑ์ค ์์ฑ
|
22 |
+
index.add(np.array(vectors)) # FAISS์ ๋ฒกํฐ ์ถ๊ฐ
|
23 |
+
|
24 |
+
# 5. ๊ฒ์ API (GPTs์์ ํธ์ถ ๊ฐ๋ฅ)
|
25 |
+
@app.get("/search")
|
26 |
+
def search(query: str):
|
27 |
+
""" ์ฌ์ฉ์์ ์ฟผ๋ฆฌ๋ฅผ ๋ฒกํฐ ๋ณํ ํ, FAISS์์ ๊ฒ์ํ์ฌ ๊ด๋ จ ๋ฌธ์ ๋ฐํ """
|
28 |
+
query_vector = embed_model.encode([query])
|
29 |
+
query_vector = np.array(query_vector, dtype=np.float32) # FAISS ํธํ
|
30 |
+
|
31 |
+
_, I = index.search(query_vector, k=3) # FAISS๋ก Top-3 ๊ฒ์
|
32 |
+
results = [texts[i] for i in I[0]] # ๊ฒ์๋ ๋ฌธ์ ๋ฐํ
|
33 |
+
|
34 |
+
return {"retrieved_docs": results}
|