Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import sys
|
|
3 |
import faiss
|
4 |
import numpy as np
|
5 |
import streamlit as st
|
|
|
6 |
from text2vec import SentenceModel
|
7 |
from src.jsonl_Indexer import JSONLIndexer
|
8 |
|
@@ -32,8 +33,6 @@ DEFAULT_CONFIG = {
|
|
32 |
# 合并默认配置和命令行参数
|
33 |
config = DEFAULT_CONFIG.copy()
|
34 |
config.update(cli_args)
|
35 |
-
|
36 |
-
# 将 vector_size 转换为整数
|
37 |
config['vector_size'] = int(config['vector_size'])
|
38 |
|
39 |
@st.cache_resource
|
@@ -68,24 +67,55 @@ st.title("JSONL Data Retrieval Visualization")
|
|
68 |
st.write("该应用基于预计算的 JSONL 文件 embedding,输入查询后将检索相似记录。")
|
69 |
|
70 |
# 查询输入
|
71 |
-
query = st.text_input("
|
72 |
-
top_k = st.slider("
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
# 检索并展示结果
|
75 |
-
if st.button("
|
76 |
-
#
|
77 |
rec_ids, scores = retriever.search_return_id(query, top_k)
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
)
|
|
|
|
|
|
|
|
|
|
3 |
import faiss
|
4 |
import numpy as np
|
5 |
import streamlit as st
|
6 |
+
import pandas as pd
|
7 |
from text2vec import SentenceModel
|
8 |
from src.jsonl_Indexer import JSONLIndexer
|
9 |
|
|
|
33 |
# 合并默认配置和命令行参数
|
34 |
config = DEFAULT_CONFIG.copy()
|
35 |
config.update(cli_args)
|
|
|
|
|
36 |
config['vector_size'] = int(config['vector_size'])
|
37 |
|
38 |
@st.cache_resource
|
|
|
67 |
st.write("该应用基于预计算的 JSONL 文件 embedding,输入查询后将检索相似记录。")
|
68 |
|
69 |
# 查询输入
|
70 |
+
query = st.text_input("请输入搜索查询:")
|
71 |
+
top_k = st.slider("选择展示的结果数量", min_value=1, max_value=100, value=5)
|
72 |
+
# 创建两列布局
|
73 |
+
col1, col2 = st.columns([2.5, 1])
|
74 |
+
with col1:
|
75 |
+
# 搜索输入框
|
76 |
+
query = st.text_input(
|
77 |
+
"请输入搜索查询:", placeholder="your query", help=""
|
78 |
+
)
|
79 |
+
with col2:
|
80 |
+
# TopK选择滑块
|
81 |
+
topk = st.slider(
|
82 |
+
"Top K", 1, 100, 50, help="choose the number of results to display"
|
83 |
+
)
|
84 |
# 检索并展示结果
|
85 |
+
if st.button("query") and query:
|
86 |
+
# 调用检索方法,返回JSON中id字段和对应的相似度得分
|
87 |
rec_ids, scores = retriever.search_return_id(query, top_k)
|
88 |
+
|
89 |
+
# 将检索结果构造成 DataFrame
|
90 |
+
results_df = pd.DataFrame({
|
91 |
+
"tool": rec_ids,
|
92 |
+
"relevance": scores
|
93 |
+
})
|
94 |
+
|
95 |
+
st.subheader("🗂️ 检索结果详情")
|
96 |
+
|
97 |
+
# 为 DataFrame 添加样式(交替行背景色)
|
98 |
+
styled_results = results_df.style.apply(
|
99 |
+
lambda x: [
|
100 |
+
"background-color: #F7F7F7" if i % 2 == 0 else "background-color: #FFFFFF"
|
101 |
+
for i in range(len(x))
|
102 |
+
],
|
103 |
+
axis=0,
|
104 |
+
).format({"relevance": "{:.4f}"})
|
105 |
+
|
106 |
+
# 使用交互式数据表格展示结果,并配置列样式
|
107 |
+
st.dataframe(
|
108 |
+
styled_results,
|
109 |
+
column_config={
|
110 |
+
"tool": st.column_config.TextColumn("tool", help="检索到的tool", width="medium"),
|
111 |
+
"relevance": st.column_config.ProgressColumn(
|
112 |
+
"relevance",
|
113 |
+
help="记录与查询的匹配程度",
|
114 |
+
format="%.4f",
|
115 |
+
min_value=0,
|
116 |
+
max_value=float(max(scores)) if len(scores) > 0 else 1,
|
117 |
)
|
118 |
+
},
|
119 |
+
hide_index=True,
|
120 |
+
use_container_width=True,
|
121 |
+
)
|