Yyy0530 commited on
Commit
8911626
·
verified ·
1 Parent(s): 97b6d6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -20
app.py CHANGED
@@ -3,6 +3,7 @@ import sys
3
  import faiss
4
  import numpy as np
5
  import streamlit as st
 
6
  from text2vec import SentenceModel
7
  from src.jsonl_Indexer import JSONLIndexer
8
 
@@ -32,8 +33,6 @@ DEFAULT_CONFIG = {
32
  # 合并默认配置和命令行参数
33
  config = DEFAULT_CONFIG.copy()
34
  config.update(cli_args)
35
-
36
- # 将 vector_size 转换为整数
37
  config['vector_size'] = int(config['vector_size'])
38
 
39
  @st.cache_resource
@@ -68,24 +67,55 @@ st.title("JSONL Data Retrieval Visualization")
68
  st.write("该应用基于预计算的 JSONL 文件 embedding,输入查询后将检索相似记录。")
69
 
70
  # 查询输入
71
- query = st.text_input("Enter a search query:")
72
- top_k = st.slider("Select number of results to display", min_value=1, max_value=100, value=5)
73
-
 
 
 
 
 
 
 
 
 
 
 
74
  # 检索并展示结果
75
- if st.button("Search") and query:
76
- # 注意:JSONLIndexer 提供的是 search_return_id 方法,返回的是 JSON id 字段
77
  rec_ids, scores = retriever.search_return_id(query, top_k)
78
-
79
- st.write("### Results:")
80
-
81
- with st.expander("Retrieval Results (click to expand)"):
82
- for j, rec_id in enumerate(rec_ids):
83
- st.markdown(
84
- f"""
85
- <div style="border:1px solid #ccc; padding:10px; border-radius:5px; margin-bottom:10px; background-color:#f9f9f9;">
86
- <p><b>Record {j+1} ID:</b> {rec_id}</p>
87
- <p><b>Score:</b> {scores[j]:.4f}</p>
88
- </div>
89
- """,
90
- unsafe_allow_html=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  )
 
 
 
 
 
3
  import faiss
4
  import numpy as np
5
  import streamlit as st
6
+ import pandas as pd
7
  from text2vec import SentenceModel
8
  from src.jsonl_Indexer import JSONLIndexer
9
 
 
33
  # 合并默认配置和命令行参数
34
  config = DEFAULT_CONFIG.copy()
35
  config.update(cli_args)
 
 
36
  config['vector_size'] = int(config['vector_size'])
37
 
38
  @st.cache_resource
 
67
  st.write("该应用基于预计算的 JSONL 文件 embedding,输入查询后将检索相似记录。")
68
 
69
  # 查询输入
70
+ query = st.text_input("请输入搜索查询:")
71
+ top_k = st.slider("选择展示的结果数量", min_value=1, max_value=100, value=5)
72
+ # 创建两列布局
73
+ col1, col2 = st.columns([2.5, 1])
74
+ with col1:
75
+ # 搜索输入框
76
+ query = st.text_input(
77
+ "请输入搜索查询:", placeholder="your query", help=""
78
+ )
79
+ with col2:
80
+ # TopK选择滑块
81
+ topk = st.slider(
82
+ "Top K", 1, 100, 50, help="choose the number of results to display"
83
+ )
84
  # 检索并展示结果
85
+ if st.button("query") and query:
86
+ # 调用检索方法,返回JSON中id字段和对应的相似度得分
87
  rec_ids, scores = retriever.search_return_id(query, top_k)
88
+
89
+ # 将检索结果构造成 DataFrame
90
+ results_df = pd.DataFrame({
91
+ "tool": rec_ids,
92
+ "relevance": scores
93
+ })
94
+
95
+ st.subheader("🗂️ 检索结果详情")
96
+
97
+ # 为 DataFrame 添加样式(交替行背景色)
98
+ styled_results = results_df.style.apply(
99
+ lambda x: [
100
+ "background-color: #F7F7F7" if i % 2 == 0 else "background-color: #FFFFFF"
101
+ for i in range(len(x))
102
+ ],
103
+ axis=0,
104
+ ).format({"relevance": "{:.4f}"})
105
+
106
+ # 使用交互式数据表格展示结果,并配置列样式
107
+ st.dataframe(
108
+ styled_results,
109
+ column_config={
110
+ "tool": st.column_config.TextColumn("tool", help="检索到的tool", width="medium"),
111
+ "relevance": st.column_config.ProgressColumn(
112
+ "relevance",
113
+ help="记录与查询的匹配程度",
114
+ format="%.4f",
115
+ min_value=0,
116
+ max_value=float(max(scores)) if len(scores) > 0 else 1,
117
  )
118
+ },
119
+ hide_index=True,
120
+ use_container_width=True,
121
+ )