yixuantt commited on
Commit
87f5105
·
1 Parent(s): aad2007
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -1,54 +1,57 @@
1
  import streamlit as st
2
  import pandas as pd
3
 
4
- # 假设你的数据存储在一个CSV文件中,我们将从这个文件中读取数据
5
  def load_data():
6
  return pd.read_csv("benchmark_data.csv")
7
 
8
- # 不区分大小写的搜索功能
9
  def case_insensitive_search(data, query, column):
10
- if query: # 如果用户输入了搜索词
11
  return data[data[column].str.lower().str.contains(query.lower())]
12
  return data
13
 
14
- # 创建一个带有滚动条的表格显示函数
15
- def display_table(data, rows_per_page=20):
16
- # 使用 Streamlit 的 container 来创建滚动效果
17
  container = st.container()
18
  with container:
19
- # 设置表格高度,基于每行的大致高度估算
20
- height = min(40 + rows_per_page * 38, 800) # 最大高度为800像素
21
  st.dataframe(data, height=height)
22
 
23
- # 页面布局和功能
24
  def main():
25
  st.title("Multihop-RAG Benchmark Space")
26
 
27
  data = load_data()
28
 
29
- # 添加搜索框
30
  st.sidebar.header("Search Options")
31
  chat_model_query = st.sidebar.text_input("Search by Chat Model")
32
  embedding_model_query = st.sidebar.text_input("Search by Embedding Model")
33
- chunk_query = st.sidebar.text_input("Search by Chunk") # 新增按 Chunk 搜索
34
 
35
- # 根据输入执行搜索
36
  if chat_model_query:
37
  data = case_insensitive_search(data, chat_model_query, 'chat_model')
38
  if embedding_model_query:
39
  data = case_insensitive_search(data, embedding_model_query, 'embedding_model')
40
- if chunk_query: # 新增 Chunk 的筛选
41
  data = case_insensitive_search(data, chunk_query, 'chunk')
42
 
43
- # 显示数据
44
  st.header("Benchmark Results")
45
  st.write("Displaying results for MRR@10, Hit@10, and Accuracy across different frameworks, embedding models, chat models, and chunks.")
46
- display_table(data) # 使用自定义的表格显示函数
 
47
 
48
- # 数据统计和图表
49
  if st.sidebar.checkbox("Show Metrics Distribution"):
50
  st.subheader("Metrics Distribution")
51
- st.bar_chart(data[['MRR@10', 'Hit@10', 'Accuracy']]) # 添加Accuracy到图表中
 
 
 
 
 
 
 
 
 
52
 
53
  if __name__ == "__main__":
54
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
 
4
+
5
  def load_data():
6
  return pd.read_csv("benchmark_data.csv")
7
 
 
8
  def case_insensitive_search(data, query, column):
9
+ if query:
10
  return data[data[column].str.lower().str.contains(query.lower())]
11
  return data
12
 
13
+ def display_table(data, rows_per_page=10):
 
 
14
  container = st.container()
15
  with container:
16
+ height = min(40 + rows_per_page * 38, 800)
 
17
  st.dataframe(data, height=height)
18
 
19
+
20
  def main():
21
  st.title("Multihop-RAG Benchmark Space")
22
 
23
  data = load_data()
24
 
 
25
  st.sidebar.header("Search Options")
26
  chat_model_query = st.sidebar.text_input("Search by Chat Model")
27
  embedding_model_query = st.sidebar.text_input("Search by Embedding Model")
28
+ chunk_query = st.sidebar.text_input("Search by Chunk")
29
 
 
30
  if chat_model_query:
31
  data = case_insensitive_search(data, chat_model_query, 'chat_model')
32
  if embedding_model_query:
33
  data = case_insensitive_search(data, embedding_model_query, 'embedding_model')
34
+ if chunk_query:
35
  data = case_insensitive_search(data, chunk_query, 'chunk')
36
 
37
+
38
  st.header("Benchmark Results")
39
  st.write("Displaying results for MRR@10, Hit@10, and Accuracy across different frameworks, embedding models, chat models, and chunks.")
40
+ display_table(data)
41
+
42
 
 
43
  if st.sidebar.checkbox("Show Metrics Distribution"):
44
  st.subheader("Metrics Distribution")
45
+ st.bar_chart(data[['MRR@10', 'Hit@10', 'Accuracy']])
46
+
47
+
48
+ st.sidebar.header("Citation")
49
+ st.sidebar.info(
50
+ "Please cite this dataset as:\n"
51
+ "Author et al. (2024). Multihop-RAG Benchmark Dataset. Retrieved from [Source URL]."
52
+ )
53
+ st.markdown("---")
54
+ st.caption("For citation, please use: 'Author et al. (2024), Multihop-RAG Benchmark Dataset, Retrieved from [Source URL].'")
55
 
56
  if __name__ == "__main__":
57
  main()