File size: 3,517 Bytes
934f74d
 
9f92793
934f74d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a48ed2
934f74d
7b626c7
 
 
 
934f74d
 
 
 
 
 
 
 
 
b8c5764
5fcbca3
 
 
 
 
 
 
 
 
 
 
a1f2581
5fcbca3
 
 
 
 
b8c5764
 
 
608503a
b8c5764
5fcbca3
5deec6f
 
608503a
5deec6f
 
 
608503a
5fcbca3
 
6c34254
 
b8c5764
934f74d
6c34254
934f74d
 
6c34254
 
934f74d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import time
import json
import numpy as np

import streamlit as st
from pathlib import Path

import sys
path_root = Path("./")
sys.path.append(str(path_root))


st.set_page_config(page_title="PSC Runtime",
                   page_icon='🌸', layout="centered")

# cola, colb, colc = st.columns([5, 4, 5])

# colaa, colbb, colcc = st.columns([1, 8, 1])
# with colbb:
#     runtime = st.select_slider(
#         'Select a runtime type',
#         options=['PyTorch', 'ONNX Runtime'])
#     st.write('Now using: ', runtime)


# colaa, colbb, colcc = st.columns([1, 8, 1])
# with colbb:
#     encoder = st.select_slider(
#         'Select a query encoder',
#         options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
#     st.write('Now Running Encoder: ', encoder)

# if runtime == 'PyTorch':
#     runtime = 'pytorch'
#     runtime_index = 1
# else:
#     runtime = 'onnx'
#     runtime_index = 0


col1, col2 = st.columns([9, 1])
with col1:
    search_query = st.text_input(label="search query", placeholder="Search")

with col2:
    st.write('#')
    button_clicked = st.button("🔎")


import torch
fn = "dl19-gpt-3.5.pt"
object = torch.load(fn)
# for x for x in object:
    
# outputs = [x[2] for x in object]
outputs = object[2]
query2outputs = {}
for output in outputs:
    all_queries = {x['query'] for x in output}
    assert len(all_queries) == 1
    query = list(all_queries)[0]
    query2outputs[query] = [x['hits'] for x in output]

search_query = sorted(query2outputs)[0]


def preferences_from_hits(list_of_hits):
    docid2id = {}
    id2doc = {}
    preferences = []

    for result in list_of_hits:
        for doc in result:
            if doc["docid"] not in docid2id:
                id = len(doc["docid"])
                docid2id[doc["docid"]] = id
                id2doc[id] = doc
        preferences.append([docid2id[doc["docid"]] for doc in result])

    #  = {v: k for k, v in docid2id.items()}
    return np.array(preferences), id2doc


def aggregate(list_of_hits):
    import numpy as np
    from permsc import KemenyOptimalAggregator, sum_kendall_tau, ranks_from_preferences
    from permsc import BordaRankAggregator

    preferences, id2doc = preferences_from_hits(list_of_hits)
    y_optimal = KemenyOptimalAggregator().aggregate(preferences)
    # y_optimal = BordaRankAggregator().aggregate(preferences)

    print("preference:")
    print(preferences)
    print("preferences shape: ", preferences.shape)
    print("y_optimal: ", y_optimal)

    return [id2doc[id] for id in y_optimal]

aggregated_ranking = aggregate(query2outputs[search_query])

if search_query or button_clicked:

    num_results = None
    t_0 = time.time()
    # search_results = query2outputs[search_query][0] # first from the 20
    search_results = aggregated_ranking

    st.write(
        f'<p align=\"right\" style=\"color:grey;\"> Before aggregation for query [{search_query}] ms</p>', unsafe_allow_html=True)

    for i, result in enumerate(search_results):
        result_id = result["docid"]
        contents = result["content"]

        # output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id} | <b>Score</b>:{result_score:.2f}</div>'
        output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id}'

        try:
            st.write(output, unsafe_allow_html=True)
            st.write(
                f'<div class="row">{contents}</div>', unsafe_allow_html=True)

        except:
            pass
        st.write('---')