import gradio as gr import numpy as np from usearch.index import Index from sentence_transformers import SentenceTransformer from datasets import load_dataset from sentencex import segment model = SentenceTransformer("Corran/SciGenAllMiniLM") train = load_dataset("Corran/SciGenColbertTriplets")['train'] rf = sorted(list(set(train['query']))) rf_emb = model.encode(rf) index = Index(ndim=rf_emb[0].size) index.add(range(len(classes)), classes_emb) def get_matches(input): global index, model, rf emb = model.encode(input,batch_size=128) matches = index.search(emb,4) if type(input)==list and len(input)>1: matches = [m[0] for m in matches] else: matches = [m for m in matches] return [(rf[m.key],m.distance) for m in matches] def return_rf_scores(paragraph): sentences = list(segment("en", paragraph)) matches = get_matches(sentences) output = {} for s,m in zip(sentences,matches): output[s] = m return output demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="json") demo.launch()