aquibmoin's picture
Update app.py
5112962 verified
raw
history blame
2.73 kB
import gradio as gr
import requests
import os
import re
API_TOKEN = os.getenv('API_TOKEN')
API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
headers = {"Authorization": f"Bearer {API_TOKEN}"}
def query_similarity(source_sentence, sentences):
payload = {
"inputs": {
"source_sentence": source_sentence,
"sentences": sentences
}
}
response = requests.post(API_URL, headers=headers, json=payload)
# Ensure response is JSON
try:
return response.json(), sentences
except json.JSONDecodeError:
return {"error": "Failed to decode JSON response"}, sentences
def format_output(response, sentences):
if isinstance(response, list):
# Pair each score with its corresponding sentence
results = list(zip(response, sentences))
# Sort results by score in descending order
results = sorted(results, key=lambda x: x[0], reverse=True)
# Format the output
formatted_results = []
for score, sentence in results:
formatted_results.append(f"Sentence: {sentence.strip()}, Score: {score:.4f}\n")
return "\n".join(formatted_results)
else:
return f"Unexpected response format: {response}"
def split_into_chunks(text, chunk_size=100):
sentences = re.split(r'(?<=[.!?]) +', text) # Split text into sentences
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
sentence_length = len(sentence.split())
if current_length + sentence_length > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = [sentence]
current_length = sentence_length
else:
current_chunk.append(sentence)
current_length += sentence_length
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def semantic_search(query, file_path):
if file_path is not None:
with open(file_path, 'r', encoding='utf-8') as file:
document = file.read()
chunks = split_into_chunks(document)
response, sentences = query_similarity(query, chunks)
return format_output(response, sentences)
else:
return "Please upload a .txt file."
# Define Gradio interface
iface = gr.Interface(
fn=semantic_search,
inputs=[
gr.Textbox(lines=2, placeholder="Enter your query here..."),
gr.File(file_types=['txt'], label="Upload a .txt file")
],
outputs="text",
title="Document Semantic Search",
description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences."
)
iface.launch()