Spaces:
Running
Running
import gradio as gr | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
import subprocess | |
# from sklearn.decomposition import PCA | |
from langchain_community.llms import Ollama | |
from langchain_chroma import Chroma | |
import langchain | |
from langchain_community.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader | |
from langchain_experimental.text_splitter import SemanticChunker | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.embeddings.ollama import OllamaEmbeddings | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from typing import List, Dict | |
from langchain.docstore.document import Document | |
import os | |
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM, pipeline | |
tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords") | |
model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords") | |
use_auth_token = os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
model_name = "meta-llama/Llama-3.2-1B-Instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") | |
generator = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=500, # Adjust as needed | |
temperature=0.5 # Adjust as needed | |
) | |
vectorstore = Chroma( | |
# docs, | |
embedding_function=HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v1"), | |
persist_directory="chroma_db" | |
) | |
print(vectorstore.similarity_search_with_score("Course Leader")) | |
# llm = Ollama( | |
# model="llama3.2:3b" | |
# ) | |
def retrieve_relevant_chunks( | |
vector_store: Chroma, | |
query: str, | |
n_docs: int = 2, | |
chunks_per_doc: int = 5 | |
) -> Dict[str, List[Document]]: | |
# Get more results initially to ensure we have enough unique documents | |
results = vector_store.similarity_search_with_score( | |
query, | |
k=50 # Fetch more to ensure we have enough unique documents | |
) | |
# Group results by document ID | |
doc_chunks: Dict[str, List[tuple]] = {} | |
for doc, score in results: | |
doc_id = doc.metadata.get('source', '') # or use appropriate metadata field | |
if doc_id: | |
if doc_id not in doc_chunks: | |
doc_chunks[doc_id] = [] | |
doc_chunks[doc_id].append((doc, score)) | |
# Sort documents by their best matching chunk's score | |
sorted_docs = sorted( | |
doc_chunks.items(), | |
key=lambda x: min(chunk[1] for chunk in x[1]) | |
) | |
# Take only the top n_docs documents | |
top_docs = sorted_docs[:n_docs] | |
# For each top document, get the best chunks_per_doc chunks | |
final_results: Dict[str, List[Document]] = {} | |
for doc_id, chunks in top_docs: | |
# Sort chunks by score (relevance) | |
sorted_chunks = sorted(chunks, key=lambda x: x[1]) | |
# Take only the specified number of chunks and store just the Document objects | |
final_results[doc_id] = [chunk[0] for chunk in sorted_chunks[:chunks_per_doc]] | |
return final_results | |
def display_results(results: Dict[str, List[str]]) -> None: | |
""" | |
Display the retrieved chunks in a formatted way. | |
Args: | |
results: Dictionary mapping document IDs to lists of text chunks | |
""" | |
prompt = " " | |
for doc_id, chunks in results.items(): | |
# prompt += f"\nDocument ID: {doc_id}\n" | |
prompt += "-" * 50 | |
for i, chunk in enumerate(chunks, 1): | |
# prompt += f"\nChunk {i}:" | |
prompt += str(chunk) + "\n" | |
# prompt += "-" * 30 | |
return prompt | |
def main(query): | |
# Initialize your vector store (example) | |
# vector_store = Chroma( | |
# persist_directory="path/to/your/vectorstore", | |
# embedding_function=your_embedding_function | |
# ) | |
upd_query = "Keyword: " + query | |
input_ids = tokenizer.encode(upd_query, return_tensors="pt") | |
outputs = model.generate(input_ids) | |
output_sequence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# print(output_sequence) | |
result_list = list(set(item.strip() for item in output_sequence.split(','))) | |
# print(result_list) | |
output_string = ", ".join(result_list) | |
print(output_string) | |
try: | |
results = retrieve_relevant_chunks( | |
vector_store=vectorstore, | |
query=output_string, | |
n_docs=2, | |
chunks_per_doc=5 | |
) | |
prompt = display_results(results) | |
except Exception as e: | |
print(f"Error: {str(e)}") | |
formatted_prompt = f""" | |
You are an AI assistant. Your goal is to answer questions regarding degree information based on the following context provided. Make sure all the answers are within the given context and act like you are a representative of IIT so do not mention anthing for users to know that you are reading something: | |
{prompt} | |
Based on the above, answer the following question: | |
{query} | |
Give the answer in a clear and concise manner | |
""" | |
response = generator(formatted_prompt, return_full_text=False) | |
return response[0]['generated_text'] | |
with gr.Blocks() as demo: | |
#gr.Image("../Documentation/Context Diagram.png", scale=2) | |
#gr(title="Your Interface Title") | |
gr.Markdown(""" | |
<center> | |
<span style='font-size: 50px; font-weight: Bold; font-family: "Graduate", serif'> | |
IIT RAG Student Handbooks | |
</span> | |
</center> | |
""") | |
with gr.Group(): | |
query = gr.Textbox(label="Question") | |
answer = gr.Textbox(label="Answer") | |
with gr.Row(): | |
login_btn = gr.Button(value="Generate") | |
login_btn.click(main, inputs=[query], outputs=answer) | |
# demo.launch(share = True, auth=authenticate) | |
demo.launch() | |