ask_my_thesis / app.py
Rahul Bhoyar
Updated files
0cec20e
raw
history blame
4.72 kB
# import streamlit as st
# from PyPDF2 import PdfReader
# from llama_index.llms import HuggingFaceInferenceAPI
# from llama_index import VectorStoreIndex
# from llama_index.embeddings import HuggingFaceEmbedding
# from llama_index import ServiceContext
# from llama_index.schema import Document
# def read_pdf(uploaded_file):
# pdf_reader = PdfReader(uploaded_file)
# text = ""
# for page_num in range(len(pdf_reader.pages)):
# text += pdf_reader.pages[page_num].extract_text()
# return text
# def querying(query_engine):
# query = st.text_input("Enter the Query for PDF:")
# submit = st.button("Generate The response for the query")
# if submit:
# with st.spinner("Fetching the response..."):
# response = query_engine.query(query)
# st.write(f"**Response:** {response}")
# def main():
# st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
# hf_token = st.text_input("Enter your Hugging Face token:")
# llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
# uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
# if uploaded_file is not None:
# file_contents = read_pdf(uploaded_file)
# documents = Document(text=file_contents)
# documents = [documents]
# st.success("Documents loaded successfully!")
# with st.spinner("Created Embedding model..."):
# embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
# service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
# st.success("Embedding model created successfully!")
# # Download embeddings from OpenAI
# with st.spinner("Created VectorStoreIndex..."):
# index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
# index.storage_context.persist()
# query_engine = index.as_query_engine()
# st.success("VectorStoreIndex created successfully!")
# querying(query_engine)
# if __name__ == "__main__":
# main()
import streamlit as st
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index import ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.schema import Document
from PyPDF2 import PdfReader
# os.environ["GOOGLE_API_KEY"]="AIzaSyBYrZpUdTc4rumhdHajlKfwY4Kq0u6vFDs"
# Streamlit title and description
st.title("Gemini-File with Llama-Index by Rahul Bhoyar")
st.write("This app allows you to upload your own Pdf and query your document, Powered By Gemini")
hf_token = st.text_input("Enter your Hugging Face token:")
#function to save a file
# def save_uploadedfile(uploadedfile):
# with open(os.path.join("data",uploadedfile.name),"wb") as f:
# f.write(uploadedfile.getbuffer())
# return st.success("Saved File:{} to directory".format(uploadedfile.name))
def read_pdf(uploaded_file):
pdf_reader = PdfReader(uploaded_file)
text = ""
for page_num in range(len(pdf_reader.pages)):
text += pdf_reader.pages[page_num].extract_text()
return text
# Streamlit input for user file upload
uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf'])
# Load data and configure the index
if uploaded_pdf is not None:
# input_file = save_uploadedfile(uploaded_pdf)
# st.write("File uploaded successfully!")
# documents = SimpleDirectoryReader("data").load_data()
file_contents = read_pdf(uploaded_pdf)
documents = Document(text=file_contents)
documents = [documents]
st.success("Documents loaded successfully!")
llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
# Configure Service Context
service_context = ServiceContext.from_defaults(
llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae
)
index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
index.storage_context.persist()
query_engine = index.as_query_engine()
# Streamlit input for user query
user_query = st.text_input("Enter your query:")
# Query engine with user input
if user_query:
response = query_engine.query(user_query)
st.markdown(f"**Response:** {response}")
else:
st.write("Please upload a file first.")