Spaces:
Paused
Paused
File size: 4,721 Bytes
08728cc 8225db2 08728cc 8225db2 08728cc 8225db2 08728cc 4ddfb35 08728cc 4ddfb35 08728cc 8225db2 08728cc 0cec20e 08728cc 0cec20e 08728cc 0cec20e 08728cc 8225db2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# import streamlit as st
# from PyPDF2 import PdfReader
# from llama_index.llms import HuggingFaceInferenceAPI
# from llama_index import VectorStoreIndex
# from llama_index.embeddings import HuggingFaceEmbedding
# from llama_index import ServiceContext
# from llama_index.schema import Document
# def read_pdf(uploaded_file):
# pdf_reader = PdfReader(uploaded_file)
# text = ""
# for page_num in range(len(pdf_reader.pages)):
# text += pdf_reader.pages[page_num].extract_text()
# return text
# def querying(query_engine):
# query = st.text_input("Enter the Query for PDF:")
# submit = st.button("Generate The response for the query")
# if submit:
# with st.spinner("Fetching the response..."):
# response = query_engine.query(query)
# st.write(f"**Response:** {response}")
# def main():
# st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
# hf_token = st.text_input("Enter your Hugging Face token:")
# llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
# uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
# if uploaded_file is not None:
# file_contents = read_pdf(uploaded_file)
# documents = Document(text=file_contents)
# documents = [documents]
# st.success("Documents loaded successfully!")
# with st.spinner("Created Embedding model..."):
# embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
# service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
# st.success("Embedding model created successfully!")
# # Download embeddings from OpenAI
# with st.spinner("Created VectorStoreIndex..."):
# index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
# index.storage_context.persist()
# query_engine = index.as_query_engine()
# st.success("VectorStoreIndex created successfully!")
# querying(query_engine)
# if __name__ == "__main__":
# main()
import streamlit as st
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index import ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.schema import Document
from PyPDF2 import PdfReader
# os.environ["GOOGLE_API_KEY"]="AIzaSyBYrZpUdTc4rumhdHajlKfwY4Kq0u6vFDs"
# Streamlit title and description
st.title("Gemini-File with Llama-Index by Rahul Bhoyar")
st.write("This app allows you to upload your own Pdf and query your document, Powered By Gemini")
hf_token = st.text_input("Enter your Hugging Face token:")
#function to save a file
# def save_uploadedfile(uploadedfile):
# with open(os.path.join("data",uploadedfile.name),"wb") as f:
# f.write(uploadedfile.getbuffer())
# return st.success("Saved File:{} to directory".format(uploadedfile.name))
def read_pdf(uploaded_file):
pdf_reader = PdfReader(uploaded_file)
text = ""
for page_num in range(len(pdf_reader.pages)):
text += pdf_reader.pages[page_num].extract_text()
return text
# Streamlit input for user file upload
uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf'])
# Load data and configure the index
if uploaded_pdf is not None:
# input_file = save_uploadedfile(uploaded_pdf)
# st.write("File uploaded successfully!")
# documents = SimpleDirectoryReader("data").load_data()
file_contents = read_pdf(uploaded_pdf)
documents = Document(text=file_contents)
documents = [documents]
st.success("Documents loaded successfully!")
llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
# Configure Service Context
service_context = ServiceContext.from_defaults(
llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae
)
index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
index.storage_context.persist()
query_engine = index.as_query_engine()
# Streamlit input for user query
user_query = st.text_input("Enter your query:")
# Query engine with user input
if user_query:
response = query_engine.query(user_query)
st.markdown(f"**Response:** {response}")
else:
st.write("Please upload a file first.")
|