File size: 4,721 Bytes
08728cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8225db2
08728cc
 
 
 
 
8225db2
08728cc
 
 
 
 
8225db2
 
08728cc
 
 
 
4ddfb35
08728cc
 
 
 
 
 
4ddfb35
08728cc
8225db2
 
08728cc
 
 
 
 
 
 
 
 
 
 
0cec20e
 
08728cc
 
 
 
 
 
 
 
 
 
 
0cec20e
 
 
 
 
 
 
 
 
 
 
 
08728cc
 
 
 
 
 
0cec20e
 
 
 
 
 
 
 
 
08728cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8225db2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# import streamlit as st
# from PyPDF2 import PdfReader
# from llama_index.llms import HuggingFaceInferenceAPI
# from llama_index import VectorStoreIndex
# from llama_index.embeddings import HuggingFaceEmbedding
# from llama_index import ServiceContext
# from llama_index.schema import Document


# def read_pdf(uploaded_file):
#     pdf_reader = PdfReader(uploaded_file)
#     text = ""
#     for page_num in range(len(pdf_reader.pages)):
#         text += pdf_reader.pages[page_num].extract_text()
#     return text

# def querying(query_engine):
#     query = st.text_input("Enter the Query for PDF:")
#     submit = st.button("Generate The response for the query")
#     if submit:
#         with st.spinner("Fetching the response..."):
#             response = query_engine.query(query)
#             st.write(f"**Response:** {response}")
            
# def main():
#     st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
#     hf_token = st.text_input("Enter your Hugging Face token:")
#     llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
#     uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])

#     if uploaded_file is not None:
#         file_contents = read_pdf(uploaded_file)
#         documents = Document(text=file_contents)
#         documents = [documents]
#         st.success("Documents loaded successfully!")
        
        
#         with st.spinner("Created Embedding model..."):
#             embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
#             service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
#             st.success("Embedding model created successfully!")
         
#         # Download embeddings from OpenAI
#         with st.spinner("Created VectorStoreIndex..."):
#             index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
#             index.storage_context.persist()
#             query_engine = index.as_query_engine()
#             st.success("VectorStoreIndex created successfully!")
            
#         querying(query_engine)
        
                
# if __name__ == "__main__":
#     main()
    
    
    
    
import streamlit as st
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index import ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceInferenceAPI
from llama_index.schema import Document
from PyPDF2 import PdfReader

# os.environ["GOOGLE_API_KEY"]="AIzaSyBYrZpUdTc4rumhdHajlKfwY4Kq0u6vFDs"

# Streamlit title and description
st.title("Gemini-File with Llama-Index by Rahul Bhoyar")
st.write("This app allows you to upload your own Pdf and query your document, Powered By Gemini")

hf_token = st.text_input("Enter your Hugging Face token:")
    

#function to save a file
# def save_uploadedfile(uploadedfile):
#      with open(os.path.join("data",uploadedfile.name),"wb") as f:
#          f.write(uploadedfile.getbuffer())
#      return st.success("Saved File:{} to directory".format(uploadedfile.name))


def read_pdf(uploaded_file):
    pdf_reader = PdfReader(uploaded_file)
    text = ""
    for page_num in range(len(pdf_reader.pages)):
        text += pdf_reader.pages[page_num].extract_text()
    return text

# Streamlit input for user file upload
uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf'])

# Load data and configure the index
if uploaded_pdf is not None:
    # input_file = save_uploadedfile(uploaded_pdf)
    # st.write("File uploaded successfully!")
    # documents = SimpleDirectoryReader("data").load_data()
    
    file_contents = read_pdf(uploaded_pdf)
    documents = Document(text=file_contents)
    documents = [documents]
    st.success("Documents loaded successfully!")
    
    llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
    embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")

    # Configure Service Context
    service_context = ServiceContext.from_defaults(
        llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae
    )
    index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
    index.storage_context.persist()
    query_engine = index.as_query_engine()

    # Streamlit input for user query
    user_query = st.text_input("Enter your query:")

    # Query engine with user input
    if user_query:
        response = query_engine.query(user_query)
        st.markdown(f"**Response:** {response}")
else:
    st.write("Please upload a file first.")