Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
from io import BytesIO
|
@@ -28,175 +29,118 @@ if "authenticated" not in st.session_state:
|
|
28 |
|
29 |
# Sidebar
|
30 |
with st.sidebar:
|
31 |
-
# BSNL Logo (local file with error handling)
|
32 |
try:
|
33 |
-
st.image(
|
34 |
-
"bsnl_logo.png",
|
35 |
-
width=200
|
36 |
-
)
|
37 |
except FileNotFoundError:
|
38 |
-
st.warning("
|
|
|
39 |
st.header("RAG Control Panel")
|
40 |
api_key_input = st.text_input("Enter RAG Access Key", type="password")
|
41 |
-
|
42 |
-
# Authentication
|
43 |
if st.button("Authenticate"):
|
44 |
if api_key_input == RAG_ACCESS_KEY:
|
45 |
st.session_state.authenticated = True
|
46 |
-
st.success("
|
47 |
else:
|
48 |
-
st.error("Invalid
|
49 |
-
|
50 |
-
# File uploader
|
51 |
if st.session_state.authenticated:
|
52 |
-
input_data = st.file_uploader("Upload
|
53 |
-
|
54 |
-
if
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
def main():
|
73 |
-
# Inject CSS for simple color scheme and clean styling
|
74 |
-
st.markdown("""
|
75 |
-
<style>
|
76 |
-
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
|
77 |
-
.stApp {
|
78 |
-
background-color: #FFFFFF; /* White background */
|
79 |
-
font-family: 'Roboto', sans-serif;
|
80 |
-
color: #333333;
|
81 |
-
}
|
82 |
-
.stTextInput > div > div > input {
|
83 |
-
background-color: #FFFFFF;
|
84 |
-
color: #333333;
|
85 |
-
border-radius: 8px;
|
86 |
-
border: 1px solid #007BFF;
|
87 |
-
padding: 10px;
|
88 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
89 |
-
}
|
90 |
-
.stButton > button {
|
91 |
-
background-color: #007BFF;
|
92 |
-
color: white;
|
93 |
-
border-radius: 8px;
|
94 |
-
padding: 10px 20px;
|
95 |
-
border: none;
|
96 |
-
transition: all 0.3s ease;
|
97 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.2);
|
98 |
-
}
|
99 |
-
.stButton > button:hover {
|
100 |
-
background-color: #0056b3;
|
101 |
-
transform: scale(1.05);
|
102 |
-
}
|
103 |
-
.stSidebar {
|
104 |
-
background-color: #F5F5F5; /* Light gray */
|
105 |
-
padding: 20px;
|
106 |
-
border-right: 2px solid #007BFF;
|
107 |
-
}
|
108 |
-
h1, h2, h3 {
|
109 |
-
color: #333333;
|
110 |
-
}
|
111 |
-
.stSpinner > div > div {
|
112 |
-
border-color: #007BFF transparent transparent transparent;
|
113 |
-
}
|
114 |
-
</style>
|
115 |
-
""", unsafe_allow_html=True)
|
116 |
-
|
117 |
st.title("RAG Q&A App with Mistral AI")
|
118 |
-
st.markdown("Welcome to the BSNL RAG App
|
119 |
-
|
120 |
if not st.session_state.authenticated:
|
121 |
-
st.warning("Please authenticate
|
122 |
return
|
123 |
-
|
124 |
if st.session_state.vectorstore is None:
|
125 |
st.info("Please upload and process a PDF file in the sidebar.")
|
126 |
return
|
127 |
-
|
128 |
query = st.text_input("Enter your question:")
|
129 |
if st.button("Submit") and query:
|
130 |
with st.spinner("Generating answer..."):
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
134 |
|
135 |
def process_input(input_data):
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
os.chmod("vectorstore", 0o777) # Ensure write permissions
|
140 |
-
except PermissionError as e:
|
141 |
-
st.error(f"Failed to create vectorstore directory: {str(e)}")
|
142 |
-
raise
|
143 |
-
|
144 |
-
# Initialize progress bar and status
|
145 |
progress_bar = st.progress(0)
|
146 |
-
status = st.status("Processing PDF
|
147 |
-
|
148 |
-
# Step 1: Read PDF file in memory
|
149 |
status.update(label="Reading PDF file...")
|
150 |
-
progress_bar.progress(0.
|
151 |
-
|
152 |
pdf_reader = PdfReader(BytesIO(input_data.read()))
|
153 |
-
documents = ""
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
162 |
-
texts = text_splitter.split_text(documents)
|
163 |
-
|
164 |
-
# Step 3: Create embeddings
|
165 |
status.update(label="Creating embeddings...")
|
166 |
-
progress_bar.progress(0.
|
167 |
-
|
168 |
-
|
169 |
model_name="sentence-transformers/all-mpnet-base-v2",
|
170 |
-
model_kwargs={
|
171 |
)
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
dimension = len(hf_embeddings.embed_query("sample text"))
|
178 |
index = faiss.IndexFlatL2(dimension)
|
179 |
vector_store = FAISS(
|
180 |
-
embedding_function=
|
181 |
index=index,
|
182 |
docstore=InMemoryDocstore({}),
|
183 |
index_to_docstore_id={}
|
184 |
)
|
185 |
-
|
186 |
-
# Add texts to vector store
|
187 |
-
uuids = [str(uuid.uuid4()) for _ in range(len(texts))]
|
188 |
vector_store.add_texts(texts, ids=uuids)
|
189 |
-
|
190 |
-
# Save vector store locally
|
191 |
status.update(label="Saving vector store...")
|
192 |
-
progress_bar.progress(0.
|
193 |
-
|
194 |
vector_store.save_local("vectorstore/faiss_index")
|
195 |
-
|
196 |
-
|
197 |
-
status.update(label="Processing complete!", state="complete")
|
198 |
progress_bar.progress(1.0)
|
199 |
-
|
200 |
return vector_store
|
201 |
|
202 |
def answer_question(vectorstore, query):
|
@@ -205,14 +149,14 @@ def answer_question(vectorstore, query):
|
|
205 |
model_kwargs={"temperature": 0.7, "max_length": 512},
|
206 |
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
|
207 |
)
|
208 |
-
|
209 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
210 |
-
|
211 |
prompt_template = PromptTemplate(
|
212 |
template="Use the provided context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
|
213 |
input_variables=["context", "question"]
|
214 |
)
|
215 |
-
|
216 |
qa_chain = RetrievalQA.from_chain_type(
|
217 |
llm=llm,
|
218 |
chain_type="stuff",
|
@@ -220,7 +164,7 @@ def answer_question(vectorstore, query):
|
|
220 |
return_source_documents=False,
|
221 |
chain_type_kwargs={"prompt": prompt_template}
|
222 |
)
|
223 |
-
|
224 |
result = qa_chain({"query": query})
|
225 |
return result["result"].split("Answer:")[-1].strip()
|
226 |
|
|
|
1 |
+
# app.py
|
2 |
import streamlit as st
|
3 |
import os
|
4 |
from io import BytesIO
|
|
|
29 |
|
30 |
# Sidebar
|
31 |
with st.sidebar:
|
|
|
32 |
try:
|
33 |
+
st.image("bsnl_logo.png", width=200)
|
|
|
|
|
|
|
34 |
except FileNotFoundError:
|
35 |
+
st.warning("Logo missing: 'bsnl_logo.png' not found.")
|
36 |
+
|
37 |
st.header("RAG Control Panel")
|
38 |
api_key_input = st.text_input("Enter RAG Access Key", type="password")
|
39 |
+
|
|
|
40 |
if st.button("Authenticate"):
|
41 |
if api_key_input == RAG_ACCESS_KEY:
|
42 |
st.session_state.authenticated = True
|
43 |
+
st.success("Authenticated successfully!")
|
44 |
else:
|
45 |
+
st.error("Invalid RAG Access Key.")
|
46 |
+
|
|
|
47 |
if st.session_state.authenticated:
|
48 |
+
input_data = st.file_uploader("Upload PDF file", type=["pdf"])
|
49 |
+
|
50 |
+
if input_data:
|
51 |
+
if input_data.type != "application/pdf":
|
52 |
+
st.error("Only PDF files are allowed.")
|
53 |
+
elif st.button("Process File"):
|
54 |
+
try:
|
55 |
+
vector_store = process_input(input_data)
|
56 |
+
st.session_state.vectorstore = vector_store
|
57 |
+
st.success("File processed successfully. You can now ask questions.")
|
58 |
+
except (PermissionError, OSError) as e:
|
59 |
+
st.error(f"File error: {str(e)}")
|
60 |
+
except Exception as e:
|
61 |
+
if "403" in str(e):
|
62 |
+
st.error("Access Denied (403): Check Hugging Face API token.")
|
63 |
+
else:
|
64 |
+
st.error(f"Unexpected error: {str(e)}")
|
65 |
+
|
66 |
+
# Chat History
|
67 |
+
st.subheader("Chat History")
|
68 |
+
for i, (q, a) in enumerate(st.session_state.history):
|
69 |
+
st.write(f"**Q{i+1}:** {q}")
|
70 |
+
st.write(f"**A{i+1}:** {a}")
|
71 |
+
st.markdown("---")
|
72 |
+
|
73 |
+
# Main area
|
74 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
st.title("RAG Q&A App with Mistral AI")
|
76 |
+
st.markdown("Welcome to the BSNL RAG App. Upload a PDF and ask questions!")
|
77 |
+
|
78 |
if not st.session_state.authenticated:
|
79 |
+
st.warning("Please authenticate using your RAG Access Key in the sidebar.")
|
80 |
return
|
81 |
+
|
82 |
if st.session_state.vectorstore is None:
|
83 |
st.info("Please upload and process a PDF file in the sidebar.")
|
84 |
return
|
85 |
+
|
86 |
query = st.text_input("Enter your question:")
|
87 |
if st.button("Submit") and query:
|
88 |
with st.spinner("Generating answer..."):
|
89 |
+
try:
|
90 |
+
answer = answer_question(st.session_state.vectorstore, query)
|
91 |
+
st.session_state.history.append((query, answer))
|
92 |
+
st.write("**Answer:**", answer)
|
93 |
+
except Exception as e:
|
94 |
+
st.error(f"Failed to generate answer: {str(e)}")
|
95 |
|
96 |
def process_input(input_data):
|
97 |
+
os.makedirs("vectorstore", exist_ok=True)
|
98 |
+
os.chmod("vectorstore", 0o777)
|
99 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
progress_bar = st.progress(0)
|
101 |
+
status = st.status("Processing PDF...", expanded=True)
|
102 |
+
|
|
|
103 |
status.update(label="Reading PDF file...")
|
104 |
+
progress_bar.progress(0.2)
|
105 |
+
|
106 |
pdf_reader = PdfReader(BytesIO(input_data.read()))
|
107 |
+
documents = "".join(page.extract_text() or "" for page in pdf_reader.pages)
|
108 |
+
|
109 |
+
status.update(label="Splitting text...")
|
110 |
+
progress_bar.progress(0.4)
|
111 |
+
|
112 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
113 |
+
texts = splitter.split_text(documents)
|
114 |
+
|
|
|
|
|
|
|
|
|
115 |
status.update(label="Creating embeddings...")
|
116 |
+
progress_bar.progress(0.6)
|
117 |
+
|
118 |
+
embeddings = HuggingFaceEmbeddings(
|
119 |
model_name="sentence-transformers/all-mpnet-base-v2",
|
120 |
+
model_kwargs={"device": "cpu"}
|
121 |
)
|
122 |
+
|
123 |
+
status.update(label="Building FAISS index...")
|
124 |
+
progress_bar.progress(0.8)
|
125 |
+
|
126 |
+
dimension = len(embeddings.embed_query("sample text"))
|
|
|
127 |
index = faiss.IndexFlatL2(dimension)
|
128 |
vector_store = FAISS(
|
129 |
+
embedding_function=embeddings,
|
130 |
index=index,
|
131 |
docstore=InMemoryDocstore({}),
|
132 |
index_to_docstore_id={}
|
133 |
)
|
134 |
+
uuids = [str(uuid.uuid4()) for _ in texts]
|
|
|
|
|
135 |
vector_store.add_texts(texts, ids=uuids)
|
136 |
+
|
|
|
137 |
status.update(label="Saving vector store...")
|
138 |
+
progress_bar.progress(0.9)
|
|
|
139 |
vector_store.save_local("vectorstore/faiss_index")
|
140 |
+
|
141 |
+
status.update(label="Done!", state="complete")
|
|
|
142 |
progress_bar.progress(1.0)
|
143 |
+
|
144 |
return vector_store
|
145 |
|
146 |
def answer_question(vectorstore, query):
|
|
|
149 |
model_kwargs={"temperature": 0.7, "max_length": 512},
|
150 |
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
|
151 |
)
|
152 |
+
|
153 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
154 |
+
|
155 |
prompt_template = PromptTemplate(
|
156 |
template="Use the provided context to answer the question concisely:\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:",
|
157 |
input_variables=["context", "question"]
|
158 |
)
|
159 |
+
|
160 |
qa_chain = RetrievalQA.from_chain_type(
|
161 |
llm=llm,
|
162 |
chain_type="stuff",
|
|
|
164 |
return_source_documents=False,
|
165 |
chain_type_kwargs={"prompt": prompt_template}
|
166 |
)
|
167 |
+
|
168 |
result = qa_chain({"query": query})
|
169 |
return result["result"].split("Answer:")[-1].strip()
|
170 |
|