samiee2213 commited on
Commit
d8796fc
·
verified ·
1 Parent(s): 96c99bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -10
app.py CHANGED
@@ -5,10 +5,17 @@ import os
5
  from google.oauth2 import service_account
6
  from googleapiclient.discovery import build
7
  from streamlit_chat import message as st_message
8
- from langchain.schema import SystemMessage
 
 
 
 
 
 
9
  from langchain_groq import ChatGroq
 
10
  from dotenv import load_dotenv
11
- import warnings
12
 
13
  warnings.filterwarnings("ignore", category=DeprecationWarning)
14
 
@@ -31,18 +38,27 @@ Instructions:
31
  3. Provide information in a single sentence or a short, structured response.
32
  4. If the requested information isn’t available or verifiable, respond with "Information not available."
33
 
 
 
 
34
  Begin extraction.
35
  """
36
 
 
37
  def get_llm_response(entity, query):
38
- formatted_prompt = PROMPT_TEMPLATE.format(entity=entity, query=query)
39
- response = llm([SystemMessage(content=formatted_prompt)])
 
 
 
 
 
40
  return response[0].content if response else "Information not available"
41
 
42
- # Streamlit Setup
43
  st.set_page_config(page_title="DataScribe", page_icon=":notebook_with_decorative_cover:", layout="wide")
44
 
45
- # Sidebar Navigation
46
  with st.sidebar:
47
  selected = option_menu(
48
  "DataScribe Menu",
@@ -63,9 +79,47 @@ if "results" not in st.session_state:
63
  if "column_selection" not in st.session_state:
64
  st.session_state["column_selection"] = None
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  # Upload Data Section
67
- if selected == "Upload Data":
68
  st.header("Upload or Connect Your Data")
 
 
69
  data_source = st.radio("Choose data source:", ["CSV File", "Google Sheets"])
70
 
71
  if data_source == "CSV File":
@@ -89,14 +143,16 @@ if selected == "Upload Data":
89
  # Define Query Section
90
  elif selected == "Define Query":
91
  st.header("Define Your Custom Query")
 
92
  if st.session_state["data"] is not None:
93
  column_selection = st.selectbox("Select the primary column for entities", options=st.session_state["data"].columns)
94
  query_template = st.text_input("Define your query template", "Get me the email for {company}")
95
  st.session_state["query_template"] = query_template
96
- st.session_state["column_selection"] = column_selection
97
 
98
  st.write("### Example query preview")
99
  if column_selection:
 
100
  sample_entity = str(st.session_state["data"][column_selection].iloc[0])
101
  example_query = query_template.replace("{company}", sample_entity)
102
  st.code(example_query)
@@ -106,31 +162,52 @@ elif selected == "Define Query":
106
  # Extract Information Section with Progress Bar
107
  elif selected == "Extract Information":
108
  st.header("Extract Information")
 
109
  if st.session_state.get("query_template") and st.session_state["data"] is not None and st.session_state["column_selection"] is not None:
110
  st.write("Data extraction is in progress. This may take a few moments.")
 
 
111
  progress_bar = st.progress(0)
112
  column_selection = st.session_state["column_selection"]
113
  progress_step = 1.0 / len(st.session_state["data"][column_selection])
114
 
115
  results = []
116
  for i, entity in enumerate(st.session_state["data"][column_selection]):
 
117
  user_message = st.session_state["query_template"].replace("{company}", str(entity))
 
 
118
  result_text = get_llm_response(entity, user_message)
119
- results.append({"Entity": entity, "Extracted Information": result_text})
 
 
120
  progress_bar.progress((i + 1) * progress_step)
121
 
 
122
  st.session_state["results"] = pd.DataFrame(results)
123
  st.write("### Extracted Information")
124
  st.dataframe(st.session_state["results"])
125
 
126
- # View & Download Section
127
  elif selected == "View & Download":
128
  st.header("View and Download Results")
 
129
  if st.session_state["results"] is not None:
130
  st.write("### Extracted Data Table")
131
  st.dataframe(st.session_state["results"])
132
 
 
133
  csv_data = st.session_state["results"].to_csv(index=False)
134
  st.download_button("Download as CSV", csv_data, "datascribe_results.csv", "text/csv")
 
 
 
 
 
 
 
 
 
 
135
  else:
136
  st.warning("No data available to view or download.")
 
5
  from google.oauth2 import service_account
6
  from googleapiclient.discovery import build
7
  from streamlit_chat import message as st_message
8
+ import plotly.express as px
9
+ from langchain.schema import HumanMessage, SystemMessage, AIMessage
10
+ from langchain.chat_models import ChatOpenAI
11
+ from langchain.memory import ConversationBufferWindowMemory
12
+ from langchain.prompts import PromptTemplate
13
+ import warnings
14
+ import time
15
  from langchain_groq import ChatGroq
16
+ import numpy as np
17
  from dotenv import load_dotenv
18
+ import re
19
 
20
  warnings.filterwarnings("ignore", category=DeprecationWarning)
21
 
 
38
  3. Provide information in a single sentence or a short, structured response.
39
  4. If the requested information isn’t available or verifiable, respond with "Information not available."
40
 
41
+ #### Example Output Format:
42
+ "Company: {entity} | Requested Information: {extracted_information}"
43
+
44
  Begin extraction.
45
  """
46
 
47
+ # Function to get response from LLM
48
  def get_llm_response(entity, query):
49
+ # Format the prompt with the entity and query
50
+ prompt = PROMPT_TEMPLATE.format(entity=entity, query=query)
51
+
52
+ # Request response from the LLM
53
+ response = llm([SystemMessage(content=prompt)])
54
+
55
+ # Return content or default message
56
  return response[0].content if response else "Information not available"
57
 
58
+ # Streamlit app setup
59
  st.set_page_config(page_title="DataScribe", page_icon=":notebook_with_decorative_cover:", layout="wide")
60
 
61
+ # Sidebar navigation
62
  with st.sidebar:
63
  selected = option_menu(
64
  "DataScribe Menu",
 
79
  if "column_selection" not in st.session_state:
80
  st.session_state["column_selection"] = None
81
 
82
+ # Helper function for Google Sheets API setup
83
+ def get_google_sheet_data(sheet_id, range_name):
84
+ credentials = service_account.Credentials.from_service_account_info(st.secrets["gcp_service_account"])
85
+ service = build('sheets', 'v4', credentials=credentials)
86
+ sheet = service.spreadsheets()
87
+ result = sheet.values().get(spreadsheetId=sheet_id, range=range_name).execute()
88
+ values = result.get('values', [])
89
+ return pd.DataFrame(values[1:], columns=values[0])
90
+
91
+ # Function to write results back to Google Sheets
92
+ def update_google_sheet(sheet_id, range_name, data):
93
+ credentials = service_account.Credentials.from_service_account_info(st.secrets["gcp_service_account"])
94
+ service = build('sheets', 'v4', credentials=credentials)
95
+ sheet = service.spreadsheets()
96
+ body = {
97
+ 'values': [data.columns.tolist()] + data.values.tolist()
98
+ }
99
+ sheet.values().update(
100
+ spreadsheetId=sheet_id,
101
+ range=range_name,
102
+ valueInputOption="RAW",
103
+ body=body
104
+ ).execute()
105
+
106
+ # Home Page
107
+ if selected == "Home":
108
+ st.markdown(
109
+ """
110
+ ### Welcome to DataScribe
111
+ **DataScribe** is an AI-powered tool designed to extract structured information from the web
112
+ based on entities in your data file. Start by uploading a CSV or Google Sheet and defining a
113
+ custom search query.
114
+ """
115
+ )
116
+ st.image("https://via.placeholder.com/1200x400.png?text=DataScribe+AI+Agent+Dashboard") # Placeholder banner image
117
+
118
  # Upload Data Section
119
+ elif selected == "Upload Data":
120
  st.header("Upload or Connect Your Data")
121
+
122
+ # CSV Upload
123
  data_source = st.radio("Choose data source:", ["CSV File", "Google Sheets"])
124
 
125
  if data_source == "CSV File":
 
143
  # Define Query Section
144
  elif selected == "Define Query":
145
  st.header("Define Your Custom Query")
146
+
147
  if st.session_state["data"] is not None:
148
  column_selection = st.selectbox("Select the primary column for entities", options=st.session_state["data"].columns)
149
  query_template = st.text_input("Define your query template", "Get me the email for {company}")
150
  st.session_state["query_template"] = query_template
151
+ st.session_state["column_selection"] = column_selection # Store column selection in session state
152
 
153
  st.write("### Example query preview")
154
  if column_selection:
155
+ # Convert sample_entity to string to avoid replace errors
156
  sample_entity = str(st.session_state["data"][column_selection].iloc[0])
157
  example_query = query_template.replace("{company}", sample_entity)
158
  st.code(example_query)
 
162
  # Extract Information Section with Progress Bar
163
  elif selected == "Extract Information":
164
  st.header("Extract Information")
165
+
166
  if st.session_state.get("query_template") and st.session_state["data"] is not None and st.session_state["column_selection"] is not None:
167
  st.write("Data extraction is in progress. This may take a few moments.")
168
+
169
+ # Progress bar initialization
170
  progress_bar = st.progress(0)
171
  column_selection = st.session_state["column_selection"]
172
  progress_step = 1.0 / len(st.session_state["data"][column_selection])
173
 
174
  results = []
175
  for i, entity in enumerate(st.session_state["data"][column_selection]):
176
+ # Prepare the prompt for the model
177
  user_message = st.session_state["query_template"].replace("{company}", str(entity))
178
+
179
+ # Get response and append to results
180
  result_text = get_llm_response(entity, user_message)
181
+ results.append({"Entity": entity, "Extracted Information": result_text}) # Consistent key
182
+
183
+ # Update the progress bar
184
  progress_bar.progress((i + 1) * progress_step)
185
 
186
+ # Save and display results
187
  st.session_state["results"] = pd.DataFrame(results)
188
  st.write("### Extracted Information")
189
  st.dataframe(st.session_state["results"])
190
 
191
+ # View & Download Section with Google Sheets Update
192
  elif selected == "View & Download":
193
  st.header("View and Download Results")
194
+
195
  if st.session_state["results"] is not None:
196
  st.write("### Extracted Data Table")
197
  st.dataframe(st.session_state["results"])
198
 
199
+ # Download as CSV
200
  csv_data = st.session_state["results"].to_csv(index=False)
201
  st.download_button("Download as CSV", csv_data, "datascribe_results.csv", "text/csv")
202
+
203
+ # Option to update Google Sheet
204
+ sheet_id = st.text_input("Enter Google Sheet ID to update with results")
205
+ range_name = st.text_input("Enter range (e.g., Sheet1!A1)")
206
+ if st.button("Update Google Sheet"):
207
+ try:
208
+ update_google_sheet(sheet_id, range_name, st.session_state["results"])
209
+ st.success("Google Sheet updated successfully!")
210
+ except Exception as e:
211
+ st.error(f"Failed to update Google Sheet: {e}")
212
  else:
213
  st.warning("No data available to view or download.")