samiee2213 commited on
Commit
96c99bd
·
verified ·
1 Parent(s): f3a8277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -99
app.py CHANGED
@@ -5,23 +5,16 @@ import os
5
  from google.oauth2 import service_account
6
  from googleapiclient.discovery import build
7
  from streamlit_chat import message as st_message
8
- import plotly.express as px
9
- from langchain.schema import HumanMessage, SystemMessage, AIMessage
10
- from langchain.chat_models import ChatOpenAI
11
- from langchain.memory import ConversationBufferWindowMemory
12
- from langchain.prompts import PromptTemplate
13
- import warnings
14
- import time
15
  from langchain_groq import ChatGroq
16
- import numpy as np
17
  from dotenv import load_dotenv
18
- import re
19
 
20
  warnings.filterwarnings("ignore", category=DeprecationWarning)
21
 
22
  # Load environment variables
23
  load_dotenv()
24
- GROQ_API_KEY=os.getenv("GROQ_API_KEY")
25
  llm = ChatGroq(model="llama-3.1-70b-versatile")
26
 
27
  PROMPT_TEMPLATE = """
@@ -38,27 +31,18 @@ Instructions:
38
  3. Provide information in a single sentence or a short, structured response.
39
  4. If the requested information isn’t available or verifiable, respond with "Information not available."
40
 
41
- #### Example Output Format:
42
- "Company: {entity} | Requested Information: {extracted_information}"
43
-
44
  Begin extraction.
45
  """
 
46
  def get_llm_response(entity, query):
47
- # Ensure entity and query are not None
48
- entity = entity or "Unknown Entity"
49
- query = query or "Information not provided"
50
-
51
- # Format the prompt with the entity and query
52
  formatted_prompt = PROMPT_TEMPLATE.format(entity=entity, query=query)
53
-
54
- # Pass the formatted prompt as a SystemMessage and get the response
55
  response = llm([SystemMessage(content=formatted_prompt)])
56
- return response[0].content
57
 
58
- # Set up the page
59
  st.set_page_config(page_title="DataScribe", page_icon=":notebook_with_decorative_cover:", layout="wide")
60
 
61
- # Sidebar navigation
62
  with st.sidebar:
63
  selected = option_menu(
64
  "DataScribe Menu",
@@ -79,47 +63,9 @@ if "results" not in st.session_state:
79
  if "column_selection" not in st.session_state:
80
  st.session_state["column_selection"] = None
81
 
82
- # Helper function for Google Sheets API setup
83
- def get_google_sheet_data(sheet_id, range_name):
84
- credentials = service_account.Credentials.from_service_account_info(st.secrets["gcp_service_account"])
85
- service = build('sheets', 'v4', credentials=credentials)
86
- sheet = service.spreadsheets()
87
- result = sheet.values().get(spreadsheetId=sheet_id, range=range_name).execute()
88
- values = result.get('values', [])
89
- return pd.DataFrame(values[1:], columns=values[0])
90
-
91
- # Function to write results back to Google Sheets
92
- def update_google_sheet(sheet_id, range_name, data):
93
- credentials = service_account.Credentials.from_service_account_info(st.secrets["gcp_service_account"])
94
- service = build('sheets', 'v4', credentials=credentials)
95
- sheet = service.spreadsheets()
96
- body = {
97
- 'values': [data.columns.tolist()] + data.values.tolist()
98
- }
99
- sheet.values().update(
100
- spreadsheetId=sheet_id,
101
- range=range_name,
102
- valueInputOption="RAW",
103
- body=body
104
- ).execute()
105
-
106
- # Home Page
107
- if selected == "Home":
108
- st.markdown(
109
- """
110
- ### Welcome to DataScribe
111
- **DataScribe** is an AI-powered tool designed to extract structured information from the web
112
- based on entities in your data file. Start by uploading a CSV or Google Sheet and defining a
113
- custom search query.
114
- """
115
- )
116
- st.image("https://via.placeholder.com/1200x400.png?text=DataScribe+AI+Agent+Dashboard") # Placeholder banner image
117
-
118
  # Upload Data Section
119
- elif selected == "Upload Data":
120
  st.header("Upload or Connect Your Data")
121
-
122
- # CSV Upload
123
  data_source = st.radio("Choose data source:", ["CSV File", "Google Sheets"])
124
 
125
  if data_source == "CSV File":
@@ -143,82 +89,48 @@ elif selected == "Upload Data":
143
  # Define Query Section
144
  elif selected == "Define Query":
145
  st.header("Define Your Custom Query")
146
-
147
- #entity = st.text_input("Enter the entity name")
148
- #query = st.text_input("Enter the query (e.g., 'contact information for {entity}')")
149
-
150
  if st.session_state["data"] is not None:
151
  column_selection = st.selectbox("Select the primary column for entities", options=st.session_state["data"].columns)
152
  query_template = st.text_input("Define your query template", "Get me the email for {company}")
153
  st.session_state["query_template"] = query_template
154
- st.session_state["column_selection"] = column_selection # Store column selection in session state
155
 
156
  st.write("### Example query preview")
157
  if column_selection:
158
- # Convert sample_entity to string to avoid replace errors
159
  sample_entity = str(st.session_state["data"][column_selection].iloc[0])
160
  example_query = query_template.replace("{company}", sample_entity)
161
  st.code(example_query)
162
  else:
163
  st.warning("Please upload data first.")
164
- # if st.button("Extract Information"):
165
- # if entity and query:
166
- # response_text = get_llm_response(entity, query)
167
- # st.write(response_text)
168
 
169
  # Extract Information Section with Progress Bar
170
  elif selected == "Extract Information":
171
  st.header("Extract Information")
172
-
173
  if st.session_state.get("query_template") and st.session_state["data"] is not None and st.session_state["column_selection"] is not None:
174
  st.write("Data extraction is in progress. This may take a few moments.")
175
-
176
- # Progress bar initialization
177
  progress_bar = st.progress(0)
178
  column_selection = st.session_state["column_selection"]
179
  progress_step = 1.0 / len(st.session_state["data"][column_selection])
180
 
181
  results = []
182
  for i, entity in enumerate(st.session_state["data"][column_selection]):
183
- # Prepare the prompt for the model
184
  user_message = st.session_state["query_template"].replace("{company}", str(entity))
185
- formatted_prompt = PROMPT_TEMPLATE.format(entity=entity, query=user_message)
186
-
187
- # Generate response from the model
188
- response = llm([SystemMessage(content=formatted_prompt)])
189
-
190
- # Collect the model's response
191
- result_text = response[0].content if response else "Information not available"
192
  results.append({"Entity": entity, "Extracted Information": result_text})
193
-
194
- # Update the progress bar
195
  progress_bar.progress((i + 1) * progress_step)
196
 
197
- # Save and display results
198
  st.session_state["results"] = pd.DataFrame(results)
199
  st.write("### Extracted Information")
200
  st.dataframe(st.session_state["results"])
201
 
202
- # View & Download Section with Google Sheets Update
203
  elif selected == "View & Download":
204
  st.header("View and Download Results")
205
-
206
  if st.session_state["results"] is not None:
207
  st.write("### Extracted Data Table")
208
  st.dataframe(st.session_state["results"])
209
 
210
- # Download as CSV
211
  csv_data = st.session_state["results"].to_csv(index=False)
212
  st.download_button("Download as CSV", csv_data, "datascribe_results.csv", "text/csv")
213
-
214
- # Option to update Google Sheet
215
- sheet_id = st.text_input("Enter Google Sheet ID to update with results")
216
- range_name = st.text_input("Enter range (e.g., Sheet1!A1)")
217
- if st.button("Update Google Sheet"):
218
- try:
219
- update_google_sheet(sheet_id, range_name, st.session_state["results"])
220
- st.success("Google Sheet updated successfully!")
221
- except Exception as e:
222
- st.error(f"Failed to update Google Sheet: {e}")
223
  else:
224
  st.warning("No data available to view or download.")
 
5
  from google.oauth2 import service_account
6
  from googleapiclient.discovery import build
7
  from streamlit_chat import message as st_message
8
+ from langchain.schema import SystemMessage
 
 
 
 
 
 
9
  from langchain_groq import ChatGroq
 
10
  from dotenv import load_dotenv
11
+ import warnings
12
 
13
  warnings.filterwarnings("ignore", category=DeprecationWarning)
14
 
15
  # Load environment variables
16
  load_dotenv()
17
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
18
  llm = ChatGroq(model="llama-3.1-70b-versatile")
19
 
20
  PROMPT_TEMPLATE = """
 
31
  3. Provide information in a single sentence or a short, structured response.
32
  4. If the requested information isn’t available or verifiable, respond with "Information not available."
33
 
 
 
 
34
  Begin extraction.
35
  """
36
+
37
  def get_llm_response(entity, query):
 
 
 
 
 
38
  formatted_prompt = PROMPT_TEMPLATE.format(entity=entity, query=query)
 
 
39
  response = llm([SystemMessage(content=formatted_prompt)])
40
+ return response[0].content if response else "Information not available"
41
 
42
+ # Streamlit Setup
43
  st.set_page_config(page_title="DataScribe", page_icon=":notebook_with_decorative_cover:", layout="wide")
44
 
45
+ # Sidebar Navigation
46
  with st.sidebar:
47
  selected = option_menu(
48
  "DataScribe Menu",
 
63
  if "column_selection" not in st.session_state:
64
  st.session_state["column_selection"] = None
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  # Upload Data Section
67
+ if selected == "Upload Data":
68
  st.header("Upload or Connect Your Data")
 
 
69
  data_source = st.radio("Choose data source:", ["CSV File", "Google Sheets"])
70
 
71
  if data_source == "CSV File":
 
89
  # Define Query Section
90
  elif selected == "Define Query":
91
  st.header("Define Your Custom Query")
 
 
 
 
92
  if st.session_state["data"] is not None:
93
  column_selection = st.selectbox("Select the primary column for entities", options=st.session_state["data"].columns)
94
  query_template = st.text_input("Define your query template", "Get me the email for {company}")
95
  st.session_state["query_template"] = query_template
96
+ st.session_state["column_selection"] = column_selection
97
 
98
  st.write("### Example query preview")
99
  if column_selection:
 
100
  sample_entity = str(st.session_state["data"][column_selection].iloc[0])
101
  example_query = query_template.replace("{company}", sample_entity)
102
  st.code(example_query)
103
  else:
104
  st.warning("Please upload data first.")
 
 
 
 
105
 
106
  # Extract Information Section with Progress Bar
107
  elif selected == "Extract Information":
108
  st.header("Extract Information")
 
109
  if st.session_state.get("query_template") and st.session_state["data"] is not None and st.session_state["column_selection"] is not None:
110
  st.write("Data extraction is in progress. This may take a few moments.")
 
 
111
  progress_bar = st.progress(0)
112
  column_selection = st.session_state["column_selection"]
113
  progress_step = 1.0 / len(st.session_state["data"][column_selection])
114
 
115
  results = []
116
  for i, entity in enumerate(st.session_state["data"][column_selection]):
 
117
  user_message = st.session_state["query_template"].replace("{company}", str(entity))
118
+ result_text = get_llm_response(entity, user_message)
 
 
 
 
 
 
119
  results.append({"Entity": entity, "Extracted Information": result_text})
 
 
120
  progress_bar.progress((i + 1) * progress_step)
121
 
 
122
  st.session_state["results"] = pd.DataFrame(results)
123
  st.write("### Extracted Information")
124
  st.dataframe(st.session_state["results"])
125
 
126
+ # View & Download Section
127
  elif selected == "View & Download":
128
  st.header("View and Download Results")
 
129
  if st.session_state["results"] is not None:
130
  st.write("### Extracted Data Table")
131
  st.dataframe(st.session_state["results"])
132
 
 
133
  csv_data = st.session_state["results"].to_csv(index=False)
134
  st.download_button("Download as CSV", csv_data, "datascribe_results.csv", "text/csv")
 
 
 
 
 
 
 
 
 
 
135
  else:
136
  st.warning("No data available to view or download.")