Futuresony commited on
Commit
05ea874
·
verified ·
1 Parent(s): 5977d71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -66,32 +66,49 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
66
  return [], [], torch.tensor([])
67
 
68
  try:
69
- # Decode the base64 key
70
  key_bytes = base64.b64decode(service_account_key_base64)
71
  key_dict = json.loads(key_bytes)
 
72
 
73
- # Authenticate using the service account key
74
  from google.oauth2 import service_account
75
  creds = service_account.Credentials.from_service_account_info(key_dict)
76
  client = gspread.authorize(creds)
 
77
 
 
 
 
 
78
  sheet = client.open_by_key(sheet_id).sheet1
79
  print(f"Successfully opened Google Sheet with ID: {sheet_id}")
 
 
80
  sheet_data = sheet.get_all_records()
 
81
 
82
  if not sheet_data:
83
  print(f"Warning: No data records found in Google Sheet with ID: {sheet_id}")
84
  return [], [], torch.tensor([])
85
 
 
86
  filtered_data = [row for row in sheet_data if row.get('Service') and row.get('Description')]
 
 
87
  if not filtered_data:
88
  print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
 
 
 
89
  return [], [], torch.tensor([])
90
 
91
- if not filtered_data or 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
92
- print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns.")
 
93
  return [], [], torch.tensor([])
94
 
 
95
  services = [row["Service"] for row in filtered_data]
96
  descriptions = [row["Description"] for row in filtered_data]
97
  print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
@@ -106,7 +123,6 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
106
  print(f"An error occurred while accessing the Google Sheet: {e}")
107
  return [], [], torch.tensor([])
108
 
109
-
110
  def load_llm_model(model_id, hf_token):
111
  """Loads the LLM in full precision (for CPU)."""
112
  print(f"Loading model {model_id} in full precision...")
 
66
  return [], [], torch.tensor([])
67
 
68
  try:
69
+ print("Decoding base64 key...")
70
  key_bytes = base64.b64decode(service_account_key_base64)
71
  key_dict = json.loads(key_bytes)
72
+ print("Base64 key decoded and parsed.")
73
 
74
+ print("Authenticating with service account...")
75
  from google.oauth2 import service_account
76
  creds = service_account.Credentials.from_service_account_info(key_dict)
77
  client = gspread.authorize(creds)
78
+ print("Authentication successful.")
79
 
80
+ print(f"Opening sheet with key '{sheet_id}'...")
81
+ # *** IMPORTANT: If your sheet is NOT the first sheet, change 'sheet1'
82
+ # *** For example, if your sheet is named 'Data', use:
83
+ # sheet = client.open_by_key(sheet_id).worksheet("Data")
84
  sheet = client.open_by_key(sheet_id).sheet1
85
  print(f"Successfully opened Google Sheet with ID: {sheet_id}")
86
+
87
+ print("Getting all records from the sheet...")
88
  sheet_data = sheet.get_all_records()
89
+ print(f"Retrieved {len(sheet_data)} raw records from sheet.")
90
 
91
  if not sheet_data:
92
  print(f"Warning: No data records found in Google Sheet with ID: {sheet_id}")
93
  return [], [], torch.tensor([])
94
 
95
+ print("Filtering data for 'Service' and 'Description' columns...")
96
  filtered_data = [row for row in sheet_data if row.get('Service') and row.get('Description')]
97
+ print(f"Filtered down to {len(filtered_data)} records.")
98
+
99
  if not filtered_data:
100
  print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
101
+ # Check if headers exist at all if filtered_data is empty but sheet_data isn't
102
+ if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
103
+ print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
104
  return [], [], torch.tensor([])
105
 
106
+ # Re-checking column existence on filtered_data (redundant after filter but safe)
107
+ if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
108
+ print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
109
  return [], [], torch.tensor([])
110
 
111
+
112
  services = [row["Service"] for row in filtered_data]
113
  descriptions = [row["Description"] for row in filtered_data]
114
  print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
 
123
  print(f"An error occurred while accessing the Google Sheet: {e}")
124
  return [], [], torch.tensor([])
125
 
 
126
  def load_llm_model(model_id, hf_token):
127
  """Loads the LLM in full precision (for CPU)."""
128
  print(f"Loading model {model_id} in full precision...")