Futuresony commited on
Commit
e0288cd
·
verified ·
1 Parent(s): 36f8be6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -8
app.py CHANGED
@@ -58,6 +58,8 @@ def load_sentence_transformer():
58
  print(f"Error loading Sentence Transformer: {e}")
59
  return None
60
 
 
 
61
  def load_google_sheet_data(sheet_id, service_account_key_base64):
62
  """Authenticates and loads data from Google Sheet."""
63
  print(f"Attempting to load Google Sheet data from ID: {sheet_id}")
@@ -67,8 +69,22 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
67
 
68
  try:
69
  print("Decoding base64 key...")
 
 
 
 
 
70
  key_bytes = base64.b64decode(service_account_key_base64)
71
- key_dict = json.loads(key_bytes)
 
 
 
 
 
 
 
 
 
72
  print("Base64 key decoded and parsed.")
73
 
74
  print("Authenticating with service account...")
@@ -78,9 +94,6 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
78
  print("Authentication successful.")
79
 
80
  print(f"Opening sheet with key '{sheet_id}'...")
81
- # *** IMPORTANT: If your sheet is NOT the first sheet, change 'sheet1'
82
- # *** For example, if your sheet is named 'Data', use:
83
- # sheet = client.open_by_key(sheet_id).worksheet("Data")
84
  sheet = client.open_by_key(sheet_id).sheet1
85
  print(f"Successfully opened Google Sheet with ID: {sheet_id}")
86
 
@@ -98,22 +111,19 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
98
 
99
  if not filtered_data:
100
  print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
101
- # Check if headers exist at all if filtered_data is empty but sheet_data isn't
102
  if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
103
  print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
104
  return [], [], torch.tensor([])
105
 
106
- # Re-checking column existence on filtered_data (redundant after filter but safe)
107
  if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
108
  print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
109
  return [], [], torch.tensor([])
110
 
111
-
112
  services = [row["Service"] for row in filtered_data]
113
  descriptions = [row["Description"] for row in filtered_data]
114
  print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
115
 
116
- return filtered_data, descriptions, None # Return descriptions, embeddings encoded later
117
 
118
  except gspread.exceptions.SpreadsheetNotFound:
119
  print(f"Error: Google Sheet with ID '{sheet_id}' not found.")
@@ -121,8 +131,10 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
121
  return [], [], torch.tensor([])
122
  except Exception as e:
123
  print(f"An error occurred while accessing the Google Sheet: {e}")
 
124
  return [], [], torch.tensor([])
125
 
 
126
  def load_llm_model(model_id, hf_token):
127
  """Loads the LLM in full precision (for CPU)."""
128
  print(f"Loading model {model_id} in full precision...")
 
58
  print(f"Error loading Sentence Transformer: {e}")
59
  return None
60
 
61
+ # Inside app.py, locate this function
62
+
63
  def load_google_sheet_data(sheet_id, service_account_key_base64):
64
  """Authenticates and loads data from Google Sheet."""
65
  print(f"Attempting to load Google Sheet data from ID: {sheet_id}")
 
69
 
70
  try:
71
  print("Decoding base64 key...")
72
+ # --- Add Debugging Prints Here ---
73
+ print(f"Raw Base64 string (first 50 chars): {service_account_key_base64[:50]}...")
74
+ print(f"Raw Base64 string length: {len(service_account_key_base64)}")
75
+ # --- End Debugging Prints ---
76
+
77
  key_bytes = base64.b64decode(service_account_key_base64)
78
+
79
+ # --- Add More Debugging Prints Here ---
80
+ print(f"Decoded bytes (first 50): {key_bytes[:50]}")
81
+ print(f"Decoded bytes length: {len(key_bytes)}")
82
+ # --- End Debugging Prints ---
83
+
84
+ # This is the line that is likely failing internally after base64.b64decode
85
+ # because key_bytes cannot be decoded as UTF-8
86
+ key_dict = json.loads(key_bytes.decode('utf-8')) # Explicitly decode to see if it throws error here
87
+
88
  print("Base64 key decoded and parsed.")
89
 
90
  print("Authenticating with service account...")
 
94
  print("Authentication successful.")
95
 
96
  print(f"Opening sheet with key '{sheet_id}'...")
 
 
 
97
  sheet = client.open_by_key(sheet_id).sheet1
98
  print(f"Successfully opened Google Sheet with ID: {sheet_id}")
99
 
 
111
 
112
  if not filtered_data:
113
  print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
 
114
  if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
115
  print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
116
  return [], [], torch.tensor([])
117
 
 
118
  if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
119
  print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
120
  return [], [], torch.tensor([])
121
 
 
122
  services = [row["Service"] for row in filtered_data]
123
  descriptions = [row["Description"] for row in filtered_data]
124
  print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
125
 
126
+ return filtered_data, descriptions, None
127
 
128
  except gspread.exceptions.SpreadsheetNotFound:
129
  print(f"Error: Google Sheet with ID '{sheet_id}' not found.")
 
131
  return [], [], torch.tensor([])
132
  except Exception as e:
133
  print(f"An error occurred while accessing the Google Sheet: {e}")
134
+ # Consider adding print(f"Type of error: {type(e)}") to see if it's specifically UnicodeDecodeError
135
  return [], [], torch.tensor([])
136
 
137
+
138
  def load_llm_model(model_id, hf_token):
139
  """Loads the LLM in full precision (for CPU)."""
140
  print(f"Loading model {model_id} in full precision...")