Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -58,6 +58,8 @@ def load_sentence_transformer():
|
|
58 |
print(f"Error loading Sentence Transformer: {e}")
|
59 |
return None
|
60 |
|
|
|
|
|
61 |
def load_google_sheet_data(sheet_id, service_account_key_base64):
|
62 |
"""Authenticates and loads data from Google Sheet."""
|
63 |
print(f"Attempting to load Google Sheet data from ID: {sheet_id}")
|
@@ -67,8 +69,22 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
67 |
|
68 |
try:
|
69 |
print("Decoding base64 key...")
|
|
|
|
|
|
|
|
|
|
|
70 |
key_bytes = base64.b64decode(service_account_key_base64)
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
print("Base64 key decoded and parsed.")
|
73 |
|
74 |
print("Authenticating with service account...")
|
@@ -78,9 +94,6 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
78 |
print("Authentication successful.")
|
79 |
|
80 |
print(f"Opening sheet with key '{sheet_id}'...")
|
81 |
-
# *** IMPORTANT: If your sheet is NOT the first sheet, change 'sheet1'
|
82 |
-
# *** For example, if your sheet is named 'Data', use:
|
83 |
-
# sheet = client.open_by_key(sheet_id).worksheet("Data")
|
84 |
sheet = client.open_by_key(sheet_id).sheet1
|
85 |
print(f"Successfully opened Google Sheet with ID: {sheet_id}")
|
86 |
|
@@ -98,22 +111,19 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
98 |
|
99 |
if not filtered_data:
|
100 |
print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
|
101 |
-
# Check if headers exist at all if filtered_data is empty but sheet_data isn't
|
102 |
if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
|
103 |
print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
|
104 |
return [], [], torch.tensor([])
|
105 |
|
106 |
-
# Re-checking column existence on filtered_data (redundant after filter but safe)
|
107 |
if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
|
108 |
print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
|
109 |
return [], [], torch.tensor([])
|
110 |
|
111 |
-
|
112 |
services = [row["Service"] for row in filtered_data]
|
113 |
descriptions = [row["Description"] for row in filtered_data]
|
114 |
print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
|
115 |
|
116 |
-
return filtered_data, descriptions, None
|
117 |
|
118 |
except gspread.exceptions.SpreadsheetNotFound:
|
119 |
print(f"Error: Google Sheet with ID '{sheet_id}' not found.")
|
@@ -121,8 +131,10 @@ def load_google_sheet_data(sheet_id, service_account_key_base64):
|
|
121 |
return [], [], torch.tensor([])
|
122 |
except Exception as e:
|
123 |
print(f"An error occurred while accessing the Google Sheet: {e}")
|
|
|
124 |
return [], [], torch.tensor([])
|
125 |
|
|
|
126 |
def load_llm_model(model_id, hf_token):
|
127 |
"""Loads the LLM in full precision (for CPU)."""
|
128 |
print(f"Loading model {model_id} in full precision...")
|
|
|
58 |
print(f"Error loading Sentence Transformer: {e}")
|
59 |
return None
|
60 |
|
61 |
+
# Inside app.py, locate this function
|
62 |
+
|
63 |
def load_google_sheet_data(sheet_id, service_account_key_base64):
|
64 |
"""Authenticates and loads data from Google Sheet."""
|
65 |
print(f"Attempting to load Google Sheet data from ID: {sheet_id}")
|
|
|
69 |
|
70 |
try:
|
71 |
print("Decoding base64 key...")
|
72 |
+
# --- Add Debugging Prints Here ---
|
73 |
+
print(f"Raw Base64 string (first 50 chars): {service_account_key_base64[:50]}...")
|
74 |
+
print(f"Raw Base64 string length: {len(service_account_key_base64)}")
|
75 |
+
# --- End Debugging Prints ---
|
76 |
+
|
77 |
key_bytes = base64.b64decode(service_account_key_base64)
|
78 |
+
|
79 |
+
# --- Add More Debugging Prints Here ---
|
80 |
+
print(f"Decoded bytes (first 50): {key_bytes[:50]}")
|
81 |
+
print(f"Decoded bytes length: {len(key_bytes)}")
|
82 |
+
# --- End Debugging Prints ---
|
83 |
+
|
84 |
+
# This is the line that is likely failing internally after base64.b64decode
|
85 |
+
# because key_bytes cannot be decoded as UTF-8
|
86 |
+
key_dict = json.loads(key_bytes.decode('utf-8')) # Explicitly decode to see if it throws error here
|
87 |
+
|
88 |
print("Base64 key decoded and parsed.")
|
89 |
|
90 |
print("Authenticating with service account...")
|
|
|
94 |
print("Authentication successful.")
|
95 |
|
96 |
print(f"Opening sheet with key '{sheet_id}'...")
|
|
|
|
|
|
|
97 |
sheet = client.open_by_key(sheet_id).sheet1
|
98 |
print(f"Successfully opened Google Sheet with ID: {sheet_id}")
|
99 |
|
|
|
111 |
|
112 |
if not filtered_data:
|
113 |
print("Warning: Filtered data is empty after checking for 'Service' and 'Description'.")
|
|
|
114 |
if sheet_data and ('Service' not in sheet_data[0] or 'Description' not in sheet_data[0]):
|
115 |
print("Error: 'Service' or 'Description' headers are missing or misspelled in the sheet.")
|
116 |
return [], [], torch.tensor([])
|
117 |
|
|
|
118 |
if 'Service' not in filtered_data[0] or 'Description' not in filtered_data[0]:
|
119 |
print("Error: Filtered Google Sheet data must contain 'Service' and 'Description' columns. This should not happen if filtering worked.")
|
120 |
return [], [], torch.tensor([])
|
121 |
|
|
|
122 |
services = [row["Service"] for row in filtered_data]
|
123 |
descriptions = [row["Description"] for row in filtered_data]
|
124 |
print(f"Loaded {len(descriptions)} entries from Google Sheet for embedding.")
|
125 |
|
126 |
+
return filtered_data, descriptions, None
|
127 |
|
128 |
except gspread.exceptions.SpreadsheetNotFound:
|
129 |
print(f"Error: Google Sheet with ID '{sheet_id}' not found.")
|
|
|
131 |
return [], [], torch.tensor([])
|
132 |
except Exception as e:
|
133 |
print(f"An error occurred while accessing the Google Sheet: {e}")
|
134 |
+
# Consider adding print(f"Type of error: {type(e)}") to see if it's specifically UnicodeDecodeError
|
135 |
return [], [], torch.tensor([])
|
136 |
|
137 |
+
|
138 |
def load_llm_model(model_id, hf_token):
|
139 |
"""Loads the LLM in full precision (for CPU)."""
|
140 |
print(f"Loading model {model_id} in full precision...")
|