FelixPhilip commited on
Commit
16532e0
·
1 Parent(s): 5b351fa

updated assign base weights

Browse files
Files changed (1) hide show
  1. Oracle/deepfundingoracle.py +11 -5
Oracle/deepfundingoracle.py CHANGED
@@ -23,6 +23,7 @@ import concurrent.futures
23
  import signal
24
  from tqdm import tqdm
25
  import sys
 
26
 
27
  from sklearn.model_selection import train_test_split, GridSearchCV
28
  from sklearn.ensemble import RandomForestRegressor
@@ -188,15 +189,20 @@ def assign_base_weight(df):
188
  print(f"[INFO] Sending prompt to LLama model for repo: {repo}", flush=True)
189
  start_llama_time = time.time()
190
  response = llama.predict(prompt)
191
- weight = float(''.join([c for c in response if c.isdigit() or c == '.']))
192
- weight = min(max(weight, 0), 1)
 
 
 
 
 
193
  end_llama_time = time.time()
194
  print(f"[INFO] Received weight {weight} for {repo} in {end_llama_time - start_llama_time:.2f} seconds.", flush=True)
195
  logging.info(f"[INFO] Processed repository {repo} in {end_llama_time - start_llama_time:.2f} seconds. Weight: {weight}")
196
  except Exception as e:
197
  print(f"[ERROR] Failed to process repository {repo}: {e}", flush=True)
198
  logging.error(f"[ERROR] Failed to process repository {repo}: {e}")
199
- weight = 0.5 # Default weight in case of failure
200
  base_weights.append(weight)
201
  print(f"[PROGRESS] Finished {idx + 1}/{len(df)} repositories.", flush=True)
202
 
@@ -238,10 +244,10 @@ def train_predict_weight(df):
238
  y = df[target]
239
  print("[INFO] Splitting data into training and testing sets...", flush=True)
240
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
241
- rf_model = RandomForestRegressor(random_state=42)
242
  param_grid = {
243
  "n_estimators": [100, 200, 300],
244
- "max_depth": [None, 10, 20, 30],
245
  "min_samples_split": [2, 5, 10],
246
  "min_samples_leaf": [1, 2, 4]
247
  }
 
23
  import signal
24
  from tqdm import tqdm
25
  import sys
26
+ import re
27
 
28
  from sklearn.model_selection import train_test_split, GridSearchCV
29
  from sklearn.ensemble import RandomForestRegressor
 
189
  print(f"[INFO] Sending prompt to LLama model for repo: {repo}", flush=True)
190
  start_llama_time = time.time()
191
  response = llama.predict(prompt)
192
+ # Use regex to extract the first valid float from the response
193
+ match = re.search(r"[-+]?\d*\.\d+|\d+", response)
194
+ if match:
195
+ weight = float(match.group())
196
+ weight = min(max(weight, 0), 1)
197
+ else:
198
+ raise ValueError(f"No valid float found in response: {response}")
199
  end_llama_time = time.time()
200
  print(f"[INFO] Received weight {weight} for {repo} in {end_llama_time - start_llama_time:.2f} seconds.", flush=True)
201
  logging.info(f"[INFO] Processed repository {repo} in {end_llama_time - start_llama_time:.2f} seconds. Weight: {weight}")
202
  except Exception as e:
203
  print(f"[ERROR] Failed to process repository {repo}: {e}", flush=True)
204
  logging.error(f"[ERROR] Failed to process repository {repo}: {e}")
205
+ weight = 0.0 # Default weight in case of failure (set to 0 for no work)
206
  base_weights.append(weight)
207
  print(f"[PROGRESS] Finished {idx + 1}/{len(df)} repositories.", flush=True)
208
 
 
244
  y = df[target]
245
  print("[INFO] Splitting data into training and testing sets...", flush=True)
246
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
247
+ rf_model = RandomForestRegressor(random_state=42, max_depth=None)
248
  param_grid = {
249
  "n_estimators": [100, 200, 300],
250
+ "max_depth": [None], # Only allow unlimited depth
251
  "min_samples_split": [2, 5, 10],
252
  "min_samples_leaf": [1, 2, 4]
253
  }