FelixPhilip commited on
Commit
274692a
·
1 Parent(s): 93b3dfc

Oracle weight assigning update

Browse files
Files changed (1) hide show
  1. Oracle/deepfundingoracle.py +10 -4
Oracle/deepfundingoracle.py CHANGED
@@ -25,6 +25,7 @@ import signal
25
  from tqdm import tqdm
26
  import sys
27
  import re
 
28
 
29
  from sklearn.model_selection import train_test_split, RandomizedSearchCV
30
  from sklearn.ensemble import RandomForestRegressor
@@ -231,7 +232,7 @@ def timeout_handler(signum, frame):
231
  # logging.info(f"[INFO] Base weights assigned successfully in {end_time - start_time:.2f} seconds.")
232
  # return df
233
 
234
- import json
235
 
236
  def assign_base_weight(df, max_workers=32):
237
  """
@@ -276,17 +277,22 @@ def assign_base_weight(df, max_workers=32):
276
  }
277
  print(f"[INFO] Using default feature weights: {feature_weights}", flush=True)
278
 
279
- # Step 2: Programmatically calculate weights for each repository
 
 
 
 
 
280
  def calculate_weight(row):
281
  weight = 0
282
  for feature, feature_weight in feature_weights.items():
283
- if feature in row and pd.notna(row[feature]):
284
  weight += row[feature] * feature_weight
285
  return weight
286
 
287
  df["base_weight_raw"] = df.apply(calculate_weight, axis=1)
288
 
289
- # Step 3: Normalize weights per parent
290
  df["base_weight"] = df.groupby("parent")["base_weight_raw"].transform(
291
  lambda s: (s - s.min()) / (s.max() - s.min() if s.max() != s.min() else 1)
292
  )
 
25
  from tqdm import tqdm
26
  import sys
27
  import re
28
+ import json
29
 
30
  from sklearn.model_selection import train_test_split, RandomizedSearchCV
31
  from sklearn.ensemble import RandomForestRegressor
 
232
  # logging.info(f"[INFO] Base weights assigned successfully in {end_time - start_time:.2f} seconds.")
233
  # return df
234
 
235
+
236
 
237
  def assign_base_weight(df, max_workers=32):
238
  """
 
277
  }
278
  print(f"[INFO] Using default feature weights: {feature_weights}", flush=True)
279
 
280
+ # Step 2: Ensure all feature columns are numeric
281
+ for feature in feature_weights.keys():
282
+ if feature in df.columns:
283
+ df[feature] = pd.to_numeric(df[feature], errors='coerce').fillna(0)
284
+
285
+ # Step 3: Programmatically calculate weights for each repository
286
  def calculate_weight(row):
287
  weight = 0
288
  for feature, feature_weight in feature_weights.items():
289
+ if feature in row:
290
  weight += row[feature] * feature_weight
291
  return weight
292
 
293
  df["base_weight_raw"] = df.apply(calculate_weight, axis=1)
294
 
295
+ # Step 4: Normalize weights per parent
296
  df["base_weight"] = df.groupby("parent")["base_weight_raw"].transform(
297
  lambda s: (s - s.min()) / (s.max() - s.min() if s.max() != s.min() else 1)
298
  )