FelixPhilip commited on
Commit
c926705
·
1 Parent(s): a636254
Files changed (1) hide show
  1. Oracle/deepfundingoracle.py +22 -4
Oracle/deepfundingoracle.py CHANGED
@@ -353,14 +353,31 @@ def visualize_feature_distributions(df):
353
  plt.title("Feature Correlation Matrix", fontsize=16)
354
  plt.show()
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  def normalize_funding(df):
357
  """
358
  Normalize funding weights for child repositories grouped by parent.
359
  """
360
  print("[INFO] Normalizing funding weights...", flush=True)
361
- df["final_weight"] = df.groupby("parent")["final_weight"].transform(
362
- lambda x: x / x.sum() if x.sum() > 0 else 1 / len(x)
363
- )
364
  print("[INFO] Funding weights normalized successfully.", flush=True)
365
  return df
366
 
@@ -487,6 +504,7 @@ def train_predict_weight(df):
487
  if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
488
  print("[WARN] Not enough data or variance for model training. Using base weights directly.")
489
  df["final_weight"] = df["base_weight"]
 
490
  return df
491
 
492
  # Split data into train/test sets
@@ -529,7 +547,7 @@ def train_predict_weight(df):
529
  except Exception as e:
530
  print(f"[ERROR] Model training failed: {e}")
531
  df["final_weight"] = df["base_weight"]
532
-
533
  end_time = time.time()
534
  print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
535
  return df
 
353
  plt.title("Feature Correlation Matrix", fontsize=16)
354
  plt.show()
355
 
356
+
357
+ def normalize_and_clip_weights(df,group_col="parent",weight_col ="final_weight"):
358
+ """
359
+ Ensures all weights are non-negative and sum to 1 within each group.
360
+ """
361
+ #Clip negative weights to zero
362
+ df[weight_col] = df[weight_col].clip(lower=0)
363
+
364
+ #Normalize weights within each group
365
+ def normalize_group(x):
366
+ total = x.sum()
367
+ if total>0:
368
+ return x/total
369
+ else:
370
+ return np.ones_like(x)/ len(x)
371
+ df[weight_col] = df.groupby(group_col)[weight_col].transform(normalize_group)
372
+ return df
373
+
374
+
375
  def normalize_funding(df):
376
  """
377
  Normalize funding weights for child repositories grouped by parent.
378
  """
379
  print("[INFO] Normalizing funding weights...", flush=True)
380
+ df = normalize_and_clip_weights(df)
 
 
381
  print("[INFO] Funding weights normalized successfully.", flush=True)
382
  return df
383
 
 
504
  if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
505
  print("[WARN] Not enough data or variance for model training. Using base weights directly.")
506
  df["final_weight"] = df["base_weight"]
507
+ df = normalize_and_clip_weights(df)
508
  return df
509
 
510
  # Split data into train/test sets
 
547
  except Exception as e:
548
  print(f"[ERROR] Model training failed: {e}")
549
  df["final_weight"] = df["base_weight"]
550
+ df = normalize_and_clip_weights(df)
551
  end_time = time.time()
552
  print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
553
  return df