Spaces:

FelixPhilip
/

DeepFundingOracle

Sleeping

App Files Files Community

FelixPhilip commited on May 9

Commit

c926705

1 Parent(s): a636254

Oracle

Browse files

Files changed (1) hide show

Oracle/deepfundingoracle.py +22 -4

Oracle/deepfundingoracle.py CHANGED Viewed

@@ -353,14 +353,31 @@ def visualize_feature_distributions(df):
     plt.title("Feature Correlation Matrix", fontsize=16)
     plt.show()
 def normalize_funding(df):
     """
     Normalize funding weights for child repositories grouped by parent.
     """
     print("[INFO] Normalizing funding weights...", flush=True)
-    df["final_weight"] = df.groupby("parent")["final_weight"].transform(
-        lambda x: x / x.sum() if x.sum() > 0 else 1 / len(x)
-    )
     print("[INFO] Funding weights normalized successfully.", flush=True)
     return df
@@ -487,6 +504,7 @@ def train_predict_weight(df):
     if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
         print("[WARN] Not enough data or variance for model training. Using base weights directly.")
         df["final_weight"] = df["base_weight"]
         return df
     # Split data into train/test sets
@@ -529,7 +547,7 @@ def train_predict_weight(df):
     except Exception as e:
         print(f"[ERROR] Model training failed: {e}")
         df["final_weight"] = df["base_weight"]
     end_time = time.time()
     print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
     return df

     plt.title("Feature Correlation Matrix", fontsize=16)
     plt.show()
+def normalize_and_clip_weights(df,group_col="parent",weight_col ="final_weight"):
+    """
+    Ensures all weights are non-negative and sum to 1 within each group.
+    """
+    #Clip negative weights to zero
+    df[weight_col] = df[weight_col].clip(lower=0)
+    #Normalize weights within each group
+    def normalize_group(x):
+           total = x.sum()
+           if total>0:
+               return x/total
+           else:
+               return np.ones_like(x)/ len(x)
+           df[weight_col] = df.groupby(group_col)[weight_col].transform(normalize_group)
+           return df
 def normalize_funding(df):
     """
     Normalize funding weights for child repositories grouped by parent.
     """
     print("[INFO] Normalizing funding weights...", flush=True)
+    df = normalize_and_clip_weights(df)
     print("[INFO] Funding weights normalized successfully.", flush=True)
     return df
     if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
         print("[WARN] Not enough data or variance for model training. Using base weights directly.")
         df["final_weight"] = df["base_weight"]
+        df = normalize_and_clip_weights(df)
         return df
     # Split data into train/test sets
     except Exception as e:
         print(f"[ERROR] Model training failed: {e}")
         df["final_weight"] = df["base_weight"]
+    df = normalize_and_clip_weights(df)
     end_time = time.time()
     print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
     return df