Spaces:
Running
Running
Commit
·
911b780
1
Parent(s):
b180b70
Oracle
Browse files- Oracle/deepfundingoracle.py +16 -11
Oracle/deepfundingoracle.py
CHANGED
@@ -357,28 +357,33 @@ def visualize_feature_distributions(df):
|
|
357 |
plt.show()
|
358 |
|
359 |
|
360 |
-
def normalize_and_clip_weights(df,group_col="parent",weight_col
|
361 |
"""
|
362 |
-
Ensures
|
363 |
"""
|
364 |
-
|
|
|
|
|
|
|
|
|
365 |
df[weight_col] = df[weight_col].clip(lower=0)
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
return x/
|
372 |
-
return np.ones_like(x)/ len(x)
|
373 |
df[weight_col] = df.groupby(group_col)[weight_col].transform(normalize_group)
|
374 |
return df
|
375 |
|
376 |
-
|
377 |
def normalize_funding(df):
|
378 |
"""
|
379 |
Normalize funding weights for child repositories grouped by parent.
|
380 |
"""
|
381 |
print("[INFO] Normalizing funding weights...", flush=True)
|
|
|
|
|
|
|
382 |
df = normalize_and_clip_weights(df)
|
383 |
print("[INFO] Funding weights normalized successfully.", flush=True)
|
384 |
return df
|
|
|
357 |
plt.show()
|
358 |
|
359 |
|
360 |
+
def normalize_and_clip_weights(df, group_col="parent", weight_col="final_weight"):
|
361 |
"""
|
362 |
+
Ensures weights are non-negative and sum to 1 per group.
|
363 |
"""
|
364 |
+
if df is None:
|
365 |
+
raise ValueError("DataFrame is None, cannot normalize weights.")
|
366 |
+
if weight_col not in df.columns:
|
367 |
+
raise KeyError(f"`{weight_col}` column not found in DataFrame.")
|
368 |
+
# Clip negatives
|
369 |
df[weight_col] = df[weight_col].clip(lower=0)
|
370 |
+
# Normalize within each group
|
371 |
+
def normalize_group(x):
|
372 |
+
total = x.sum()
|
373 |
+
if total > 0:
|
374 |
+
return x / total
|
375 |
+
return np.ones_like(x) / len(x)
|
|
|
376 |
df[weight_col] = df.groupby(group_col)[weight_col].transform(normalize_group)
|
377 |
return df
|
378 |
|
|
|
379 |
def normalize_funding(df):
|
380 |
"""
|
381 |
Normalize funding weights for child repositories grouped by parent.
|
382 |
"""
|
383 |
print("[INFO] Normalizing funding weights...", flush=True)
|
384 |
+
if df is None or df.empty:
|
385 |
+
print("[WARN] Skipping normalization: DataFrame is None or empty.", flush=True)
|
386 |
+
return df
|
387 |
df = normalize_and_clip_weights(df)
|
388 |
print("[INFO] Funding weights normalized successfully.", flush=True)
|
389 |
return df
|