Spaces:
Running
Running
Commit
·
c926705
1
Parent(s):
a636254
Oracle
Browse files- Oracle/deepfundingoracle.py +22 -4
Oracle/deepfundingoracle.py
CHANGED
@@ -353,14 +353,31 @@ def visualize_feature_distributions(df):
|
|
353 |
plt.title("Feature Correlation Matrix", fontsize=16)
|
354 |
plt.show()
|
355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
def normalize_funding(df):
|
357 |
"""
|
358 |
Normalize funding weights for child repositories grouped by parent.
|
359 |
"""
|
360 |
print("[INFO] Normalizing funding weights...", flush=True)
|
361 |
-
df
|
362 |
-
lambda x: x / x.sum() if x.sum() > 0 else 1 / len(x)
|
363 |
-
)
|
364 |
print("[INFO] Funding weights normalized successfully.", flush=True)
|
365 |
return df
|
366 |
|
@@ -487,6 +504,7 @@ def train_predict_weight(df):
|
|
487 |
if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
|
488 |
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
489 |
df["final_weight"] = df["base_weight"]
|
|
|
490 |
return df
|
491 |
|
492 |
# Split data into train/test sets
|
@@ -529,7 +547,7 @@ def train_predict_weight(df):
|
|
529 |
except Exception as e:
|
530 |
print(f"[ERROR] Model training failed: {e}")
|
531 |
df["final_weight"] = df["base_weight"]
|
532 |
-
|
533 |
end_time = time.time()
|
534 |
print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
|
535 |
return df
|
|
|
353 |
plt.title("Feature Correlation Matrix", fontsize=16)
|
354 |
plt.show()
|
355 |
|
356 |
+
|
357 |
+
def normalize_and_clip_weights(df,group_col="parent",weight_col ="final_weight"):
|
358 |
+
"""
|
359 |
+
Ensures all weights are non-negative and sum to 1 within each group.
|
360 |
+
"""
|
361 |
+
#Clip negative weights to zero
|
362 |
+
df[weight_col] = df[weight_col].clip(lower=0)
|
363 |
+
|
364 |
+
#Normalize weights within each group
|
365 |
+
def normalize_group(x):
|
366 |
+
total = x.sum()
|
367 |
+
if total>0:
|
368 |
+
return x/total
|
369 |
+
else:
|
370 |
+
return np.ones_like(x)/ len(x)
|
371 |
+
df[weight_col] = df.groupby(group_col)[weight_col].transform(normalize_group)
|
372 |
+
return df
|
373 |
+
|
374 |
+
|
375 |
def normalize_funding(df):
|
376 |
"""
|
377 |
Normalize funding weights for child repositories grouped by parent.
|
378 |
"""
|
379 |
print("[INFO] Normalizing funding weights...", flush=True)
|
380 |
+
df = normalize_and_clip_weights(df)
|
|
|
|
|
381 |
print("[INFO] Funding weights normalized successfully.", flush=True)
|
382 |
return df
|
383 |
|
|
|
504 |
if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
|
505 |
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
506 |
df["final_weight"] = df["base_weight"]
|
507 |
+
df = normalize_and_clip_weights(df)
|
508 |
return df
|
509 |
|
510 |
# Split data into train/test sets
|
|
|
547 |
except Exception as e:
|
548 |
print(f"[ERROR] Model training failed: {e}")
|
549 |
df["final_weight"] = df["base_weight"]
|
550 |
+
df = normalize_and_clip_weights(df)
|
551 |
end_time = time.time()
|
552 |
print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
|
553 |
return df
|