FelixPhilip commited on
Commit
19b8fa5
·
1 Parent(s): cb06856
Files changed (1) hide show
  1. Oracle/deepfundingoracle.py +14 -6
Oracle/deepfundingoracle.py CHANGED
@@ -463,7 +463,7 @@ def train_predict_weight(df):
463
  print("[INFO] Starting weight prediction with hyperparameter tuning...", flush=True)
464
  start_time = time.time()
465
  target = "base_weight"
466
- feature_cols = [col for col in df.columns if col not in ["repo", "parent", "base_weight", "final_weight"]]
467
 
468
  # Validate and scale features
469
  df = validate_features(df)
@@ -474,8 +474,17 @@ def train_predict_weight(df):
474
  X = df[feature_cols]
475
  y = df[target]
476
 
 
 
 
 
 
 
 
 
 
477
  # Check for sufficient data and variance
478
- if X.shape[0] < 5 or X.nunique().sum() <=1 or y.nunique() <=1:
479
  print("[WARN] Not enough data or variance for model training. Using base weights directly.")
480
  df["final_weight"] = df["base_weight"]
481
  return df
@@ -484,7 +493,7 @@ def train_predict_weight(df):
484
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
485
 
486
  # Check again after split
487
- if X_train.shape[0] < 2 or y_train.nunique()<=1 or X_train.nunique().sum() <=1:
488
  print("[WARN] Not enough data or variance for model training. Using base weights directly.")
489
  df["final_weight"] = df["base_weight"]
490
  return df
@@ -498,7 +507,6 @@ def train_predict_weight(df):
498
  }
499
  rf = RandomForestRegressor(random_state=42)
500
  grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
501
- grid_search.fit(X_train, y_train)
502
 
503
  try:
504
  grid_search.fit(X_train, y_train)
@@ -512,12 +520,12 @@ def train_predict_weight(df):
512
 
513
  # Feature importance analysis
514
  feature_importances = best_rf.feature_importances_
515
- importance_df = pd.DataFrame({"Feature": feature_cols, "Importance": feature_importances}).sort_values(by="Importance", ascending=False)
516
  print("[INFO] Feature importances:")
517
  print(importance_df)
518
 
519
  # Assign predictions to DataFrame
520
- df["final_weight"] = best_rf.predict(X)
521
  except Exception as e:
522
  print(f"[ERROR] Model training failed: {e}")
523
  df["final_weight"] = df["base_weight"]
 
463
  print("[INFO] Starting weight prediction with hyperparameter tuning...", flush=True)
464
  start_time = time.time()
465
  target = "base_weight"
466
+ feature_cols = [col for col in df.select_dtypes(include=[np.number]).columns if col not in ["base_weight", "final_weight","base_weight_raw"]]
467
 
468
  # Validate and scale features
469
  df = validate_features(df)
 
474
  X = df[feature_cols]
475
  y = df[target]
476
 
477
+ # Remove columns with all NaN values
478
+ X = X.loc[:, X.notna().any()]
479
+ X = X.loc[:, X.nunique() > 1]
480
+
481
+ # Remove rows with NaN values
482
+ mask = X.notna().all(axis=1) & y.notna()
483
+ X= X[mask]
484
+ y = y[mask]
485
+
486
  # Check for sufficient data and variance
487
+ if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
488
  print("[WARN] Not enough data or variance for model training. Using base weights directly.")
489
  df["final_weight"] = df["base_weight"]
490
  return df
 
493
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
494
 
495
  # Check again after split
496
+ if X_train.shape[0] < 2 or X_train.shape[1] == 0 or y_train.nunique() <= 1:
497
  print("[WARN] Not enough data or variance for model training. Using base weights directly.")
498
  df["final_weight"] = df["base_weight"]
499
  return df
 
507
  }
508
  rf = RandomForestRegressor(random_state=42)
509
  grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
 
510
 
511
  try:
512
  grid_search.fit(X_train, y_train)
 
520
 
521
  # Feature importance analysis
522
  feature_importances = best_rf.feature_importances_
523
+ importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances}).sort_values(by="Importance", ascending=False)
524
  print("[INFO] Feature importances:")
525
  print(importance_df)
526
 
527
  # Assign predictions to DataFrame
528
+ df["final_weight"] = best_rf.predict(df[X_train.columns].fillna(0))
529
  except Exception as e:
530
  print(f"[ERROR] Model training failed: {e}")
531
  df["final_weight"] = df["base_weight"]