Spaces:

FelixPhilip
/

DeepFundingOracle

Sleeping

App Files Files Community

FelixPhilip commited on May 5

Commit

19b8fa5

1 Parent(s): cb06856

Oracle

Browse files

Files changed (1) hide show

Oracle/deepfundingoracle.py +14 -6

Oracle/deepfundingoracle.py CHANGED Viewed

@@ -463,7 +463,7 @@ def train_predict_weight(df):
     print("[INFO] Starting weight prediction with hyperparameter tuning...", flush=True)
     start_time = time.time()
     target = "base_weight"
-    feature_cols = [col for col in df.columns if col not in ["repo", "parent", "base_weight", "final_weight"]]
     # Validate and scale features
     df = validate_features(df)
@@ -474,8 +474,17 @@ def train_predict_weight(df):
     X = df[feature_cols]
     y = df[target]
     # Check for sufficient data and variance
-    if X.shape[0] < 5 or X.nunique().sum() <=1 or y.nunique() <=1:
         print("[WARN] Not enough data or variance for model training. Using base weights directly.")
         df["final_weight"] = df["base_weight"]
         return df
@@ -484,7 +493,7 @@ def train_predict_weight(df):
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     # Check again after split
-    if X_train.shape[0] < 2 or y_train.nunique()<=1 or X_train.nunique().sum() <=1:
         print("[WARN] Not enough data or variance for model training. Using base weights directly.")
         df["final_weight"] = df["base_weight"]
         return df
@@ -498,7 +507,6 @@ def train_predict_weight(df):
     }
     rf = RandomForestRegressor(random_state=42)
     grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
-    grid_search.fit(X_train, y_train)
     try:
         grid_search.fit(X_train, y_train)
@@ -512,12 +520,12 @@ def train_predict_weight(df):
         # Feature importance analysis
         feature_importances = best_rf.feature_importances_
-        importance_df = pd.DataFrame({"Feature": feature_cols, "Importance": feature_importances}).sort_values(by="Importance", ascending=False)
         print("[INFO] Feature importances:")
         print(importance_df)
         # Assign predictions to DataFrame
-        df["final_weight"] = best_rf.predict(X)
     except Exception as e:
         print(f"[ERROR] Model training failed: {e}")
         df["final_weight"] = df["base_weight"]

     print("[INFO] Starting weight prediction with hyperparameter tuning...", flush=True)
     start_time = time.time()
     target = "base_weight"
+    feature_cols = [col for col in df.select_dtypes(include=[np.number]).columns if col not in ["base_weight", "final_weight","base_weight_raw"]]
     # Validate and scale features
     df = validate_features(df)
     X = df[feature_cols]
     y = df[target]
+    # Remove columns with all NaN values
+    X = X.loc[:, X.notna().any()]
+    X = X.loc[:, X.nunique() > 1]
+    # Remove rows with NaN values
+    mask = X.notna().all(axis=1) & y.notna()
+    X= X[mask]
+    y = y[mask]
     # Check for sufficient data and variance
+    if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
         print("[WARN] Not enough data or variance for model training. Using base weights directly.")
         df["final_weight"] = df["base_weight"]
         return df
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     # Check again after split
+    if X_train.shape[0] < 2 or X_train.shape[1] == 0 or y_train.nunique() <= 1:
         print("[WARN] Not enough data or variance for model training. Using base weights directly.")
         df["final_weight"] = df["base_weight"]
         return df
     }
     rf = RandomForestRegressor(random_state=42)
     grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
     try:
         grid_search.fit(X_train, y_train)
         # Feature importance analysis
         feature_importances = best_rf.feature_importances_
+        importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances}).sort_values(by="Importance", ascending=False)
         print("[INFO] Feature importances:")
         print(importance_df)
         # Assign predictions to DataFrame
+        df["final_weight"] = best_rf.predict(df[X_train.columns].fillna(0))
     except Exception as e:
         print(f"[ERROR] Model training failed: {e}")
         df["final_weight"] = df["base_weight"]