Spaces:
Running
Running
Commit
·
19b8fa5
1
Parent(s):
cb06856
Oracle
Browse files- Oracle/deepfundingoracle.py +14 -6
Oracle/deepfundingoracle.py
CHANGED
@@ -463,7 +463,7 @@ def train_predict_weight(df):
|
|
463 |
print("[INFO] Starting weight prediction with hyperparameter tuning...", flush=True)
|
464 |
start_time = time.time()
|
465 |
target = "base_weight"
|
466 |
-
feature_cols = [col for col in df.columns if col not in ["
|
467 |
|
468 |
# Validate and scale features
|
469 |
df = validate_features(df)
|
@@ -474,8 +474,17 @@ def train_predict_weight(df):
|
|
474 |
X = df[feature_cols]
|
475 |
y = df[target]
|
476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
# Check for sufficient data and variance
|
478 |
-
if X.shape[0] < 5 or X.
|
479 |
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
480 |
df["final_weight"] = df["base_weight"]
|
481 |
return df
|
@@ -484,7 +493,7 @@ def train_predict_weight(df):
|
|
484 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
485 |
|
486 |
# Check again after split
|
487 |
-
if X_train.shape[0] < 2 or
|
488 |
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
489 |
df["final_weight"] = df["base_weight"]
|
490 |
return df
|
@@ -498,7 +507,6 @@ def train_predict_weight(df):
|
|
498 |
}
|
499 |
rf = RandomForestRegressor(random_state=42)
|
500 |
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
|
501 |
-
grid_search.fit(X_train, y_train)
|
502 |
|
503 |
try:
|
504 |
grid_search.fit(X_train, y_train)
|
@@ -512,12 +520,12 @@ def train_predict_weight(df):
|
|
512 |
|
513 |
# Feature importance analysis
|
514 |
feature_importances = best_rf.feature_importances_
|
515 |
-
importance_df = pd.DataFrame({"Feature":
|
516 |
print("[INFO] Feature importances:")
|
517 |
print(importance_df)
|
518 |
|
519 |
# Assign predictions to DataFrame
|
520 |
-
df["final_weight"] = best_rf.predict(
|
521 |
except Exception as e:
|
522 |
print(f"[ERROR] Model training failed: {e}")
|
523 |
df["final_weight"] = df["base_weight"]
|
|
|
463 |
print("[INFO] Starting weight prediction with hyperparameter tuning...", flush=True)
|
464 |
start_time = time.time()
|
465 |
target = "base_weight"
|
466 |
+
feature_cols = [col for col in df.select_dtypes(include=[np.number]).columns if col not in ["base_weight", "final_weight","base_weight_raw"]]
|
467 |
|
468 |
# Validate and scale features
|
469 |
df = validate_features(df)
|
|
|
474 |
X = df[feature_cols]
|
475 |
y = df[target]
|
476 |
|
477 |
+
# Remove columns with all NaN values
|
478 |
+
X = X.loc[:, X.notna().any()]
|
479 |
+
X = X.loc[:, X.nunique() > 1]
|
480 |
+
|
481 |
+
# Remove rows with NaN values
|
482 |
+
mask = X.notna().all(axis=1) & y.notna()
|
483 |
+
X= X[mask]
|
484 |
+
y = y[mask]
|
485 |
+
|
486 |
# Check for sufficient data and variance
|
487 |
+
if X.shape[0] < 5 or X.shape[1] == 0 or y.nunique() <=1:
|
488 |
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
489 |
df["final_weight"] = df["base_weight"]
|
490 |
return df
|
|
|
493 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
494 |
|
495 |
# Check again after split
|
496 |
+
if X_train.shape[0] < 2 or X_train.shape[1] == 0 or y_train.nunique() <= 1:
|
497 |
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
498 |
df["final_weight"] = df["base_weight"]
|
499 |
return df
|
|
|
507 |
}
|
508 |
rf = RandomForestRegressor(random_state=42)
|
509 |
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
|
|
|
510 |
|
511 |
try:
|
512 |
grid_search.fit(X_train, y_train)
|
|
|
520 |
|
521 |
# Feature importance analysis
|
522 |
feature_importances = best_rf.feature_importances_
|
523 |
+
importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances}).sort_values(by="Importance", ascending=False)
|
524 |
print("[INFO] Feature importances:")
|
525 |
print(importance_df)
|
526 |
|
527 |
# Assign predictions to DataFrame
|
528 |
+
df["final_weight"] = best_rf.predict(df[X_train.columns].fillna(0))
|
529 |
except Exception as e:
|
530 |
print(f"[ERROR] Model training failed: {e}")
|
531 |
df["final_weight"] = df["base_weight"]
|