Spaces:
Running
Running
Commit
·
cb06856
1
Parent(s):
ea68d4a
Oracle
Browse files- Oracle/deepfundingoracle.py +37 -33
Oracle/deepfundingoracle.py
CHANGED
@@ -474,49 +474,53 @@ def train_predict_weight(df):
|
|
474 |
X = df[feature_cols]
|
475 |
y = df[target]
|
476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
# Split data into train/test sets
|
478 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
# Hyperparameter tuning using GridSearchCV
|
481 |
param_grid = {
|
482 |
-
"n_estimators": [100, 200
|
483 |
-
"max_depth": [10, 15
|
484 |
-
"min_samples_split": [2, 5
|
485 |
-
"min_samples_leaf": [1, 2
|
486 |
}
|
487 |
rf = RandomForestRegressor(random_state=42)
|
488 |
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
|
489 |
grid_search.fit(X_train, y_train)
|
490 |
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
plt.scatter(y_test, y_pred, alpha=0.5)
|
513 |
-
plt.xlabel("Actual Base Weight")
|
514 |
-
plt.ylabel("Predicted Base Weight")
|
515 |
-
plt.title("Predictions vs. Actual")
|
516 |
-
plt.show()
|
517 |
-
|
518 |
-
# Assign predictions to DataFrame
|
519 |
-
df["final_weight"] = best_rf.predict(X)
|
520 |
|
521 |
end_time = time.time()
|
522 |
print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
|
|
|
474 |
X = df[feature_cols]
|
475 |
y = df[target]
|
476 |
|
477 |
+
# Check for sufficient data and variance
|
478 |
+
if X.shape[0] < 5 or X.nunique().sum() <=1 or y.nunique() <=1:
|
479 |
+
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
480 |
+
df["final_weight"] = df["base_weight"]
|
481 |
+
return df
|
482 |
+
|
483 |
# Split data into train/test sets
|
484 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
485 |
|
486 |
+
# Check again after split
|
487 |
+
if X_train.shape[0] < 2 or y_train.nunique()<=1 or X_train.nunique().sum() <=1:
|
488 |
+
print("[WARN] Not enough data or variance for model training. Using base weights directly.")
|
489 |
+
df["final_weight"] = df["base_weight"]
|
490 |
+
return df
|
491 |
+
|
492 |
# Hyperparameter tuning using GridSearchCV
|
493 |
param_grid = {
|
494 |
+
"n_estimators": [100, 200],
|
495 |
+
"max_depth": [10, 15],
|
496 |
+
"min_samples_split": [2, 5],
|
497 |
+
"min_samples_leaf": [1, 2]
|
498 |
}
|
499 |
rf = RandomForestRegressor(random_state=42)
|
500 |
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring="neg_mean_squared_error", verbose=2)
|
501 |
grid_search.fit(X_train, y_train)
|
502 |
|
503 |
+
try:
|
504 |
+
grid_search.fit(X_train, y_train)
|
505 |
+
best_rf = grid_search.best_estimator_
|
506 |
+
print(f"[INFO] Best parameters: {grid_search.best_params_}")
|
507 |
+
|
508 |
+
# Evaluate on test set
|
509 |
+
y_pred = best_rf.predict(X_test)
|
510 |
+
mse = mean_squared_error(y_test, y_pred)
|
511 |
+
print(f"[INFO] Test MSE: {mse}")
|
512 |
+
|
513 |
+
# Feature importance analysis
|
514 |
+
feature_importances = best_rf.feature_importances_
|
515 |
+
importance_df = pd.DataFrame({"Feature": feature_cols, "Importance": feature_importances}).sort_values(by="Importance", ascending=False)
|
516 |
+
print("[INFO] Feature importances:")
|
517 |
+
print(importance_df)
|
518 |
+
|
519 |
+
# Assign predictions to DataFrame
|
520 |
+
df["final_weight"] = best_rf.predict(X)
|
521 |
+
except Exception as e:
|
522 |
+
print(f"[ERROR] Model training failed: {e}")
|
523 |
+
df["final_weight"] = df["base_weight"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
|
525 |
end_time = time.time()
|
526 |
print(f"[INFO] Weight prediction completed in {end_time - start_time:.2f} seconds.", flush=True)
|