Spaces:
Sleeping
Sleeping
Commit
·
274692a
1
Parent(s):
93b3dfc
Oracle weight assigning update
Browse files- Oracle/deepfundingoracle.py +10 -4
Oracle/deepfundingoracle.py
CHANGED
@@ -25,6 +25,7 @@ import signal
|
|
25 |
from tqdm import tqdm
|
26 |
import sys
|
27 |
import re
|
|
|
28 |
|
29 |
from sklearn.model_selection import train_test_split, RandomizedSearchCV
|
30 |
from sklearn.ensemble import RandomForestRegressor
|
@@ -231,7 +232,7 @@ def timeout_handler(signum, frame):
|
|
231 |
# logging.info(f"[INFO] Base weights assigned successfully in {end_time - start_time:.2f} seconds.")
|
232 |
# return df
|
233 |
|
234 |
-
|
235 |
|
236 |
def assign_base_weight(df, max_workers=32):
|
237 |
"""
|
@@ -276,17 +277,22 @@ def assign_base_weight(df, max_workers=32):
|
|
276 |
}
|
277 |
print(f"[INFO] Using default feature weights: {feature_weights}", flush=True)
|
278 |
|
279 |
-
# Step 2:
|
|
|
|
|
|
|
|
|
|
|
280 |
def calculate_weight(row):
|
281 |
weight = 0
|
282 |
for feature, feature_weight in feature_weights.items():
|
283 |
-
if feature in row
|
284 |
weight += row[feature] * feature_weight
|
285 |
return weight
|
286 |
|
287 |
df["base_weight_raw"] = df.apply(calculate_weight, axis=1)
|
288 |
|
289 |
-
# Step
|
290 |
df["base_weight"] = df.groupby("parent")["base_weight_raw"].transform(
|
291 |
lambda s: (s - s.min()) / (s.max() - s.min() if s.max() != s.min() else 1)
|
292 |
)
|
|
|
25 |
from tqdm import tqdm
|
26 |
import sys
|
27 |
import re
|
28 |
+
import json
|
29 |
|
30 |
from sklearn.model_selection import train_test_split, RandomizedSearchCV
|
31 |
from sklearn.ensemble import RandomForestRegressor
|
|
|
232 |
# logging.info(f"[INFO] Base weights assigned successfully in {end_time - start_time:.2f} seconds.")
|
233 |
# return df
|
234 |
|
235 |
+
|
236 |
|
237 |
def assign_base_weight(df, max_workers=32):
|
238 |
"""
|
|
|
277 |
}
|
278 |
print(f"[INFO] Using default feature weights: {feature_weights}", flush=True)
|
279 |
|
280 |
+
# Step 2: Ensure all feature columns are numeric
|
281 |
+
for feature in feature_weights.keys():
|
282 |
+
if feature in df.columns:
|
283 |
+
df[feature] = pd.to_numeric(df[feature], errors='coerce').fillna(0)
|
284 |
+
|
285 |
+
# Step 3: Programmatically calculate weights for each repository
|
286 |
def calculate_weight(row):
|
287 |
weight = 0
|
288 |
for feature, feature_weight in feature_weights.items():
|
289 |
+
if feature in row:
|
290 |
weight += row[feature] * feature_weight
|
291 |
return weight
|
292 |
|
293 |
df["base_weight_raw"] = df.apply(calculate_weight, axis=1)
|
294 |
|
295 |
+
# Step 4: Normalize weights per parent
|
296 |
df["base_weight"] = df.groupby("parent")["base_weight_raw"].transform(
|
297 |
lambda s: (s - s.min()) / (s.max() - s.min() if s.max() != s.min() else 1)
|
298 |
)
|