Spaces:
Sleeping
Sleeping
Commit
·
16532e0
1
Parent(s):
5b351fa
updated assign base weights
Browse files- Oracle/deepfundingoracle.py +11 -5
Oracle/deepfundingoracle.py
CHANGED
@@ -23,6 +23,7 @@ import concurrent.futures
|
|
23 |
import signal
|
24 |
from tqdm import tqdm
|
25 |
import sys
|
|
|
26 |
|
27 |
from sklearn.model_selection import train_test_split, GridSearchCV
|
28 |
from sklearn.ensemble import RandomForestRegressor
|
@@ -188,15 +189,20 @@ def assign_base_weight(df):
|
|
188 |
print(f"[INFO] Sending prompt to LLama model for repo: {repo}", flush=True)
|
189 |
start_llama_time = time.time()
|
190 |
response = llama.predict(prompt)
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
193 |
end_llama_time = time.time()
|
194 |
print(f"[INFO] Received weight {weight} for {repo} in {end_llama_time - start_llama_time:.2f} seconds.", flush=True)
|
195 |
logging.info(f"[INFO] Processed repository {repo} in {end_llama_time - start_llama_time:.2f} seconds. Weight: {weight}")
|
196 |
except Exception as e:
|
197 |
print(f"[ERROR] Failed to process repository {repo}: {e}", flush=True)
|
198 |
logging.error(f"[ERROR] Failed to process repository {repo}: {e}")
|
199 |
-
weight = 0.
|
200 |
base_weights.append(weight)
|
201 |
print(f"[PROGRESS] Finished {idx + 1}/{len(df)} repositories.", flush=True)
|
202 |
|
@@ -238,10 +244,10 @@ def train_predict_weight(df):
|
|
238 |
y = df[target]
|
239 |
print("[INFO] Splitting data into training and testing sets...", flush=True)
|
240 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
241 |
-
rf_model = RandomForestRegressor(random_state=42)
|
242 |
param_grid = {
|
243 |
"n_estimators": [100, 200, 300],
|
244 |
-
"max_depth": [None,
|
245 |
"min_samples_split": [2, 5, 10],
|
246 |
"min_samples_leaf": [1, 2, 4]
|
247 |
}
|
|
|
23 |
import signal
|
24 |
from tqdm import tqdm
|
25 |
import sys
|
26 |
+
import re
|
27 |
|
28 |
from sklearn.model_selection import train_test_split, GridSearchCV
|
29 |
from sklearn.ensemble import RandomForestRegressor
|
|
|
189 |
print(f"[INFO] Sending prompt to LLama model for repo: {repo}", flush=True)
|
190 |
start_llama_time = time.time()
|
191 |
response = llama.predict(prompt)
|
192 |
+
# Use regex to extract the first valid float from the response
|
193 |
+
match = re.search(r"[-+]?\d*\.\d+|\d+", response)
|
194 |
+
if match:
|
195 |
+
weight = float(match.group())
|
196 |
+
weight = min(max(weight, 0), 1)
|
197 |
+
else:
|
198 |
+
raise ValueError(f"No valid float found in response: {response}")
|
199 |
end_llama_time = time.time()
|
200 |
print(f"[INFO] Received weight {weight} for {repo} in {end_llama_time - start_llama_time:.2f} seconds.", flush=True)
|
201 |
logging.info(f"[INFO] Processed repository {repo} in {end_llama_time - start_llama_time:.2f} seconds. Weight: {weight}")
|
202 |
except Exception as e:
|
203 |
print(f"[ERROR] Failed to process repository {repo}: {e}", flush=True)
|
204 |
logging.error(f"[ERROR] Failed to process repository {repo}: {e}")
|
205 |
+
weight = 0.0 # Default weight in case of failure (set to 0 for no work)
|
206 |
base_weights.append(weight)
|
207 |
print(f"[PROGRESS] Finished {idx + 1}/{len(df)} repositories.", flush=True)
|
208 |
|
|
|
244 |
y = df[target]
|
245 |
print("[INFO] Splitting data into training and testing sets...", flush=True)
|
246 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
247 |
+
rf_model = RandomForestRegressor(random_state=42, max_depth=None)
|
248 |
param_grid = {
|
249 |
"n_estimators": [100, 200, 300],
|
250 |
+
"max_depth": [None], # Only allow unlimited depth
|
251 |
"min_samples_split": [2, 5, 10],
|
252 |
"min_samples_leaf": [1, 2, 4]
|
253 |
}
|