Spaces:

FelixPhilip
/

DeepFundingOracle

Sleeping

App Files Files Community

FelixPhilip commited on Apr 19

Commit

16532e0

1 Parent(s): 5b351fa

updated assign base weights

Browse files

Files changed (1) hide show

Oracle/deepfundingoracle.py +11 -5

Oracle/deepfundingoracle.py CHANGED Viewed

@@ -23,6 +23,7 @@ import concurrent.futures
 import signal
 from tqdm import tqdm
 import sys
 from sklearn.model_selection import train_test_split, GridSearchCV
 from sklearn.ensemble import RandomForestRegressor
@@ -188,15 +189,20 @@ def assign_base_weight(df):
             print(f"[INFO] Sending prompt to LLama model for repo: {repo}", flush=True)
             start_llama_time = time.time()
             response = llama.predict(prompt)
-            weight = float(''.join([c for c in response if c.isdigit() or c == '.']))
-            weight = min(max(weight, 0), 1)
             end_llama_time = time.time()
             print(f"[INFO] Received weight {weight} for {repo} in {end_llama_time - start_llama_time:.2f} seconds.", flush=True)
             logging.info(f"[INFO] Processed repository {repo} in {end_llama_time - start_llama_time:.2f} seconds. Weight: {weight}")
         except Exception as e:
             print(f"[ERROR] Failed to process repository {repo}: {e}", flush=True)
             logging.error(f"[ERROR] Failed to process repository {repo}: {e}")
-            weight = 0.5  # Default weight in case of failure
         base_weights.append(weight)
         print(f"[PROGRESS] Finished {idx + 1}/{len(df)} repositories.", flush=True)
@@ -238,10 +244,10 @@ def train_predict_weight(df):
     y = df[target]
     print("[INFO] Splitting data into training and testing sets...", flush=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    rf_model = RandomForestRegressor(random_state=42)
     param_grid = {
         "n_estimators": [100, 200, 300],
-        "max_depth": [None, 10, 20, 30],
         "min_samples_split": [2, 5, 10],
         "min_samples_leaf": [1, 2, 4]
     }

 import signal
 from tqdm import tqdm
 import sys
+import re
 from sklearn.model_selection import train_test_split, GridSearchCV
 from sklearn.ensemble import RandomForestRegressor
             print(f"[INFO] Sending prompt to LLama model for repo: {repo}", flush=True)
             start_llama_time = time.time()
             response = llama.predict(prompt)
+            # Use regex to extract the first valid float from the response
+            match = re.search(r"[-+]?\d*\.\d+|\d+", response)
+            if match:
+                weight = float(match.group())
+                weight = min(max(weight, 0), 1)
+            else:
+                raise ValueError(f"No valid float found in response: {response}")
             end_llama_time = time.time()
             print(f"[INFO] Received weight {weight} for {repo} in {end_llama_time - start_llama_time:.2f} seconds.", flush=True)
             logging.info(f"[INFO] Processed repository {repo} in {end_llama_time - start_llama_time:.2f} seconds. Weight: {weight}")
         except Exception as e:
             print(f"[ERROR] Failed to process repository {repo}: {e}", flush=True)
             logging.error(f"[ERROR] Failed to process repository {repo}: {e}")
+            weight = 0.0  # Default weight in case of failure (set to 0 for no work)
         base_weights.append(weight)
         print(f"[PROGRESS] Finished {idx + 1}/{len(df)} repositories.", flush=True)
     y = df[target]
     print("[INFO] Splitting data into training and testing sets...", flush=True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    rf_model = RandomForestRegressor(random_state=42, max_depth=None)
     param_grid = {
         "n_estimators": [100, 200, 300],
+        "max_depth": [None],  # Only allow unlimited depth
         "min_samples_split": [2, 5, 10],
         "min_samples_leaf": [1, 2, 4]
     }