Spaces:
Running
Running
Commit
·
ed0fab0
1
Parent(s):
2424d59
Oracle
Browse files- Oracle/deepfundingoracle.py +45 -3
- app.py +2 -1
Oracle/deepfundingoracle.py
CHANGED
@@ -47,7 +47,7 @@ logging.basicConfig(
|
|
47 |
)
|
48 |
|
49 |
##############################
|
50 |
-
#
|
51 |
##############################
|
52 |
def fetch_repo_metrics(repo_url):
|
53 |
"""
|
@@ -89,7 +89,7 @@ def fetch_repo_metrics(repo_url):
|
|
89 |
|
90 |
|
91 |
##############################
|
92 |
-
#
|
93 |
##############################
|
94 |
def load_data(file):
|
95 |
"""
|
@@ -300,6 +300,48 @@ def assign_base_weight(df, max_workers=32, llm_retries=2, llm_delay=0):
|
|
300 |
return df
|
301 |
|
302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
def prepare_dataset(file):
|
304 |
print("[INFO] Starting dataset preparation...")
|
305 |
start_time = time.time()
|
@@ -319,7 +361,7 @@ def prepare_dataset(file):
|
|
319 |
|
320 |
|
321 |
##############################
|
322 |
-
#
|
323 |
##############################
|
324 |
def train_predict_weight(df):
|
325 |
print("[INFO] Starting weight prediction...", flush=True)
|
|
|
47 |
)
|
48 |
|
49 |
##############################
|
50 |
+
# GitHub API helper: Fetch repository metrics
|
51 |
##############################
|
52 |
def fetch_repo_metrics(repo_url):
|
53 |
"""
|
|
|
89 |
|
90 |
|
91 |
##############################
|
92 |
+
# Feature Extraction
|
93 |
##############################
|
94 |
def load_data(file):
|
95 |
"""
|
|
|
300 |
return df
|
301 |
|
302 |
|
303 |
+
############################################
|
304 |
+
# identify the parent and child repositories
|
305 |
+
############################################
|
306 |
+
def enforce_parent_child_constraints(df, weight_col="final_weight"):
|
307 |
+
# Build a mapping from repo to its parent and weight
|
308 |
+
parent_map = dict(zip(df["repo"], df["parent"]))
|
309 |
+
weight_map = dict(zip(df["repo"], df[weight_col]))
|
310 |
+
|
311 |
+
# Build child mapping for tree traversal
|
312 |
+
children_map = {}
|
313 |
+
for idx, row in df.iterrows():
|
314 |
+
parent = row["parent"]
|
315 |
+
repo = row["repo"]
|
316 |
+
if parent not in children_map:
|
317 |
+
children_map[parent] = []
|
318 |
+
children_map[parent].append(repo)
|
319 |
+
|
320 |
+
# Find all roots (repos that are never a child)
|
321 |
+
roots = set(df["repo"]) - set(df["parent"])
|
322 |
+
|
323 |
+
def dfs(repo, parent_weight):
|
324 |
+
# If repo not in weight_map, skip
|
325 |
+
if repo not in weight_map:
|
326 |
+
return
|
327 |
+
# Enforce constraint: child cannot have more than parent
|
328 |
+
if parent_weight is not None and weight_map[repo] > parent_weight:
|
329 |
+
weight_map[repo] = parent_weight
|
330 |
+
# Recurse for children
|
331 |
+
for child in children_map.get(repo, []):
|
332 |
+
dfs(child, weight_map[repo])
|
333 |
+
|
334 |
+
# Apply DFS from each root
|
335 |
+
for root in roots:
|
336 |
+
for child in children_map.get(root, []):
|
337 |
+
dfs(child, weight_map.get(root, 1.0))
|
338 |
+
|
339 |
+
# Update DataFrame
|
340 |
+
df[weight_col] = df["repo"].map(weight_map)
|
341 |
+
return df
|
342 |
+
|
343 |
+
|
344 |
+
|
345 |
def prepare_dataset(file):
|
346 |
print("[INFO] Starting dataset preparation...")
|
347 |
start_time = time.time()
|
|
|
361 |
|
362 |
|
363 |
##############################
|
364 |
+
# RandomForest Regression
|
365 |
##############################
|
366 |
def train_predict_weight(df):
|
367 |
print("[INFO] Starting weight prediction...", flush=True)
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv
|
4 |
import pandas as pd
|
5 |
import matplotlib.pyplot as plt
|
6 |
import time
|
@@ -13,6 +13,7 @@ def analyze_file(file, progress=gr.Progress(track_tqdm=True)):
|
|
13 |
df = prepare_dataset(file.name)
|
14 |
progress(0.3, desc="Predicting weights...")
|
15 |
df = train_predict_weight(df)
|
|
|
16 |
progress(0.6, desc="Saving results to CSV...")
|
17 |
csv_path = create_submission_csv(df, "submission.csv")
|
18 |
progress(0.8, desc="Generating graph...")
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv,enforce_parent_child_constraints
|
4 |
import pandas as pd
|
5 |
import matplotlib.pyplot as plt
|
6 |
import time
|
|
|
13 |
df = prepare_dataset(file.name)
|
14 |
progress(0.3, desc="Predicting weights...")
|
15 |
df = train_predict_weight(df)
|
16 |
+
df = enforce_parent_child_constraints(df, weight_col="final_weight")
|
17 |
progress(0.6, desc="Saving results to CSV...")
|
18 |
csv_path = create_submission_csv(df, "submission.csv")
|
19 |
progress(0.8, desc="Generating graph...")
|