FelixPhilip commited on
Commit
ed0fab0
·
1 Parent(s): 2424d59
Files changed (2) hide show
  1. Oracle/deepfundingoracle.py +45 -3
  2. app.py +2 -1
Oracle/deepfundingoracle.py CHANGED
@@ -47,7 +47,7 @@ logging.basicConfig(
47
  )
48
 
49
  ##############################
50
- # Enhanced GitHub API helper: Fetch repository metrics
51
  ##############################
52
  def fetch_repo_metrics(repo_url):
53
  """
@@ -89,7 +89,7 @@ def fetch_repo_metrics(repo_url):
89
 
90
 
91
  ##############################
92
- # Enhanced Feature Extraction
93
  ##############################
94
  def load_data(file):
95
  """
@@ -300,6 +300,48 @@ def assign_base_weight(df, max_workers=32, llm_retries=2, llm_delay=0):
300
  return df
301
 
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  def prepare_dataset(file):
304
  print("[INFO] Starting dataset preparation...")
305
  start_time = time.time()
@@ -319,7 +361,7 @@ def prepare_dataset(file):
319
 
320
 
321
  ##############################
322
- # Enhanced RandomForest Regression
323
  ##############################
324
  def train_predict_weight(df):
325
  print("[INFO] Starting weight prediction...", flush=True)
 
47
  )
48
 
49
  ##############################
50
+ # GitHub API helper: Fetch repository metrics
51
  ##############################
52
  def fetch_repo_metrics(repo_url):
53
  """
 
89
 
90
 
91
  ##############################
92
+ # Feature Extraction
93
  ##############################
94
  def load_data(file):
95
  """
 
300
  return df
301
 
302
 
303
+ ############################################
304
+ # identify the parent and child repositories
305
+ ############################################
306
+ def enforce_parent_child_constraints(df, weight_col="final_weight"):
307
+ # Build a mapping from repo to its parent and weight
308
+ parent_map = dict(zip(df["repo"], df["parent"]))
309
+ weight_map = dict(zip(df["repo"], df[weight_col]))
310
+
311
+ # Build child mapping for tree traversal
312
+ children_map = {}
313
+ for idx, row in df.iterrows():
314
+ parent = row["parent"]
315
+ repo = row["repo"]
316
+ if parent not in children_map:
317
+ children_map[parent] = []
318
+ children_map[parent].append(repo)
319
+
320
+ # Find all roots (repos that are never a child)
321
+ roots = set(df["repo"]) - set(df["parent"])
322
+
323
+ def dfs(repo, parent_weight):
324
+ # If repo not in weight_map, skip
325
+ if repo not in weight_map:
326
+ return
327
+ # Enforce constraint: child cannot have more than parent
328
+ if parent_weight is not None and weight_map[repo] > parent_weight:
329
+ weight_map[repo] = parent_weight
330
+ # Recurse for children
331
+ for child in children_map.get(repo, []):
332
+ dfs(child, weight_map[repo])
333
+
334
+ # Apply DFS from each root
335
+ for root in roots:
336
+ for child in children_map.get(root, []):
337
+ dfs(child, weight_map.get(root, 1.0))
338
+
339
+ # Update DataFrame
340
+ df[weight_col] = df["repo"].map(weight_map)
341
+ return df
342
+
343
+
344
+
345
  def prepare_dataset(file):
346
  print("[INFO] Starting dataset preparation...")
347
  start_time = time.time()
 
361
 
362
 
363
  ##############################
364
+ # RandomForest Regression
365
  ##############################
366
  def train_predict_weight(df):
367
  print("[INFO] Starting weight prediction...", flush=True)
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import gradio as gr
3
- from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv
4
  import pandas as pd
5
  import matplotlib.pyplot as plt
6
  import time
@@ -13,6 +13,7 @@ def analyze_file(file, progress=gr.Progress(track_tqdm=True)):
13
  df = prepare_dataset(file.name)
14
  progress(0.3, desc="Predicting weights...")
15
  df = train_predict_weight(df)
 
16
  progress(0.6, desc="Saving results to CSV...")
17
  csv_path = create_submission_csv(df, "submission.csv")
18
  progress(0.8, desc="Generating graph...")
 
1
  import os
2
  import gradio as gr
3
+ from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv,enforce_parent_child_constraints
4
  import pandas as pd
5
  import matplotlib.pyplot as plt
6
  import time
 
13
  df = prepare_dataset(file.name)
14
  progress(0.3, desc="Predicting weights...")
15
  df = train_predict_weight(df)
16
+ df = enforce_parent_child_constraints(df, weight_col="final_weight")
17
  progress(0.6, desc="Saving results to CSV...")
18
  csv_path = create_submission_csv(df, "submission.csv")
19
  progress(0.8, desc="Generating graph...")