Spaces:

FelixPhilip
/

DeepFundingOracle

Running

App Files Files Community

FelixPhilip commited on Apr 28

Commit

ed0fab0

1 Parent(s): 2424d59

Oracle

Browse files

Files changed (2) hide show

Oracle/deepfundingoracle.py +45 -3
app.py +2 -1

Oracle/deepfundingoracle.py CHANGED Viewed

@@ -47,7 +47,7 @@ logging.basicConfig(
 )
 ##############################
-# Enhanced GitHub API helper: Fetch repository metrics
 ##############################
 def fetch_repo_metrics(repo_url):
     """
@@ -89,7 +89,7 @@ def fetch_repo_metrics(repo_url):
 ##############################
-# Enhanced Feature Extraction
 ##############################
 def load_data(file):
     """
@@ -300,6 +300,48 @@ def assign_base_weight(df, max_workers=32, llm_retries=2, llm_delay=0):
     return df
 def prepare_dataset(file):
     print("[INFO] Starting dataset preparation...")
     start_time = time.time()
@@ -319,7 +361,7 @@ def prepare_dataset(file):
 ##############################
-# Enhanced RandomForest Regression
 ##############################
 def train_predict_weight(df):
     print("[INFO] Starting weight prediction...", flush=True)

 )
 ##############################
+#  GitHub API helper: Fetch repository metrics
 ##############################
 def fetch_repo_metrics(repo_url):
     """
 ##############################
+#  Feature Extraction
 ##############################
 def load_data(file):
     """
     return df
+############################################
+# identify the parent and child repositories
+############################################
+def enforce_parent_child_constraints(df, weight_col="final_weight"):
+    # Build a mapping from repo to its parent and weight
+    parent_map = dict(zip(df["repo"], df["parent"]))
+    weight_map = dict(zip(df["repo"], df[weight_col]))
+    # Build child mapping for tree traversal
+    children_map = {}
+    for idx, row in df.iterrows():
+        parent = row["parent"]
+        repo = row["repo"]
+        if parent not in children_map:
+            children_map[parent] = []
+        children_map[parent].append(repo)
+    # Find all roots (repos that are never a child)
+    roots = set(df["repo"]) - set(df["parent"])
+    def dfs(repo, parent_weight):
+        # If repo not in weight_map, skip
+        if repo not in weight_map:
+            return
+        # Enforce constraint: child cannot have more than parent
+        if parent_weight is not None and weight_map[repo] > parent_weight:
+            weight_map[repo] = parent_weight
+        # Recurse for children
+        for child in children_map.get(repo, []):
+            dfs(child, weight_map[repo])
+    # Apply DFS from each root
+    for root in roots:
+        for child in children_map.get(root, []):
+            dfs(child, weight_map.get(root, 1.0))
+    # Update DataFrame
+    df[weight_col] = df["repo"].map(weight_map)
+    return df
 def prepare_dataset(file):
     print("[INFO] Starting dataset preparation...")
     start_time = time.time()
 ##############################
+#  RandomForest Regression
 ##############################
 def train_predict_weight(df):
     print("[INFO] Starting weight prediction...", flush=True)

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import gradio as gr
-from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv
 import pandas as pd
 import matplotlib.pyplot as plt
 import time
@@ -13,6 +13,7 @@ def analyze_file(file, progress=gr.Progress(track_tqdm=True)):
     df = prepare_dataset(file.name)
     progress(0.3, desc="Predicting weights...")
     df = train_predict_weight(df)
     progress(0.6, desc="Saving results to CSV...")
     csv_path = create_submission_csv(df, "submission.csv")
     progress(0.8, desc="Generating graph...")

 import os
 import gradio as gr
+from Oracle.deepfundingoracle import prepare_dataset, train_predict_weight, create_submission_csv,enforce_parent_child_constraints
 import pandas as pd
 import matplotlib.pyplot as plt
 import time
     df = prepare_dataset(file.name)
     progress(0.3, desc="Predicting weights...")
     df = train_predict_weight(df)
+    df = enforce_parent_child_constraints(df, weight_col="final_weight")
     progress(0.6, desc="Saving results to CSV...")
     csv_path = create_submission_csv(df, "submission.csv")
     progress(0.8, desc="Generating graph...")