Spaces:

FelixPhilip
/

DeepFundingOracle

Running

App Files Files Community

FelixPhilip commited on Apr 27

Commit

93f0901

1 Parent(s): 526b6b7

Oracle weight assigning update

Browse files

Files changed (2) hide show

Oracle/SmolLM.py +8 -8
Oracle/deepfundingoracle.py +11 -11

Oracle/SmolLM.py CHANGED Viewed

@@ -4,26 +4,26 @@ class SmolLM:
     def __init__(self, model_path="HuggingFaceTB/SmolLM2-1.7B-Instruct"):
         self.available = True
         try:
-            print(f"[INFO] Loading model tokenizer from {model_path}")
             self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-            print(f"[INFO] Loading model from {model_path}")
             self.model = AutoModelForCausalLM.from_pretrained(model_path)
-            print("[INFO] Model loaded successfully")
         except Exception as e:
             print(f"[ERROR] Failed to load model '{model_path}': {e}")
             self.available = False
-    def predict(self, prompt):
         if not self.available:
-            print("[WARN] LLama model unavailable, returning default weight 0.5")
             return "0.5"
         try:
             print(f"[INFO] Generating response for prompt: {prompt[:100]}...", flush=True)
-            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-            outputs = self.model.generate(**inputs, max_length=150, num_return_sequences=1)
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             print(f"[INFO] Generated response: {response[:100]}...", flush=True)
             return response
         except Exception as e:
-            print(f"[ERROR] LLama model inference failed: {e}")
             return "0.5"

     def __init__(self, model_path="HuggingFaceTB/SmolLM2-1.7B-Instruct"):
         self.available = True
         try:
+            print(f"[INFO] Loading Oracle tokenizer from {model_path}")
             self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+            print(f"[INFO] Loading Oracle from {model_path}")
             self.model = AutoModelForCausalLM.from_pretrained(model_path)
+            print("[INFO] Oracle loaded successfully")
         except Exception as e:
             print(f"[ERROR] Failed to load model '{model_path}': {e}")
             self.available = False
+    def predict(self, prompt,max_length=512,max_new_tokens=150):
         if not self.available:
+            print("[WARN] Oracle unavailable, returning default weight 0.5")
             return "0.5"
         try:
             print(f"[INFO] Generating response for prompt: {prompt[:100]}...", flush=True)
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
+            outputs = self.model.generate(**inputs, max_length=inputs["input_ids"].shape[1]+max_new_tokens,num_return_sequences=1)
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             print(f"[INFO] Generated response: {response[:100]}...", flush=True)
             return response
         except Exception as e:
+            print(f"[ERROR] Oracle has failed: {e}")
             return "0.5"

Oracle/deepfundingoracle.py CHANGED Viewed

@@ -243,7 +243,7 @@ def assign_base_weight(df, max_workers=32, llm_retries=3,llm_delay=2):
     print("[INFO] Starting optimized base weight assignment...", flush=True)
     logging.info("[INFO] Assigning base weights using optimized approach...")
     start_time = time.time()
-    llama = SmolLM()
     # Step 1: Call LLM once to determine weights for each feature
     prompt = (
@@ -255,9 +255,9 @@ def assign_base_weight(df, max_workers=32, llm_retries=3,llm_delay=2):
         "- Pull Requests\n"
         "- Activity (days since last update)\n"
         "- Contributors\n\n"
-        "Assign a weight (0-1) to each feature based on its importance in determining "
-        "the influence of a repository. Output ONLY a valid JSON object with keys as feature names and values as their weights. "
-        "Do not include any explanation or extra text. Example:\n"
         '{\n'
         '  "stars": 0.3,\n'
         '  "forks": 0.2,\n'
@@ -268,21 +268,21 @@ def assign_base_weight(df, max_workers=32, llm_retries=3,llm_delay=2):
         '  "contributors": 0.05\n'
         '}\n'
     )
-    fearure_weights= None
     for attempt in range(llm_retries):
         try:
-            response = llama.predict(prompt)
-            if not response or response.strip():
-                raise ValueError("Empty response from LLM.")
             feature_weights = json.loads(response)  # Safely parse JSON
             print(f"[INFO] Feature weights from LLM: {feature_weights}", flush=True)
             break
         except Exception as e:
-            print(f"[ERROR] LLM attempt {attempt+1} failed: {e}", flush=True)
-            logging.error(f"[ERROR] LLM attempt {attempt+1} failed: {e}")
             time.sleep(llm_delay)
             # Fallback to default weights
-        if fearure_weights is None:
             feature_weights = {
                 "stars": 0.3,
                 "forks": 0.2,

     print("[INFO] Starting optimized base weight assignment...", flush=True)
     logging.info("[INFO] Assigning base weights using optimized approach...")
     start_time = time.time()
+    oracle = SmolLM()
     # Step 1: Call LLM once to determine weights for each feature
     prompt = (
         "- Pull Requests\n"
         "- Activity (days since last update)\n"
         "- Contributors\n\n"
+        "Can you Predict a weight in the range (0-1) to each feature listed above based on its importance in determining "
+        "the influence of a repository. Output ONLY a valid JSON object with keys as feature names and values as the predicted weights. "
+        "Do not include any explanation or extra text. here is an output example: \n"
         '{\n'
         '  "stars": 0.3,\n'
         '  "forks": 0.2,\n'
         '  "contributors": 0.05\n'
         '}\n'
     )
+    feature_weights= None
     for attempt in range(llm_retries):
         try:
+            response = oracle.predict(prompt,max_length=512, max_new_tokens=150)
+            if not response or not response.strip():
+                raise ValueError("Empty response from Oracle.")
             feature_weights = json.loads(response)  # Safely parse JSON
             print(f"[INFO] Feature weights from LLM: {feature_weights}", flush=True)
             break
         except Exception as e:
+            print(f"[ERROR] Oracle attempt {attempt+1} failed: {e}", flush=True)
+            logging.error(f"[ERROR] Oracle attempt {attempt+1} failed: {e}")
             time.sleep(llm_delay)
             # Fallback to default weights
+    if feature_weights is None:
             feature_weights = {
                 "stars": 0.3,
                 "forks": 0.2,