FelixPhilip commited on
Commit
93f0901
·
1 Parent(s): 526b6b7

Oracle weight assigning update

Browse files
Files changed (2) hide show
  1. Oracle/SmolLM.py +8 -8
  2. Oracle/deepfundingoracle.py +11 -11
Oracle/SmolLM.py CHANGED
@@ -4,26 +4,26 @@ class SmolLM:
4
  def __init__(self, model_path="HuggingFaceTB/SmolLM2-1.7B-Instruct"):
5
  self.available = True
6
  try:
7
- print(f"[INFO] Loading model tokenizer from {model_path}")
8
  self.tokenizer = AutoTokenizer.from_pretrained(model_path)
9
- print(f"[INFO] Loading model from {model_path}")
10
  self.model = AutoModelForCausalLM.from_pretrained(model_path)
11
- print("[INFO] Model loaded successfully")
12
  except Exception as e:
13
  print(f"[ERROR] Failed to load model '{model_path}': {e}")
14
  self.available = False
15
 
16
- def predict(self, prompt):
17
  if not self.available:
18
- print("[WARN] LLama model unavailable, returning default weight 0.5")
19
  return "0.5"
20
  try:
21
  print(f"[INFO] Generating response for prompt: {prompt[:100]}...", flush=True)
22
- inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
23
- outputs = self.model.generate(**inputs, max_length=150, num_return_sequences=1)
24
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
25
  print(f"[INFO] Generated response: {response[:100]}...", flush=True)
26
  return response
27
  except Exception as e:
28
- print(f"[ERROR] LLama model inference failed: {e}")
29
  return "0.5"
 
4
  def __init__(self, model_path="HuggingFaceTB/SmolLM2-1.7B-Instruct"):
5
  self.available = True
6
  try:
7
+ print(f"[INFO] Loading Oracle tokenizer from {model_path}")
8
  self.tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+ print(f"[INFO] Loading Oracle from {model_path}")
10
  self.model = AutoModelForCausalLM.from_pretrained(model_path)
11
+ print("[INFO] Oracle loaded successfully")
12
  except Exception as e:
13
  print(f"[ERROR] Failed to load model '{model_path}': {e}")
14
  self.available = False
15
 
16
+ def predict(self, prompt,max_length=512,max_new_tokens=150):
17
  if not self.available:
18
+ print("[WARN] Oracle unavailable, returning default weight 0.5")
19
  return "0.5"
20
  try:
21
  print(f"[INFO] Generating response for prompt: {prompt[:100]}...", flush=True)
22
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
23
+ outputs = self.model.generate(**inputs, max_length=inputs["input_ids"].shape[1]+max_new_tokens,num_return_sequences=1)
24
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
25
  print(f"[INFO] Generated response: {response[:100]}...", flush=True)
26
  return response
27
  except Exception as e:
28
+ print(f"[ERROR] Oracle has failed: {e}")
29
  return "0.5"
Oracle/deepfundingoracle.py CHANGED
@@ -243,7 +243,7 @@ def assign_base_weight(df, max_workers=32, llm_retries=3,llm_delay=2):
243
  print("[INFO] Starting optimized base weight assignment...", flush=True)
244
  logging.info("[INFO] Assigning base weights using optimized approach...")
245
  start_time = time.time()
246
- llama = SmolLM()
247
 
248
  # Step 1: Call LLM once to determine weights for each feature
249
  prompt = (
@@ -255,9 +255,9 @@ def assign_base_weight(df, max_workers=32, llm_retries=3,llm_delay=2):
255
  "- Pull Requests\n"
256
  "- Activity (days since last update)\n"
257
  "- Contributors\n\n"
258
- "Assign a weight (0-1) to each feature based on its importance in determining "
259
- "the influence of a repository. Output ONLY a valid JSON object with keys as feature names and values as their weights. "
260
- "Do not include any explanation or extra text. Example:\n"
261
  '{\n'
262
  ' "stars": 0.3,\n'
263
  ' "forks": 0.2,\n'
@@ -268,21 +268,21 @@ def assign_base_weight(df, max_workers=32, llm_retries=3,llm_delay=2):
268
  ' "contributors": 0.05\n'
269
  '}\n'
270
  )
271
- fearure_weights= None
272
  for attempt in range(llm_retries):
273
  try:
274
- response = llama.predict(prompt)
275
- if not response or response.strip():
276
- raise ValueError("Empty response from LLM.")
277
  feature_weights = json.loads(response) # Safely parse JSON
278
  print(f"[INFO] Feature weights from LLM: {feature_weights}", flush=True)
279
  break
280
  except Exception as e:
281
- print(f"[ERROR] LLM attempt {attempt+1} failed: {e}", flush=True)
282
- logging.error(f"[ERROR] LLM attempt {attempt+1} failed: {e}")
283
  time.sleep(llm_delay)
284
  # Fallback to default weights
285
- if fearure_weights is None:
286
  feature_weights = {
287
  "stars": 0.3,
288
  "forks": 0.2,
 
243
  print("[INFO] Starting optimized base weight assignment...", flush=True)
244
  logging.info("[INFO] Assigning base weights using optimized approach...")
245
  start_time = time.time()
246
+ oracle = SmolLM()
247
 
248
  # Step 1: Call LLM once to determine weights for each feature
249
  prompt = (
 
255
  "- Pull Requests\n"
256
  "- Activity (days since last update)\n"
257
  "- Contributors\n\n"
258
+ "Can you Predict a weight in the range (0-1) to each feature listed above based on its importance in determining "
259
+ "the influence of a repository. Output ONLY a valid JSON object with keys as feature names and values as the predicted weights. "
260
+ "Do not include any explanation or extra text. here is an output example: \n"
261
  '{\n'
262
  ' "stars": 0.3,\n'
263
  ' "forks": 0.2,\n'
 
268
  ' "contributors": 0.05\n'
269
  '}\n'
270
  )
271
+ feature_weights= None
272
  for attempt in range(llm_retries):
273
  try:
274
+ response = oracle.predict(prompt,max_length=512, max_new_tokens=150)
275
+ if not response or not response.strip():
276
+ raise ValueError("Empty response from Oracle.")
277
  feature_weights = json.loads(response) # Safely parse JSON
278
  print(f"[INFO] Feature weights from LLM: {feature_weights}", flush=True)
279
  break
280
  except Exception as e:
281
+ print(f"[ERROR] Oracle attempt {attempt+1} failed: {e}", flush=True)
282
+ logging.error(f"[ERROR] Oracle attempt {attempt+1} failed: {e}")
283
  time.sleep(llm_delay)
284
  # Fallback to default weights
285
+ if feature_weights is None:
286
  feature_weights = {
287
  "stars": 0.3,
288
  "forks": 0.2,