Hieucyber2208 commited on
Commit
d155102
·
verified ·
1 Parent(s): aedb14e

Update src/generation/llm.py

Browse files
Files changed (1) hide show
  1. src/generation/llm.py +7 -3
src/generation/llm.py CHANGED
@@ -99,10 +99,14 @@ class LLM:
99
  )
100
  # Decode the generated tokens
101
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
102
- # Strip any system/user metadata
103
- response = response.replace("system", "").replace("user", "").replace("assistant", "")
104
- # Remove any extra whitespace or unwanted tokens
 
105
  response = " ".join(response.split()).strip()
 
 
 
106
  print("Response generated successfully!")
107
  return response
108
  except Exception as e:
 
99
  )
100
  # Decode the generated tokens
101
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
102
+ # Remove system/user/assistant metadata and unwanted tokens
103
+ for token in ["system", "user", "assistant", "You are", "Bạn là", "Truy vấn", "Danh sách nhà hàng", "Phản hồi"]:
104
+ response = response.replace(token, "")
105
+ # Remove extra whitespace and clean up
106
  response = " ".join(response.split()).strip()
107
+ # Ensure the response starts cleanly (remove any leading colons or brackets)
108
+ if response.startswith(":") or response.startswith("["):
109
+ response = response[1:].strip()
110
  print("Response generated successfully!")
111
  return response
112
  except Exception as e: