dharmendra commited on
Commit
c1073c4
·
1 Parent(s): b0353ee

Fix: Implement ConversationBufferWindowMemory and pipeline generation parameters

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -17,7 +17,12 @@ if HUGGINGFACEHUB_API_TOKEN is None:
17
 
18
 
19
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
20
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=HUGGINGFACEHUB_API_TOKEN)
 
 
 
 
 
21
  #print(f"Tokenizer attributes: {dir(tokenizer)}")
22
 
23
  if torch.backends.mps.is_available():
@@ -28,13 +33,23 @@ else :
28
  device = "cpu"
29
 
30
  model.to(device)
31
- memory = ConversationBufferMemory() # for memory management
 
32
 
33
  # Initialize Langchain HuggingFacePipeline
34
- llm = HuggingFacePipeline(pipeline=pipeline("text-generation", model=model, tokenizer=tokenizer))
 
 
 
 
 
 
 
 
35
 
36
  # Initialize Langchain ConversationChain
37
- conversation = ConversationChain(llm=llm, memory=memory)
 
38
 
39
  class QuestionRequest(BaseModel):
40
  question: str
 
17
 
18
 
19
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ "Qwen/Qwen2.5-1.5B-Instruct",
22
+ device_map="auto",
23
+ torch_dtype=torch.bfloat16,
24
+ trust_remote_code=True,
25
+ token=HUGGINGFACEHUB_API_TOKEN)
26
  #print(f"Tokenizer attributes: {dir(tokenizer)}")
27
 
28
  if torch.backends.mps.is_available():
 
33
  device = "cpu"
34
 
35
  model.to(device)
36
+ # k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
37
+ memory = ConversationBufferWindowMemory(k=5)
38
 
39
  # Initialize Langchain HuggingFacePipeline
40
+ llm = HuggingFacePipeline(pipeline=pipeline(
41
+ "text-generation",
42
+ model=model,
43
+ tokenizer=tokenizer,
44
+ max_new_tokens=512, # Adjust as needed for desired response length
45
+ return_full_text=False, # Crucial for getting only the AI's response, esp when ans is small
46
+ temperature=0.7, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
47
+ do_sample=True # Enable sampling for more varied outputs
48
+ ))
49
 
50
  # Initialize Langchain ConversationChain
51
+ # verbose=True for debugging LangChain's pro
52
+ conversation = ConversationChain(llm=llm, memory=memory,verbose=True)
53
 
54
  class QuestionRequest(BaseModel):
55
  question: str