Spaces:

rathore11
/

PY_LLM_NEW

Paused

dharmendra commited on Jul 6

Commit

c1073c4

1 Parent(s): b0353ee

Fix: Implement ConversationBufferWindowMemory and pipeline generation parameters

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,12 @@ if HUGGINGFACEHUB_API_TOKEN is None:
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=HUGGINGFACEHUB_API_TOKEN)
 #print(f"Tokenizer attributes: {dir(tokenizer)}")
 if torch.backends.mps.is_available():
@@ -28,13 +33,23 @@ else :
      device = "cpu"
 model.to(device)
-memory = ConversationBufferMemory() # for memory management
 # Initialize Langchain HuggingFacePipeline
-llm = HuggingFacePipeline(pipeline=pipeline("text-generation", model=model, tokenizer=tokenizer))
 # Initialize Langchain ConversationChain
-conversation = ConversationChain(llm=llm, memory=memory)
 class QuestionRequest(BaseModel):
     question: str

 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen2.5-1.5B-Instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+     trust_remote_code=True,
+     token=HUGGINGFACEHUB_API_TOKEN)
 #print(f"Tokenizer attributes: {dir(tokenizer)}")
 if torch.backends.mps.is_available():
      device = "cpu"
 model.to(device)
+# k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
+memory = ConversationBufferWindowMemory(k=5)
 # Initialize Langchain HuggingFacePipeline
+llm = HuggingFacePipeline(pipeline=pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=512,  # Adjust as needed for desired response length
+    return_full_text=False, # Crucial for getting only the AI's response, esp when ans is small
+    temperature=0.7,      # Controls randomness (0.0 for deterministic, 1.0 for very creative)
+    do_sample=True        # Enable sampling for more varied outputs
+    ))
 # Initialize Langchain ConversationChain
+# verbose=True for debugging LangChain's pro
+conversation = ConversationChain(llm=llm, memory=memory,verbose=True)
 class QuestionRequest(BaseModel):
     question: str