Mikhil-jivus commited on
Commit
724715f
·
verified ·
1 Parent(s): 551ac7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -36,12 +36,18 @@ def respond(
36
 
37
  messages.append({"role": "user", "content": message})
38
 
39
- # Tokenize the input messages
40
  input_text = system_message + " ".join([f"{msg['role']}: {msg['content']}" for msg in messages])
41
- input_ids = tokenizer.encode(input_text, return_tensors="pt")
 
 
 
 
 
 
42
 
43
- # Create attention mask
44
- attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
45
 
46
  # Generate a response
47
  chat_history_ids = model.generate(
@@ -51,7 +57,7 @@ def respond(
51
  top_p=top_p,
52
  pad_token_id=tokenizer.eos_token_id,
53
  do_sample=True,
54
- attention_mask=attention_mask,
55
  )
56
 
57
  # Decode the response
 
36
 
37
  messages.append({"role": "user", "content": message})
38
 
39
+ # Tokenize the input messages with dynamic padding and truncation
40
  input_text = system_message + " ".join([f"{msg['role']}: {msg['content']}" for msg in messages])
41
+ inputs = tokenizer(
42
+ input_text,
43
+ return_tensors="pt",
44
+ padding=True, # Dynamically pad to the longest sequence in the batch
45
+ truncation=True, # Truncate if exceeds max length
46
+ max_length=max_tokens # Ensure max length is respected
47
+ )
48
 
49
+ input_ids = inputs["input_ids"]
50
+ attention_mask = inputs["attention_mask"]
51
 
52
  # Generate a response
53
  chat_history_ids = model.generate(
 
57
  top_p=top_p,
58
  pad_token_id=tokenizer.eos_token_id,
59
  do_sample=True,
60
+ attention_mask=attention_mask, # Use the dynamically generated attention mask
61
  )
62
 
63
  # Decode the response