Spaces:
Running on Zero

Ruurd commited on
Commit
acc4845
·
verified ·
1 Parent(s): 2736195

Remove random noising

Browse files
Files changed (1) hide show
  1. app.py +10 -45
app.py CHANGED
@@ -20,11 +20,6 @@ pad_token = tokenizer.pad_token_id or tokenizer.eos_token_id
20
  eot_token_id = tokenizer.eos_token_id
21
  assistant_marker_ids = tokenizer.encode("Assistant:", add_special_tokens=False)
22
 
23
- # --- Load token probabilities ---
24
- with open("token_probabilities.json") as f:
25
- token_probs_dict = json.load(f)
26
- token_probabilities = np.array([token_probs_dict[str(i)] for i in range(len(token_probs_dict))], dtype=np.float32)
27
-
28
  # def load_model():
29
  # ckpt_path = hf_hub_download(
30
  # repo_id="ruurd/tini_bi_m",
@@ -87,7 +82,7 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
87
  x = i / max_it
88
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
89
 
90
- def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, mask_weight=0.0, clustering=0.5, noise_start = 1.0):
91
  noised = input_ids.copy()
92
  answer_len = len(noised) - answer_start
93
  num_to_noise = int(threshold * answer_len * noise_start)
@@ -96,19 +91,6 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, mask_
96
  if num_to_noise == 0:
97
  return noised, []
98
 
99
- mixed_probs = token_probabilities.copy()
100
-
101
- # Apply EOT weighting
102
- mixed_probs[eot_token_id] *= eot_weight
103
-
104
- # Scale all other probabilities so they sum to 1 - mask_weight
105
- total_other = mixed_probs.sum() - mixed_probs[mask_token_id]
106
- scale = (1.0 - mask_weight) / total_other
107
- mixed_probs *= scale
108
-
109
- # Set mask_token_id to mask_weight explicitly
110
- mixed_probs[mask_token_id] = mask_weight
111
-
112
  num_clusters = max(1, int((1 - clustering) * num_to_noise))
113
  cluster_size = max(1, int(num_to_noise / num_clusters))
114
 
@@ -121,15 +103,14 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, mask_
121
 
122
  noised_indices = sorted(list(noised_indices))[:num_to_noise]
123
 
124
- noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
125
- for idx, val in zip(noised_indices, noise):
126
- noised[idx] = val
127
 
128
  return noised, noised_indices
129
 
130
 
131
  # Add new noising function
132
- def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, eot_weight = 1.0, mask_weight = 0.0, noise_start = 1.0):
133
  noised = input_ids.copy()
134
  answer_len = len(input_ids) - answer_start
135
  num_to_noise = int(threshold * answer_len * noise_start)
@@ -158,22 +139,8 @@ def confidence_guided_noising(input_ids, answer_start, confidences, noise_clippi
158
  p=weights
159
  )
160
 
161
- mixed_probs = token_probabilities.copy()
162
-
163
- # Apply EOT weighting
164
- mixed_probs[eot_token_id] *= eot_weight
165
-
166
- # Scale all other probabilities so they sum to 1 - mask_weight
167
- total_other = mixed_probs.sum() - mixed_probs[mask_token_id]
168
- scale = (1.0 - mask_weight) / total_other
169
- mixed_probs *= scale
170
-
171
- # Set mask_token_id to mask_weight explicitly
172
- mixed_probs[mask_token_id] = mask_weight
173
-
174
- noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
175
- for idx, val in zip(indices, noise):
176
- noised[idx] = val
177
 
178
  return noised
179
 
@@ -194,7 +161,7 @@ def generate_diffusion_text(input_ids):
194
  return sampled, conf
195
 
196
  # --- Inference Wrapper ---
197
- def diffusion_chat(question, eot_weight, mask_weight, max_it, pause_length, sharpness, clustering, noise_start, use_confidence_noising, noise_clipping):
198
  placeholder = "What do you know about the city of New York?"
199
  placeholder = ""
200
  if question.strip() == "":
@@ -215,7 +182,7 @@ def diffusion_chat(question, eot_weight, mask_weight, max_it, pause_length, shar
215
 
216
  ori_input_tokens = input_ids
217
  current_tokens, just_noised_indices = noisify_answer(
218
- input_ids, answer_start, threshold=1.0, eot_weight=eot_weight, mask_weight=mask_weight, clustering=clustering, noise_start = 1.0,
219
  )
220
  yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
221
  time.sleep(pause_length)
@@ -262,12 +229,12 @@ def diffusion_chat(question, eot_weight, mask_weight, max_it, pause_length, shar
262
  threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
263
  if use_confidence_noising:
264
  noised_answer = confidence_guided_noising(
265
- current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, eot_weight=eot_weight, mask_weight=mask_weight, noise_start=noise_start
266
  )
267
  just_noised_indices = []
268
  else:
269
  noised_answer, just_noised_indices = noisify_answer(
270
- current_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, mask_weight=mask_weight, clustering=clustering, noise_start = noise_start,
271
  )
272
 
273
  # Compose full input again: prompt + noised answer
@@ -306,8 +273,6 @@ demo = gr.Interface(
306
  fn=diffusion_chat,
307
  inputs=[
308
  gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of New York?"),
309
- gr.Slider(0, 1, value=0.5, step=0.05, label="↓ = longer answers (EOT weight)"),
310
- gr.Slider(0, 1, value=0.5, step=0.05, label="↓ = more random answers (MASK weight)"),
311
  gr.Slider(1, 512, value=32, step=1, label="↑ = more iterations"),
312
  gr.Slider(0.01, 5, value=0.01, step=0.01, label="↑ = longer pause (for visualization)"),
313
  gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
 
20
  eot_token_id = tokenizer.eos_token_id
21
  assistant_marker_ids = tokenizer.encode("Assistant:", add_special_tokens=False)
22
 
 
 
 
 
 
23
  # def load_model():
24
  # ckpt_path = hf_hub_download(
25
  # repo_id="ruurd/tini_bi_m",
 
82
  x = i / max_it
83
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
84
 
85
+ def noisify_answer(input_ids, answer_start, threshold=1.0, clustering=0.5, noise_start = 1.0):
86
  noised = input_ids.copy()
87
  answer_len = len(noised) - answer_start
88
  num_to_noise = int(threshold * answer_len * noise_start)
 
91
  if num_to_noise == 0:
92
  return noised, []
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  num_clusters = max(1, int((1 - clustering) * num_to_noise))
95
  cluster_size = max(1, int(num_to_noise / num_clusters))
96
 
 
103
 
104
  noised_indices = sorted(list(noised_indices))[:num_to_noise]
105
 
106
+ for idx in noised_indices:
107
+ noised[idx] = mask_token_id
 
108
 
109
  return noised, noised_indices
110
 
111
 
112
  # Add new noising function
113
+ def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, noise_start = 1.0):
114
  noised = input_ids.copy()
115
  answer_len = len(input_ids) - answer_start
116
  num_to_noise = int(threshold * answer_len * noise_start)
 
139
  p=weights
140
  )
141
 
142
+ for idx in indices:
143
+ noised[idx] = mask_token_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  return noised
146
 
 
161
  return sampled, conf
162
 
163
  # --- Inference Wrapper ---
164
+ def diffusion_chat(question, max_it, pause_length, sharpness, clustering, noise_start, use_confidence_noising, noise_clipping):
165
  placeholder = "What do you know about the city of New York?"
166
  placeholder = ""
167
  if question.strip() == "":
 
182
 
183
  ori_input_tokens = input_ids
184
  current_tokens, just_noised_indices = noisify_answer(
185
+ input_ids, answer_start, threshold=1.0, clustering=clustering, noise_start = 1.0,
186
  )
187
  yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
188
  time.sleep(pause_length)
 
229
  threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
230
  if use_confidence_noising:
231
  noised_answer = confidence_guided_noising(
232
+ current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, noise_start=noise_start
233
  )
234
  just_noised_indices = []
235
  else:
236
  noised_answer, just_noised_indices = noisify_answer(
237
+ current_tokens, answer_start, threshold=threshold, clustering=clustering, noise_start = noise_start,
238
  )
239
 
240
  # Compose full input again: prompt + noised answer
 
273
  fn=diffusion_chat,
274
  inputs=[
275
  gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of New York?"),
 
 
276
  gr.Slider(1, 512, value=32, step=1, label="↑ = more iterations"),
277
  gr.Slider(0.01, 5, value=0.01, step=0.01, label="↑ = longer pause (for visualization)"),
278
  gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),