Spaces:

Ruurd
/

lad

Running on Zero

App Files Files

Ruurd commited on 1 day ago

Commit

7346e83

1 Parent(s): a8d72d4

Add semi-autoregressive generation

Browse files

Files changed (2) hide show

app.py +12 -11
infer.py +3 -3

app.py CHANGED Viewed

@@ -16,7 +16,6 @@ from infer import (
     find_answer_start,
     get_noising_schedule,
     noisify_answer,
-    generate_diffusion_text,
     filter_logits,
     confidence_guided_noising,
     noisify_answer_without_remasking
@@ -84,10 +83,10 @@ def highlight_tokens(token_ids, answer_start, changed_indices, color):
             highlighted.append(tok_str)
     return "".join(highlighted)
-def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
-                   clustering, noise_start, use_confidence_noising,
                    use_permanent_unmasking, noise_clipping, top_p,
-                   top_k):
     eos_bias = -eos_bias
     if question.strip() == "":
@@ -105,7 +104,7 @@ def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
     # Initial noising
     current_tokens, just_noised_indices = noisify_answer(
-        input_ids, answer_start, tokenizer, threshold=1.0, clustering=clustering, noise_start=1.0
     )
     yield render_html("Iteration 0 (initial noise)",
                       highlight_tokens(current_tokens[answer_start:], answer_start, just_noised_indices, color="red"))
@@ -115,8 +114,10 @@ def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
     prev_decoded = []
     unmasked_mask = [False] * len(current_tokens)
     for i in range(max_it):
         generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k, eos_bias = eos_bias)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
@@ -156,7 +157,7 @@ def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
             else:
                 noised_answer, just_noised_indices = noisify_answer(
                     current_tokens, answer_start, tokenizer,
-                    threshold=threshold, clustering=clustering, noise_start=noise_start
                 )
             for idx in range(answer_start, len(current_tokens)):
@@ -178,7 +179,7 @@ def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
         final_ids = answer_ids
     final_output = tokenizer.decode(final_ids, skip_special_tokens=True)
-    yield render_html(f"Final Output ({len(final_ids)} tokens after {i+1} iterations)", final_output)
 def is_running_on_spaces():
@@ -195,7 +196,7 @@ if is_running_on_spaces():
     )
 else:
     # Load from local path
-    ckpt_path = "diffusion-model-8B.pth"  # change to your actual local path
 model, tokenizer = load_trained_model(checkpoint_path=ckpt_path)
 print("✅ Model loaded.")
@@ -213,13 +214,13 @@ demo = gr.Interface(
         gr.Slider(0.01, 5, value=0.01, step=0.01, label="Pause between iteration ↑ = longer pause"),
         gr.Slider(-5.0, 5.0, value=0.0, step=0.1, label="Generation length: ↑ = more output tokens by decreasing eos token probability"),
         gr.Slider(1.0, 20.0, value=1.0, step=0.5, label="Noise decay sharpness: ↓ = more noise in later iterations"),
-        gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Clustering: ↑ = more clustered noising"),
         gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Noise start fraction: ↑ = more noise"),
         gr.Checkbox(value=False, label="Use confidence-guided noising"),
         gr.Checkbox(value=False, label="Use permanent unmasking"),
         gr.Slider(0.01, 1.0, value=0.01, step=0.01, label="Noise clipping: ↓ = more confidence guidance"),
         gr.Slider(1, 1000, value = 3, step = 1, label = "Top-p: ↑ = more random answers"),
-        gr.Slider(0.0, 1.0, value = 1.0, step = 0.01, label = "Top-k: ↑ = more random answers")
     ],
     outputs=[gr.HTML(label="Diffusion Output")],
     title="Diffusion Language Model Chat",

     find_answer_start,
     get_noising_schedule,
     noisify_answer,
     filter_logits,
     confidence_guided_noising,
     noisify_answer_without_remasking
             highlighted.append(tok_str)
     return "".join(highlighted)
+def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
+                   noise_start, use_confidence_noising,
                    use_permanent_unmasking, noise_clipping, top_p,
+                   top_k, added_tokens):
     eos_bias = -eos_bias
     if question.strip() == "":
     # Initial noising
     current_tokens, just_noised_indices = noisify_answer(
+        input_ids, answer_start, tokenizer, threshold=1.0, noise_start=1.0
     )
     yield render_html("Iteration 0 (initial noise)",
                       highlight_tokens(current_tokens[answer_start:], answer_start, just_noised_indices, color="red"))
     prev_decoded = []
     unmasked_mask = [False] * len(current_tokens)
+    current_tokens = current_tokens[:answer_start]
     for i in range(max_it):
+        current_tokens = current_tokens + [mask_token_id] * added_tokens
+        current_tokens = current_tokens[:256]  # Ensure we don't exceed the max length
         generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k, eos_bias = eos_bias)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
             else:
                 noised_answer, just_noised_indices = noisify_answer(
                     current_tokens, answer_start, tokenizer,
+                    threshold=threshold, noise_start=noise_start
                 )
             for idx in range(answer_start, len(current_tokens)):
         final_ids = answer_ids
     final_output = tokenizer.decode(final_ids, skip_special_tokens=True)
+    yield render_html(f"Final Output ({len(final_ids)} tokens after {i+1} iterations)", final_output) # type: ignore
 def is_running_on_spaces():
     )
 else:
     # Load from local path
+    ckpt_path = "diffusion-model-3B.pth"  # change to your actual local path
 model, tokenizer = load_trained_model(checkpoint_path=ckpt_path)
 print("✅ Model loaded.")
         gr.Slider(0.01, 5, value=0.01, step=0.01, label="Pause between iteration ↑ = longer pause"),
         gr.Slider(-5.0, 5.0, value=0.0, step=0.1, label="Generation length: ↑ = more output tokens by decreasing eos token probability"),
         gr.Slider(1.0, 20.0, value=1.0, step=0.5, label="Noise decay sharpness: ↓ = more noise in later iterations"),
         gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Noise start fraction: ↑ = more noise"),
         gr.Checkbox(value=False, label="Use confidence-guided noising"),
         gr.Checkbox(value=False, label="Use permanent unmasking"),
         gr.Slider(0.01, 1.0, value=0.01, step=0.01, label="Noise clipping: ↓ = more confidence guidance"),
         gr.Slider(1, 1000, value = 3, step = 1, label = "Top-p: ↑ = more random answers"),
+        gr.Slider(0.0, 1.0, value = 1.0, step = 0.01, label = "Top-k: ↑ = more random answers"),
+        gr.Slider(1, 256, value=256, step=1, label="Semi-autoregressive generation: number of added tokens per iteration"),
     ],
     outputs=[gr.HTML(label="Diffusion Output")],
     title="Diffusion Language Model Chat",

infer.py CHANGED Viewed

@@ -97,7 +97,7 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
     x = i / max_it
     return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
-def noisify_answer(input_ids, answer_start, tokenizer, threshold=1.0, clustering=0.5, noise_start = 1.0):
     noised = input_ids.copy()
     answer_len = len(noised) - answer_start
     num_to_noise = int(threshold * answer_len * noise_start)
@@ -316,7 +316,7 @@ def generate_answer(question: str, model, tokenizer, max_it=16, noise_start=0.5,
         input_ids += [mask_token] * (max_length - len(input_ids))
     ori_tokens = input_ids
-    current_tokens = noisify_answer(ori_tokens, answer_start, threshold=1.0, mask_token_id=mask_token)
     last_tokens = []
     for step in range(max_it):
@@ -344,6 +344,6 @@ def generate_answer(question: str, model, tokenizer, max_it=16, noise_start=0.5,
         # Re-apply noise for next iteration
         if step < max_it - 1:
             threshold = noise_start * get_noising_schedule(step, max_it, sharpness=noising_sharpness)
-            current_tokens = noisify_answer(current_tokens, answer_start, threshold=threshold, mask_token_id=mask_token)
     return tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).strip()

     x = i / max_it
     return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
+def noisify_answer(input_ids, answer_start, tokenizer, threshold=1.0, clustering=0, noise_start = 1.0):
     noised = input_ids.copy()
     answer_len = len(noised) - answer_start
     num_to_noise = int(threshold * answer_len * noise_start)
         input_ids += [mask_token] * (max_length - len(input_ids))
     ori_tokens = input_ids
+    current_tokens = noisify_answer(ori_tokens, answer_start, tokenizer, threshold=1.0)
     last_tokens = []
     for step in range(max_it):
         # Re-apply noise for next iteration
         if step < max_it - 1:
             threshold = noise_start * get_noising_schedule(step, max_it, sharpness=noising_sharpness)
+            current_tokens = noisify_answer(current_tokens, answer_start, tokenizer, threshold=threshold)
     return tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).strip()