FLUX.MF-Lightning-Fast-Upscaler

Running on Zero

LPX55 commited on May 19

Commit

20d1485

verified ·

1 Parent(s): a530f5c

Update raw.py

Files changed (1) hide show

raw.py CHANGED Viewed

@@ -105,34 +105,27 @@ def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: fl
 	# WARNING: HF's handling of chat's on Llava models is very fragile.  This specific combination of processor.apply_chat_template(), and processor() works
 	# but if using other combinations always inspect the final input_ids to ensure they are correct.  Often times you will end up with multiple <bos> tokens
 	# if not careful, which can make the model perform poorly.
-	convo_string = processor.apply_chat_template(convo, tokenize = False, add_generation_prompt = True)
-	assert isinstance(convo_string, str)
-	# Process the inputs
-	inputs = processor(text=[convo_string], images=[input_image], return_tensors="pt").to('cuda')
-	inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
-	streamer = TextIteratorStreamer(processor.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
-	generate_kwargs = dict(
-		**inputs,
-		max_new_tokens=max_new_tokens,
-		do_sample=True if temperature > 0 else False,
-		suppress_tokens=None,
-		use_cache=True,
-		temperature=temperature if temperature > 0 else None,
-		top_k=None,
-		top_p=top_p if temperature > 0 else None,
-		streamer=streamer,
-	)
-	t = Thread(target=model.generate, kwargs=generate_kwargs)
-	t.start()
-	outputs = []
-	for text in streamer:
-		outputs.append(text)
-		yield "".join(outputs)
 @spaces.GPU()
 @torch.no_grad()

 	# WARNING: HF's handling of chat's on Llava models is very fragile.  This specific combination of processor.apply_chat_template(), and processor() works
 	# but if using other combinations always inspect the final input_ids to ensure they are correct.  Often times you will end up with multiple <bos> tokens
 	# if not careful, which can make the model perform poorly.
+    convo_string = cap_processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
+    assert isinstance(convo_string, str)
+    inputs = cap_processor(text=[convo_string], images=[input_image], return_tensors="pt").to('cuda')
+    inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
+    streamer = TextIteratorStreamer(cap_processor.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        **inputs,
+        max_new_tokens=max_new_tokens,
+        do_sample=True if temperature > 0 else False,
+        suppress_tokens=None,
+        use_cache=True,
+        temperature=temperature if temperature > 0 else None,
+        top_k=None,
+        top_p=top_p if temperature > 0 else None,
+        streamer=streamer,
+    )
+    _ = cap_model.generate(**generate_kwargs)
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
 @spaces.GPU()
 @torch.no_grad()