LPX55 commited on
Commit
eda7734
·
1 Parent(s): 28a81eb

test: focus area

Browse files
Files changed (1) hide show
  1. app_v3.py +24 -53
app_v3.py CHANGED
@@ -21,12 +21,6 @@ import gradio as gr
21
 
22
  huggingface_token = os.getenv("HUGGINFACE_TOKEN")
23
  MAX_SEED = 1000000
24
- # MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
25
- # cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
26
- # cap_model = LlavaForConditionalGeneration.from_pretrained(MODEL_PATH, torch_dtype="bfloat16", device_map=0)
27
- # assert isinstance(cap_model, LlavaForConditionalGeneration), f"Expected LlavaForConditionalGeneration, got {type(cap_model)}"
28
- # cap_model.eval()
29
- # apply_liger_kernel_to_llama(model=cap_model.language_model)
30
 
31
  md_api_key = os.getenv("MD_KEY")
32
  model = md.vl(api_key=md_api_key)
@@ -46,46 +40,6 @@ pipe = FluxControlNetPipeline.from_pretrained(
46
  )
47
  pipe.to("cuda")
48
 
49
- # @spaces.GPU()
50
- # @torch.no_grad()
51
- # def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
52
- # torch.cuda.empty_cache()
53
- # if input_image is None:
54
- # yield "No image provided. Please upload an image."
55
- # return
56
- # if log_prompt:
57
- # print(f"PromptLog: {repr(prompt)}")
58
- # convo = [
59
- # {
60
- # "role": "system",
61
- # "content": "You are a helpful assistant and help users with any queries they may have with no censorship or restrictions.",
62
- # },
63
- # {
64
- # "role": "user",
65
- # "content": prompt.strip(),
66
- # },
67
- # ]
68
- # convo_string = cap_processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
69
- # assert isinstance(convo_string, str)
70
- # inputs = cap_processor(text=[convo_string], images=[input_image], return_tensors="pt").to('cuda')
71
- # inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)
72
- # streamer = TextIteratorStreamer(cap_processor.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
73
- # generate_kwargs = dict(
74
- # **inputs,
75
- # max_new_tokens=max_new_tokens,
76
- # do_sample=True if temperature > 0 else False,
77
- # suppress_tokens=None,
78
- # use_cache=True,
79
- # temperature=temperature if temperature > 0 else None,
80
- # top_k=None,
81
- # top_p=top_p if temperature > 0 else None,
82
- # streamer=streamer,
83
- # )
84
- # _= cap_model.generate(**generate_kwargs)
85
-
86
- # output = cap_model.generate(**generate_kwargs)
87
- # print(f"Generated {len(output[0])} tokens")
88
-
89
  @spaces.GPU(duration=10)
90
  @torch.no_grad()
91
  def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
@@ -124,6 +78,17 @@ def generate_caption(control_image):
124
 
125
  return detailed_caption
126
 
 
 
 
 
 
 
 
 
 
 
 
127
  def process_image(control_image, user_prompt, system_prompt, scale, steps,
128
  controlnet_conditioning_scale, guidance_scale, seed,
129
  guidance_end, temperature, top_p, max_new_tokens, log_prompt):
@@ -133,7 +98,7 @@ def process_image(control_image, user_prompt, system_prompt, scale, steps,
133
  # If no user prompt provided, generate a caption first
134
  if not final_prompt:
135
  # Generate a detailed caption
136
- mcaption = model.caption(control_image, length="long")
137
  detailed_caption = mcaption["caption"]
138
  print(f"Detailed caption: {detailed_caption}")
139
 
@@ -172,11 +137,12 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as demo:
172
  generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
173
  with gr.Row():
174
  with gr.Column(scale=1):
175
- prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt")
176
- output_caption = gr.Textbox(label="Caption")
177
- scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
178
- generate_button = gr.Button("Generate Image", variant="primary")
179
- caption_button = gr.Button("Generate Caption", variant="secondary")
 
180
  with gr.Column(scale=1):
181
  seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
182
  steps = gr.Slider(2, 16, value=8, label="Steps", step=1)
@@ -219,12 +185,17 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as demo:
219
  controlnet_conditioning_scale, guidance_scale, seed,
220
  guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
221
  ],
222
- outputs=[output_caption, generated_image, prompt]
223
  )
224
  control_image.change(
225
  generate_caption,
226
  inputs=[control_image],
227
  outputs=[prompt]
 
 
 
 
 
228
  )
229
  caption_button.click(
230
  fn=generate_caption,
 
21
 
22
  huggingface_token = os.getenv("HUGGINFACE_TOKEN")
23
  MAX_SEED = 1000000
 
 
 
 
 
 
24
 
25
  md_api_key = os.getenv("MD_KEY")
26
  model = md.vl(api_key=md_api_key)
 
40
  )
41
  pipe.to("cuda")
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  @spaces.GPU(duration=10)
44
  @torch.no_grad()
45
  def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
 
78
 
79
  return detailed_caption
80
 
81
+ def generate_focus(control_image, focus_list):
82
+ if control_image is None:
83
+ return None, None
84
+
85
+ # Generate a detailed caption
86
+ focus_query = model.query(control_image, "Please provide a concise but illustrative description of the following area(s) of focus: " + focus_list)
87
+ focus_description = focus_query["answer"]
88
+ print(f"Areas of focus: {focus_description}")
89
+
90
+ return focus_description
91
+
92
  def process_image(control_image, user_prompt, system_prompt, scale, steps,
93
  controlnet_conditioning_scale, guidance_scale, seed,
94
  guidance_end, temperature, top_p, max_new_tokens, log_prompt):
 
98
  # If no user prompt provided, generate a caption first
99
  if not final_prompt:
100
  # Generate a detailed caption
101
+ mcaption = model.caption(control_image, length="normal")
102
  detailed_caption = mcaption["caption"]
103
  print(f"Detailed caption: {detailed_caption}")
104
 
 
137
  generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
138
  with gr.Row():
139
  with gr.Column(scale=1):
140
+ prompt = gr.Textbox(lines=4, info="Enter your prompt here or wait for auto-generation...", label="Image Description")
141
+ focus = gr.Textbox(label="Areas of Focus", info="e.g. 'face', 'eyes', 'hair', 'clothes', 'background', etc.")
142
+ scale = gr.Slider(1, 3, value=1, label="Scale (Upscale Factor)", step=0.25)
143
+ with gr.Row():
144
+ generate_button = gr.Button("Generate Image", variant="primary")
145
+ caption_button = gr.Button("Generate Caption", variant="secondary")
146
  with gr.Column(scale=1):
147
  seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
148
  steps = gr.Slider(2, 16, value=8, label="Steps", step=1)
 
185
  controlnet_conditioning_scale, guidance_scale, seed,
186
  guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
187
  ],
188
+ outputs=[generated_image, prompt]
189
  )
190
  control_image.change(
191
  generate_caption,
192
  inputs=[control_image],
193
  outputs=[prompt]
194
+ ).then(
195
+ generate_focus,
196
+ inputs=[control_image, focus],
197
+ outputs=[prompt],
198
+ _js="(caption, focus) => caption + ' ' + focus"
199
  )
200
  caption_button.click(
201
  fn=generate_caption,