lambertxiao commited on
Commit
c50c7e8
Β·
verified Β·
1 Parent(s): 816d008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -1,42 +1,44 @@
1
- import spaces # NEW ─ brings the decorator into scope
 
 
2
  import gradio as gr
3
- from transformers import AutoModel, AutoProcessor
4
  from PIL import Image
5
  import torch, numpy as np
6
 
7
  model_name_or_path = "lyttt/VLV_captioner"
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
- model = AutoModel.from_pretrained(
11
  model_name_or_path,
12
  revision="master",
13
  trust_remote_code=True,
14
  low_cpu_mem_usage=False
15
  ).to(device)
16
 
17
- processor = AutoProcessor.from_pretrained(
18
- model_name_or_path,
19
- revision="master",
20
- trust_remote_code=True,
21
- )
 
22
 
23
- @spaces.GPU(duration=120) # now the decorator resolves
 
24
  def greet(image):
25
- if image.dtype != np.uint8:
26
  image = (np.clip(image, 0, 1) * 255).astype(np.uint8)
 
27
  image_pil = Image.fromarray(image, mode="RGB")
28
 
29
- # preprocess + generate
30
- inputs = processor(images=[image_pil], return_tensors="pt").to(device)
31
  with torch.no_grad():
32
- ids = model.generate(**inputs, max_new_tokens=64)
33
- text = processor.decode(ids[0], skip_special_tokens=True)
 
34
 
35
- # drop unfinished last sentence
36
- sentences = [s.strip() for s in text.split('.') if s.strip()]
37
- if not text.strip().endswith('.'):
38
- sentences = sentences[:-1]
39
- return '. '.join(sentences) + ('.' if sentences else '')
40
 
41
  demo = gr.Interface(fn=greet, inputs="image", outputs="text")
42
  demo.launch()
 
1
+ # If this really is a HF Space, keep the next import;
2
+ # otherwise comment it out and delete the decorator line below.
3
+ import spaces # <─ ONLY needed in a Space
4
  import gradio as gr
5
+ from transformers import AutoModel
6
  from PIL import Image
7
  import torch, numpy as np
8
 
9
  model_name_or_path = "lyttt/VLV_captioner"
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
 
12
+ model = AutoModel.from_pretrained(
13
  model_name_or_path,
14
  revision="master",
15
  trust_remote_code=True,
16
  low_cpu_mem_usage=False
17
  ).to(device)
18
 
19
+ def drop_incomplete_tail(text: str) -> str:
20
+ """Remove any unfinished sentence fragment at the end of `text`."""
21
+ sentences = [s.strip() for s in text.split('.') if s.strip()]
22
+ if not text.strip().endswith('.'):
23
+ sentences = sentences[:-1]
24
+ return '. '.join(sentences) + ('.' if sentences else '')
25
 
26
+ # ───────────────────────────────────────────────────────────────
27
+ @spaces.GPU(duration=120) # ← delete this line if **not** in a Space
28
  def greet(image):
29
+ if image.dtype != np.uint8: # gradio gives float arr 0-1
30
  image = (np.clip(image, 0, 1) * 255).astype(np.uint8)
31
+
32
  image_pil = Image.fromarray(image, mode="RGB")
33
 
34
+ # The VLV-captioner accepts a list of PIL images directly.
 
35
  with torch.no_grad():
36
+ # Second arg is max-new-tokens (kept from your original code).
37
+ raw = model([image_pil], 300)
38
+ text = raw.generated_text[0] if hasattr(raw, "generated_text") else raw[0]
39
 
40
+ return drop_incomplete_tail(text)
41
+ # ───────────────────────────────────────────────────────────────
 
 
 
42
 
43
  demo = gr.Interface(fn=greet, inputs="image", outputs="text")
44
  demo.launch()