File size: 2,598 Bytes
a439fdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
import yake
import requests
from io import BytesIO

# Load Janus-Pro-7B model and tokenizer
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/Janus-Pro-7B")
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/Janus-Pro-7B")

# Function to process image and generate text-based caption using external image model (e.g., BLIP, CLIP)
# For this case, we assume BLIP is used or any other text-based description for image
def process_image_for_caption(image):
    # This is just a placeholder; replace this with a real image captioning model like BLIP or CLIP
    # For now, we'll return a dummy caption.
    return "A person holding a book in a library."

# Function to enhance caption using Janus-Pro-7B
def enhance_caption_with_janus(caption):
    inputs = tokenizer(caption, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100)
    enhanced_caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return enhanced_caption

# YAKE-based keyword extraction and task name generation
class YakeTaskGenerator:
    def __init__(self, n=2, top_k=3):
        self.kw_extractor = yake.KeywordExtractor(n=n, top=top_k)

    def extract_keywords(self, caption):
        return [kw[0] for kw in self.kw_extractor.extract_keywords(caption)]

    def generate_task_name(self, caption):
        keywords = self.extract_keywords(caption)
        if not keywords:
            return "General Image Processing"
        task_name = " ".join(keywords[:2])  # Use top 2 keywords for task
        task_name = task_name.capitalize() + " Analysis"  # Format task name
        return task_name

# Full pipeline for image processing and task name generation
def process_image_and_generate_task(image):
    caption = process_image_for_caption(image)  # Get image caption
    enhanced_caption = enhance_caption_with_janus(caption)  # Enhance the caption using Janus-Pro-7B
    task_generator = YakeTaskGenerator()
    task_name = task_generator.generate_task_name(enhanced_caption)
    return task_name

# Gradio Interface
def gradio_interface(image):
    task_name = process_image_and_generate_task(image)
    return task_name

# Create Gradio interface
image_input = gr.Image(type="pil", label="Upload Image")
output = gr.Textbox(label="Generated Task Name")

gr.Interface(
    fn=gradio_interface,
    inputs=image_input,
    outputs=output,
    live=True,
    title="Image Captioning and Task Name Generation with Janus-Pro-7B"
).launch(share=True, debug=True)