File size: 4,196 Bytes
5734d35
15c06cc
5734d35
0c5a382
 
15c06cc
0c5a382
15c06cc
0c5a382
 
 
15c06cc
 
0c5a382
 
5734d35
0c5a382
 
 
5734d35
0c5a382
 
5734d35
0c5a382
 
 
5734d35
0c5a382
 
 
5734d35
0c5a382
 
5734d35
0c5a382
5734d35
0c5a382
 
 
 
 
 
 
 
 
 
 
5734d35
0c5a382
5734d35
0c5a382
 
 
5734d35
0c5a382
 
 
 
5734d35
0c5a382
 
 
 
 
 
 
5734d35
0c5a382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5734d35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# This is a Gradio app that provides a text-based chat interface with optional image and video generation.
import gradio as gr
import numpy as np
import tempfile
import imageio

# ---------- Setup ----------
AVAILABLE_MODELS = {
    "GPT-2 (small, fast)": "gpt2",
    "Falcon (TII UAE)": "tiiuae/falcon-7b-instruct",
    "Mistral (OpenAccess)": "mistralai/Mistral-7B-v0.1"
}

# Load text-to-image model using diffusers (correct API)
try:
    from diffusers import DiffusionPipeline
    image_generator = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
    image_generator.to("cpu")
    image_enabled = True
except ImportError:
    image_generator = None
    image_enabled = False
    print("[Image model error]: diffusers library not found")

# Load text-to-video model
try:
    from diffusers import DiffusionPipeline
    video_pipeline = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b")
    video_pipeline.to("cpu")
    video_enabled = True
except ImportError:
    video_pipeline = None
    video_enabled = False
    print("[Video model error]: diffusers library not found")

text_model_cache = {}
chat_memory = {}

# ---------- Core Function ----------
def codette_terminal(prompt, model_name, generate_image, generate_video, session_id):
    if session_id not in chat_memory:
        chat_memory[session_id] = []

    if prompt.lower() in ["exit", "quit"]:
        chat_memory[session_id] = []
        return "🧠 Codette signing off... Session reset.", None, None

    # Load the text generation model if it's not already in the cache
    if model_name not in text_model_cache:
        text_model_cache[model_name] = gr.pipelines.TextGeneration(model=AVAILABLE_MODELS[model_name])
    generator = text_model_cache[model_name]
    response = generator(prompt, max_length=100, num_return_sequences=1, do_sample=True)[0]['generated_text'].strip()

    # Update the chat log
    chat_memory[session_id].append(f"πŸ–‹οΈ You > {prompt}")
    chat_memory[session_id].append(f"🧠 Codette > {response}")
    chat_log = "\n".join(chat_memory[session_id][-10:])

    # Generate image if requested and image generation is enabled
    img = None
    if generate_image and image_enabled:
        try:
            img = image_generator(prompt).images[0]
        except Exception as e:
            chat_log += f"\n[Image error]: {e}"

    # Generate video if requested and video generation is enabled
    vid = None
    if generate_video and video_enabled:
        try:
            video_frames = video_pipeline(prompt, num_inference_steps=50).frames
            temp_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
            imageio.mimsave(temp_video_path, video_frames, fps=8)
            vid = temp_video_path
        except Exception as e:
            chat_log += f"\n[Video error]: {e}"

    return chat_log, img, vid

# ---------- Gradio UI ----------
with gr.Blocks(title="Codette Terminal – Text + Image + Video") as demo:
    gr.Markdown("## 🧬 Codette Terminal (Text + Image + Video, CPU-Friendly)")
    gr.Markdown("Type a prompt and select your model. Optionally generate images or videos. Type `'exit'` to reset session.")

    session_id = gr.Textbox(value="session_default", visible=False)
    model_dropdown = gr.Dropdown(choices=list(AVAILABLE_MODELS.keys()), value="GPT-2 (small, fast)", label="Choose Language Model")
    generate_image_toggle = gr.Checkbox(label="Also generate image?", value=False, interactive=image_enabled)
    generate_video_toggle = gr.Checkbox(label="Also generate video?", value=False, interactive=video_enabled)
    user_input = gr.Textbox(label="Your Prompt", placeholder="e.g. A robot dreaming on Mars", lines=1)
    output_text = gr.Textbox(label="Codette Output", lines=15, interactive=False)
    output_image = gr.Image(label="Generated Image")
    output_video = gr.Video(label="Generated Video")

    user_input.submit(
        fn=codette_terminal,
        inputs=[user_input, model_dropdown, generate_image_toggle, generate_video_toggle, session_id],
        outputs=[output_text, output_image, output_video]
    )

if __name__ == "__main__":
    demo.launch(show_error=True)