File size: 3,115 Bytes
72cd7d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67e79b5
72cd7d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67e79b5
72cd7d2
 
67e79b5
72cd7d2
 
 
67e79b5
72cd7d2
 
 
 
 
 
 
67e79b5
72cd7d2
 
67e79b5
72cd7d2
67e79b5
 
 
 
 
 
 
 
 
 
 
 
72cd7d2
67e79b5
72cd7d2
 
67e79b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import torch
import numpy as np
from PIL import Image
import os
import json
import base64
from io import BytesIO
import requests
from typing import Dict, List, Any, Optional
from transformers.pipelines import pipeline

# Initialize the model
model = pipeline("image-feature-extraction", model="nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)

# Function to generate embeddings from an image
def generate_embedding(image):
    if image is None:
        return None, "No image provided"
    
    # Convert to PIL Image if needed
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    try:
        # Generate embedding using the transformers pipeline
        result = model(image)
        
        # Process the result based on its type
        embedding_list = None
        
        # Handle different possible output types
        if isinstance(result, torch.Tensor):
            embedding_list = result.detach().cpu().numpy().flatten().tolist()
        elif isinstance(result, np.ndarray):
            embedding_list = result.flatten().tolist()
        elif isinstance(result, list):
            # If it's a list of tensors or arrays
            if result and isinstance(result[0], (torch.Tensor, np.ndarray)):
                embedding_list = result[0].flatten().tolist() if hasattr(result[0], 'flatten') else result[0]
            else:
                embedding_list = result
        else:
            # Try to convert to a list as a last resort
            try:
                if result is not None:
                    embedding_list = list(result)
                else:
                    print("Result is None")
                    return None, "Failed to generate embedding"
            except:
                print(f"Couldn't convert result of type {type(result)} to list")
                return None, "Failed to process embedding"
        
        # Ensure we have a valid embedding list
        if embedding_list is None:
            return None, "Failed to generate embedding"
            
        # Calculate embedding dimension
        embedding_dim = len(embedding_list)
        
        return {
            "embedding": embedding_list,
            "dimension": embedding_dim
        }, f"Dimension: {embedding_dim}"
    except Exception as e:
        print(f"Error generating embedding: {str(e)}")
        return None, f"Error: {str(e)}"

# Create a Gradio app
app = gr.Interface(
    fn=generate_embedding,
    inputs=gr.Image(type="pil", label="Input Image"),
    outputs=[
        gr.JSON(label="Embedding Output"),
        gr.Textbox(label="Embedding Dimension")
    ],
    title="Nomic Vision Embedding Model (nomic-ai/nomic-embed-vision-v1.5)",
    description="Upload an image to generate embeddings using the Nomic Vision model.",
    allow_flagging="never"
)

# Launch the app
if __name__ == "__main__":
    # For Huggingface Spaces, we need to specify the server name and port
    app.launch(server_name="0.0.0.0", server_port=7860)