Spaces:
Running
Running
import gradio as gr | |
import torch | |
import numpy as np | |
from PIL import Image | |
import os | |
import json | |
import base64 | |
from io import BytesIO | |
import requests | |
from typing import Dict, List, Any, Optional | |
from transformers.pipelines import pipeline | |
# Initialize the model | |
try: | |
model = pipeline("image-feature-extraction", model="nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True) | |
model_loaded = True | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
model = None | |
model_loaded = False | |
# Function to generate embeddings from an image | |
def generate_embedding(image): | |
if image is None: | |
return {"error": "No image provided"}, "No image provided" | |
if not model_loaded: | |
return {"error": "Model not loaded properly"}, "Error: Model not loaded properly" | |
# Convert to PIL Image if needed | |
if not isinstance(image, Image.Image): | |
try: | |
image = Image.fromarray(image) | |
except Exception as e: | |
print(f"Error converting image: {str(e)}") | |
return {"error": f"Invalid image format: {str(e)}"}, f"Error: Invalid image format" | |
try: | |
# Check if model is loaded before calling it | |
if model is None: | |
return {"error": "Model not loaded properly"}, "Error: Model not loaded properly" | |
# Generate embedding using the transformers pipeline | |
result = model(image) | |
# Process the result based on its type | |
embedding_list = None | |
# Handle different possible output types | |
if isinstance(result, torch.Tensor): | |
embedding_list = result.detach().cpu().numpy().flatten().tolist() | |
elif isinstance(result, np.ndarray): | |
embedding_list = result.flatten().tolist() | |
elif isinstance(result, list): | |
# If it's a list of tensors or arrays | |
if result and isinstance(result[0], (torch.Tensor, np.ndarray)): | |
embedding_list = result[0].flatten().tolist() if hasattr(result[0], 'flatten') else result[0] | |
else: | |
embedding_list = result | |
else: | |
# Try to convert to a list as a last resort | |
try: | |
if result is not None: | |
embedding_list = list(result) | |
else: | |
print("Result is None") | |
return {"error": "Failed to generate embedding"}, "Failed to generate embedding" | |
except: | |
print(f"Couldn't convert result of type {type(result)} to list") | |
return {"error": "Failed to process embedding"}, "Failed to process embedding" | |
# Ensure we have a valid embedding list | |
if embedding_list is None: | |
return {"error": "Failed to generate embedding"}, "Failed to generate embedding" | |
# Calculate embedding dimension | |
embedding_dim = len(embedding_list) | |
return { | |
"embedding": embedding_list, | |
"dimension": embedding_dim | |
}, f"Dimension: {embedding_dim}" | |
except Exception as e: | |
print(f"Error generating embedding: {str(e)}") | |
return {"error": f"Error generating embedding: {str(e)}"}, f"Error: {str(e)}" | |
# Create a Gradio app | |
app = gr.Interface( | |
fn=generate_embedding, | |
inputs=gr.Image(type="pil", label="Input Image"), | |
outputs=[ | |
gr.JSON(label="Embedding Output"), | |
gr.Textbox(label="Embedding Dimension") | |
], | |
title="Nomic Vision Embedding Model (nomic-ai/nomic-embed-vision-v1.5)", | |
description="Upload an image to generate embeddings using the Nomic Vision model.", | |
allow_flagging="never" | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
# For Huggingface Spaces, we need to specify the server name and port | |
app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True) |