import gradio as gr import torch import numpy as np from PIL import Image import os import json import base64 from io import BytesIO import requests from typing import Dict, List, Any, Optional from transformers.pipelines import pipeline # Initialize the model try: model = pipeline("image-feature-extraction", model="nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True) model_loaded = True except Exception as e: print(f"Error loading model: {str(e)}") model = None model_loaded = False # Function to generate embeddings from an image def generate_embedding(image): """ Generate normalized embedding vector for the uploaded image. Args: image (PIL.Image.Image or np.ndarray): Input image uploaded by the user. Returns: list[float]: A normalized image embedding vector representing the input image. """ if image is None: return {"error": "No image provided"}, "No image provided" if not model_loaded: return {"error": "Model not loaded properly"}, "Error: Model not loaded properly" # Convert to PIL Image if needed if not isinstance(image, Image.Image): try: image = Image.fromarray(image) except Exception as e: print(f"Error converting image: {str(e)}") return {"error": f"Invalid image format: {str(e)}"}, f"Error: Invalid image format" try: # Check if model is loaded before calling it if model is None: return {"error": "Model not loaded properly"}, "Error: Model not loaded properly" # Generate embedding using the transformers pipeline result = model(image) # Process the result based on its type embedding_list = None # Handle different possible output types if isinstance(result, torch.Tensor): embedding_list = result.detach().cpu().numpy().flatten().tolist() elif isinstance(result, np.ndarray): embedding_list = result.flatten().tolist() elif isinstance(result, list): # If it's a list of tensors or arrays if result and isinstance(result[0], (torch.Tensor, np.ndarray)): embedding_list = result[0].flatten().tolist() if hasattr(result[0], 'flatten') else result[0] else: embedding_list = result else: # Try to convert to a list as a last resort try: if result is not None: embedding_list = list(result) else: print("Result is None") return {"error": "Failed to generate embedding"}, "Failed to generate embedding" except: print(f"Couldn't convert result of type {type(result)} to list") return {"error": "Failed to process embedding"}, "Failed to process embedding" # Ensure we have a valid embedding list if embedding_list is None: return {"error": "Failed to generate embedding"}, "Failed to generate embedding" # Calculate embedding dimension embedding_dim = len(embedding_list) return { "embedding": embedding_list, "dimension": embedding_dim }, f"Dimension: {embedding_dim}" except Exception as e: print(f"Error generating embedding: {str(e)}") return {"error": f"Error generating embedding: {str(e)}"}, f"Error: {str(e)}" # Function to generate embeddings from an image URL def embed_image_from_url(image_url): """ Generate normalized embedding vector for the image from a URL. Args: image_url (str): Image URL provided by the User. Returns: list[float]: A normalized image embedding vector representing the input image. """ try: # Download the image response = requests.get(image_url) image = Image.open(BytesIO(response.content)) # Generate embedding return generate_embedding(image) except Exception as e: return {"error": str(e)} # Function to generate embeddings from base64 image data def embed_image_from_base64(image_data): try: # Decode the base64 image decoded_data = base64.b64decode(image_data) image = Image.open(BytesIO(decoded_data)) # Generate embedding return generate_embedding(image) except Exception as e: return {"error": str(e)} # Create a Gradio app app = gr.Interface( fn=generate_embedding, inputs=gr.Image(type="pil", label="Input Image"), outputs=[ gr.JSON(label="Embedding Output"), gr.Textbox(label="Embedding Dimension") ], title="Nomic Vision Embedding Model (nomic-ai/nomic-embed-vision-v1.5)", description="Upload an image to generate embeddings using the Nomic Vision model.", allow_flagging="never" ) # Launch the app if __name__ == "__main__": app.launch(mcp_server=True)