Spaces:

Chintan-Shah
/

CLIPInference

Sleeping

App Files Files Community

CLIPInference / app.py

Chintan-Shah

Update app.py

18dbf41 verified 11 months ago

raw

history blame contribute delete

1.9 kB

	import os
	import clip
	import torch
	from torchvision.datasets import CIFAR100
	from PIL import Image
	import gradio as gr

	# Load the model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model, preprocess = clip.load('ViT-B/32', device)

	# Download the dataset
	cifar100 = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False)
	text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in cifar100.classes]).to(device)

	def generateOutput(source):
	# Prepare the inputs
	# image, class_id = cifar100[3637]
	image = Image.fromarray(source.astype('uint8'), 'RGB')
	image_input = preprocess(image).unsqueeze(0).to(device)

	with torch.no_grad():
	image_features = model.encode_image(image_input)
	text_features = model.encode_text(text_inputs)

	# Pick the top 5 most similar labels for the image
	image_features /= image_features.norm(dim=-1, keepdim=True)
	text_features /= text_features.norm(dim=-1, keepdim=True)
	similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
	values, indices = similarity[0].topk(5)

	# Result in Text
	outputText = "\nTop predictions:\n"
	for value, index in zip(values, indices):
	outputText = outputText + f"{cifar100.classes[index]:>16s}: {100 * value.item():.2f}% \n"

	return(outputText)

	title = "CLIP Classification Inference Trials"
	description = "Shows the CLIP Classification based on CIFAR100 data with your own image"
	examples = [["Elephants.jpg"],["bloom-blooming-blossom-462118.jpg"], ["Puppies.jpg"], ["photo2.JPG"], ["MultipleItems.jpg"]]
	demo = gr.Interface(
	generateOutput,
	inputs = [
	gr.Image(width=256, height=256, label="Input Image"),
	],
	outputs = [
	gr.Text(),
	],
	title = title,
	description = description,
	examples = examples,
	cache_examples=False
	)
	demo.launch()