from transformers import AutoImageProcessor, AutoModelForSemanticSegmentation from PIL import Image import torch import numpy as np # Load model processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") model = AutoModelForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") def predict_defect(image: Image.Image): inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits segmentation = torch.argmax(logits.squeeze(), dim=0).detach().cpu().numpy() # Convert to RGB overlay overlay = np.zeros((segmentation.shape[0], segmentation.shape[1], 3), dtype=np.uint8) overlay[segmentation == 12] = [255, 0, 0] # example label index for defects (adjust accordingly) return Image.fromarray(overlay)