metadata
license: mit
Prompt
messages = [
{
"role": "user",
"content": [
{"type": "image_url", "image_url": convert_to_base64(element_image)},
{
"type": "text",
"text": """Analyze the provided cropped image from a screenshot to determine whether it contains a single, valid, and visually complete UI element.
Criteria for validity:
- The image must contain exactly one UI element.
- The element must be entirely visible within the cropped area, with no significant cut-off parts.
- The image should not consist solely of background, empty space, or meaningless fragments.
Response format:
Conclude with your final determination in a dedicated section:
Conclusion
Yes (if the image contains a single, valid, and complete UI element)
No (if it does not meet the criteria)"""
}
]
}
]
Result Parsing
last_lines = response.strip().split('\n')[-2:]
conclusion = " ".join(last_lines).lower()
is_valid_box = "yes" in conclusion