tdziwok's picture
first commit
31af2b2
import requests
from smolagents import tool
import openai
import base64
@tool
def analyse_image(image_url: str) -> str:
"""
analyse the provided image, and return a description or transcription of the contents.
Args:
image_url (str): The URL of the image to be analysed. Usually with an image extension like png, jpg, etc.
Returns:
str: description or transcription of the contents of the provided image
"""
# some security:
if "https://agents-course-unit4-scoring.hf.space" not in image_url:
return "the requested URL is not whitelisted, refusing to fetch data"
resp = requests.get(image_url)
if resp.status_code != 200:
return f"failed to fetch the requested image: (status={resp.status_code})\n{resp.text}"
mime = resp.headers.get("content-type")
# todo filer mimetypes for security and correctness
image_bytes = base64.b64encode(resp.content).decode("utf-8")
# Create the message to GPT-4o (vision)
response = openai.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Please analyze the contents of this image. Provide a short (two sentence) description of the contents, and then output your analysis. The analysis should be in the most appropriate format. e.g. if the image is a document, maybe transcription is best. if it's a picture, describe the contents in detail. if it's a chessboard, outline the situation in chess notation. etc. "},
{"type": "image_url", "image_url": {"url": f"data:{mime};base64," + image_bytes}}
]
}
],
max_tokens=500,
)
return response.choices[0].message.content