File size: 1,705 Bytes
877a062
 
 
 
fc66fa8
877a062
 
 
 
 
 
 
fc66fa8
877a062
 
 
 
 
 
 
 
fc66fa8
877a062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# from PIL import Image
# from io import BytesIO
# import requests
# from transformers import pipeline

# class ImageOCRService:
#     def __init__(self):
#         self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview")

#     def extract_text(self, image_url: str) -> str:
#         response = requests.get(image_url)
#         image = Image.open(BytesIO(response.content)).convert("RGB")

#         result = self.pipe([{
#             "role": "user",
#             "content": [
#                 {"type": "image", "image": image},
#                 {"type": "text", "text": "extract text from image"}
#             ]
#         }])
#         return result[0]['generated_text'][1]['content'] if result else ""

from PIL import Image
import google.generativeai as genai
import json
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
class ImageClassifier:
    def __init__(self):
        self.prompt = prompt = """
Classify the image into toxic and non toxic or safe or not safe.
Output format:
toxic:boolean
non_toxic:boolean
if text is present in image extract text from image :
Output format:
toxic:boolean
non_toxic:boolean
text:string
Give output in json format and dont include anything in output
"""
    def classify(self, image: Image.Image) -> str:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-2.0-flash')
        response = model.generate_content(
            [self.prompt,image]
        )
        text = response.text.strip().replace("```json", "").replace("```", "").strip()
        try:
            return json.loads(text)
        except Exception:
            return {"raw": text}