Spaces:
Running
Running
File size: 1,705 Bytes
877a062 fc66fa8 877a062 fc66fa8 877a062 fc66fa8 877a062 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# from PIL import Image
# from io import BytesIO
# import requests
# from transformers import pipeline
# class ImageOCRService:
# def __init__(self):
# self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview")
# def extract_text(self, image_url: str) -> str:
# response = requests.get(image_url)
# image = Image.open(BytesIO(response.content)).convert("RGB")
# result = self.pipe([{
# "role": "user",
# "content": [
# {"type": "image", "image": image},
# {"type": "text", "text": "extract text from image"}
# ]
# }])
# return result[0]['generated_text'][1]['content'] if result else ""
from PIL import Image
import google.generativeai as genai
import json
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
class ImageClassifier:
def __init__(self):
self.prompt = prompt = """
Classify the image into toxic and non toxic or safe or not safe.
Output format:
toxic:boolean
non_toxic:boolean
if text is present in image extract text from image :
Output format:
toxic:boolean
non_toxic:boolean
text:string
Give output in json format and dont include anything in output
"""
def classify(self, image: Image.Image) -> str:
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.0-flash')
response = model.generate_content(
[self.prompt,image]
)
text = response.text.strip().replace("```json", "").replace("```", "").strip()
try:
return json.loads(text)
except Exception:
return {"raw": text} |