mileski-dev commited on
Commit
7b743f7
·
1 Parent(s): 7dfd23e

ajustar para pt-br com helsinki

Browse files
Files changed (1) hide show
  1. app.py +84 -33
app.py CHANGED
@@ -1,37 +1,88 @@
1
- from io import BytesIO
 
 
 
 
2
  from PIL import Image
3
- import gradio as gr
4
- from transformers import BlipProcessor, BlipForConditionalGeneration
5
 
6
- # 1) Carrega processor e modelo BLIP
7
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
8
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- from io import BytesIO
11
- from PIL import Image
12
 
13
- def infer_caption(image):
14
- # 1) Redimensiona mantendo proporção para max 1024×1024
15
- max_size = 1024
16
- w, h = image.size
17
- if max(w, h) > max_size:
18
- ratio = max_size / max(w, h)
19
- new_size = (int(w * ratio), int(h * ratio))
20
- image = image.resize(new_size, Image.LANCZOS)
21
-
22
- # 2) Continua o fluxo BLIP
23
- inputs = processor(image, return_tensors="pt").to(model.device)
24
- outputs = model.generate(**inputs)
25
- return processor.decode(outputs[0], skip_special_tokens=True)
26
-
27
- # 2) Cria interface Gradio
28
- interface = gr.Interface(
29
- fn=infer_caption,
30
- inputs=gr.Image(type="pil"),
31
- outputs="text",
32
- title="Ouvir Para Ver",
33
- description="Envie uma imagem e receba a descrição."
34
- )
35
-
36
- if __name__ == "__main__":
37
- interface.launch(server_name="0.0.0.0", share=False)
 
1
+ import os
2
+ import requests
3
+ from flask import Flask, request, jsonify
4
+ from flask_cors import CORS
5
+ from dotenv import load_dotenv
6
  from PIL import Image
7
+ from io import BytesIO
 
8
 
9
+ # Carrega variáveis de ambiente de .env (se existir)
10
+ load_dotenv()
11
+ HF_TOKEN = os.getenv('HF_API_TOKEN')
12
+ if not HF_TOKEN:
13
+ raise RuntimeError("Defina HF_API_TOKEN no .env")
14
+
15
+ # Endpoints do Hugging Face Inference API
16
+ BLIP_API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
17
+ TRANS_API_URL = "https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-pt-br"
18
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
19
+
20
+ # Inicialização do Flask e CORS
21
+ app = Flask(__name__)
22
+ CORS(app)
23
+
24
+
25
+ def infer_caption(image_bytes):
26
+ # Gerar legenda em inglês com BLIP
27
+ response_blip = requests.post(
28
+ BLIP_API_URL,
29
+ headers=HEADERS,
30
+ data=image_bytes,
31
+ timeout=60
32
+ )
33
+ if response_blip.status_code != 200:
34
+ raise RuntimeError(f"Erro BLIP: {response_blip.status_code} - {response_blip.text}")
35
+ blip_output = response_blip.json()
36
+ if isinstance(blip_output, list) and blip_output and 'generated_text' in blip_output[0]:
37
+ english_caption = blip_output[0]['generated_text']
38
+ else:
39
+ raise RuntimeError(f"Resposta inesperada BLIP: {blip_output}")
40
+
41
+ # Traduzir legenda para pt-BR
42
+ response_trans = requests.post(
43
+ TRANS_API_URL,
44
+ headers=HEADERS,
45
+ json={"inputs": english_caption},
46
+ timeout=60
47
+ )
48
+ if response_trans.status_code != 200:
49
+ raise RuntimeError(f"Erro Tradução: {response_trans.status_code} - {response_trans.text}")
50
+ trans_output = response_trans.json()
51
+ if isinstance(trans_output, list) and trans_output and 'translation_text' in trans_output[0]:
52
+ portuguese_caption = trans_output[0]['translation_text']
53
+ else:
54
+ raise RuntimeError(f"Resposta inesperada Tradução: {trans_output}")
55
+
56
+ return portuguese_caption
57
+
58
+
59
+ @app.route('/', methods=['GET'])
60
+ def index():
61
+ return jsonify({'message': 'API rodando!'})
62
+
63
+
64
+ @app.route('/upload', methods=['POST'])
65
+ def upload():
66
+ # Validação de arquivo enviado
67
+ if 'image' not in request.files:
68
+ return jsonify({'error': 'Nenhuma imagem enviada'}), 400
69
+ img_file = request.files['image']
70
+ if img_file.filename == '':
71
+ return jsonify({'error': 'Nome de arquivo inválido'}), 400
72
+
73
+ # Leitura dos bytes da imagem
74
+ img_bytes = img_file.read()
75
+
76
+ # Obter legenda em português
77
+ try:
78
+ caption = infer_caption(img_bytes)
79
+ except Exception as e:
80
+ return jsonify({'error': str(e)}), 500
81
+
82
+ # Retorna a legenda em JSON
83
+ return jsonify({'caption': caption})
84
 
 
 
85
 
86
+ if __name__ == '__main__':
87
+ port = int(os.getenv('PORT', 5000))
88
+ app.run(host='0.0.0.0', port=port, debug=True)