from flask import Flask, request, jsonify from transformers import AutoModel, AutoTokenizer import torch app = Flask(__name__) # Load PhoBERT tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base") model = AutoModel.from_pretrained("vinai/phobert-base") @app.route('/embed', methods=['POST']) def embed(): data = request.get_json() text = data.get('text', '') if not text: return jsonify({"error": "No text provided"}), 400 inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Lấy embedding từ hidden state đầu tiên embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist() return jsonify({"embedding": embedding}) @app.route('/', methods=['GET']) def index(): return "PhoBERT Space is running!" if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)