VietCat commited on
Commit
adb76ba
·
1 Parent(s): cfc05e8

update app logic

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -1,12 +1,11 @@
1
  from flask import Flask, request, jsonify
2
- from transformers import AutoModel, AutoTokenizer
3
- import torch
4
 
5
  app = Flask(__name__)
6
 
7
- # Load PhoBERT
8
  tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
9
- model = AutoModel.from_pretrained("vinai/phobert-base", from_tf=True)
10
 
11
  @app.route('/embed', methods=['POST'])
12
  def embed():
@@ -15,12 +14,11 @@ def embed():
15
  if not text:
16
  return jsonify({"error": "No text provided"}), 400
17
 
18
- inputs = tokenizer(text, return_tensors="pt")
19
- with torch.no_grad():
20
- outputs = model(**inputs)
21
 
22
  # Lấy embedding từ hidden state đầu tiên
23
- embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist()
24
 
25
  return jsonify({"embedding": embedding})
26
 
 
1
  from flask import Flask, request, jsonify
2
+ from transformers import TFAutoModel, AutoTokenizer
 
3
 
4
  app = Flask(__name__)
5
 
6
+ # Load PhoBERT (TensorFlow version)
7
  tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
8
+ model = TFAutoModel.from_pretrained("vinai/phobert-base", from_tf=True)
9
 
10
  @app.route('/embed', methods=['POST'])
11
  def embed():
 
14
  if not text:
15
  return jsonify({"error": "No text provided"}), 400
16
 
17
+ inputs = tokenizer(text, return_tensors="tf") # Chuyển sang TensorFlow tensor
18
+ outputs = model(**inputs)
 
19
 
20
  # Lấy embedding từ hidden state đầu tiên
21
+ embedding = outputs.last_hidden_state[:, 0, :].numpy().tolist() # Dùng .numpy() để chuyển từ TensorFlow tensor sang list
22
 
23
  return jsonify({"embedding": embedding})
24