File size: 1,614 Bytes
fe86b7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
"""
Copyright 2024 X_G85
Model Integration Utils
-------------------------
"""
# Author: Adam-Al-Rahman <adam.al.rahman.dev@gmail.com>
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
def tokenizer(arch: str, tokenizer_json: str, text: str, max_length=300):
"""
::param:: arch: type of model `Bstm` or `Bert`
"""
tokenized_data = None
if arch == "Lstm":
# Load the tokenizer from the JSON file
with open(tokenizer_json) as file:
data = file.read()
tokenizer = tokenizer_from_json(data)
# Use the tokenizer to transform test data
tokenized_text = tokenizer.texts_to_sequences(text)
tokenized_data = pad_sequences(tokenized_text, maxlen=max_length)
tokenized_data = tokenized_data.astype(np.float32)
return tokenized_data
def modelx(
arch: str,
model_path: str,
text: str,
tokenizer_json: str = "",
batch_size=32,
max_length=300,
):
model_result = None
if tokenizer_json:
text = tokenizer(arch, tokenizer_json, text, max_length)
else:
text = pd.Series(text)
if arch == "Lstm":
model = tf.keras.models.load_model(model_path)
model_result = model.predict(text, batch_size=batch_size)
model_result = tf.squeeze(tf.round(model_result))
if model_result == 1.0:
model_result = "REAL NEWS"
elif model_result == 0.0:
model_result = "FAKE NEWS"
return model_result
|