fake-news / utils.py
Adam-Al-Rahman's picture
add: lstm model
fe86b7f
"""
Copyright 2024 X_G85
Model Integration Utils
-------------------------
"""
# Author: Adam-Al-Rahman <adam.al.rahman.dev@gmail.com>
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
def tokenizer(arch: str, tokenizer_json: str, text: str, max_length=300):
"""
::param:: arch: type of model `Bstm` or `Bert`
"""
tokenized_data = None
if arch == "Lstm":
# Load the tokenizer from the JSON file
with open(tokenizer_json) as file:
data = file.read()
tokenizer = tokenizer_from_json(data)
# Use the tokenizer to transform test data
tokenized_text = tokenizer.texts_to_sequences(text)
tokenized_data = pad_sequences(tokenized_text, maxlen=max_length)
tokenized_data = tokenized_data.astype(np.float32)
return tokenized_data
def modelx(
arch: str,
model_path: str,
text: str,
tokenizer_json: str = "",
batch_size=32,
max_length=300,
):
model_result = None
if tokenizer_json:
text = tokenizer(arch, tokenizer_json, text, max_length)
else:
text = pd.Series(text)
if arch == "Lstm":
model = tf.keras.models.load_model(model_path)
model_result = model.predict(text, batch_size=batch_size)
model_result = tf.squeeze(tf.round(model_result))
if model_result == 1.0:
model_result = "REAL NEWS"
elif model_result == 0.0:
model_result = "FAKE NEWS"
return model_result