|
""" |
|
Copyright 2024 X_G85 |
|
Model Integration Utils |
|
------------------------- |
|
""" |
|
|
|
|
|
|
|
import numpy as np |
|
import pandas as pd |
|
import tensorflow as tf |
|
from tensorflow.keras.preprocessing.text import tokenizer_from_json |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
|
|
|
|
def tokenizer(arch: str, tokenizer_json: str, text: str, max_length=300): |
|
""" |
|
::param:: arch: type of model `Bstm` or `Bert` |
|
""" |
|
tokenized_data = None |
|
if arch == "Lstm": |
|
|
|
with open(tokenizer_json) as file: |
|
data = file.read() |
|
tokenizer = tokenizer_from_json(data) |
|
|
|
|
|
tokenized_text = tokenizer.texts_to_sequences(text) |
|
tokenized_data = pad_sequences(tokenized_text, maxlen=max_length) |
|
tokenized_data = tokenized_data.astype(np.float32) |
|
|
|
return tokenized_data |
|
|
|
|
|
def modelx( |
|
arch: str, |
|
model_path: str, |
|
text: str, |
|
tokenizer_json: str = "", |
|
batch_size=32, |
|
max_length=300, |
|
): |
|
model_result = None |
|
if tokenizer_json: |
|
text = tokenizer(arch, tokenizer_json, text, max_length) |
|
else: |
|
text = pd.Series(text) |
|
|
|
if arch == "Lstm": |
|
model = tf.keras.models.load_model(model_path) |
|
model_result = model.predict(text, batch_size=batch_size) |
|
|
|
model_result = tf.squeeze(tf.round(model_result)) |
|
|
|
if model_result == 1.0: |
|
model_result = "REAL NEWS" |
|
elif model_result == 0.0: |
|
model_result = "FAKE NEWS" |
|
|
|
return model_result |
|
|