Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer | |
from sklearn.linear_model import LogisticRegression | |
import pickle | |
from sklearn.model_selection import train_test_split | |
import joblib | |
import pandas as pd | |
def get_embedding(text): | |
model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True) | |
embedding = model_encode.encode(text) | |
return embedding | |
def train_model(): | |
sample_data_df = pd.read_excel("sms_process_data_main.xlsx") | |
sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True) | |
input = sample_data_df['MessageText'] | |
label = sample_data_df['label'] | |
X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42) | |
X_train_embeddings = get_embedding(X_train.tolist()) | |
log_reg_model = LogisticRegression( max_iter = 1000) | |
log_reg_model.fit(X_train_embeddings, y_train) | |
save_model(log_reg_model,'log_reg_model.pkl') | |
return log_reg_model | |
def save_model(model, filename): | |
with open(filename, 'wb') as model_file: | |
pickle.dump(model, model_file) | |
print(f"Model saved to {filename}") | |
def load_model(filename): | |
# loaded_model = joblib.load('log_reg_model.pkl') | |
with open(filename, 'rb') as model_file: | |
loaded_model = pickle.load(model_file) | |
print(f"Model loaded from {filename}") | |
return loaded_model |