Spaces:
Sleeping
Sleeping
File size: 1,368 Bytes
911d300 198d7d1 911d300 78cb0b1 911d300 54c7168 911d300 78cb0b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from sentence_transformers import SentenceTransformer
from sklearn.linear_model import LogisticRegression
import pickle
from sklearn.model_selection import train_test_split
import joblib
import pandas as pd
def get_embedding(text):
model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
embedding = model_encode.encode(text)
return embedding
def train_model():
sample_data_df = pd.read_excel("sms_process_data_main.xlsx")
sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True)
input = sample_data_df['MessageText']
label = sample_data_df['label']
X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42)
X_train_embeddings = get_embedding(X_train.tolist())
log_reg_model = LogisticRegression( max_iter = 1000)
log_reg_model.fit(X_train_embeddings, y_train)
save_model(log_reg_model,'log_reg_model.pkl')
return log_reg_model
def save_model(model, filename):
with open(filename, 'wb') as model_file:
pickle.dump(model, model_file)
print(f"Model saved to {filename}")
def load_model(filename):
# loaded_model = joblib.load('log_reg_model.pkl')
with open(filename, 'rb') as model_file:
loaded_model = pickle.load(model_file)
print(f"Model loaded from {filename}")
return loaded_model |