Spaces:
Sleeping
Sleeping
Vela
commited on
Commit
·
911d300
1
Parent(s):
9dbe43c
Add end point for prediction and cosine similarity
Browse files- .gitignore +1 -0
- __pycache__/embedding_model.cpython-312.pyc +0 -0
- __pycache__/main.cpython-312.pyc +0 -0
- __pycache__/models.cpython-312.pyc +0 -0
- __pycache__/schema.cpython-312.pyc +0 -0
- log_reg_model.pkl +3 -0
- main.py +29 -0
- models.py +37 -0
- requirements.txt +5 -0
- schema.py +5 -0
- sms_process_data_main.xlsx +0 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv
|
__pycache__/embedding_model.cpython-312.pyc
ADDED
Binary file (571 Bytes). View file
|
|
__pycache__/main.cpython-312.pyc
ADDED
Binary file (2.09 kB). View file
|
|
__pycache__/models.cpython-312.pyc
ADDED
Binary file (2.36 kB). View file
|
|
__pycache__/schema.cpython-312.pyc
ADDED
Binary file (542 Bytes). View file
|
|
log_reg_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d21e0e1132a61d2fed963c2786120590917124684a4ed569075ba813165a8368
|
3 |
+
size 6874
|
main.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
import models
|
3 |
+
import numpy as np
|
4 |
+
from schema import Prediction
|
5 |
+
from sentence_transformers import util
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
@app.get("/embeddings")
|
10 |
+
def display_embedding(text:str):
|
11 |
+
embedding = models.get_embedding(text)
|
12 |
+
dimension = len(embedding)
|
13 |
+
return {"Dimension" : {dimension : embedding.tolist()}}
|
14 |
+
|
15 |
+
@app.post("/prediction")
|
16 |
+
def display_prediction(prediction : Prediction):
|
17 |
+
message = prediction.message
|
18 |
+
embedding = models.get_embedding([message])
|
19 |
+
loaded_model = models.loaded_model
|
20 |
+
result = loaded_model.predict(embedding).tolist()
|
21 |
+
return {"Prediction": f"{message} is a {result}"}
|
22 |
+
|
23 |
+
@app.post("/cosine_similarity")
|
24 |
+
def display_cosine_similarity(prediction : Prediction):
|
25 |
+
message = prediction.message
|
26 |
+
message_1 = prediction.message_1
|
27 |
+
embendding = models.get_embedding([message,message_1])
|
28 |
+
similarity = util.cos_sim(embendding[0], embendding[1]).item()
|
29 |
+
return {f"Cosine Similarity between {message} and {message_1} is" : round(similarity, 4)}
|
models.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
from sklearn.linear_model import LogisticRegression
|
3 |
+
import pickle
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
def get_embedding(text):
|
9 |
+
model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
|
10 |
+
embedding = model_encode.encode(text)
|
11 |
+
return embedding
|
12 |
+
|
13 |
+
def train_model():
|
14 |
+
sample_data_df = pd.read_excel("sms_process_data_main.xlsx")
|
15 |
+
sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True) # Drop rows with missing labels or texts
|
16 |
+
input = sample_data_df['MessageText']
|
17 |
+
label = sample_data_df['label']
|
18 |
+
X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42)
|
19 |
+
X_train_embeddings = get_embedding(X_train.tolist())
|
20 |
+
log_reg_model = LogisticRegression( max_iter = 1000)
|
21 |
+
log_reg_model.fit(X_train_embeddings, y_train)
|
22 |
+
save_model(log_reg_model,'log_reg_model.pkl')
|
23 |
+
return log_reg_model
|
24 |
+
|
25 |
+
def save_model(model, filename):
|
26 |
+
with open(filename, 'wb') as model_file:
|
27 |
+
pickle.dump(model, model_file)
|
28 |
+
print(f"Model saved to {filename}")
|
29 |
+
|
30 |
+
|
31 |
+
def load_model(filename):
|
32 |
+
with open(filename, 'rb') as model_file:
|
33 |
+
loaded_model = pickle.load(model_file)
|
34 |
+
print(f"Model loaded from {filename}")
|
35 |
+
return loaded_model
|
36 |
+
|
37 |
+
loaded_model = load_model('log_reg_model.pkl')
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi[standard]
|
2 |
+
pandas
|
3 |
+
scikit-learn
|
4 |
+
sentence_transformers
|
5 |
+
openpyxl
|
schema.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class Prediction(BaseModel):
|
4 |
+
message : str = "Enter a text message"
|
5 |
+
message_1 : str = "Enter a text message"
|
sms_process_data_main.xlsx
ADDED
Binary file (42.2 kB). View file
|
|