Vela commited on
Commit
911d300
·
1 Parent(s): 9dbe43c

Add end point for prediction and cosine similarity

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
__pycache__/embedding_model.cpython-312.pyc ADDED
Binary file (571 Bytes). View file
 
__pycache__/main.cpython-312.pyc ADDED
Binary file (2.09 kB). View file
 
__pycache__/models.cpython-312.pyc ADDED
Binary file (2.36 kB). View file
 
__pycache__/schema.cpython-312.pyc ADDED
Binary file (542 Bytes). View file
 
log_reg_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21e0e1132a61d2fed963c2786120590917124684a4ed569075ba813165a8368
3
+ size 6874
main.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import models
3
+ import numpy as np
4
+ from schema import Prediction
5
+ from sentence_transformers import util
6
+
7
+ app = FastAPI()
8
+
9
+ @app.get("/embeddings")
10
+ def display_embedding(text:str):
11
+ embedding = models.get_embedding(text)
12
+ dimension = len(embedding)
13
+ return {"Dimension" : {dimension : embedding.tolist()}}
14
+
15
+ @app.post("/prediction")
16
+ def display_prediction(prediction : Prediction):
17
+ message = prediction.message
18
+ embedding = models.get_embedding([message])
19
+ loaded_model = models.loaded_model
20
+ result = loaded_model.predict(embedding).tolist()
21
+ return {"Prediction": f"{message} is a {result}"}
22
+
23
+ @app.post("/cosine_similarity")
24
+ def display_cosine_similarity(prediction : Prediction):
25
+ message = prediction.message
26
+ message_1 = prediction.message_1
27
+ embendding = models.get_embedding([message,message_1])
28
+ similarity = util.cos_sim(embendding[0], embendding[1]).item()
29
+ return {f"Cosine Similarity between {message} and {message_1} is" : round(similarity, 4)}
models.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from sklearn.linear_model import LogisticRegression
3
+ import pickle
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ import pandas as pd
7
+
8
+ def get_embedding(text):
9
+ model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
10
+ embedding = model_encode.encode(text)
11
+ return embedding
12
+
13
+ def train_model():
14
+ sample_data_df = pd.read_excel("sms_process_data_main.xlsx")
15
+ sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True) # Drop rows with missing labels or texts
16
+ input = sample_data_df['MessageText']
17
+ label = sample_data_df['label']
18
+ X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42)
19
+ X_train_embeddings = get_embedding(X_train.tolist())
20
+ log_reg_model = LogisticRegression( max_iter = 1000)
21
+ log_reg_model.fit(X_train_embeddings, y_train)
22
+ save_model(log_reg_model,'log_reg_model.pkl')
23
+ return log_reg_model
24
+
25
+ def save_model(model, filename):
26
+ with open(filename, 'wb') as model_file:
27
+ pickle.dump(model, model_file)
28
+ print(f"Model saved to {filename}")
29
+
30
+
31
+ def load_model(filename):
32
+ with open(filename, 'rb') as model_file:
33
+ loaded_model = pickle.load(model_file)
34
+ print(f"Model loaded from {filename}")
35
+ return loaded_model
36
+
37
+ loaded_model = load_model('log_reg_model.pkl')
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi[standard]
2
+ pandas
3
+ scikit-learn
4
+ sentence_transformers
5
+ openpyxl
schema.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class Prediction(BaseModel):
4
+ message : str = "Enter a text message"
5
+ message_1 : str = "Enter a text message"
sms_process_data_main.xlsx ADDED
Binary file (42.2 kB). View file