Anime_RecSys / app.py
OhST's picture
Update app.py
f8c9b4c verified
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import tensorflow as tf
import gradio as gr
class AnimeRecommender:
def __init__(self, rating_path, anime_path, synopsis_path, model_path):
self.rating_df = pd.read_csv(rating_path)
self.df_anime = pd.read_csv(anime_path, low_memory=True)
self.sypnopsis_df = pd.read_csv(synopsis_path, usecols=["MAL_ID", "Name", "Genres", "sypnopsis"])
self.model = tf.keras.models.load_model(model_path)
self._preprocess_data()
def _preprocess_data(self):
# User and anime ID encoding
user_ids = self.rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
anime_ids = self.rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
self.rating_df["user"] = self.rating_df["user_id"].map(user2user_encoded)
self.rating_df["anime"] = self.rating_df["anime_id"].map(anime2anime_encoded)
self.n_users = len(user2user_encoded)
self.n_animes = len(anime2anime_encoded)
self.anime2anime_encoded = anime2anime_encoded
self.anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
# Normalize anime weights
self.anime_weights = self._extract_weights('anime_embedding')
# Fix anime names
self.df_anime['anime_id'] = self.df_anime['MAL_ID']
self.df_anime["eng_version"] = self.df_anime['English name']
self.df_anime['eng_version'] = self.df_anime.anime_id.apply(self._get_anime_name)
self.df_anime.sort_values(by=['Score'], inplace=True, ascending=False, kind='quicksort', na_position='last')
self.df_anime = self.df_anime[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]]
def _extract_weights(self, name):
weight_layer = self.model.get_layer(name)
weights = weight_layer.get_weights()[0]
weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1))
return weights
def _get_anime_name(self, anime_id):
try:
name = self.df_anime[self.df_anime.anime_id == anime_id].eng_version.values[0]
if name is np.nan:
name = self.df_anime[self.df_anime.anime_id == anime_id].Name.values[0]
except:
name = 'Unknown'
return name
def get_anime_frame(self, anime):
if isinstance(anime, int):
return self.df_anime[self.df_anime.anime_id == anime]
if isinstance(anime, str):
return self.df_anime[self.df_anime.eng_version == anime]
def get_sypnopsis(self, anime):
if isinstance(anime, int):
return self.sypnopsis_df[self.sypnopsis_df.MAL_ID == anime].sypnopsis.values[0]
if isinstance(anime, str):
return self.sypnopsis_df[self.sypnopsis_df.Name == anime].sypnopsis.values[0]
def find_similar_animes_combined(self, anime_names, n=3, return_dist=False, neg=False):
try:
encoded_indices = []
input_anime_ids = []
for name in anime_names:
index = self.get_anime_frame(name).anime_id.values[0]
input_anime_ids.append(index)
encoded_index = self.anime2anime_encoded.get(index)
encoded_indices.append(encoded_index)
combined_weights = np.mean(self.anime_weights[encoded_indices], axis=0)
combined_weights = combined_weights / np.linalg.norm(combined_weights)
dists = np.dot(self.anime_weights, combined_weights)
sorted_dists = np.argsort(dists)
n = n + len(input_anime_ids)
if neg:
closest = sorted_dists[:n]
else:
closest = sorted_dists[-n:]
if return_dist:
return dists, closest
rindex = self.df_anime
SimilarityArr = []
for close in closest:
decoded_id = self.anime_encoded2anime.get(close)
if decoded_id in input_anime_ids:
continue
sypnopsis = self.get_sypnopsis(decoded_id)
anime_frame = self.get_anime_frame(decoded_id)
anime_name = anime_frame.eng_version.values[0]
genre = anime_frame.Genres.values[0]
similarity = dists[close]
SimilarityArr.append({"anime_id": decoded_id, "name": anime_name, "similarity": similarity, "genre": genre, 'sypnopsis': sypnopsis})
Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)
return Frame.drop(index=0)
except Exception as e:
print('{}!, Not Found in Anime list'.format(anime_names))
print(str(e))
return pd.DataFrame()
def get_anime_url(self, name):
anime = self.df_anime[self.df_anime['eng_version'] == name]
if not anime.empty:
mal_id = anime['anime_id'].values[0]
anime_name = anime['eng_version'].values[0].replace(' ', '_').replace(':', '_').replace('!', '_')
return f"https://myanimelist.net/anime/{mal_id}/{anime_name}"
else:
print(f"{name}์— ํ•ด๋‹นํ•˜๋Š” ์• ๋‹ˆ๋ฉ”์ด์…˜์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return None
def extract_image_url(self, url):
try:
response = requests.get(url)
response.raise_for_status()
except requests.RequestException as e:
print(f"ํŽ˜์ด์ง€๋ฅผ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
return None
soup = BeautifulSoup(response.text, 'html.parser')
image_tag = soup.find('img', {'data-src': True})
if image_tag:
return image_tag['data-src']
else:
print("์ด๋ฏธ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return None
def NCF_Recommendation(self, a, b, c):
anime_list = [a, b, c]
anime_result = self.find_similar_animes_combined(anime_list, n=3)
result1 = anime_result.loc[3, 'name']
result2 = anime_result.loc[2, 'name']
result3 = anime_result.loc[1, 'name']
explain1 = anime_result.loc[3, 'sypnopsis']
explain2 = anime_result.loc[2, 'sypnopsis']
explain3 = anime_result.loc[1, 'sypnopsis']
url1 = self.get_anime_url(result1)
url2 = self.get_anime_url(result2)
url3 = self.get_anime_url(result3)
image1 = self.extract_image_url(url1)
image2 = self.extract_image_url(url2)
image3 = self.extract_image_url(url3)
return result1, explain1, image1, result2, explain2, image2, result3, explain3, image3
# ํŒŒ์ผ ๊ฒฝ๋กœ ์„ค์ •
rating_path = 'data/rating_complete.csv'
anime_path = 'data/anime.csv'
synopsis_path = 'data/anime_with_synopsis.csv'
model_path = 'data/anime_model.h5'
# ๊ฐ์ฒด ์ƒ์„ฑ
recommender = AnimeRecommender(rating_path, anime_path, synopsis_path, model_path)
with gr.Blocks() as app:
with gr.Row():
a = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์ฒซ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
b = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ๋‘ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
c = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์„ธ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
with gr.Row():
with gr.Column():
img1 = gr.Image(label="1๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
output1 = gr.Textbox(label="1๏ธโƒฃ ์ฒซ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
output2 = gr.Textbox(label="์ฒซ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
with gr.Column():
img2 = gr.Image(label="2๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
output3 = gr.Textbox(label="2๏ธโƒฃ ๋‘๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
output4 = gr.Textbox(label="๋‘ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
with gr.Column():
img3 = gr.Image(label="3๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
output5 = gr.Textbox(label="3๏ธโƒฃ ์„ธ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
output6 = gr.Textbox(label="์„ธ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
btn = gr.Button("์ถ”์ฒœ์„ ๋ฐ›์•„๋ด…์‹œ๋‹ค!")
btn.click(
fn=recommender.NCF_Recommendation,
inputs=[a, b, c],
outputs=[output1, output2, img1, output3, output4, img2, output5, output6, img3]
)
app.launch(share = True)