import pandas as pd import numpy as np import requests from bs4 import BeautifulSoup import tensorflow as tf import gradio as gr class AnimeRecommender: def __init__(self, rating_path, anime_path, synopsis_path, model_path): self.rating_df = pd.read_csv(rating_path) self.df_anime = pd.read_csv(anime_path, low_memory=True) self.sypnopsis_df = pd.read_csv(synopsis_path, usecols=["MAL_ID", "Name", "Genres", "sypnopsis"]) self.model = tf.keras.models.load_model(model_path) self._preprocess_data() def _preprocess_data(self): # User and anime ID encoding user_ids = self.rating_df["user_id"].unique().tolist() user2user_encoded = {x: i for i, x in enumerate(user_ids)} anime_ids = self.rating_df["anime_id"].unique().tolist() anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)} self.rating_df["user"] = self.rating_df["user_id"].map(user2user_encoded) self.rating_df["anime"] = self.rating_df["anime_id"].map(anime2anime_encoded) self.n_users = len(user2user_encoded) self.n_animes = len(anime2anime_encoded) self.anime2anime_encoded = anime2anime_encoded self.anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)} # Normalize anime weights self.anime_weights = self._extract_weights('anime_embedding') # Fix anime names self.df_anime['anime_id'] = self.df_anime['MAL_ID'] self.df_anime["eng_version"] = self.df_anime['English name'] self.df_anime['eng_version'] = self.df_anime.anime_id.apply(self._get_anime_name) self.df_anime.sort_values(by=['Score'], inplace=True, ascending=False, kind='quicksort', na_position='last') self.df_anime = self.df_anime[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]] def _extract_weights(self, name): weight_layer = self.model.get_layer(name) weights = weight_layer.get_weights()[0] weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1)) return weights def _get_anime_name(self, anime_id): try: name = self.df_anime[self.df_anime.anime_id == anime_id].eng_version.values[0] if name is np.nan: name = self.df_anime[self.df_anime.anime_id == anime_id].Name.values[0] except: name = 'Unknown' return name def get_anime_frame(self, anime): if isinstance(anime, int): return self.df_anime[self.df_anime.anime_id == anime] if isinstance(anime, str): return self.df_anime[self.df_anime.eng_version == anime] def get_sypnopsis(self, anime): if isinstance(anime, int): return self.sypnopsis_df[self.sypnopsis_df.MAL_ID == anime].sypnopsis.values[0] if isinstance(anime, str): return self.sypnopsis_df[self.sypnopsis_df.Name == anime].sypnopsis.values[0] def find_similar_animes_combined(self, anime_names, n=3, return_dist=False, neg=False): try: encoded_indices = [] input_anime_ids = [] for name in anime_names: index = self.get_anime_frame(name).anime_id.values[0] input_anime_ids.append(index) encoded_index = self.anime2anime_encoded.get(index) encoded_indices.append(encoded_index) combined_weights = np.mean(self.anime_weights[encoded_indices], axis=0) combined_weights = combined_weights / np.linalg.norm(combined_weights) dists = np.dot(self.anime_weights, combined_weights) sorted_dists = np.argsort(dists) n = n + len(input_anime_ids) if neg: closest = sorted_dists[:n] else: closest = sorted_dists[-n:] if return_dist: return dists, closest rindex = self.df_anime SimilarityArr = [] for close in closest: decoded_id = self.anime_encoded2anime.get(close) if decoded_id in input_anime_ids: continue sypnopsis = self.get_sypnopsis(decoded_id) anime_frame = self.get_anime_frame(decoded_id) anime_name = anime_frame.eng_version.values[0] genre = anime_frame.Genres.values[0] similarity = dists[close] SimilarityArr.append({"anime_id": decoded_id, "name": anime_name, "similarity": similarity, "genre": genre, 'sypnopsis': sypnopsis}) Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False) return Frame.drop(index=0) except Exception as e: print('{}!, Not Found in Anime list'.format(anime_names)) print(str(e)) return pd.DataFrame() def get_anime_url(self, name): anime = self.df_anime[self.df_anime['eng_version'] == name] if not anime.empty: mal_id = anime['anime_id'].values[0] anime_name = anime['eng_version'].values[0].replace(' ', '_').replace(':', '_').replace('!', '_') return f"https://myanimelist.net/anime/{mal_id}/{anime_name}" else: print(f"{name}에 해당하는 애니메이션을 찾을 수 없습니다.") return None def extract_image_url(self, url): try: response = requests.get(url) response.raise_for_status() except requests.RequestException as e: print(f"페이지를 가져올 수 없습니다: {e}") return None soup = BeautifulSoup(response.text, 'html.parser') image_tag = soup.find('img', {'data-src': True}) if image_tag: return image_tag['data-src'] else: print("이미지를 찾을 수 없습니다.") return None def NCF_Recommendation(self, a, b, c): anime_list = [a, b, c] anime_result = self.find_similar_animes_combined(anime_list, n=3) result1 = anime_result.loc[3, 'name'] result2 = anime_result.loc[2, 'name'] result3 = anime_result.loc[1, 'name'] explain1 = anime_result.loc[3, 'sypnopsis'] explain2 = anime_result.loc[2, 'sypnopsis'] explain3 = anime_result.loc[1, 'sypnopsis'] url1 = self.get_anime_url(result1) url2 = self.get_anime_url(result2) url3 = self.get_anime_url(result3) image1 = self.extract_image_url(url1) image2 = self.extract_image_url(url2) image3 = self.extract_image_url(url3) return result1, explain1, image1, result2, explain2, image2, result3, explain3, image3 # 파일 경로 설정 rating_path = 'data/rating_complete.csv' anime_path = 'data/anime.csv' synopsis_path = 'data/anime_with_synopsis.csv' model_path = 'data/anime_model.h5' # 객체 생성 recommender = AnimeRecommender(rating_path, anime_path, synopsis_path, model_path) with gr.Blocks() as app: with gr.Row(): a = gr.Textbox(label="너의 최애 애니 첫 번째를 작성해봐!") b = gr.Textbox(label="너의 최애 애니 두 번째를 작성해봐!") c = gr.Textbox(label="너의 최애 애니 세 번째를 작성해봐!") with gr.Row(): with gr.Column(): img1 = gr.Image(label="1번째 애니 추천") output1 = gr.Textbox(label="1️⃣ 첫번째 애니 추천!") output2 = gr.Textbox(label="첫 번째 애니 설명", interactive=False) with gr.Column(): img2 = gr.Image(label="2번째 애니 추천") output3 = gr.Textbox(label="2️⃣ 두번째 애니 추천!") output4 = gr.Textbox(label="두 번째 애니 설명", interactive=False) with gr.Column(): img3 = gr.Image(label="3번째 애니 추천") output5 = gr.Textbox(label="3️⃣ 세번째 애니 추천!") output6 = gr.Textbox(label="세 번째 애니 설명", interactive=False) btn = gr.Button("추천을 받아봅시다!") btn.click( fn=recommender.NCF_Recommendation, inputs=[a, b, c], outputs=[output1, output2, img1, output3, output4, img2, output5, output6, img3] ) app.launch(share = True)