File size: 8,334 Bytes
449c21f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2833827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87a9292
449c21f
43402b6
 
 
 
449c21f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8c9b4c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import tensorflow as tf
import gradio as gr
class AnimeRecommender:
    def __init__(self, rating_path, anime_path, synopsis_path, model_path):
        self.rating_df = pd.read_csv(rating_path)
        self.df_anime = pd.read_csv(anime_path, low_memory=True)
        self.sypnopsis_df = pd.read_csv(synopsis_path, usecols=["MAL_ID", "Name", "Genres", "sypnopsis"])
        self.model = tf.keras.models.load_model(model_path)
        self._preprocess_data()

    def _preprocess_data(self):
        # User and anime ID encoding
        user_ids = self.rating_df["user_id"].unique().tolist()
        user2user_encoded = {x: i for i, x in enumerate(user_ids)}
        anime_ids = self.rating_df["anime_id"].unique().tolist()
        anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}

        self.rating_df["user"] = self.rating_df["user_id"].map(user2user_encoded)
        self.rating_df["anime"] = self.rating_df["anime_id"].map(anime2anime_encoded)

        self.n_users = len(user2user_encoded)
        self.n_animes = len(anime2anime_encoded)

        self.anime2anime_encoded = anime2anime_encoded
        self.anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}

        # Normalize anime weights
        self.anime_weights = self._extract_weights('anime_embedding')

        # Fix anime names
        self.df_anime['anime_id'] = self.df_anime['MAL_ID']
        self.df_anime["eng_version"] = self.df_anime['English name']
        self.df_anime['eng_version'] = self.df_anime.anime_id.apply(self._get_anime_name)

        self.df_anime.sort_values(by=['Score'], inplace=True, ascending=False, kind='quicksort', na_position='last')
        self.df_anime = self.df_anime[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]]

    def _extract_weights(self, name):
        weight_layer = self.model.get_layer(name)
        weights = weight_layer.get_weights()[0]
        weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1))
        return weights

    def _get_anime_name(self, anime_id):
        try:
            name = self.df_anime[self.df_anime.anime_id == anime_id].eng_version.values[0]
            if name is np.nan:
                name = self.df_anime[self.df_anime.anime_id == anime_id].Name.values[0]
        except:
            name = 'Unknown'
        return name

    def get_anime_frame(self, anime):
        if isinstance(anime, int):
            return self.df_anime[self.df_anime.anime_id == anime]
        if isinstance(anime, str):
            return self.df_anime[self.df_anime.eng_version == anime]

    def get_sypnopsis(self, anime):
        if isinstance(anime, int):
            return self.sypnopsis_df[self.sypnopsis_df.MAL_ID == anime].sypnopsis.values[0]
        if isinstance(anime, str):
            return self.sypnopsis_df[self.sypnopsis_df.Name == anime].sypnopsis.values[0]

    def find_similar_animes_combined(self, anime_names, n=3, return_dist=False, neg=False):
        try:
            encoded_indices = []
            input_anime_ids = []
            for name in anime_names:
                index = self.get_anime_frame(name).anime_id.values[0]
                input_anime_ids.append(index)
                encoded_index = self.anime2anime_encoded.get(index)
                encoded_indices.append(encoded_index)

            combined_weights = np.mean(self.anime_weights[encoded_indices], axis=0)
            combined_weights = combined_weights / np.linalg.norm(combined_weights)

            dists = np.dot(self.anime_weights, combined_weights)
            sorted_dists = np.argsort(dists)
            n = n + len(input_anime_ids)

            if neg:
                closest = sorted_dists[:n]
            else:
                closest = sorted_dists[-n:]

            if return_dist:
                return dists, closest

            rindex = self.df_anime
            SimilarityArr = []
            for close in closest:
                decoded_id = self.anime_encoded2anime.get(close)
                if decoded_id in input_anime_ids:
                    continue
                sypnopsis = self.get_sypnopsis(decoded_id)
                anime_frame = self.get_anime_frame(decoded_id)
                anime_name = anime_frame.eng_version.values[0]
                genre = anime_frame.Genres.values[0]
                similarity = dists[close]
                SimilarityArr.append({"anime_id": decoded_id, "name": anime_name, "similarity": similarity, "genre": genre, 'sypnopsis': sypnopsis})

            Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)
            return Frame.drop(index=0)
        except Exception as e:
            print('{}!, Not Found in Anime list'.format(anime_names))
            print(str(e))
            return pd.DataFrame()

    def get_anime_url(self, name):
        anime = self.df_anime[self.df_anime['eng_version'] == name]
        if not anime.empty:
            mal_id = anime['anime_id'].values[0]
            anime_name = anime['eng_version'].values[0].replace(' ', '_').replace(':', '_').replace('!', '_')
            return f"https://myanimelist.net/anime/{mal_id}/{anime_name}"
        else:
            print(f"{name}์— ํ•ด๋‹นํ•˜๋Š” ์• ๋‹ˆ๋ฉ”์ด์…˜์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
            return None

    def extract_image_url(self, url):
        try:
            response = requests.get(url)
            response.raise_for_status()
        except requests.RequestException as e:
            print(f"ํŽ˜์ด์ง€๋ฅผ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
            return None

        soup = BeautifulSoup(response.text, 'html.parser')
        image_tag = soup.find('img', {'data-src': True})

        if image_tag:
            return image_tag['data-src']
        else:
            print("์ด๋ฏธ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
            return None

    def NCF_Recommendation(self, a, b, c):
        anime_list = [a, b, c]
        anime_result = self.find_similar_animes_combined(anime_list, n=3)

        result1 = anime_result.loc[3, 'name']
        result2 = anime_result.loc[2, 'name']
        result3 = anime_result.loc[1, 'name']

        explain1 = anime_result.loc[3, 'sypnopsis']
        explain2 = anime_result.loc[2, 'sypnopsis']
        explain3 = anime_result.loc[1, 'sypnopsis']

        url1 = self.get_anime_url(result1)
        url2 = self.get_anime_url(result2)
        url3 = self.get_anime_url(result3)

        image1 = self.extract_image_url(url1)
        image2 = self.extract_image_url(url2)
        image3 = self.extract_image_url(url3)

        return result1, explain1, image1, result2, explain2, image2, result3, explain3, image3


# ํŒŒ์ผ ๊ฒฝ๋กœ ์„ค์ •
rating_path = 'data/rating_complete.csv'
anime_path = 'data/anime.csv'
synopsis_path = 'data/anime_with_synopsis.csv'
model_path = 'data/anime_model.h5'


# ๊ฐ์ฒด ์ƒ์„ฑ
recommender = AnimeRecommender(rating_path, anime_path, synopsis_path, model_path)

with gr.Blocks() as app:
    with gr.Row():
        a = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์ฒซ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
        b = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ๋‘ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
        c = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์„ธ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")

    with gr.Row():
        with gr.Column():
            img1 = gr.Image(label="1๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
            output1 = gr.Textbox(label="1๏ธโƒฃ ์ฒซ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
            output2 = gr.Textbox(label="์ฒซ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
        with gr.Column():
            img2 = gr.Image(label="2๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
            output3 = gr.Textbox(label="2๏ธโƒฃ ๋‘๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
            output4 = gr.Textbox(label="๋‘ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
        with gr.Column():
            img3 = gr.Image(label="3๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
            output5 = gr.Textbox(label="3๏ธโƒฃ ์„ธ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
            output6 = gr.Textbox(label="์„ธ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)

    btn = gr.Button("์ถ”์ฒœ์„ ๋ฐ›์•„๋ด…์‹œ๋‹ค!")

    btn.click(
        fn=recommender.NCF_Recommendation,
        inputs=[a, b, c],
        outputs=[output1, output2, img1, output3, output4, img2, output5, output6, img3]
    )

app.launch(share = True)