Spaces:

amaisto
/

CapaMarianna

Sleeping

App Files Files Community

amaisto commited on May 14

Commit

cc0b516

verified ·

1 Parent(s): 76f6531

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
Marianna.py +391 -0
dati_per_database_riassunti.pkl +3 -0
legends.pkl +3 -0
marianna-102.jpeg +3 -0
requirements.txt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+marianna-102.jpeg filter=lfs diff=lfs merge=lfs -text

Marianna.py ADDED Viewed

	@@ -0,0 +1,391 @@

+import gradio as gr
+import random
+import pickle
+from sentence_transformers import SentenceTransformer, CrossEncoder, util
+import os  # Importa il modulo os
+from google.colab import drive
+class MariannaBot:
+    def __init__(self):
+        self.data_path_main = "dati_per_database_riassunti.pkl"
+        self.data_path_legends = "legends.pkl"
+        print("Inizializzazione di MariannaBot (senza DB)...")  # Debug
+        self.database = self.load_data_from_pickle(self.data_path_main)
+        self.database_legends = self.load_data_from_pickle(self.data_path_legends)
+        self.database = self.database + self.database_legends
+        self.db_keys = [el[0] for el in self.database] if isinstance(self.database, list) else []
+        self.db_keys_legends = [el[0] for el in self.database] if isinstance(self.database, list) else []
+        # print("Chiavi principali caricate:", len(self.db_keys))  # Debug
+        # print("Chiavi leggende caricate:", len(self.db_keys_legends))  # Debug
+        self.query_dic = self.load_queries_dataset()
+        self.query_dic_keys = list(self.query_dic.keys())
+        # print("dataset query inziali caricato: ", len(self.query_dic))
+        self.reset_state()
+    def load_queries_dataset(self):
+        """Loads queries dataset"""
+        return {"si, certo, certamente, ok, assolutamente si, sicuro, sisi":"si","no, non ho domande, non mi interessa, niente, nulla":"no","non so, scegli tu, fai tu, casuale, lascio a te, decidi tu, pensaci tu, sorprendimi":"non so","stronzo, vaffanculo, ti odio, pezzo di merda, cazzo":"parolacce"}
+    def load_data_from_pickle(self, file_path):
+        """Loads data from a pickle file."""
+        try:
+            with open(file_path, 'rb') as f:
+                data = pickle.load(f)
+            print(f"Dati caricati da: {file_path}")  # Debug
+            return data
+        except FileNotFoundError:
+            print(f"Errore: File non trovato: {file_path}")
+            return []
+        except Exception as e:
+            print(f"Errore durante il caricamento da pickle {file_path}: {e}")
+            return []
+    def initialize_encoder(self):
+        """
+        Initialize encoder and cross-encoder model.
+        """
+        try:
+            # Initialize the encoder model
+            encoder_model = "nickprock/sentence-bert-base-italian-xxl-uncased"
+            cross_encoder_model = "nickprock/cross-encoder-italian-bert-stsb"
+            self.encoder = SentenceTransformer(encoder_model)
+            self.cross_encoder = CrossEncoder(cross_encoder_model)
+            # Pre-encode all database keys
+            self.db_keys_embeddings = self.encoder.encode(self.db_keys, convert_to_tensor=True)
+            self.db_keys_legends_embeddings = self.encoder.encode(self.db_keys_legends, convert_to_tensor=True)
+            self.first_query_emb = self.encoder.encode(self.query_dic_keys, convert_to_tensor=True)
+            print(f"Encoder initialized with {len(self.db_keys)} keys.")
+            return True
+        except Exception as e:
+            print(f"Error initializing encoder: {str(e)}")
+        return False
+    def reset_state(self):
+        self.state = "initial"
+        self.welcome_sent = False
+        self.current_further_info_values = []
+        self.current_index = 0
+        self.main_k = []
+        self.is_telling_stories = False
+    def get_welcome_message(self):
+        return random.choice(["""Ciao, benvenuto!\n\nSono Marianna, la testa di Napoli, in napoletano 'a capa 'e Napule, una statua ritrovata per caso nel 1594. \nAll'epoca del mio ritrovamento, si pensò che fossi una rappresentazione della sirena Partenope, dalle cui spoglie, leggenda narra, nacque la città di Napoli. In seguito, diversi studiosi riconobbero in me una statua della dea Venere, probabilmente collocata in uno dei tanti templi che si trovavano nella città in epoca tardo-romana, quando ancora si chiamava Neapolis.
+        \nPosso raccontarti molte storie sulla città di Napoli e mostrarti le sue bellezze. \nC'è qualcosa in particolare che ti interessa?""","""Benvenuto!\nIo mi chiamo Marianna, 'a capa 'e Napule. Sono stata ritrovata casualmente nel 1594 a Napoli. All'epoca si pensava che fossi una rappresentazione della Sirena Partenope, dalle cui spoglie, secondo la leggenda, nacque la città di Napoli. In seguito, gli archeologi mi riconobbero come una statua della dea Venere, collocata, probabilmente, in uno dei templi della città di Neapolis in epoca tardo-romana.\nConosco molte storie e leggende di Napoli e posso illustrarti le sue bellezze. \nCosa ti interessa in particolare?"""])
+    def get_safe_example_keys(self, num_examples=3):
+        """Safely get example keys from the loaded data."""
+        if not self.db_keys:
+            return []
+        return random.sample(self.db_keys, min(len(self.db_keys), num_examples))
+    def story_flow(self):
+        """Gestisce la selezione casuale di una storia dai dati delle leggende."""
+        if not self.database_legends:
+            return random.choice(["Mi dispiace, al momento non ho leggende da raccontare.","Ti ho già raccontato tutte le leggende di cui sono a conoscenza!","Emmmm... Non mi viene in mente altro al momento, posso parlarti di altro?"])
+        available_topics = [item[0] for item in self.database_legends if item[0] not in self.main_k]
+        if not available_topics:
+            self.main_k = []  # Reset della lista delle storie raccontate
+            available_topics = [item[0] for item in self.database_legends]
+        if not available_topics:
+            return random.choice(["Sembra che tu abbia già ascoltato tutte le storie!","Ti ho già raccontato tutte le leggende di cui sono a conoscenza!","Emmmm... Non mi viene in mente altro al momento, posso parlarti di altro?"])
+        random_story_tuple = random.choice([item for item in self.database_legends if item[0] in available_topics])
+        topic = random_story_tuple[0]
+        content = random_story_tuple[1]['intro'] if 'intro' in random_story_tuple[1] else None
+        if not content:
+            return f"Mi dispiace, non ho trovato un'introduzione per la leggenda '{topic}'."
+        self.main_k.append(topic)
+        self.state = "follow_up"
+        self.is_telling_stories = True
+        return random.choice([f"Ok, lascia che ti racconti de '{topic}'.\n\n{content}\n\nVuoi che ti racconti un'altra storia?",f"Ora ti parlerò di {topic}!\n\n{content}\n\nPosso raccontarti un'altra storia?"])
+    def get_legend_content(self, key):
+        """Helper function to get the content for a legend key."""
+        # Se self.database_legends è una lista di stringhe (le chiavi),
+        # potresti aver bisogno di caricare il contenuto da un altro file
+        # o averlo pre-caricato in un'altra struttura dati.
+        # Al momento, restituisco None, dovresti implementare la logica
+        # per recuperare il contenuto effettivo.
+        return self.database_legends.get(key) if isinstance(self.database_legends, dict) else None
+    def get_value(self, key):
+        """Retrieve a value from the loaded main data by key."""
+        for k, v in self.database:
+            if k == key:
+                return v
+        return None
+    def handle_query(self, message):
+        """Handle user queries by searching the database"""
+        try:
+            # Encode the user query
+            query_embedding = self.encoder.encode(message, convert_to_tensor=True)
+            # Perform semantic search on the keys
+            semantic_hits = util.semantic_search(query_embedding, self.db_keys_embeddings, top_k=3)
+            semantic_hits = semantic_hits[0]
+            cross_inp = [(message, self.db_keys[hit['corpus_id']]) for hit in semantic_hits]
+            cross_scores = self.cross_encoder.predict(cross_inp)
+            reranked_hits = sorted(
+                [{'corpus_id': hit['corpus_id'], 'cross-score': score}
+                 for hit, score in zip(semantic_hits, cross_scores)],
+                key=lambda x: x['cross-score'], reverse=True
+            )
+            best_hit = reranked_hits[0]
+            best_title = self.db_keys[best_hit['corpus_id']]
+            best_score = best_hit['cross-score']
+            # print(best_title, best_score)
+            # Main treshold = 0.75
+            similarity_threshold = 0.75
+            # treshold granularity
+            if best_score < similarity_threshold:
+                # low confidence (< 0.35)
+                if best_score < 0.55:
+                    return random.choice(["Mi dispiace, non ho informazioni su questo argomento. Puoi chiedermi di altro sulla città di Napoli.",
+                                          "Purtroppo non riesco a rammentare altro su questo argomento, la mia memoria non è più quella di un tempo. Chiedimi qualcos'altro su Napoli e le sue bellezze!",
+                                          "Mi dispiace tantissimo, ma non riesco a ricordare altro. Vuoi chiedermi altro sulla città di Napoli?"])
+                # medium confidence(0.55 - 0.75)
+                else:
+                    alternative_hits = [self.db_keys[hit['corpus_id']] for hit in reranked_hits[:2]]
+                    suggestions = ", ".join(alternative_hits)
+                    value = self.get_value(best_title)
+                    if value:
+                        partial_info = value.get('short_intro', value.get('intro', '').split('.')[0] + '.')
+                        self.state = "query"
+                        self.is_telling_stories = False
+                        return random.choice([f"Potrei avere alcune informazioni su {best_title}, ma non sono completamente sicura sia ciò che stai cercando. I miei suggerimenti sono {suggestions}. \n\nCosa ti interessa?",
+                                              f"Credo che tu stia parlando de {best_title}, ma per essere sicura di ciò che vuoi sapere, potresti specificare se parli di {suggestions}?",
+                                              f"Per assicurarmi di aver capito bene, vuoi che ti parli di {suggestions}?"])
+                    else:
+                        return f"Ho trovato qualcosa su {best_title}, ma non sono completamente sicura. Vuoi saperne di più?"
+            # high confidence (above the threshold)
+            if best_title is not None:
+                value = self.get_value(best_title)
+                if value:
+                    key = best_title
+                    self.main_k.append(key)
+                    self.state = "follow_up"
+                    self.is_telling_stories = False
+                    response = value.get('intro', '')
+                    if isinstance(value, dict):
+                        self.current_further_info_values = list(value.get('further_info', {}).values())
+                    else:
+                        self.current_further_info_values = []  # Se il valore non è un dizionario
+                    self.current_index = 0
+                    return f"{response}\n\nVuoi sapere altro su {self.main_k[-1]}?"
+                else:
+                    return random.choice(["Mi dispiace, non ho informazioni su questo argomento. Puoi chiedermi di altro sulla città di Napoli.",
+                                          "Purtroppo non riesco a rammentare altro su questo argomento, la mia memoria non è più quella di un tempo. Chiedimi qualcos'altro su Napoli e le sue bellezze!",
+                                          "Mi dispiace tantissimo, ma non riesco a ricordare altro. Vuoi chiedermi altro sulla città di Napoli?"])
+        except Exception as e:
+            print(e)
+            self.state = "initial"
+            return random.choice(["Mi dispiace, c'è stato un errore. Puoi riprovare con un'altra domanda? ",
+                                  "Scusami, sto facendo confusione. Puoi farmi un'altra domanda?",
+                                  "Mi dispiace, non ho capito. Puoi essere più preciso?"])
+    def first_query(self, message):
+        try:
+         # Encode the user query
+            query_embedding = self.encoder.encode(message, convert_to_tensor=True)
+            # Perform semantic search on the keys
+            semantic_hits = util.semantic_search(query_embedding, self.first_query_emb, top_k=4)
+            semantic_hits = semantic_hits[0]
+            print(semantic_hits)
+            cross_inp = [(message, self.query_dic_keys[hit['corpus_id']]) for hit in semantic_hits]
+            cross_scores = self.cross_encoder.predict(cross_inp)
+            reranked_hits = sorted(
+                [{'corpus_id': hit['corpus_id'], 'cross-score': score}
+                 for hit, score in zip(semantic_hits, cross_scores)],
+                key=lambda x: x['cross-score'], reverse=True
+            )
+            best_hit = reranked_hits[0]
+            best_title = self.query_dic[self.query_dic_keys[best_hit['corpus_id']]]
+            best_score = best_hit['cross-score']
+            print(best_title, best_score)
+            # Main treshold = 0.75
+            similarity_threshold = 0.35
+            # treshold granularity
+            if best_score < similarity_threshold:
+              if message == "no":
+                value = "no"
+                return value
+              elif message == "si":
+                value = "si"
+                return value
+              else:
+                value='query'
+                return value
+            else:
+                value = best_title
+                return value
+        except Exception as e:
+            print(e)
+            self.state = "initial"
+            return random.choice(["Mi dispiace, c'è stato un errore. Puoi riprovare con un'altra domanda? ",
+                                  "Scusami, sto facendo confusione. Puoi farmi un'altra domanda?",
+                                  "Mi dispiace, non ho capito. Puoi essere più preciso?"])
+    def respond(self, message, history):
+        if not message:
+            return random.choice(["Mi dispiace, c'è stato un errore. Puoi riprovare con un'altra domanda? ",
+                                  "Scusami, sto facendo confusione. Puoi farmi un'altra domanda?",
+                                  "Mi dispiace, non ho capito. Puoi essere più preciso?"])
+        message = message.lower().strip()
+        if self.state == "initial":
+            value=self.first_query(message)
+            print("analizzando il messaggio....")
+            if value == "si":
+                self.state = "query"
+                self.is_telling_stories = False
+                return random.choice(["Cosa vorresti sapere?","Di cosa posso parlarti?","Cosa ti interessa?","Chiedi pure quello che vuoi"])
+            elif value == "no":
+                self.state = "end"
+                return random.choice(["Va bene, grazie per aver parlato con me.","Ti ringrazio per aver parlato con me. A presto!","Spero di rivederti presto! Ciao!","È stato un piacere conversare con te, alla prossima!","Spero di esserti stata di aiuto. A presto!"])
+            elif value == "non so":
+                return self.story_flow()
+            elif value == "parolacce":
+                return random.choice(["Mi dispiace sentirtelo dire. Per favore, chiedimi qualcosa","Sono veramente mortificata. Vuoi chiedermi altro?","Sono molto triste. Forse un'altra domanda miglirerà le cose!"])
+            elif value == "query":
+                return self.handle_query(message)
+            else:
+                return "Scusa, non ho capito."
+        elif self.state == "query":
+            return self.handle_query(message)
+        elif self.state == "follow_up":
+            value=self.first_query(message)
+            if value == "si":
+                if self.is_telling_stories:
+                    return self.story_flow()
+                elif self.current_further_info_values and self.current_index < len(self.current_further_info_values):
+                    value = self.current_further_info_values[self.current_index]
+                    self.current_index += 1
+                    if self.current_index < len(self.current_further_info_values):
+                        return f"{value}\n\nVuoi sapere altro su {self.main_k[-1]}?"
+                    else:
+                        self.state = "initial"
+                        return f"{value}\n\nNon ho altre informazioni su {self.main_k[-1]}. Ti interessa qualcos'altro?"
+                else:
+                    self.state = "initial"
+                    return f"Non ho altre informazioni su {self.main_k[-1]}. Ti interessa qualcos'altro?"
+            elif value == "no":
+                self.state = "initial"
+                self.is_telling_stories = False
+                return random.choice(["C'è qualcos'altro che ti interessa?","Hai qualche altra domanda?","Vuoi sapere qualcos'altro?"])
+            elif value == "non so":
+                return self.story_flow()
+            elif value == "parolacce":
+                return random.choice(["Mi dispiace sentirtelo dire. Per favore, chiedimi qualcosa","Sono veramente mortificata. Vuoi chiedermi altro?","Sono molto triste. Forse un'altra domanda miglirerà le cose!"])
+            elif value == "query":
+                return self.handle_query(message)
+            else:
+                return "Scusa, non ho capito."
+        return random.choice(["Mi dispiace, c'è stato un errore. Puoi riprovare con un'altra domanda? ",
+                                  "Scusami, sto facendo confusione. Puoi farmi un'altra domanda?",
+                                  "Mi dispiace, non ho capito. Puoi essere più preciso?"])
+def main():
+    bot = MariannaBot()
+    if not bot.initialize_encoder():
+        print("Failed to initialize encoder. Exiting.")
+        return  # Exit if encoder initialization fails
+    def update_chatbot(message, history):
+        if not message.strip():
+            return history, ""
+        response = bot.respond(message, history)
+        return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}], ""
+    def reset_chat():
+        bot.reset_state()
+        return [{"role": "assistant", "content": bot.get_welcome_message()}], ""
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+        with gr.Row():
+            gr.Markdown("## Chatta con Marianna - 'La Testa di Napoli'")
+        with gr.Row():
+            gr.Image("marianna-102.jpeg",
+                     elem_id="marianna-image",
+                     width=250)
+            chatbot = gr.Chatbot(
+                value=[{"role": "assistant", "content": bot.get_welcome_message()}],
+                height=500,
+                type="messages"
+            )
+        msg = gr.Textbox(
+            placeholder="Scrivi il tuo messaggio qui...",
+            container=False
+        )
+        with gr.Row():
+            clear = gr.Button("Clicca qui per ricominciare")
+        msg.submit(
+            update_chatbot,
+            [msg, chatbot],
+            [chatbot, msg]
+        )
+        clear.click(
+            reset_chat,
+            [],
+            [chatbot, msg]
+        )
+        # Get example keys safely
+        example_keys = bot.get_safe_example_keys()
+        if example_keys:
+            examples = [key for key in example_keys]
+            gr.Examples(
+                examples=examples,
+                inputs=msg
+            )
+    demo.launch(share=True, debug=True)
+if __name__ == "__main__":
+    main()

dati_per_database_riassunti.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff2b91a3f03b2b6ad66519b19237714e98ecf0c4363f889d38af7c1738089b6
+size 639226

legends.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:373dc3bf62ef74b755bd2da0e6edcabf30de9a45d71adab5a5820119d6011f3a
+size 13565

marianna-102.jpeg ADDED Viewed

Git LFS Details

SHA256: 0afd47725e39ab358933da2aa8f6c4e0190e2f186e05feb5cb1fa441f2a8967a
Pointer size: 132 Bytes
Size of remote file: 1.25 MB

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+pickle
+gradio
+sentence-transformers