Spaces:

cdxxi
/

find_my_book_pr

Running

File size: 12,100 Bytes



import streamlit as st
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss

# title itself
col1, col2, col3 = st.columns(spec=[0.4, 0.5, 0.2])
with col1:
    st.image('images/book03.PNG', width=160)
with col2:
    st.subheader("Умный поиск книг")
    
st.divider()


@st.cache_resource
def load_resources():
    # Загрузка модели
    model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    model1 = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
    model2 = SentenceTransformer('intfloat/multilingual-e5-large')
    
    # Загрузка FAISS-индекса
    index = faiss.read_index("indexes/index1.faiss")
    index1 = faiss.read_index('indexes/index2.faiss')
    index2 = faiss.read_index('indexes/index3.faiss')

    df = pd.read_csv('dff.csv')
        
    return model, model1, model2, index, index1, index2, df

model, model1,model2, index, index1, index2, df = load_resources()


user_query = st.text_input(
    "📖 Введите описание книги, которую хотите найти:",
    placeholder="Например: детектив о загадочном убийстве в маленьком городке"
)

num_results = st.slider(
    "Количество рекомендаций:",
    min_value=1,
    max_value=20,
    value=5,
    help="Выберите сколько книг показать в результатах"
)

search_button = st.button("Искать")

# --- Обработка и вывод результатов ---
if search_button and user_query:
    with st.spinner("Ищем лучшие варианты..."):
        try:
            # Преобразование запроса в вектор
            test_embedding = model.encode([user_query])
            faiss.normalize_L2(test_embedding)
            
            # Поиск в индексе
            distances, indices = index.search(test_embedding, num_results)
            
            # Отображение результатов
            st.subheader(f"Найденные варианты для запроса Модель 1: '{user_query}'")
            
            for i, (idx, score) in enumerate(zip(indices[0], distances[0])):
                book = df.iloc[idx]
                
                # Создаем карточку для каждой книги
                with st.container():
                    cols = st.columns([0.2, 0.8])
                    
                    with cols[0]:
                        try:
                            if pd.notna(book['image_url']):  # Проверяем, что URL не пустой
                                st.image(
                                    book['image_url'],
                                    width=120,
                                    caption="Обложка книги",
                                    use_container_width="auto"
                                )
                            else:
                                st.warning("Обложка недоступна")
                                st.image("images/reklama.jpg", width=120)  # Если есть колонка с изображениями
                        except Exception as e:
                            st.error(f"Ошибка загрузки обложки: {str(e)}")
                            st.image("images/reklama.jpg", width=120)

                    with cols[1]:
                        st.markdown(f"**{book['title']}**")
                        st.caption(f"Автор: {book['author']}")
                        st.write(f"**Сходство:** {score:.3f}")
                        
                        # Добавляем раскрывающуюся аннотацию
                        annotation = book['annotation']
                        short_length = 150  # Количество символов для сокращённой версии
                        
                        if len(annotation) > short_length:
                            short_annotation = annotation[:short_length] + "..."
                            with st.expander(f"Аннотация: {short_annotation}"):
                                st.write(annotation)
                        else:
                            st.write(f"**Аннотация:** {annotation}")
                    
                    st.divider()
        
        except Exception as e:
            st.error(f"Ошибка при поиске: {str(e)}")

elif search_button and not user_query:
    st.warning("Пожалуйста, введите описание для поиска!")
        
if search_button and user_query:
    with st.spinner("Ищем лучшие варианты..."):
        try:
            # Преобразование запроса в вектор
            test_embedding = model1.encode([user_query])
            faiss.normalize_L2(test_embedding)
            
            # Поиск в индексе
            distances1, indices1 = index1.search(test_embedding, num_results)
            
            # Отображение результатов
            st.subheader(f"Найденные варианты для запроса Модель 2: '{user_query}'")
            
            for i, (idx, score) in enumerate(zip(indices1[0], distances1[0])):
                book = df.iloc[idx]
                
                # Создаем карточку для каждой книги
                with st.container():
                    cols = st.columns([0.2, 0.8])
                    
                    with cols[0]:
                        try:
                            if pd.notna(book['image_url']):  # Проверяем, что URL не пустой
                                st.image(
                                    book['image_url'],
                                    width=120,
                                    caption="Обложка книги",
                                    use_container_width="auto"
                                )
                            else:
                                st.warning("Обложка недоступна")
                                st.image("images/reklama.jpg", width=120)  # Если есть колонка с изображениями
                        except Exception as e:
                            st.error(f"Ошибка загрузки обложки: {str(e)}")
                            st.image("images/reklama.jpg", width=120)

                    with cols[1]:
                        st.markdown(f"**{book['title']}**")
                        st.caption(f"Автор: {book['author']}")
                        st.write(f"**Сходство:** {score:.3f}")
                        
                        # Добавляем раскрывающуюся аннотацию
                        annotation = book['annotation']
                        short_length = 150  # Количество символов для сокращённой версии
                        
                        if len(annotation) > short_length:
                            short_annotation = annotation[:short_length] + "..."
                            with st.expander(f"Аннотация: {short_annotation}"):
                                st.write(annotation)
                        else:
                            st.write(f"**Аннотация:** {annotation}")
                    
                    st.divider()
        
        except Exception as e:
            st.error(f"Ошибка при поиске: {str(e)}")

elif search_button and not user_query:
    pass

if search_button and user_query:
    with st.spinner("Ищем лучшие варианты..."):
        try:
            # Преобразование запроса в вектор
            test_embedding = model2.encode([user_query])
            faiss.normalize_L2(test_embedding)
            
            # Поиск в индексе
            distances2, indices2 = index2.search(test_embedding, num_results)
            
            # Отображение результатов
            st.subheader(f"Найденные варианты для запроса Модель 3: '{user_query}'")
            
            for i, (idx, score) in enumerate(zip(indices2[0], distances2[0])):
                book = df.iloc[idx]
                
                # Создаем карточку для каждой книги
                with st.container():
                    cols = st.columns([0.2, 0.8])
                    
                    with cols[0]:
                        try:
                            if pd.notna(book['image_url']):  # Проверяем, что URL не пустой
                                st.image(
                                    book['image_url'],
                                    width=120,
                                    caption="Обложка книги",
                                    use_container_width="auto"
                                )
                            else:
                                st.warning("Обложка недоступна")
                                st.image("images/reklama.jpg", width=120)  # Если есть колонка с изображениями
                        except Exception as e:
                            st.error(f"Ошибка загрузки обложки: {str(e)}")
                            st.image("images/reklama.jpg", width=120)

                    with cols[1]:
                        st.markdown(f"**{book['title']}**")
                        st.caption(f"Автор: {book['author']}")
                        st.write(f"**Сходство:** {score:.3f}")
                        
                        # Добавляем раскрывающуюся аннотацию
                        annotation = book['annotation']
                        short_length = 150  # Количество символов для сокращённой версии
                        
                        if len(annotation) > short_length:
                            short_annotation = annotation[:short_length] + "..."
                            with st.expander(f"Аннотация: {short_annotation}"):
                                st.write(annotation)
                        else:
                            st.write(f"**Аннотация:** {annotation}")
                    
                    st.divider()
        
        except Exception as e:
            st.error(f"Ошибка при поиске: {str(e)}")

elif search_button and not user_query:
    pass

if search_button and user_query:
    with st.spinner("Сравниваем полученные варианты..."):
        try:
            col1, col2, col3, col4 = st.columns(spec=[0.1, 0.3, 0.3, 0.3])
            with col1:
                st.write('#')
            with col2:
                st.write('Model-1')
            with col3:        
                st.write('Model-2')
            with col4:        
                st.write('Model-3')
            
            for i in range(0, num_results):
                col1, col2, col3, col4 = st.columns(spec=[0.1, 0.3, 0.3, 0.3])
                with col1:
                    st.write(str(i))
                with col2:        
                    st.image(df.loc[indices[0][i], 'image_url'], width=100)
                with col3:        
                    st.image(df.loc[indices1[0][i], 'image_url'], width=100)
                with col4:        
                    st.image(df.loc[indices2[0][i], 'image_url'], width=100)
                    
            st.divider()             
        except Exception as e:
            st.error(f"Ошибка при поиске: {str(e)}")