import streamlit as st st.set_page_config(page_title="FindMyBook", page_icon="πŸ“š", menu_items=None, initial_sidebar_state="auto") import matplotlib.pyplot as plt import pandas as pd import numpy as np import csv from joblib import load from transformers import AutoTokenizer, AutoModel from faiss_file import model, tokenizer, embeddings_dataset, embed_bert_cls, recommend # МодСль, Ρ‚ΠΎΠΊΠ΅Π½Π°ΠΉΠ·Π΅Ρ€, датасСт, kmeans, функция Ρ€Π΅ΠΊΠΎΠΌΠ΅Π½Π΄Π°Ρ†ΠΈΠΉ device = 'cpu' tokenizer_k = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny") model_k = AutoModel.from_pretrained("cointegrated/rubert-tiny") kmeans = load('kmeans.joblib') emb = load('final_emb.joblib') def recomendation(input): user_input = embed_bert_cls(input, model_k, tokenizer_k) label = kmeans.predict(user_input.reshape(1, -1))[0] sample_df = emb[emb['labels'] == label].copy() sample_df['cosine'] = sample_df['embeddings'].apply(lambda x: np.dot(x, user_input) / (np.linalg.norm(x) * np.linalg.norm(user_input))) return sample_df.sort_values('cosine', ascending=False) st.title('Π£ΠΌΠ½Ρ‹ΠΉ поиск ΠΊΠ½ΠΈΠ³') with st.sidebar: st.markdown('Π”ΠΎΠ±Ρ€ΠΎ ΠΏΠΎΠΆΠ°Π»ΠΎΠ²Π°Ρ‚ΡŒ Π² ΠΌΠΈΡ€ **FindMyBook** - самого ΡƒΠΌΠ½ΠΎΠ³ΠΎ поисковика ΠΊΠ½ΠΈΠ³! Π­Ρ‚ΠΎ ΠΊΠ°ΠΊ Ρ‚Π²ΠΎΠΉ Π»ΠΈΡ‡Π½Ρ‹ΠΉ Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊΠ°Ρ€ΡŒ, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π·Π½Π°Π΅Ρ‚ всС ΠΎ Ρ‚Π΅Π±Π΅ ΠΈ Ρ‚Π²ΠΎΠΈΡ… прСдпочтСниях Π² Π»ΠΈΡ‚Π΅Ρ€Π°Ρ‚ΡƒΡ€Π΅! Π­Ρ‚ΠΎ Π½Π΅ просто ΠΎΠ±Ρ‹Ρ‡Π½Ρ‹ΠΉ поисковик, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ ΠΈΡ‰Π΅Ρ‚ ΠΊΠ½ΠΈΠ³ΠΈ ΠΏΠΎ Π°Π²Ρ‚ΠΎΡ€Π°ΠΌ ΠΈΠ»ΠΈ названиям, это настоящий Π»ΠΈΡ‚Π΅Ρ€Π°Ρ‚ΡƒΡ€Π½Ρ‹ΠΉ Π΄Π΅Ρ‚Π΅ΠΊΡ‚ΠΈΠ², ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ ΠΏΡ€ΠΎΠ½ΠΈΠΊΠ°Π΅Ρ‚ Π² Π³Π»ΡƒΠ±ΡŒ содСрТания ΠΊΠ½ΠΈΠ³ ΠΈ ΠΏΠΎΠΌΠΎΠ³Π°Π΅Ρ‚ Π½Π°ΠΉΡ‚ΠΈ ΠΈΠΌΠ΅Π½Π½ΠΎ Ρ‚Π΅, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ оставят Π½Π΅ΠΈΠ·Π³Π»Π°Π΄ΠΈΠΌΠΎΠ΅ Π²ΠΏΠ΅Ρ‡Π°Ρ‚Π»Π΅Π½ΠΈΠ΅.') st.markdown('**FindMyBook** Ρ€Π°Π±ΠΎΡ‚Π°Π΅Ρ‚ Π½Π° основС ΠΏΠ΅Ρ€Π΅Π΄ΠΎΠ²Ρ‹Ρ… Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠΎΠ² искусствСнного ΠΈΠ½Ρ‚Π΅Π»Π»Π΅ΠΊΡ‚Π°, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ ΠΏΠΎΠ·Π²ΠΎΠ»ΡΡŽΡ‚ Π΅ΠΌΡƒ Π°Π½Π°Π»ΠΈΠ·ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ содСрТаниС ΠΊΠ½ΠΈΠ³ ΠΈ Π½Π°Ρ…ΠΎΠ΄ΠΈΡ‚ΡŒ связи ΠΌΠ΅ΠΆΠ΄Ρƒ Π½ΠΈΠΌΠΈ. Π­Ρ‚ΠΎΡ‚ поисковик смоТСт Π½Π°ΠΉΡ‚ΠΈ ΠΊΠ½ΠΈΠ³Ρƒ, которая понравится ΠΈΠΌΠ΅Π½Π½ΠΎ Ρ‚Π΅Π±Π΅, учитывая Ρ‚Π²ΠΎΠΈ прСдпочтСния ΠΈ интСрСсы.') st.markdown('НС Π½ΡƒΠΆΠ½ΠΎ Ρ‚Ρ€Π°Ρ‚ΠΈΡ‚ΡŒ врСмя Π½Π° бСсконСчный поиск ΠΊΠ½ΠΈΠ³ Π² ΠΎΠ³Ρ€ΠΎΠΌΠ½Ρ‹Ρ… ΠΎΠ½Π»Π°ΠΉΠ½-Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊΠ°Ρ…. ΠŸΡ€ΠΎΡΡ‚ΠΎ Π²Π²Π΅Π΄ΠΈ Ρ‚Π΅ΠΌΡƒ, которая тСбя интСрСсуСт, ΠΈ **FindMyBook** ΡƒΠΆΠ΅ Π½Π°Ρ‡Π½Π΅Ρ‚ ΠΈΡΠΊΠ°Ρ‚ΡŒ ΠΊΠ½ΠΈΠ³ΠΈ, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ подходят ΠΈΠΌΠ΅Π½Π½ΠΎ Ρ‚Π΅Π±Π΅!') user_prompt = st.text_area(label='Π’Π²Π΅Π΄ΠΈΡ‚Π΅ запрос:', placeholder="Π₯ΠΎΡ‡Ρƒ ΠΏΡ€ΠΎΡ‡ΠΈΡ‚Π°Ρ‚ΡŒ ΠΎ...", height=200) books_per_page = st.number_input('ΠšΠΎΠ»ΠΈΡ‡Π΅ΡΡ‚Π²ΠΎ Ρ€Π΅ΠΊΠΎΠΌΠ΅Π½Π΄Π°Ρ†ΠΈΠΉ:', min_value=1, max_value=5, value=3) button = st.button("Найти") tab1, tab2 = st.tabs(["Faiss Search", "K-Mean"]) with tab1: if button and len(user_prompt) > 1: book_recs = recommend(user_prompt, books_per_page) for i in range(books_per_page): col1, col2 = st.columns([2,7]) with col1: image = book_recs['image_url'].iloc[i] st.image(image) with col2: title = book_recs['title'].iloc[i] try: author = book_recs['author'].iloc[i].rstrip() except: author = book_recs['author'].iloc[i] annotation = book_recs['annotation'].iloc[i] st.subheader(title) st.markdown(f'_{author}_') st.caption(annotation) st.markdown(f"[ΠŸΠΎΠ΄Ρ€ΠΎΠ±Π½Π΅Π΅...]({book_recs['page_url'].iloc[i]})") #st.divider() with tab2: book_recs = recommend(user_prompt, books_per_page) if button and len(user_prompt) > 1: book_recs = recomendation(user_prompt) for i in range(books_per_page): col1, col2 = st.columns([2,7]) with col1: image = book_recs['image_url'].iloc[i] st.image(image) with col2: title = book_recs['title'].iloc[i] try: author = book_recs['author'].iloc[i].rstrip() except: author = book_recs['author'].iloc[i] annotation = book_recs['annotation'].iloc[i] st.subheader(title) st.markdown(f'_{author}_') st.caption(annotation) st.markdown(f"[ΠŸΠΎΠ΄Ρ€ΠΎΠ±Π½Π΅Π΅...]({book_recs['page_url'].iloc[i]})") #st.divider()