remzicam commited on
Commit
26540ec
1 Parent(s): 0df610f

Upload 3 files

Browse files
app.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import read_pickle
2
+ import streamlit as st
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ from streamlit_extras.add_vertical_space import add_vertical_space
6
+ from streamlit_extras.colored_header import colored_header
7
+ from streamlit_option_menu import option_menu
8
+
9
+ max_seq_length = 256
10
+ repo_id = "all-MiniLM-L6-v2"
11
+ data_path = "detailed_movies_top_250_embeds.pkl.xz"
12
+ output_column_names = [
13
+ "year",
14
+ "duration",
15
+ "genre",
16
+ "stars",
17
+ "summary",
18
+ "poster_url",
19
+ "trailer_url",
20
+ ]
21
+
22
+ st.set_page_config(layout="wide")
23
+
24
+ colored_header(
25
+ label="SEARCH ENGINE&MOVIE RECOMMENDER: IMDB TOP 250 MOVIES",
26
+ description="""Discover the best movies from the IMDB Top 250 list with advanced semantic search engine and movie recommender.
27
+ Simply enter a keyword, phrase, or even plot.
28
+ It provides you with a personalized selection of top-rated films!""",
29
+ color_name="blue-70",
30
+ )
31
+
32
+ hide_streamlit_style = """
33
+ <style>
34
+ #MainMenu {visibility: hidden;}
35
+ footer {visibility: hidden;}
36
+ </style>
37
+ """
38
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
39
+
40
+
41
+ @st.cache(suppress_st_warning=True, allow_output_mutation=True)
42
+ def load_data_model():
43
+ """
44
+ It loads the dataframe and the sentence embedding model.
45
+
46
+ Returns:
47
+ A tuple of the dataframe and the embedding model
48
+ """
49
+
50
+ df = read_pickle(data_path)
51
+ embed_model = SentenceTransformer(repo_id)
52
+ embed_model.max_seq_length = max_seq_length
53
+ return df, embed_model
54
+
55
+
56
+ def top_n_retriever(titles: list[str], similarity_scores: object, n: int, query_type: str) -> list[str] :
57
+ """
58
+ It takes in a list of titles, a numpy array of similarity scores, the number of results to return,
59
+ and the type of query (search engine or similar movies). It then returns the top n results
60
+
61
+ Args:
62
+ titles (List[str]): List of movie titles
63
+ similarity_scores (ndarray): The cosine similarity scores of the query movie with all the movies
64
+ in the dataset.
65
+ n (int): The number of results to return
66
+ query_type (str): This is the type of query. It can be either "Search Engine" or "Similar Movies".
67
+
68
+ Returns:
69
+ The top n movies that are similar to the query movie.
70
+ """
71
+
72
+ sim_scores = zip(titles, similarity_scores)
73
+ sorted_sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
74
+
75
+ if query_type == "Search Engine":
76
+ sorted_sim_scores = sorted_sim_scores[:n]
77
+
78
+ if query_type == "Similar Movies":
79
+ sorted_sim_scores = sorted_sim_scores[1 : n + 1]
80
+
81
+ return [i[0] for i in sorted_sim_scores]
82
+
83
+
84
+ def grid_maker(movie_recs: list[str], df: object):
85
+ """
86
+ It takes the list of recommended movies and the dataframe as input and outputs a grid of movie
87
+ posters and details
88
+
89
+ Args:
90
+ movie_recs (List[str]): - a list of movie titles
91
+ df (object): the dataframe containing the movie data
92
+ """
93
+
94
+ for movie in movie_recs:
95
+ poster_col, title_col = st.columns([1, 8])
96
+ (year, duration, genre, stars, summary, poster_url, trailer_url) = (
97
+ df[output_column_names][df.title == movie]
98
+ ).values.flatten()
99
+ poster_col.image(poster_url)
100
+ poster_col.markdown(
101
+ f'<a href={trailer_url}><button style="background-color:GreenYellow;">🎥Trailer</button></a>',
102
+ unsafe_allow_html=True,
103
+ )
104
+
105
+ title_col.markdown(
106
+ f""" #### **:blue[{movie}]** | {year} | {duration} | {genre} """
107
+ )
108
+ title_col.markdown(
109
+ f""" <span style="background-color:rgba(0, 0, 0, 0.1);">{stars}</span>
110
+ <span style="word-wrap:break-word;font-family:roboto;font-weight: 700;">
111
+ <br>{summary}</span>""",
112
+ unsafe_allow_html=True,
113
+ )
114
+
115
+
116
+ def filter_df(df: object, selected_page: str):
117
+ """
118
+ The function takes in a dataframe, and the selected page, and returns the selected movie, the
119
+ filtered dataframe, and the top_n number of recommendations
120
+
121
+ Args:
122
+ df (object): the dataframe
123
+ selected_page (str): the page that the user is on
124
+
125
+ Returns:
126
+ selected_movie, filtered_df, top_n
127
+ """
128
+ filtered_df = df.copy()
129
+ text_input, genre_box, top_n_rec = st.columns([3, 1, 2])
130
+ with genre_box:
131
+ selected_genre = st.selectbox("Genre", genres_list)
132
+ with top_n_rec:
133
+ top_n = st.slider("Number of Recommendations", 1, 15, 5)
134
+
135
+ if selected_genre != "All":
136
+ filtered_df = df[df.genre.str.contains(selected_genre)]
137
+
138
+ if selected_page == "Similar Movies":
139
+ with text_input:
140
+ selected_movie = st.selectbox("Movie", movie_list)
141
+ return selected_movie, filtered_df, top_n
142
+
143
+ if selected_page == "Search Engine":
144
+ with text_input:
145
+ query = st.text_input("Query", value="Mafia")
146
+ return query, filtered_df, top_n
147
+
148
+
149
+ def get_results_button():
150
+ """
151
+ It creates a button that says "Get Results ◀" and returns it
152
+
153
+ Returns:
154
+ A button object.
155
+ """
156
+ _, _, col_center, _, _ = st.columns(5)
157
+ return col_center.button("Get Results ◀")
158
+
159
+
160
+ df, embed_model = load_data_model()
161
+ df["trailer_url"] = df["trailer_url"].astype(str)
162
+ movie_list = df["title"].values
163
+ genres_list = list(set(df["genre"].str.split(", ").sum()))
164
+ genres_list.insert(0, "All")
165
+
166
+
167
+ selected_page = option_menu(
168
+ menu_title=None, # required
169
+ options=["Search Engine", "Similar Movies"], # required
170
+ icons=["search", "film"], # optional
171
+ menu_icon="cast", # optional
172
+ default_index=0, # optional
173
+ orientation="horizontal",
174
+ styles={
175
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
176
+ "icon": {"color": "orange", "font-size": "25px"},
177
+ "nav-link": {
178
+ "font-size": "25px",
179
+ "text-align": "left",
180
+ "margin": "0px",
181
+ "--hover-color": "#eee",
182
+ },
183
+ "nav-link-selected": {"background-color": "#0068C9"},
184
+ },
185
+ )
186
+
187
+ if selected_page == "Search Engine":
188
+
189
+ query, genre_df, top_n = filter_df(df, selected_page)
190
+ query_embed = embed_model.encode(query)
191
+
192
+ bt = get_results_button()
193
+
194
+ if bt:
195
+ if query == "":
196
+ st.warning("You should type something", icon="⚠️")
197
+ else:
198
+ semantic_sims = [
199
+ cosine_similarity([query_embed], [movie_embed]).item()
200
+ for movie_embed in genre_df.embedding
201
+ ]
202
+ movie_recs = top_n_retriever(
203
+ genre_df.title, semantic_sims, top_n, selected_page
204
+ )
205
+ add_vertical_space(2)
206
+ grid_maker(movie_recs, genre_df)
207
+
208
+
209
+ if selected_page == "Similar Movies":
210
+ st.info("Movies are recommended based on plot similarity!")
211
+ selected_movie, genre_df, top_n = filter_df(df, selected_page)
212
+
213
+ bt = get_results_button()
214
+ if bt:
215
+ movie_sims = [
216
+ cosine_similarity(
217
+ list(df.embedding[df.title == selected_movie]), [movie_embed]
218
+ ).item()
219
+ for movie_embed in genre_df.embedding
220
+ ]
221
+ movie_recs = top_n_retriever(genre_df.title, movie_sims, top_n, selected_page)
222
+ add_vertical_space(2)
223
+ grid_maker(movie_recs, genre_df)
detailed_movies_top_250_embeds.pkl.xz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2c2da07b2a8a8f28c3b0b7969da829d6f837251729c8a284327431b2ba11db
3
+ size 434052
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ --find-links https://download.pytorch.org/whl/torch_stable.html
2
+ torch==1.13.1+cpu
3
+ sentence-transformers
4
+ pandas
5
+ streamlit-option-menu
6
+ streamlit-extras