Spaces:

remzicam
/

movie_search_engine

Sleeping

App Files Files Community

remzicam commited on Jan 3, 2023

Commit

26540ec

•

1 Parent(s): 0df610f

Upload 3 files

Browse files

Files changed (3) hide show

app.py +223 -0
detailed_movies_top_250_embeds.pkl.xz +3 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from pandas import read_pickle
+import streamlit as st
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+from streamlit_extras.add_vertical_space import add_vertical_space
+from streamlit_extras.colored_header import colored_header
+from streamlit_option_menu import option_menu
+max_seq_length = 256
+repo_id = "all-MiniLM-L6-v2"
+data_path = "detailed_movies_top_250_embeds.pkl.xz"
+output_column_names = [
+    "year",
+    "duration",
+    "genre",
+    "stars",
+    "summary",
+    "poster_url",
+    "trailer_url",
+]
+st.set_page_config(layout="wide")
+colored_header(
+    label="SEARCH ENGINE&MOVIE RECOMMENDER: IMDB TOP 250 MOVIES",
+    description="""Discover the best movies from the IMDB Top 250 list with advanced semantic search engine and movie recommender.
+                    Simply enter a keyword, phrase, or even plot.
+                    It provides you with a personalized selection of top-rated films!""",
+    color_name="blue-70",
+)
+hide_streamlit_style = """
+            <style>
+            #MainMenu {visibility: hidden;}
+            footer {visibility: hidden;}
+            </style>
+            """
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+@st.cache(suppress_st_warning=True, allow_output_mutation=True)
+def load_data_model():
+    """
+    It loads the dataframe and the sentence embedding model.
+    Returns:
+      A tuple of the dataframe and the embedding model
+    """
+    df = read_pickle(data_path)
+    embed_model = SentenceTransformer(repo_id)
+    embed_model.max_seq_length = max_seq_length
+    return df, embed_model
+def top_n_retriever(titles: list[str], similarity_scores: object, n: int, query_type: str) -> list[str] :
+    """
+    It takes in a list of titles, a numpy array of similarity scores, the number of results to return,
+    and the type of query (search engine or similar movies). It then returns the top n results
+    Args:
+      titles (List[str]): List of movie titles
+      similarity_scores (ndarray): The cosine similarity scores of the query movie with all the movies
+    in the dataset.
+      n (int): The number of results to return
+      query_type (str): This is the type of query. It can be either "Search Engine" or "Similar Movies".
+    Returns:
+      The top n movies that are similar to the query movie.
+    """
+    sim_scores = zip(titles, similarity_scores)
+    sorted_sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
+    if query_type == "Search Engine":
+        sorted_sim_scores = sorted_sim_scores[:n]
+    if query_type == "Similar Movies":
+        sorted_sim_scores = sorted_sim_scores[1 : n + 1]
+    return [i[0] for i in sorted_sim_scores]
+def grid_maker(movie_recs: list[str], df: object):
+    """
+    It takes the list of recommended movies and the dataframe as input and outputs a grid of movie
+    posters and details
+    Args:
+      movie_recs (List[str]): - a list of movie titles
+      df (object): the dataframe containing the movie data
+    """
+    for movie in movie_recs:
+        poster_col, title_col = st.columns([1, 8])
+        (year, duration, genre, stars, summary, poster_url, trailer_url) = (
+            df[output_column_names][df.title == movie]
+        ).values.flatten()
+        poster_col.image(poster_url)
+        poster_col.markdown(
+            f'<a href={trailer_url}><button style="background-color:GreenYellow;">🎥Trailer</button></a>',
+            unsafe_allow_html=True,
+        )
+        title_col.markdown(
+            f""" #### **:blue[{movie}]** | {year} | {duration} | {genre} """
+        )
+        title_col.markdown(
+            f""" <span style="background-color:rgba(0, 0, 0, 0.1);">{stars}</span>
+                                <span style="word-wrap:break-word;font-family:roboto;font-weight: 700;">
+                                <br>{summary}</span>""",
+            unsafe_allow_html=True,
+        )
+def filter_df(df: object, selected_page: str):
+    """
+    The function takes in a dataframe, and the selected page, and returns the selected movie, the
+    filtered dataframe, and the top_n number of recommendations
+    Args:
+      df (object): the dataframe
+      selected_page (str): the page that the user is on
+    Returns:
+      selected_movie, filtered_df, top_n
+    """
+    filtered_df = df.copy()
+    text_input, genre_box, top_n_rec = st.columns([3, 1, 2])
+    with genre_box:
+        selected_genre = st.selectbox("Genre", genres_list)
+    with top_n_rec:
+        top_n = st.slider("Number of Recommendations", 1, 15, 5)
+    if selected_genre != "All":
+        filtered_df = df[df.genre.str.contains(selected_genre)]
+    if selected_page == "Similar Movies":
+        with text_input:
+            selected_movie = st.selectbox("Movie", movie_list)
+        return selected_movie, filtered_df, top_n
+    if selected_page == "Search Engine":
+        with text_input:
+            query = st.text_input("Query", value="Mafia")
+        return query, filtered_df, top_n
+def get_results_button():
+    """
+    It creates a button that says "Get Results ◀" and returns it
+    Returns:
+      A button object.
+    """
+    _, _, col_center, _, _ = st.columns(5)
+    return col_center.button("Get Results ◀")
+df, embed_model = load_data_model()
+df["trailer_url"] = df["trailer_url"].astype(str)
+movie_list = df["title"].values
+genres_list = list(set(df["genre"].str.split(", ").sum()))
+genres_list.insert(0, "All")
+selected_page = option_menu(
+    menu_title=None,  # required
+    options=["Search Engine", "Similar Movies"],  # required
+    icons=["search", "film"],  # optional
+    menu_icon="cast",  # optional
+    default_index=0,  # optional
+    orientation="horizontal",
+    styles={
+        "container": {"padding": "0!important", "background-color": "#fafafa"},
+        "icon": {"color": "orange", "font-size": "25px"},
+        "nav-link": {
+            "font-size": "25px",
+            "text-align": "left",
+            "margin": "0px",
+            "--hover-color": "#eee",
+        },
+        "nav-link-selected": {"background-color": "#0068C9"},
+    },
+)
+if selected_page == "Search Engine":
+    query, genre_df, top_n = filter_df(df, selected_page)
+    query_embed = embed_model.encode(query)
+    bt = get_results_button()
+    if bt:
+        if query == "":
+            st.warning("You should type something", icon="⚠️")
+        else:
+            semantic_sims = [
+                cosine_similarity([query_embed], [movie_embed]).item()
+                for movie_embed in genre_df.embedding
+            ]
+            movie_recs = top_n_retriever(
+                genre_df.title, semantic_sims, top_n, selected_page
+            )
+            add_vertical_space(2)
+            grid_maker(movie_recs, genre_df)
+if selected_page == "Similar Movies":
+    st.info("Movies are recommended based on plot similarity!")
+    selected_movie, genre_df, top_n = filter_df(df, selected_page)
+    bt = get_results_button()
+    if bt:
+        movie_sims = [
+            cosine_similarity(
+                list(df.embedding[df.title == selected_movie]), [movie_embed]
+            ).item()
+            for movie_embed in genre_df.embedding
+        ]
+        movie_recs = top_n_retriever(genre_df.title, movie_sims, top_n, selected_page)
+        add_vertical_space(2)
+        grid_maker(movie_recs, genre_df)

detailed_movies_top_250_embeds.pkl.xz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed2c2da07b2a8a8f28c3b0b7969da829d6f837251729c8a284327431b2ba11db
+size 434052

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+--find-links https://download.pytorch.org/whl/torch_stable.html
+torch==1.13.1+cpu
+sentence-transformers
+pandas
+streamlit-option-menu
+streamlit-extras