import streamlit as st from keybert import KeyBERT # Create a KeyBERT instance kw_model = KeyBERT() # Define the Streamlit app def main(): st.title("Keyword Extraction") st.write("Enter your document below:") # Get user input doc = st.text_area("Document") # Get user choice for stopwords removal (default checkbox) remove_stopwords = st.checkbox("Remove Stopwords", value=True) # Get user choice for MMR (default checkbox) apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)", value=True) # Get user choice for number of results (slider) num_results = st.slider("Number of Results", min_value=1, max_value=30, value=5, step=1) # Extract keywords if st.button("Extract Keywords"): keywords = kw_model.extract_keywords(doc, stop_words=None if remove_stopwords else "english") if apply_mmr: # Apply Maximal Marginal Relevance (MMR) selected_keywords = [] selected_keywords.append(keywords[0]) # Select the top-scoring keyword # Set the MMR hyperparameters lambda_param = 0.7 # Weight for the trade-off between relevance and diversity for i in range(1, num_results): selected_keywords_scores = [kw[1] for kw in selected_keywords] remaining_keywords = [kw for kw in keywords if kw[0] not in [kw[0] for kw in selected_keywords]] mmr_scores = kw_model.maximal_marginal_relevance(doc, remaining_keywords, selected_keywords_scores, lambda_param) max_mmr_index = mmr_scores.index(max(mmr_scores)) selected_keywords.append(remaining_keywords[max_mmr_index]) keywords = selected_keywords # Update keywords with MMR-selected keywords st.write(f"Top {num_results} Keywords:") for keyword, score in keywords: st.write(f"- {keyword} (Score: {score})") # Run the app if __name__ == "__main__": main()