Spaces:

zayedupal
/

google_ads_space

Running

App Files Files Community

zayed-upal commited on Sep 5, 2023

Commit

4c25316

•

1 Parent(s): 325466a

Google ads format download added, topic name rename option added

Browse files

Files changed (2) hide show

Functionalities/TopicClustering.py +24 -3
pages/2_Topic_Cluster.py +28 -1

Functionalities/TopicClustering.py CHANGED Viewed

@@ -9,14 +9,16 @@ import plotly.graph_objects as go
 class TopicClustering:
     def __init__(self, keyword_df, text_col, representation_model, sentence_model):
         self.topic_model = None
         self.embeddings = None
         self.keyword_df, self.text_col = keyword_df, text_col
         self.sentence_model = SentenceTransformer(sentence_model)
         self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
     def topic_cluster_bert(self) -> None:
-        self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=False)
         self.topic_model = BERTopic(representation_model=self.representation_model,
                                     embedding_model=self.sentence_model,
                                     n_gram_range=(1, 3), top_n_words=2)
@@ -34,7 +36,7 @@ class TopicClustering:
         self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
         self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
         self.keyword_df.drop(columns=['CustomName'], inplace=True)
     def visualize_documents(self, n_neighbors) -> go.Figure:
         reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
@@ -46,5 +48,24 @@ class TopicClustering:
         return fig
     def visualize_topic_distribution(self) -> go.Figure:
-        fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20, title='Topic Distribution')
         return fig

 class TopicClustering:
     def __init__(self, keyword_df, text_col, representation_model, sentence_model):
+        self.topic_names = None
         self.topic_model = None
         self.embeddings = None
+        self.topic_name_mapping = {}
         self.keyword_df, self.text_col = keyword_df, text_col
         self.sentence_model = SentenceTransformer(sentence_model)
         self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
     def topic_cluster_bert(self) -> None:
+        self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=True)
         self.topic_model = BERTopic(representation_model=self.representation_model,
                                     embedding_model=self.sentence_model,
                                     n_gram_range=(1, 3), top_n_words=2)
         self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
         self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
         self.keyword_df.drop(columns=['CustomName'], inplace=True)
+        self.topic_names = topic_labels
     def visualize_documents(self, n_neighbors) -> go.Figure:
         reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
         return fig
     def visualize_topic_distribution(self) -> go.Figure:
+        fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20,
+                                                  title='Topic Distribution')
         return fig
+    def update_topic_names(self):
+        for k in self.topic_name_mapping:
+            self.keyword_df['Topic Name'][self.keyword_df['Topic Name'] == k] = self.topic_name_mapping[k]
+        self.topic_names = self.topic_name_mapping.values()
+        self.topic_name_mapping = {}
+    def get_df_in_google_ads_format(self, campaign_name):
+        keyword_df_google_ads = pd.DataFrame(
+            columns=['Action', 'Keyword status', 'Campaign', 'Ad group', 'Keyword', 'Match Type'])
+        keyword_df_google_ads['Ad group'] = self.keyword_df['Topic Name']
+        keyword_df_google_ads['Keyword'] = self.keyword_df[self.text_col]
+        keyword_df_google_ads['Match Type'] = 'Phrase'
+        keyword_df_google_ads['Action'] = 'Add'
+        keyword_df_google_ads['Keyword status'] = 'Enabled'
+        keyword_df_google_ads['Campaign'] = campaign_name
+        return keyword_df_google_ads

pages/2_Topic_Cluster.py CHANGED Viewed

@@ -4,6 +4,7 @@ from Functionalities import NLP_Helper
 from Functionalities.TopicClustering import TopicClustering
 from streamlit_extras.dataframe_explorer import dataframe_explorer
 class TopicClusterView:
     def __init__(self):
         self.n_neighbors = 10
@@ -60,14 +61,39 @@ class TopicClusterView:
         if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
             filtered_df = dataframe_explorer(st.session_state.topic_cluster.keyword_df)
             st.dataframe(filtered_df)
             st.download_button(
-                "Press to Download",
                 st.session_state.topic_cluster.keyword_df.to_csv(index=False).encode('utf-8'),
                 "Clustered.csv",
                 "text/csv",
                 key='download-csv'
             )
     def visualize_clusters(self):
         if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
             self.n_neighbors = st.slider(label='Size of the local neighborhood', min_value=2, max_value=100, step=1)
@@ -85,6 +111,7 @@ class TopicClusterView:
                 fig = st.session_state.topic_cluster.visualize_topic_distribution()
                 st.plotly_chart(fig, use_container_width=True, theme=None)
 if __name__ == '__main__':
     topic_cluster_view = TopicClusterView()
     # tab1, tab2, tab3 = st.tabs(['Clustering Process', 'Cluster Visualization', 'Topic Distribution'])

 from Functionalities.TopicClustering import TopicClustering
 from streamlit_extras.dataframe_explorer import dataframe_explorer
 class TopicClusterView:
     def __init__(self):
         self.n_neighbors = 10
         if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
             filtered_df = dataframe_explorer(st.session_state.topic_cluster.keyword_df)
             st.dataframe(filtered_df)
+            with st.expander("Rename Topics"):
+                for topic_name in st.session_state.topic_cluster.topic_names:
+                    cur_topic_col, new_topic_col = st.columns(2)
+                    with cur_topic_col:
+                        cur_topic_col.write(topic_name)
+                    with new_topic_col:
+                        st.session_state.topic_cluster.topic_name_mapping[topic_name] = \
+                            st.text_input("New topic name", topic_name)
+                if st.button("Update Topic Names"):
+                    st.session_state.topic_cluster.update_topic_names()
+                    st.experimental_rerun()
             st.download_button(
+                "Press to Download as CSV",
                 st.session_state.topic_cluster.keyword_df.to_csv(index=False).encode('utf-8'),
                 "Clustered.csv",
                 "text/csv",
                 key='download-csv'
             )
+            with st.expander("Download as CSV for Bulk upload in Google Ads"):
+                campaign_name = st.text_input("Campaign Name", "Demo Campaign")
+                st.dataframe(st.session_state.topic_cluster.get_df_in_google_ads_format(campaign_name))
+                st.download_button(
+                    "Download as CSV for Bulk upload in Google Ads",
+                    st.session_state.topic_cluster.get_df_in_google_ads_format(campaign_name).to_csv(
+                        index=False).encode('utf-8'),
+                    f"{campaign_name}_keywords_upload.csv",
+                    "text/csv",
+                    key='download-google-csv'
+                )
     def visualize_clusters(self):
         if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
             self.n_neighbors = st.slider(label='Size of the local neighborhood', min_value=2, max_value=100, step=1)
                 fig = st.session_state.topic_cluster.visualize_topic_distribution()
                 st.plotly_chart(fig, use_container_width=True, theme=None)
 if __name__ == '__main__':
     topic_cluster_view = TopicClusterView()
     # tab1, tab2, tab3 = st.tabs(['Clustering Process', 'Cluster Visualization', 'Topic Distribution'])