zayed-upal commited on
Commit
4c25316
1 Parent(s): 325466a

Google ads format download added, topic name rename option added

Browse files
Functionalities/TopicClustering.py CHANGED
@@ -9,14 +9,16 @@ import plotly.graph_objects as go
9
 
10
  class TopicClustering:
11
  def __init__(self, keyword_df, text_col, representation_model, sentence_model):
 
12
  self.topic_model = None
13
  self.embeddings = None
 
14
  self.keyword_df, self.text_col = keyword_df, text_col
15
  self.sentence_model = SentenceTransformer(sentence_model)
16
  self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
17
 
18
  def topic_cluster_bert(self) -> None:
19
- self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=False)
20
  self.topic_model = BERTopic(representation_model=self.representation_model,
21
  embedding_model=self.sentence_model,
22
  n_gram_range=(1, 3), top_n_words=2)
@@ -34,7 +36,7 @@ class TopicClustering:
34
  self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
35
  self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
36
  self.keyword_df.drop(columns=['CustomName'], inplace=True)
37
-
38
 
39
  def visualize_documents(self, n_neighbors) -> go.Figure:
40
  reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
@@ -46,5 +48,24 @@ class TopicClustering:
46
  return fig
47
 
48
  def visualize_topic_distribution(self) -> go.Figure:
49
- fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20, title='Topic Distribution')
 
50
  return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class TopicClustering:
11
  def __init__(self, keyword_df, text_col, representation_model, sentence_model):
12
+ self.topic_names = None
13
  self.topic_model = None
14
  self.embeddings = None
15
+ self.topic_name_mapping = {}
16
  self.keyword_df, self.text_col = keyword_df, text_col
17
  self.sentence_model = SentenceTransformer(sentence_model)
18
  self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
19
 
20
  def topic_cluster_bert(self) -> None:
21
+ self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=True)
22
  self.topic_model = BERTopic(representation_model=self.representation_model,
23
  embedding_model=self.sentence_model,
24
  n_gram_range=(1, 3), top_n_words=2)
 
36
  self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
37
  self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
38
  self.keyword_df.drop(columns=['CustomName'], inplace=True)
39
+ self.topic_names = topic_labels
40
 
41
  def visualize_documents(self, n_neighbors) -> go.Figure:
42
  reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
 
48
  return fig
49
 
50
  def visualize_topic_distribution(self) -> go.Figure:
51
+ fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20,
52
+ title='Topic Distribution')
53
  return fig
54
+
55
+ def update_topic_names(self):
56
+ for k in self.topic_name_mapping:
57
+ self.keyword_df['Topic Name'][self.keyword_df['Topic Name'] == k] = self.topic_name_mapping[k]
58
+
59
+ self.topic_names = self.topic_name_mapping.values()
60
+ self.topic_name_mapping = {}
61
+
62
+ def get_df_in_google_ads_format(self, campaign_name):
63
+ keyword_df_google_ads = pd.DataFrame(
64
+ columns=['Action', 'Keyword status', 'Campaign', 'Ad group', 'Keyword', 'Match Type'])
65
+ keyword_df_google_ads['Ad group'] = self.keyword_df['Topic Name']
66
+ keyword_df_google_ads['Keyword'] = self.keyword_df[self.text_col]
67
+ keyword_df_google_ads['Match Type'] = 'Phrase'
68
+ keyword_df_google_ads['Action'] = 'Add'
69
+ keyword_df_google_ads['Keyword status'] = 'Enabled'
70
+ keyword_df_google_ads['Campaign'] = campaign_name
71
+ return keyword_df_google_ads
pages/2_Topic_Cluster.py CHANGED
@@ -4,6 +4,7 @@ from Functionalities import NLP_Helper
4
  from Functionalities.TopicClustering import TopicClustering
5
  from streamlit_extras.dataframe_explorer import dataframe_explorer
6
 
 
7
  class TopicClusterView:
8
  def __init__(self):
9
  self.n_neighbors = 10
@@ -60,14 +61,39 @@ class TopicClusterView:
60
  if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
61
  filtered_df = dataframe_explorer(st.session_state.topic_cluster.keyword_df)
62
  st.dataframe(filtered_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  st.download_button(
64
- "Press to Download",
65
  st.session_state.topic_cluster.keyword_df.to_csv(index=False).encode('utf-8'),
66
  "Clustered.csv",
67
  "text/csv",
68
  key='download-csv'
69
  )
70
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def visualize_clusters(self):
72
  if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
73
  self.n_neighbors = st.slider(label='Size of the local neighborhood', min_value=2, max_value=100, step=1)
@@ -85,6 +111,7 @@ class TopicClusterView:
85
  fig = st.session_state.topic_cluster.visualize_topic_distribution()
86
  st.plotly_chart(fig, use_container_width=True, theme=None)
87
 
 
88
  if __name__ == '__main__':
89
  topic_cluster_view = TopicClusterView()
90
  # tab1, tab2, tab3 = st.tabs(['Clustering Process', 'Cluster Visualization', 'Topic Distribution'])
 
4
  from Functionalities.TopicClustering import TopicClustering
5
  from streamlit_extras.dataframe_explorer import dataframe_explorer
6
 
7
+
8
  class TopicClusterView:
9
  def __init__(self):
10
  self.n_neighbors = 10
 
61
  if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
62
  filtered_df = dataframe_explorer(st.session_state.topic_cluster.keyword_df)
63
  st.dataframe(filtered_df)
64
+ with st.expander("Rename Topics"):
65
+ for topic_name in st.session_state.topic_cluster.topic_names:
66
+ cur_topic_col, new_topic_col = st.columns(2)
67
+ with cur_topic_col:
68
+ cur_topic_col.write(topic_name)
69
+ with new_topic_col:
70
+ st.session_state.topic_cluster.topic_name_mapping[topic_name] = \
71
+ st.text_input("New topic name", topic_name)
72
+
73
+ if st.button("Update Topic Names"):
74
+ st.session_state.topic_cluster.update_topic_names()
75
+ st.experimental_rerun()
76
+
77
  st.download_button(
78
+ "Press to Download as CSV",
79
  st.session_state.topic_cluster.keyword_df.to_csv(index=False).encode('utf-8'),
80
  "Clustered.csv",
81
  "text/csv",
82
  key='download-csv'
83
  )
84
 
85
+ with st.expander("Download as CSV for Bulk upload in Google Ads"):
86
+ campaign_name = st.text_input("Campaign Name", "Demo Campaign")
87
+ st.dataframe(st.session_state.topic_cluster.get_df_in_google_ads_format(campaign_name))
88
+ st.download_button(
89
+ "Download as CSV for Bulk upload in Google Ads",
90
+ st.session_state.topic_cluster.get_df_in_google_ads_format(campaign_name).to_csv(
91
+ index=False).encode('utf-8'),
92
+ f"{campaign_name}_keywords_upload.csv",
93
+ "text/csv",
94
+ key='download-google-csv'
95
+ )
96
+
97
  def visualize_clusters(self):
98
  if (st.session_state.topic_cluster is not None) and (st.session_state.topic_cluster.topic_model is not None):
99
  self.n_neighbors = st.slider(label='Size of the local neighborhood', min_value=2, max_value=100, step=1)
 
111
  fig = st.session_state.topic_cluster.visualize_topic_distribution()
112
  st.plotly_chart(fig, use_container_width=True, theme=None)
113
 
114
+
115
  if __name__ == '__main__':
116
  topic_cluster_view = TopicClusterView()
117
  # tab1, tab2, tab3 = st.tabs(['Clustering Process', 'Cluster Visualization', 'Topic Distribution'])