Bayhaqy commited on
Commit
65d3732
β€’
1 Parent(s): 82508ca

Create πŸ“±_X_Scrapping.py

Browse files
Files changed (1) hide show
  1. pages/πŸ“±_X_Scrapping.py +534 -0
pages/πŸ“±_X_Scrapping.py ADDED
@@ -0,0 +1,534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data Analysis and Profiling
2
+ import pandas as pd
3
+ from ydata_profiling import ProfileReport
4
+ from streamlit_pandas_profiling import st_profile_report
5
+
6
+ # Streamlit for Building the Dashboard
7
+ import streamlit as st
8
+ import streamlit_pandas_profiling
9
+
10
+ # Language Detection
11
+ from langdetect import detect
12
+
13
+ # NLP and Text Processing
14
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
15
+ from deep_translator import GoogleTranslator
16
+ import nltk
17
+ from nltk.corpus import stopwords
18
+ from nltk.stem import WordNetLemmatizer
19
+ from bs4 import BeautifulSoup
20
+
21
+ # Sentiment Analysis
22
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
23
+ from textblob import TextBlob
24
+
25
+ # URL Parsing
26
+ from urllib.parse import urlparse
27
+
28
+ # Data Visualization
29
+ import plotly.express as px
30
+ import matplotlib.pyplot as plt
31
+
32
+ # Word Cloud Generation
33
+ from wordcloud import WordCloud
34
+
35
+ # Other Libraries
36
+ import torch
37
+ import requests
38
+ import subprocess
39
+ import logging
40
+ import re
41
+ import os
42
+
43
+ # NLTK Data Download
44
+ nltk.download('wordnet')
45
+ nltk.download('punkt')
46
+
47
+ ## ............................................... ##
48
+ # Set page configuration (Call this once and make changes as needed)
49
+ st.set_page_config(page_title='(Tweet) X Scrapper Dashboard', layout='wide', page_icon=':rocket:')
50
+
51
+
52
+ ## ............................................... ##
53
+ with st.container():
54
+ # Define Streamlit app title and introduction
55
+ st.title("(Tweet) X Scrapper Dashboard")
56
+ st.write("Created by Bayhaqy")
57
+
58
+ # Sidebar content
59
+ st.sidebar.subheader("About the app")
60
+ st.sidebar.info("This app allows you to get data, analysis and prediction with the (Tweet) X Scrapper tool.")
61
+
62
+ url = "https://blogs.bayhaqy.my.id/2023/10/auth-token-twitter.html"
63
+ st.sidebar.markdown("check this [link](%s) for guides on how to get your own X Auth Token" % url)
64
+
65
+ st.sidebar.write("\n\n")
66
+ st.sidebar.markdown("**Please contact me if you have any questions**")
67
+ st.sidebar.write("\n\n")
68
+ st.sidebar.divider()
69
+ st.sidebar.markdown("Β© 2023 (Tweet) X Scrapper Dashboard")
70
+
71
+ ## ............................................... ##
72
+ # Function to install Node.js
73
+ @st.cache_data
74
+ def install_nodejs():
75
+ node_major_version = int(subprocess.check_output(['node', '-v']).decode("utf-8").split('.')[0][1:])
76
+
77
+ if node_major_version < 20:
78
+ #st.markdown('Update OS')
79
+ subprocess.check_call(['sudo', 'apt-get', 'update'])
80
+
81
+ st.markdown('Download Files Requirement for Nodesource')
82
+ subprocess.check_call(['sudo', 'apt-get', 'install', '-y', 'ca-certificates', 'curl', 'gnupg'])
83
+ subprocess.check_call(['sudo', 'mkdir', '-p', '/etc/apt/keyrings'])
84
+ subprocess.check_call(f'curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg', shell=True)
85
+
86
+ NODE_MAJOR = 20
87
+ node_source_entry = f"deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_{NODE_MAJOR}.x nodistro main"
88
+ subprocess.check_call(f'echo "{node_source_entry}" | sudo tee /etc/apt/sources.list.d/nodesource.list', shell=True)
89
+
90
+ #st.markdown('Install Node.js')
91
+ subprocess.check_call(['sudo', 'apt-get', 'update'])
92
+ subprocess.check_call(['sudo', 'apt-get', 'install', 'nodejs', '-y'])
93
+
94
+ result = subprocess.check_output(['node', '-v']).decode("utf-8")
95
+ #st.markdown(f'Node.js version: {result}')
96
+ else:
97
+ #st.markdown('Node.js version already installed')
98
+ result = subprocess.check_output(['node', '-v']).decode("utf-8")
99
+ #st.markdown(f'Node.js version already updated to {result}')
100
+
101
+ ## ............................................... ##
102
+ # Function to run tweet-harvest
103
+ @st.cache_data
104
+ def run_X_scrapping(search_keyword,from_date,to_date,limit,delay,token,filename):
105
+ # Run tweet-harvest with the provided parameters
106
+ #st.markdown('Check Tweet')
107
+ command = f'npx --yes tweet-harvest@latest -s "{search_keyword}" -f "{from_date}" -t "{to_date}" -l {limit} -d {delay} --token "{token}" -o "{filename}"'
108
+ try:
109
+ result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
110
+ #st.markdown("Command executed successfully.")
111
+ #st.markdown(result.stdout) # Display the standard output, give comment if you don't want to see
112
+ except subprocess.CalledProcessError as e:
113
+ st.markdown("Error: The command returned a non-zero exit status.")
114
+ st.markdown("Error message:", e)
115
+ st.markdown(f'Standard output: {e.stdout}')
116
+ st.markdown(f'Standard error: {e.stderr}')
117
+
118
+ ## ............................................... ##
119
+ # Function for get model and tokenize
120
+ @st.cache_resource
121
+ def get_models_and_tokenizers():
122
+ model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
123
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
124
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
125
+ #model.eval()
126
+
127
+ return model, tokenizer
128
+
129
+ ## ............................................... ##
130
+ # Function for sentiment analysis
131
+ @st.cache_resource
132
+ def analyze_sentiment_distilbert(text, _model, _tokenizer):
133
+ try:
134
+ tokens_info = _tokenizer(text, truncation=True, return_tensors="pt")
135
+ with torch.no_grad():
136
+ raw_predictions = _model(**tokens_info).logits
137
+
138
+ predicted_class_id = raw_predictions.argmax().item()
139
+ predict = _model.config.id2label[predicted_class_id]
140
+
141
+ softmaxed = int(torch.nn.functional.softmax(raw_predictions[0], dim=0)[1] * 100)
142
+ if (softmaxed > 70):
143
+ status = 'Not trust'
144
+ elif (softmaxed > 40):
145
+ status = 'Not sure'
146
+ else:
147
+ status = 'Trust'
148
+ return status, predict
149
+
150
+ except Exception as e:
151
+ logging.error(f"Sentiment analysis error: {str(e)}")
152
+ return 'N/A', 'N/A'
153
+
154
+ ## ............................................... ##
155
+ # Function for sentiment analysis using VADER
156
+ @st.cache_resource
157
+ def analyze_sentiment_vader(text):
158
+ analyzer = SentimentIntensityAnalyzer()
159
+ sentiment = analyzer.polarity_scores(text)
160
+ compound_score = sentiment['compound']
161
+ if compound_score >= 0.05:
162
+ return 'Positive'
163
+ elif compound_score <= -0.05:
164
+ return 'Negative'
165
+ else:
166
+ return 'Neutral'
167
+
168
+ ## ............................................... ##
169
+ # Function for sentiment analysis using TextBlob
170
+ @st.cache_resource
171
+ def analyze_sentiment_textblob(text):
172
+ analysis = TextBlob(text)
173
+ polarity = analysis.sentiment.polarity
174
+ if polarity > 0:
175
+ return 'Positive'
176
+ elif polarity < 0:
177
+ return 'Negative'
178
+ else:
179
+ return 'Neutral'
180
+
181
+ ## ............................................... ##
182
+ # Function for translation
183
+ @st.cache_data
184
+ def translate_text(text, source='auto', target='en'):
185
+ try:
186
+ if source != target:
187
+ text = GoogleTranslator(source=source, target=target).translate(text)
188
+ return text
189
+
190
+ except Exception as e:
191
+ logging.error(f"Translation error: {str(e)}")
192
+ return text
193
+
194
+ ## ............................................... ##
195
+ # Function for Load and Transform Data
196
+ @st.cache_data
197
+ def selection_data(filename):
198
+ file_path = f"tweets-data/{filename}"
199
+ df = pd.read_csv(file_path, delimiter=";")
200
+
201
+
202
+ # Rename columns
203
+ column_mapping = {
204
+ 'created_at': 'Created Date',
205
+ 'user_id_str': 'User ID',
206
+ 'username': 'Username',
207
+ 'full_text': 'Tweet',
208
+ 'tweet_url': 'Tweet URL',
209
+ 'id_str': 'Tweet ID',
210
+ 'conversation_id_str': 'Conversation ID',
211
+ 'lang': 'App Language',
212
+ 'quote_count': 'Quote Count',
213
+ 'reply_count': 'Reply Count',
214
+ 'retweet_count': 'Retweet Count',
215
+ 'favorite_count': 'Favorite Count',
216
+ }
217
+
218
+ df = df.rename(columns=column_mapping)
219
+
220
+ # Add a new column for detected language
221
+ df['Detect Language'] = df['Tweet'].apply(lambda tweet: detect(tweet))
222
+
223
+ # Mapping language codes to country names
224
+ language_to_country = {
225
+ 'af': 'South Africa',
226
+ 'ar': 'Arabic',
227
+ 'bg': 'Bulgaria',
228
+ 'bn': 'Bangladesh',
229
+ 'ca': 'Catalan',
230
+ 'cs': 'Czech',
231
+ 'cy': 'Welsh',
232
+ 'da': 'Danish',
233
+ 'de': 'German',
234
+ 'el': 'Greek',
235
+ 'en': 'English',
236
+ 'es': 'Spanish',
237
+ 'et': 'Estonian',
238
+ 'fa': 'Persian',
239
+ 'fi': 'Finnish',
240
+ 'fr': 'French',
241
+ 'gu': 'Gujarati',
242
+ 'he': 'Hebrew',
243
+ 'hi': 'Hindi',
244
+ 'hr': 'Croatian',
245
+ 'hu': 'Hungarian',
246
+ 'id': 'Indonesian',
247
+ 'it': 'Italian',
248
+ 'ja': 'Japanese',
249
+ 'kn': 'Kannada',
250
+ 'ko': 'Korean',
251
+ 'lt': 'Lithuanian',
252
+ 'lv': 'Latvian',
253
+ 'mk': 'Macedonian',
254
+ 'ml': 'Malayalam',
255
+ 'mr': 'Marathi',
256
+ 'ne': 'Nepali',
257
+ 'nl': 'Dutch',
258
+ 'no': 'Norwegian',
259
+ 'pa': 'Punjabi',
260
+ 'pl': 'Polish',
261
+ 'pt': 'Portuguese',
262
+ 'ro': 'Romanian',
263
+ 'ru': 'Russian',
264
+ 'sk': 'Slovak',
265
+ 'sl': 'Slovenian',
266
+ 'so': 'Somali',
267
+ 'sq': 'Albanian',
268
+ 'sv': 'Swedish',
269
+ 'sw': 'Swahili',
270
+ 'ta': 'Tamil',
271
+ 'te': 'Telugu',
272
+ 'th': 'Thai',
273
+ 'tl': 'Tagalog',
274
+ 'tr': 'Turkish',
275
+ 'uk': 'Ukrainian',
276
+ 'ur': 'Urdu',
277
+ 'vi': 'Vietnamese',
278
+ 'zh-cn': 'Simplified Chinese',
279
+ 'zh-tw': 'Traditional Chinese'
280
+ }
281
+
282
+ # Add 'Country' column to df
283
+ df['Language'] = df['Detect Language'].map(language_to_country)
284
+
285
+ # Sort columns
286
+ desired_columns = ['Created Date', 'User ID', 'Username', 'Tweet', 'Language', 'Detect Language', 'App Language', 'Tweet URL', 'Tweet ID', 'Conversation ID', 'Quote Count', 'Reply Count', 'Retweet Count', 'Favorite Count']
287
+ df = df[desired_columns]
288
+
289
+ # Set data types
290
+ data_types = {
291
+ 'Created Date': 'datetime64[ns]',
292
+ 'User ID': 'int64',
293
+ 'Username': 'object',
294
+ 'Tweet': 'object',
295
+ 'Language': 'object',
296
+ 'Detect Language': 'object',
297
+ 'App Language': 'object',
298
+ 'Tweet URL': 'object',
299
+ 'Tweet ID': 'int64',
300
+ 'Conversation ID': 'int64',
301
+ 'Quote Count': 'int64',
302
+ 'Reply Count': 'int64',
303
+ 'Retweet Count': 'int64',
304
+ 'Favorite Count': 'int64',
305
+ }
306
+
307
+ df = df.astype(data_types)
308
+
309
+ return df
310
+
311
+ ## ............................................... ##
312
+ # Function to preprocess the data
313
+ @st.cache_data
314
+ def preprocessing_data(df):
315
+ # Remove duplicates
316
+ df = df.drop_duplicates(subset='Translation')
317
+
318
+ # Function to clean and preprocess text
319
+ def clean_text(text):
320
+ # Remove mentions (e.g., @username)
321
+ text = re.sub(r'@[\w]+', '', text)
322
+
323
+ # Remove URLs
324
+ text = re.sub(r'http\S+', '', text)
325
+
326
+ # Remove HTML tags
327
+ text = BeautifulSoup(text, 'html.parser').get_text()
328
+
329
+ # Convert to lowercase
330
+ text = text.lower()
331
+
332
+ # Remove non-alphanumeric characters
333
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
334
+
335
+ # Tokenize text
336
+ words = nltk.word_tokenize(text)
337
+
338
+ # Remove stopwords
339
+ stop_words = set(stopwords.words('english'))
340
+ words = [word for word in words if word not in stop_words]
341
+
342
+ # Lemmatize words
343
+ lemmatizer = WordNetLemmatizer()
344
+ words = [lemmatizer.lemmatize(word) for word in words]
345
+
346
+ return ' '.join(words)
347
+
348
+ # Apply the clean_text function to the "Translation" column
349
+ df['Cleaned Translation'] = df['Translation'].apply(clean_text)
350
+
351
+ return df
352
+
353
+ ## ............................................... ##
354
+ # Function to create a Word Cloud
355
+ @st.cache_data
356
+ def create_wordcloud(df):
357
+ # Combine all text
358
+ text = ' '.join(df['Cleaned Translation'])
359
+
360
+ # Create a Word Cloud
361
+ wordcloud = WordCloud(width=700, height=400, max_words=50).generate(text)
362
+
363
+ # Convert the word cloud to an image
364
+ wordcloud_image = wordcloud.to_image()
365
+
366
+ # Display the Word Cloud using st.image
367
+ st.write("word Cloud by Tweets")
368
+ st.image(wordcloud_image, use_column_width=True)
369
+
370
+ ## ............................................... ##
371
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
372
+ @st.cache_data
373
+ def convert_df(df):
374
+ return df.to_csv().encode('utf-8')
375
+
376
+ ## ............................................... ##
377
+ # Set up logging
378
+ logging.basicConfig(filename='tweet_harvest.log', level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
379
+
380
+ ## ............................................... ##
381
+ with st.container():
382
+ # Input search parameters
383
+ search_keyword = st.text_input("Enter search keyword", "Jakarta",)
384
+
385
+ col1, col2 = st.columns(2)
386
+
387
+ with col1:
388
+ from_date = st.date_input('From Date :', pd.to_datetime('2023-01-01'))
389
+ to_date = st.date_input('To Date :', pd.to_datetime('2023-12-01'))
390
+ with col2:
391
+ limit = st.number_input("Enter limit", min_value=10, value=10, max_value=100)
392
+ delay = st.number_input("Enter delay in seconds", min_value=1, value=3)
393
+
394
+ token = st.text_input("Enter your X Auth Token", type="password")
395
+
396
+ ## ............................................... ##
397
+ with st.container():
398
+ col1, col2 = st.columns(2)
399
+
400
+ with col1:
401
+ # Checkbox options for different processing steps
402
+ include_translation = st.checkbox("Include Translation", value=False)
403
+ include_sentiment_analysis = st.checkbox("Include Sentiment Analysis", value=False)
404
+ with col2:
405
+ include_sentiment_vader = st.checkbox("Include VADER Sentiment Analysis", value=False)
406
+ include_sentiment_textblob = st.checkbox("Include TextBlob Sentiment Analysis", value=False)
407
+
408
+ ## ............................................... ##
409
+ # Initialize to install node.js
410
+ install_nodejs()
411
+
412
+ # Initialize model and tokenizer
413
+ model, tokenizer = get_models_and_tokenizers()
414
+
415
+ # Create a variable to track whether the data has been processed
416
+ data_processed = False
417
+
418
+ ## ............................................... ##
419
+ # Create a button to trigger tweet-harvest
420
+ with st.container():
421
+ if st.button("Run it"):
422
+ # Format the dates as "DD-MM-YYYY"
423
+ from_date = from_date.strftime("%d-%m-%Y")
424
+ to_date = to_date.strftime("%d-%m-%Y")
425
+
426
+ filename = 'tweets_data.csv'
427
+
428
+ run_X_scrapping(search_keyword,from_date,to_date,limit,delay,token,filename)
429
+
430
+ df = selection_data(filename)
431
+
432
+ # Conditionally apply translation function to the 'Translation' column
433
+ if include_translation:
434
+ df['Translation'] = df.apply(lambda row: translate_text((row['Tweet']), source=row['Detect Language'], target='en'), axis=1)
435
+ df = preprocessing_data(df)
436
+
437
+ # Conditionally apply sentiment analysis function to the 'Translation' column
438
+ if include_sentiment_analysis:
439
+ df[['Fake Check', 'Sentiment Distilbert']] = df['Translation'].apply(lambda text: pd.Series(analyze_sentiment_distilbert(text, model, tokenizer))).apply(lambda x: x.str.title())
440
+
441
+ # Conditionally apply VADER sentiment analysis to the 'Translation' column
442
+ if include_sentiment_vader:
443
+ df['Sentiment VADER'] = df['Translation'].apply(analyze_sentiment_vader)
444
+
445
+ # Conditionally apply TextBlob sentiment analysis to the 'Translation' column
446
+ if include_sentiment_textblob:
447
+ df['Sentiment TextBlob'] = df['Translation'].apply(analyze_sentiment_textblob)
448
+
449
+ # Set data_processed to True when the data has been successfully processed
450
+ data_processed = True
451
+
452
+ ## ............................................... ##
453
+ # Add a button to download the data as a CSV file
454
+ if data_processed:
455
+ st.markdown("### Download Processed Data as CSV")
456
+ st.write("Click the button below to download the processed data as a CSV file.")
457
+ csv_data = convert_df(df)
458
+
459
+ # Create a downloadable link
460
+ st.download_button(
461
+ label="Download data as CSV",
462
+ data=csv_data,
463
+ file_name='processed_data.csv',
464
+ mime='text/csv',
465
+ )
466
+
467
+ with st.expander("See Table"):
468
+ ## ............................................... ##
469
+ # Display processed data
470
+ st.dataframe(df)
471
+
472
+ # Display processed data
473
+ with st.expander("See EDA"):
474
+ ## ............................................... ##
475
+ # Create a Streamlit app
476
+ st.subheader("Tweet Data Visualization")
477
+
478
+ col1, col2 = st.columns(2)
479
+ with col1:
480
+ ## ............................................... ##
481
+ # Create a new column with a count of 1 for each tweet
482
+ df_date = pd.DataFrame(df['Created Date'])
483
+ df_date['Tweet Count'] = 1
484
+
485
+ # Resample the data per second and calculate the count
486
+ data_resampled = df_date.resample('S', on='Created Date')['Tweet Count'].count().reset_index()
487
+
488
+ # Create a time series plot with custom styling
489
+ fig = px.line(data_resampled, x='Created Date', y='Tweet Count', title='Tweet Counts Over Time')
490
+ fig.update_xaxes(title_text='Time')
491
+ fig.update_yaxes(title_text='Tweet Count')
492
+ fig.update_layout(xaxis_rangeslider_visible=True)
493
+
494
+ # Specify custom dimensions for the chart
495
+ st.plotly_chart(fig, use_container_width=True, use_container_height=True, width=700, height=400)
496
+
497
+ ## ............................................... ##
498
+ # Group by Sentiment columns and get the count
499
+ sentiment_counts = df[['Sentiment Distilbert', 'Sentiment VADER', 'Sentiment TextBlob']].apply(lambda x: x.value_counts()).T
500
+
501
+ # Reset index to get Sentiment as a column
502
+ sentiment_counts = sentiment_counts.reset_index()
503
+
504
+ # Melt the DataFrame for easier plotting
505
+ sentiment_counts = pd.melt(sentiment_counts, id_vars='index', var_name='Sentiment', value_name='Count')
506
+
507
+ # Create the plot
508
+ fig = px.bar(sentiment_counts, x='Sentiment', y='Count', color='index', barmode='group', title='Total Tweet per Sentiment')
509
+
510
+ # Specify custom dimensions for the chart
511
+ st.plotly_chart(fig, use_container_width=True, use_container_height=True, width=700, height=400)
512
+
513
+ with col2:
514
+ ## ............................................... ##
515
+ # Create a DataFrame to count the number of tweets by language
516
+ language_counts = df['Language'].value_counts().reset_index()
517
+ language_counts.columns = ['Language', 'Tweet Count']
518
+
519
+ # Create an attractive Plotly bar chart
520
+ fig = px.bar(language_counts, x='Language', y='Tweet Count', text='Tweet Count', title='Total Tweet by Language')
521
+ fig.update_xaxes(title_text='Language')
522
+ fig.update_yaxes(title_text='Total Tweet')
523
+
524
+ # Specify custom dimensions for the chart
525
+ st.plotly_chart(fig, use_container_width=True, use_container_height=True, width=700, height=400)
526
+
527
+ ## ............................................... ##
528
+ # Create wordcloud
529
+ create_wordcloud(df)
530
+
531
+ ## ............................................... ##
532
+ # Show dataset information
533
+ pr = ProfileReport(df)
534
+ st_profile_report(pr)