Spaces:
Runtime error
Runtime error
aus10powell
commited on
Commit
•
07768ac
1
Parent(s):
8ba48e6
Upload twitter_scraper.py
Browse files- scripts/twitter_scraper.py +39 -1
scripts/twitter_scraper.py
CHANGED
@@ -3,7 +3,44 @@ import pandas as pd
|
|
3 |
import datetime as dt
|
4 |
from tqdm import tqdm
|
5 |
import requests
|
|
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def get_tweets(
|
9 |
query: str,
|
@@ -131,13 +168,14 @@ def get_twitter_account_info(twitter_handle: str) -> dict:
|
|
131 |
|
132 |
# Get the user's profile information
|
133 |
user_profile = user_scraper.entity
|
|
|
134 |
return {
|
135 |
"name": user_profile.displayname,
|
136 |
"username": user_profile.username,
|
137 |
"user_id": user_profile.id,
|
138 |
"follower_count": user_profile.followersCount,
|
139 |
"friends_count": user_profile.friendsCount,
|
140 |
-
"verified": user_profile.verified,
|
141 |
}
|
142 |
|
143 |
|
|
|
3 |
import datetime as dt
|
4 |
from tqdm import tqdm
|
5 |
import requests
|
6 |
+
from scripts import sentiment
|
7 |
|
8 |
+
def get_latest_account_tweets(handle):
|
9 |
+
import tweepy
|
10 |
+
|
11 |
+
import configparser
|
12 |
+
|
13 |
+
config = configparser.ConfigParser()
|
14 |
+
config.read("tweepy_auth.ini")
|
15 |
+
|
16 |
+
# Get the authentication details
|
17 |
+
authentication_section = config['AUTHENTICATION']
|
18 |
+
consumer_key = authentication_section["twitter_consumer_key"]
|
19 |
+
consumer_secret = authentication_section["twitter_consumer_secret"]
|
20 |
+
access_token = authentication_section["twitter_access_token"]
|
21 |
+
access_token_secret = authentication_section["twitter_access_token_secret"]
|
22 |
+
|
23 |
+
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
|
24 |
+
auth.set_access_token(access_token, access_token_secret)
|
25 |
+
|
26 |
+
# create the API object
|
27 |
+
api = tweepy.API(auth)
|
28 |
+
|
29 |
+
# load the tweets from a specific user
|
30 |
+
tweets = api.user_timeline(
|
31 |
+
screen_name=handle, count=10000000, tweet_mode="extended"
|
32 |
+
)
|
33 |
+
|
34 |
+
df_tweets = pd.DataFrame(data=[t._json for t in tweets])
|
35 |
+
df_tweets["created_at"] = pd.DataFrame(df_tweets["created_at"])
|
36 |
+
df_tweets = df_tweets.sort_values("created_at")
|
37 |
+
# print the tweet texts
|
38 |
+
tweets_txt = []
|
39 |
+
for tweet in tweets:
|
40 |
+
tweets_txt.append(sentiment.tweet_cleaner(tweet.full_text))
|
41 |
+
df_tweets["clean_text"] = tweets_txt
|
42 |
+
df_tweets["handle"] = df_tweets.user.iloc[0]["screen_name"]
|
43 |
+
return df_tweets
|
44 |
|
45 |
def get_tweets(
|
46 |
query: str,
|
|
|
168 |
|
169 |
# Get the user's profile information
|
170 |
user_profile = user_scraper.entity
|
171 |
+
check_string = lambda s: "false" if str(s).lower() == "false" else "true"
|
172 |
return {
|
173 |
"name": user_profile.displayname,
|
174 |
"username": user_profile.username,
|
175 |
"user_id": user_profile.id,
|
176 |
"follower_count": user_profile.followersCount,
|
177 |
"friends_count": user_profile.friendsCount,
|
178 |
+
"verified": check_string(user_profile.verified),
|
179 |
}
|
180 |
|
181 |
|