Spaces:

aus10powell
/

TwitterAccounts

Runtime error

File size: 5,732 Bytes
import snscrape.modules.twitter as sntwitter
import pandas as pd
import datetime as dt
from tqdm import tqdm
import requests


def get_tweets(
    query: str,
) -> list:
    """
    Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information.

    Args:
        query (str): The query to search for tweets on Twitter.

    Returns:
        A list of extracted tweet information.
    """
    print(f"Fetching tweets with query: {query}")

    fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items()
    return [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)]


def get_replies(username: str, conversation_id: str, max_tweets: int) -> list:
    """
    Fetches the replies for a given Twitter user and conversation, and returns a list of extracted tweet information.

    Args:
        username (str): The username of the Twitter user whose replies are to be fetched.
        conversation_id (str): The ID of the conversation for which replies are to be fetched.

    Returns:
        A list of extracted tweet information for the replies.
    """
    print(
        f"Fetching replies for username {username} and conversation {conversation_id}"
    )
    query = f"to:{username} since_id:{conversation_id} filter:safe"

    tweets_list = []
    for i, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())):
        if i > max_tweets:
            break
        else:
            tweets_list.append(extract_tweet_info(tweet))
    return tweets_list


def get_tweet_by_id_and_username(username: str, tweet_id: str):
    """
    Fetches a tweet from Twitter based on the given username and tweet ID.

    Args:
        username (str): The username of the Twitter user who posted the tweet.
        tweet_id (str): The ID of the tweet to fetch.

    Returns:
        The fetched tweet.
    """
    tweet_url = f"https://twitter.com/{username}/status/{tweet_id}"
    return sntwitter.TwitterSearchScraper(tweet_url).get_items()


def extract_tweet_info(tweet):
    """
    Extracts relevant information from a tweet object and returns a dictionary with the extracted values.

    Args:
        tweet: A tweet object.

    Returns:
        A dictionary with the extracted tweet information.
    """
    return {
        "date": tweet.date,
        "username": tweet.user.username,
        "content": tweet.rawContent,
        "retweet_count": tweet.retweetCount,
        "tweet_id": tweet.id,
        "like_count": tweet.likeCount,
        "reply_count": tweet.replyCount,
        "in_reply_to_tweet_id": tweet.inReplyToTweetId,
        "conversation_id": tweet.conversationId,
        "view_count": tweet.viewCount,
    }


def get_follower_ids(username: str, limit: int = 20):
    """
    Retrieves a list of Twitter IDs for users who follow a given Twitter handle.

    Args:
        username (str): The Twitter handle to retrieve follower IDs for.
        limit (int): The maximum number of follower IDs to retrieve.

    Returns:
        A list of Twitter user IDs (as strings).
    """
    # Construct the search query using snscrape
    query = f"from:{username} replies:True"

    start_date = dt.date(year=2023, month=3, day=10)
    end_date = dt.date(year=2023, month=3, day=22)
    query = f"from:{username} since:{start_date} until:{end_date}"
    tweets = get_tweets(query=query)
    one_tweet = tweets[-1]
    one_tweet_id = one_tweet["tweet_id"]

    replies = get_replies(
        username=username, conversation_id=one_tweet_id, max_tweets=1000
    )

    return one_tweet, replies


def get_twitter_account_info(twitter_handle: str) -> dict:
    """
    Extracts the name, username, follower count, and last tweet of a Twitter user using snscrape.

    Args:
        twitter_handle (str): The Twitter username to retrieve information for.

    Returns:
        dict: A dictionary containing the name, username, follower count, and last tweet of the Twitter user.
    """

    # Create a TwitterUserScraper object
    user_scraper = sntwitter.TwitterUserScraper(twitter_handle)

    # Get the user's profile information
    user_profile = user_scraper.entity
    return {
        "name": user_profile.displayname,
        "username": user_profile.username,
        "user_id": user_profile.id,
        "follower_count": user_profile.followersCount,
        "friends_count": user_profile.friendsCount,
        "verified": user_profile.verified,
    }


if __name__ == "__main__":
    ## Testing extracting tweets from an account
    # Set the search variables (dates for when account tweeted. Does not take into account replies)
    account = "taylorlorenz"
    start_date = dt.date(year=2023, month=2, day=1)
    end_date = dt.date(year=2023, month=3, day=11)

    # Format the query string
    query = f"from:{account} since:{start_date} until:{end_date}"
    print(f"query: {query}")
    tweets = get_tweets(query=query)

    df_tweets = pd.DataFrame(data=tweets)
    df_tweets = df_tweets.sort_values("in_reply_to_tweet_id")
    # Uncomment to save output
    df_tweets.to_csv("df_tweets.csv")

    print(df_tweets.head(2))
    print(df_tweets.tail(2))
    print(f"Total Tweets: {len(tweets)}")

    ## Testing extracting conversatin threeds from conversation Id
    conversation_id = (
        1620650202305798144  # A tweet from elon musk about turbulent times
    )
    max_tweets = 3000
    tweets = get_replies(
        username="elonmusk", conversation_id=conversation_id, max_tweets=max_tweets
    )
    df_replies = pd.DataFrame(data=tweets)

    # Uncomment to save output
    # df_replies.to_csv("df_replies.csv")
    print(
        f"Number of extracted tweets from conversation_id: {conversation_id}, {len(tweets)}"
    )