Spaces:
Runtime error
Runtime error
Upload with huggingface_hub
Browse files- Dockerfile +20 -0
- app.py +30 -0
- requirements.txt +2 -0
- workcell.yaml +10 -0
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.8
|
5 |
+
|
6 |
+
# Set up a new user named "user" with user ID 1000
|
7 |
+
RUN useradd -m -u 1000 user
|
8 |
+
# Switch to the "user" user
|
9 |
+
USER user
|
10 |
+
# Set home to the user's home directory
|
11 |
+
ENV HOME=/home/user \
|
12 |
+
PATH=/home/user/.local/bin:$PATH
|
13 |
+
# Set the working directory to the user's home directory
|
14 |
+
WORKDIR $HOME/app
|
15 |
+
|
16 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
17 |
+
COPY --chown=user . $HOME/app
|
18 |
+
RUN pip install --no-cache-dir --upgrade -r $HOME/app/requirements.txt
|
19 |
+
|
20 |
+
CMD ["workcell", "serve", "--config", "workcell.yaml", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tweety.bot import Twitter
|
2 |
+
from pydantic import BaseModel, Field
|
3 |
+
import pandas as pd
|
4 |
+
from workcell.integrations.types import PerspectiveTable
|
5 |
+
|
6 |
+
|
7 |
+
class Input(BaseModel):
|
8 |
+
username: str = Field(default="sama", description="Twitter username of the person you want to scrape")
|
9 |
+
|
10 |
+
def fetch_twitter_by_id(username):
|
11 |
+
# app = Twitter("elonmusk")
|
12 |
+
app = Twitter(username)
|
13 |
+
# Get 20 Tweets of a user
|
14 |
+
all_tweets = app.get_tweets()
|
15 |
+
return all_tweets
|
16 |
+
|
17 |
+
def process_tweets(tweets):
|
18 |
+
all_tweets = [tweet.to_dict() for tweet in tweets]
|
19 |
+
# pandas dataframe
|
20 |
+
df = pd.DataFrame(all_tweets)
|
21 |
+
# filter
|
22 |
+
filter_columns = ['created_on', 'text', 'likes','reply_counts', 'retweet_counts', 'id']
|
23 |
+
df = df[filter_columns]
|
24 |
+
return df
|
25 |
+
|
26 |
+
def twitter_scraper(input: Input) -> PerspectiveTable:
|
27 |
+
"""Returns latest 20 tweets of given usename, such as 'elonmusk'. """
|
28 |
+
all_tweets = fetch_twitter_by_id(username=input.username)
|
29 |
+
df = process_tweets(all_tweets)
|
30 |
+
return PerspectiveTable(data=df)
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
workcell
|
2 |
+
tweety-ns
|
workcell.yaml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
workcell_name: twitter_scraper
|
2 |
+
workcell_provider: huggingface
|
3 |
+
workcell_id: weanalyze/twitter_scraper
|
4 |
+
workcell_version: latest
|
5 |
+
workcell_runtime: python3.8
|
6 |
+
workcell_entrypoint: app:twitter_scraper
|
7 |
+
workcell_code:
|
8 |
+
ImageUri: ''
|
9 |
+
workcell_tags: {}
|
10 |
+
workcell_envs: {}
|