{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "730ba509", "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "d9acd4b6", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import sys\n", "proj_dir = Path.cwd().parent\n", "\n", "sys.path.append(str(proj_dir))\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "62452860", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset" ] }, { "cell_type": "code", "execution_count": 28, "id": "00affc9a", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a106bb47c1194b15bc289d2ef24258af", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/804 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scorenum_commentstitlepermalinkselftexturlcreated_utcauthoriddownsupsdatetime
024Reddit, if someone had to describe you to a st.../r/AskReddit/comments/15sn6y/reddit_if_someone...They would be talking about you without your p...http://www.reddit.com/r/AskReddit/comments/15s...2013-01-01 23:59:40+00:00[deleted]15sn6y022013-01-0123:59:40
1524What kind of car does the average \\nRedditor d.../r/AskReddit/comments/15sn6m/what_kind_of_car_...I've always wanted to know what kind of car th...http://www.reddit.com/r/AskReddit/comments/15s...2013-01-01 23:59:31+00:00PaytonAdams15sn6m052013-01-0123:59:31
215What movies have made you go back to the theat.../r/AskReddit/comments/15sn6b/what_movies_have_...http://www.reddit.com/r/AskReddit/comments/15s...2013-01-01 23:59:20+00:00[deleted]15sn6b012013-01-0123:59:20
3018Worst fear(s)?/r/AskReddit/comments/15sn4u/worst_fears/So what is your worst fear, reddit?http://www.reddit.com/r/AskReddit/comments/15s...2013-01-01 23:58:37+00:00[deleted]15sn4u002013-01-0123:58:37
41129If there was a type of ink that lasted only fo.../r/AskReddit/comments/15sn44/if_there_was_a_ty...http://www.reddit.com/r/AskReddit/comments/15s...2013-01-01 23:58:15+00:00Honeybeard15sn440112013-01-0123:58:15
..........................................
3267011Smokers of Reddit- What are your reasons for s.../r/AskReddit/comments/15qzen/smokers_of_reddit...I'm very curious as to what causes someone to ...http://www.reddit.com/r/AskReddit/comments/15q...2013-01-01 00:01:36+00:00kelsofb15qzen002013-01-0100:01:36
326814Hi/r/AskReddit/comments/15qzei/hi/http://www.reddit.com/r/AskReddit/comments/15q...2013-01-01 00:01:34+00:00ImJE5US15qzei012013-01-0100:01:34
326912At the stroke of midnight I was writing this p.../r/AskReddit/comments/15qzdx/at_the_stroke_of_...http://www.reddit.com/r/AskReddit/comments/15q...2013-01-01 00:01:15+00:00Sangfroid_Sonder15qzdx012013-01-0100:01:15
327012With all the rape stories in the news, why don.../r/AskReddit/comments/15qzdc/with_all_the_rape...http://www.reddit.com/r/AskReddit/comments/15q...2013-01-01 00:00:58+00:00[deleted]15qzdc012013-01-0100:00:58
327103Do beautiful people have low entropy?/r/AskReddit/comments/15qzd3/do_beautiful_peop...I have been reading about entropy and arrows o...http://www.reddit.com/r/AskReddit/comments/15q...2013-01-01 00:00:53+00:00[deleted]15qzd3002013-01-0100:00:53
\n", "

3272 rows × 13 columns

\n", "" ], "text/plain": [ " score num_comments title \\\n", "0 2 4 Reddit, if someone had to describe you to a st... \n", "1 5 24 What kind of car does the average \\nRedditor d... \n", "2 1 5 What movies have made you go back to the theat... \n", "3 0 18 Worst fear(s)? \n", "4 11 29 If there was a type of ink that lasted only fo... \n", "... ... ... ... \n", "3267 0 11 Smokers of Reddit- What are your reasons for s... \n", "3268 1 4 Hi \n", "3269 1 2 At the stroke of midnight I was writing this p... \n", "3270 1 2 With all the rape stories in the news, why don... \n", "3271 0 3 Do beautiful people have low entropy? \n", "\n", " permalink \\\n", "0 /r/AskReddit/comments/15sn6y/reddit_if_someone... \n", "1 /r/AskReddit/comments/15sn6m/what_kind_of_car_... \n", "2 /r/AskReddit/comments/15sn6b/what_movies_have_... \n", "3 /r/AskReddit/comments/15sn4u/worst_fears/ \n", "4 /r/AskReddit/comments/15sn44/if_there_was_a_ty... \n", "... ... \n", "3267 /r/AskReddit/comments/15qzen/smokers_of_reddit... \n", "3268 /r/AskReddit/comments/15qzei/hi/ \n", "3269 /r/AskReddit/comments/15qzdx/at_the_stroke_of_... \n", "3270 /r/AskReddit/comments/15qzdc/with_all_the_rape... \n", "3271 /r/AskReddit/comments/15qzd3/do_beautiful_peop... \n", "\n", " selftext \\\n", "0 They would be talking about you without your p... \n", "1 I've always wanted to know what kind of car th... \n", "2 \n", "3 So what is your worst fear, reddit? \n", "4 \n", "... ... \n", "3267 I'm very curious as to what causes someone to ... \n", "3268 \n", "3269 \n", "3270 \n", "3271 I have been reading about entropy and arrows o... \n", "\n", " url \\\n", "0 http://www.reddit.com/r/AskReddit/comments/15s... \n", "1 http://www.reddit.com/r/AskReddit/comments/15s... \n", "2 http://www.reddit.com/r/AskReddit/comments/15s... \n", "3 http://www.reddit.com/r/AskReddit/comments/15s... \n", "4 http://www.reddit.com/r/AskReddit/comments/15s... \n", "... ... \n", "3267 http://www.reddit.com/r/AskReddit/comments/15q... \n", "3268 http://www.reddit.com/r/AskReddit/comments/15q... \n", "3269 http://www.reddit.com/r/AskReddit/comments/15q... \n", "3270 http://www.reddit.com/r/AskReddit/comments/15q... \n", "3271 http://www.reddit.com/r/AskReddit/comments/15q... \n", "\n", " created_utc author id downs ups \\\n", "0 2013-01-01 23:59:40+00:00 [deleted] 15sn6y 0 2 \n", "1 2013-01-01 23:59:31+00:00 PaytonAdams 15sn6m 0 5 \n", "2 2013-01-01 23:59:20+00:00 [deleted] 15sn6b 0 1 \n", "3 2013-01-01 23:58:37+00:00 [deleted] 15sn4u 0 0 \n", "4 2013-01-01 23:58:15+00:00 Honeybeard 15sn44 0 11 \n", "... ... ... ... ... ... \n", "3267 2013-01-01 00:01:36+00:00 kelsofb 15qzen 0 0 \n", "3268 2013-01-01 00:01:34+00:00 ImJE5US 15qzei 0 1 \n", "3269 2013-01-01 00:01:15+00:00 Sangfroid_Sonder 15qzdx 0 1 \n", "3270 2013-01-01 00:00:58+00:00 [deleted] 15qzdc 0 1 \n", "3271 2013-01-01 00:00:53+00:00 [deleted] 15qzd3 0 0 \n", "\n", " date time \n", "0 2013-01-01 23:59:40 \n", "1 2013-01-01 23:59:31 \n", "2 2013-01-01 23:59:20 \n", "3 2013-01-01 23:58:37 \n", "4 2013-01-01 23:58:15 \n", "... ... ... \n", "3267 2013-01-01 00:01:36 \n", "3268 2013-01-01 00:01:34 \n", "3269 2013-01-01 00:01:15 \n", "3270 2013-01-01 00:00:58 \n", "3271 2013-01-01 00:00:53 \n", "\n", "[3272 rows x 13 columns]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = dataset['all_days'].to_pandas()\n", "df" ] }, { "cell_type": "code", "execution_count": 16, "id": "28df4b06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "score Int64\n", "num_comments Int64\n", "title string\n", "permalink string\n", "selftext string\n", "url string\n", "created_utc string\n", "author string\n", "id string\n", "downs Int64\n", "ups Int64\n", "dtype: object" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.convert_dtypes().dtypes" ] }, { "cell_type": "code", "execution_count": 18, "id": "e322b6c0", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 21, "id": "ed1b06c3", "metadata": {}, "outputs": [], "source": [ "df['created_utc'] = pd.to_datetime(df['created_utc'])\n", "df['date'] = df['created_utc'].dt.date\n", "df['time'] = df['created_utc'].dt.time" ] }, { "cell_type": "code", "execution_count": 33, "id": "ff477737", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2013-01-01 3272\n", "Name: date, dtype: int64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.date.value_counts()" ] }, { "cell_type": "code", "execution_count": 26, "id": "1d11b967", "metadata": {}, "outputs": [], "source": [ "new_df = df.drop_duplicates(subset=['id'], keep=\"first\")" ] }, { "cell_type": "code", "execution_count": 27, "id": "eec00dd6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "new_df.date.hist(bins=400)" ] }, { "cell_type": "code", "execution_count": null, "id": "1acf60dc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }