diff --git "a/fin_rl_policy_gradiant_v1.ipynb" "b/fin_rl_policy_gradiant_v1.ipynb" new file mode 100644--- /dev/null +++ "b/fin_rl_policy_gradiant_v1.ipynb" @@ -0,0 +1,1378 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "nwaAZRu1NTiI" + }, + "source": [ + "# PolicyGradiant\n", + "\n", + "\n", + "#### This version implements PolicyGradiant using a custom enviroment (Unit 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install talib-binary\n", + "!pip install yfinance" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "LNXxxKojNTiL" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-12-27 12:47:16.481995: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "\n" + ] + } + ], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.keras import layers\n", + "from tensorflow.keras.utils import to_categorical\n", + "import gym\n", + "from gym import spaces\n", + "from gym.utils import seeding\n", + "from gym import wrappers\n", + "\n", + "from tqdm.notebook import tqdm\n", + "from collections import deque\n", + "import numpy as np\n", + "import random\n", + "from matplotlib import pyplot as plt\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "import joblib\n", + "import talib as ta\n", + "import yfinance as yf\n", + "import pandas as pd\n", + "\n", + "import io\n", + "import base64\n", + "from IPython.display import HTML, Video\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "class Policy:\n", + " def __init__(self, env=None):\n", + "\n", + " self.action_size = env.action_space.n\n", + "\n", + " # Hyperparameters\n", + " self.gamma = 0.95 # Discount rate\n", + " self.epsilon = 1.0 # Exploration rate\n", + " self.epsilon_min = 0.001 # Minimal exploration rate (epsilon-greedy)\n", + " self.epsilon_decay = 0.95 # Decay rate for epsilon\n", + " self.update_rate = 5 # Number of steps until updating the target network\n", + " self.batch_size = 200\n", + " self.learning_rate = 1e-4\n", + " \n", + " self.model = self._build_model()\n", + " self.model.summary()\n", + " self.env = env\n", + "\n", + " self.history = None\n", + " self.scaler = None\n", + "\n", + " def _build_model(self):\n", + " model = tf.keras.Sequential()\n", + " \n", + " model.add(tf.keras.Input(shape=(4,)))\n", + " model.add(layers.Dense(256, activation = 'relu'))\n", + " model.add(layers.Dense(128, activation = 'relu'))\n", + " model.add(layers.Dense(64, activation = 'relu'))\n", + " model.add(layers.Dense(self.action_size, activation = 'softmax'))\n", + " \n", + " optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)\n", + " model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics = ['accuracy'])\n", + " # model.compile(loss='mse', optimizer=optimizer, metrics = ['mse'])\n", + " return model\n", + "\n", + " def act(self, state):\n", + " probs = self.model.predict([state])\n", + " action = tf.random.categorical(tf.math.log(probs), 1)\n", + " return action.numpy()[0], tf.math.log(probs[0][action])\n", + "\n", + " def reinforce(self, n_training_episodes, max_t, gamma, print_every):\n", + " # Help us to calculate the score during the training\n", + " scores_deque = deque(maxlen=100)\n", + " scores = []\n", + " # Line 3 of pseudocode\n", + " for i_episode in range(1, n_training_episodes+1):\n", + " saved_log_probs = []\n", + " rewards = []\n", + " state = self.env.reset()\n", + " # Line 4 of pseudocode\n", + " for t in range(max_t):\n", + " action, log_prob = self.act(state)\n", + " saved_log_probs.append(log_prob)\n", + " state, reward, done, _ = self.env.step(action)\n", + " rewards.append(reward)\n", + " if done:\n", + " break \n", + " scores_deque.append(sum(rewards))\n", + " scores.append(sum(rewards))\n", + " \n", + " # Line 6 of pseudocode: calculate the return\n", + " returns = deque(maxlen=max_t) \n", + " n_steps = len(rewards) \n", + " # Compute the discounted returns at each timestep,\n", + " # as \n", + " # the sum of the gamma-discounted return at time t (G_t) + the reward at time t\n", + " #\n", + " # In O(N) time, where N is the number of time steps\n", + " # (this definition of the discounted return G_t follows the definition of this quantity \n", + " # shown at page 44 of Sutton&Barto 2017 2nd draft)\n", + " # G_t = r_(t+1) + r_(t+2) + ...\n", + " \n", + " # Given this formulation, the returns at each timestep t can be computed \n", + " # by re-using the computed future returns G_(t+1) to compute the current return G_t\n", + " # G_t = r_(t+1) + gamma*G_(t+1)\n", + " # G_(t-1) = r_t + gamma* G_t\n", + " # (this follows a dynamic programming approach, with which we memorize solutions in order \n", + " # to avoid computing them multiple times)\n", + " \n", + " # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)\n", + " # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...\n", + " \n", + " \n", + " ## Given the above, we calculate the returns at timestep t as: \n", + " # gamma[t] * return[t] + reward[t]\n", + " #\n", + " ## We compute this starting from the last timestep to the first, in order\n", + " ## to employ the formula presented above and avoid redundant computations that would be needed \n", + " ## if we were to do it from first to last.\n", + " \n", + " ## Hence, the queue \"returns\" will hold the returns in chronological order, from t=0 to t=n_steps\n", + " ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)\n", + " ## a normal python list would instead require O(N) to do this.\n", + " for t in range(n_steps)[::-1]:\n", + " disc_return_t = (returns[0] if len(returns)>0 else 0)\n", + " returns.appendleft( gamma*disc_return_t + rewards[t] ) \n", + " \n", + " ## standardization of the returns is employed to make training more stable\n", + " eps = np.finfo(np.float32).eps.item()\n", + " ## eps is the smallest representable float, which is \n", + " # added to the standard deviation of the returns to avoid numerical instabilities \n", + " returns = np.array(returns)\n", + " returns = (returns - returns.mean()) / (returns.std() + eps)\n", + " \n", + " # Line 7:\n", + " policy_loss = []\n", + " for log_prob, disc_return in zip(saved_log_probs, returns):\n", + " policy_loss.append(-log_prob * disc_return)\n", + " policy_loss = np.concatenate(policy_loss).sum()\n", + " \n", + " # Line 8: gradient descent step\n", + " # optimizer.zero_grad()\n", + " policy_loss.backward()\n", + " self.model.train_on_batch()\n", + " # optimizer.step()\n", + " \n", + " if i_episode % print_every == 0:\n", + " print('Episode {}\\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))\n", + " \n", + " return scores\n", + "\n", + "\n", + " #\n", + " # Loads a saved model\n", + " #\n", + " def load(self, name):\n", + " self.model = tf.keras.models.load_model(name)\n", + " # self.scaler = joblib.load(name+\".scaler\") \n", + "\n", + " #\n", + " # Saves parameters of a trained model\n", + " #\n", + " def save(self, name):\n", + " self.model.save(name)\n", + " # joblib.dump(self.scaler, name+\".scaler\") \n", + "\n", + " def play(self, state):\n", + " # state = self._get_scaled_state(state)\n", + " return np.argmax(self.model.predict(np.array([state]), verbose=0)[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from enum import Enum\n", + "class Actions(Enum):\n", + " Sell = 0\n", + " Buy = 1\n", + " Do_nothing = 2\n", + "\n", + "class CustTradingEnv(gym.Env):\n", + "\n", + " def __init__(self, df, max_steps=0, seed=8, random_start=True, scaler=None):\n", + " self.seed(seed=seed)\n", + " self.df = df\n", + " if scaler is None:\n", + " self.scaler = MinMaxScaler()\n", + " else:\n", + " self.scaler = scaler\n", + " self.prices, self.signal_features = self._process_data()\n", + "\n", + " # spaces\n", + " self.action_space = spaces.Discrete(3)\n", + " self.observation_space = spaces.Box(low=0, high=1, shape=(1,) , dtype=np.float64)\n", + "\n", + " # episode\n", + " self._start_tick = 0\n", + " self._end_tick = 0\n", + " self._done = None\n", + " self._current_tick = None\n", + " self._last_trade_tick = None\n", + " self._position = None\n", + " self._position_history = None\n", + " self._total_reward = None\n", + " self._total_profit = None\n", + " self._first_rendering = None\n", + " self.history = None\n", + " self._max_steps = max_steps\n", + " self._start_episode_tick = None\n", + " self._trade_history = None\n", + " self._random_start = random_start\n", + "\n", + "\n", + " def reset(self):\n", + " self._done = False\n", + " if self._random_start:\n", + " self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n", + " self._end_tick = self._start_episode_tick + self._max_steps\n", + " else:\n", + " self._start_episode_tick = 1\n", + " self._end_tick = len(self.df)-1\n", + "\n", + " self._current_tick = self._start_episode_tick\n", + " self._last_trade_tick = self._current_tick - 1\n", + " self._position = 0\n", + " self._position_history = []\n", + " # self._position_history = (self.window_size * [None]) + [self._position]\n", + " self._total_reward = 0.\n", + " self._total_profit = 0.\n", + " self._trade_history = []\n", + " self.history = {}\n", + " return self._get_observation()\n", + "\n", + "\n", + " def step(self, action):\n", + " self._done = False\n", + " self._current_tick += 1\n", + "\n", + " if self._current_tick == self._end_tick:\n", + " self._done = True\n", + "\n", + " step_reward = self._calculate_reward(action)\n", + " self._total_reward += step_reward\n", + "\n", + " observation = self._get_observation()\n", + " info = dict(\n", + " total_reward = self._total_reward,\n", + " total_profit = self._total_profit,\n", + " position = self._position,\n", + " action = action\n", + " )\n", + " self._update_history(info)\n", + "\n", + " return observation, step_reward, self._done, info\n", + "\n", + " def seed(self, seed=None):\n", + " self.np_random, seed = seeding.np_random(seed)\n", + " return [seed]\n", + " \n", + " def _get_observation(self):\n", + " return self.signal_features[self._current_tick]\n", + "\n", + " def _update_history(self, info):\n", + " if not self.history:\n", + " self.history = {key: [] for key in info.keys()}\n", + "\n", + " for key, value in info.items():\n", + " self.history[key].append(value)\n", + "\n", + "\n", + " def render(self, mode='human'):\n", + " window_ticks = np.arange(len(self._position_history))\n", + " prices = self.prices[self._start_episode_tick:self._end_tick+1]\n", + " plt.plot(prices)\n", + "\n", + " open_buy = []\n", + " close_buy = []\n", + " open_sell = []\n", + " close_sell = []\n", + " do_nothing = []\n", + "\n", + " for i, tick in enumerate(window_ticks):\n", + " if self._position_history[i] == 1:\n", + " open_buy.append(tick)\n", + " elif self._position_history[i] == 2 :\n", + " close_buy.append(tick)\n", + " elif self._position_history[i] == 3 :\n", + " open_sell.append(tick)\n", + " elif self._position_history[i] == 4 :\n", + " close_sell.append(tick)\n", + " elif self._position_history[i] == 0 :\n", + " do_nothing.append(tick)\n", + "\n", + " plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n", + " plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n", + " plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n", + " plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n", + " \n", + " plt.plot(do_nothing, prices[do_nothing], 'yo')\n", + "\n", + " plt.suptitle(\n", + " \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n", + " \"Total Profit: %.6f\" % self._total_profit\n", + " )\n", + "\n", + " def _calculate_reward(self, action):\n", + " step_reward = 0\n", + "\n", + " current_price = self.prices[self._current_tick]\n", + " last_price = self.prices[self._current_tick - 1]\n", + " price_diff = current_price - last_price\n", + "\n", + " penalty = -1 * last_price * 0.01\n", + " # OPEN BUY - 1\n", + " if action == Actions.Buy.value and self._position == 0:\n", + " self._position = 1\n", + " step_reward += price_diff\n", + " self._last_trade_tick = self._current_tick - 1\n", + " self._position_history.append(1)\n", + "\n", + " elif action == Actions.Buy.value and self._position > 0:\n", + " step_reward += penalty\n", + " self._position_history.append(-1)\n", + " # CLOSE SELL - 4\n", + " elif action == Actions.Buy.value and self._position < 0:\n", + " self._position = 0\n", + " step_reward += -1 * (self.prices[self._current_tick -1] - self.prices[self._last_trade_tick]) \n", + " self._total_profit += step_reward\n", + " self._position_history.append(4)\n", + " self._trade_history.append(step_reward)\n", + "\n", + " # OPEN SELL - 3\n", + " elif action == Actions.Sell.value and self._position == 0:\n", + " self._position = -1\n", + " step_reward += -1 * price_diff\n", + " self._last_trade_tick = self._current_tick - 1\n", + " self._position_history.append(3)\n", + " # CLOSE BUY - 2\n", + " elif action == Actions.Sell.value and self._position > 0:\n", + " self._position = 0\n", + " step_reward += self.prices[self._current_tick -1] - self.prices[self._last_trade_tick] \n", + " self._total_profit += step_reward\n", + " self._position_history.append(2)\n", + " self._trade_history.append(step_reward)\n", + " elif action == Actions.Sell.value and self._position < 0:\n", + " step_reward += penalty\n", + " self._position_history.append(-1)\n", + "\n", + " # DO NOTHING - 0\n", + " elif action == Actions.Do_nothing.value and self._position > 0:\n", + " step_reward += price_diff\n", + " self._position_history.append(0)\n", + " elif action == Actions.Do_nothing.value and self._position < 0:\n", + " step_reward += -1 * price_diff\n", + " self._position_history.append(0)\n", + " elif action == Actions.Do_nothing.value and self._position == 0:\n", + " step_reward += -1 * abs(price_diff)\n", + " self._position_history.append(0)\n", + "\n", + " return step_reward\n", + "\n", + " def get_scaler(self):\n", + " return self.scaler\n", + "\n", + " def set_scaler(self, scaler):\n", + " self.scaler = scaler\n", + " \n", + " def _process_data(self):\n", + " timeperiod = 14\n", + " self.df = self.df.copy()\n", + " \n", + " self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n", + " _, self.df['stoch_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n", + " self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", + " self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", + " self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n", + " self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n", + "\n", + " self.df = self.df.dropna()\n", + " # self.df['di_s']=self.df['di']\n", + " # self.df['mfi_s']=self.df['mfi_r']\n", + " # self.df['stoch_d_s']=self.df['stoch_d_r']\n", + " # self.df['adx_s']=self.df['adx_r']\n", + "\n", + " self.df[['di_s','mfi_s','stoch_d_s','adx_s']] = self.scaler.fit_transform(self.df[['di','mfi_r','stoch_d_r','adx_r']])\n", + "\n", + " def f1(row):\n", + " row['state'] = [row['di_s'], row['mfi_s'], row['stoch_d_s'], row['adx_s']]\n", + " return row\n", + "\n", + " self.df = self.df.apply(f1, axis=1 )\n", + "\n", + " prices = self.df.loc[:, 'Close'].to_numpy()\n", + " # print(self.df.head(30))\n", + "\n", + " signal_features = np.stack(self.df.loc[:, 'state'].to_numpy())\n", + "\n", + " return prices, signal_features" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3024\n", + "1875\n" + ] + } + ], + "source": [ + "# Get data\n", + "eth_usd = yf.Ticker(\"ETH-USD\")\n", + "eth = eth_usd.history(period=\"max\")\n", + "\n", + "btc_usd = yf.Ticker(\"BTC-USD\")\n", + "btc = btc_usd.history(period=\"max\")\n", + "print(len(btc))\n", + "print(len(eth))\n", + "\n", + "btc_train = eth[-3015:-200]\n", + "# btc_test = eth[-200:]\n", + "eth_train = eth[-1864:-200]\n", + "eth_test = eth[-200:]\n", + "# len(eth_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create env\n", + "max_steps = 20 \n", + "env = CustTradingEnv(df=eth_train, max_steps=max_steps)\n", + "\n", + "model = Policy(env=env)\n", + "#n_training_episodes, max_t, gamma, print_every\n", + "model.reinforce(1000, 1000, 0.95, 100)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save(\"./alt/fin_rl_dqn_v1\")\n", + "joblib.dump(env.get_scaler(),\"./alt/fin_rl_dqn_v1.h5_scaler\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def evaluate_agent(env, max_steps, n_eval_episodes, model, random=False):\n", + " \"\"\"\n", + " Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n", + " :param env: The evaluation environment\n", + " :param n_eval_episodes: Number of episode to evaluate the agent\n", + " :param model: The DQN model\n", + " \"\"\"\n", + " episode_rewards = []\n", + " episode_profits = []\n", + " for episode in tqdm(range(n_eval_episodes), disable=random):\n", + " state = env.reset()\n", + " step = 0\n", + " done = False\n", + " total_rewards_ep = 0\n", + " total_profit_ep = 0\n", + " \n", + " for step in range(max_steps):\n", + " # Take the action (index) that have the maximum expected future reward given that state\n", + " if random:\n", + " action = env.action_space.sample()\n", + " else:\n", + " action = model.play(state)\n", + " # print(action)\n", + " \n", + " new_state, reward, done, info = env.step(action)\n", + " total_rewards_ep += reward\n", + " \n", + " if done:\n", + " break\n", + " state = new_state\n", + "\n", + " episode_rewards.append(total_rewards_ep)\n", + " episode_profits.append(env.history['total_profit'][-1])\n", + " # print(env.history)\n", + " # env.render()\n", + " # assert 0\n", + "\n", + " mean_reward = np.mean(episode_rewards)\n", + " std_reward = np.std(episode_rewards)\n", + " mean_profit = np.mean(episode_profits)\n", + " std_profit = np.std(episode_profits)\n", + "\n", + " return mean_reward, std_reward, mean_profit, std_profit" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f0eff2ef3b0a4e12a23709db72722a25", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1000 [00:00here for more info. View Jupyter log for further details." + ] + } + ], + "source": [ + "max_steps = 20 \n", + "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True, scaler=env.get_scaler())\n", + "n_eval_episodes = 1000\n", + "\n", + "evaluate_agent(env_test, max_steps, n_eval_episodes, model)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a7b0edb264fe43edbe5cea55fac21688", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(15,6))\n", + "plt.cla()\n", + "env_l.render()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-156.66986416870117,\n", + " 394.94783990529805,\n", + " 4.957175903320312,\n", + " 211.59187866264426)" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test for random n_eval_episodes\n", + "max_steps = 20 \n", + "env_test_rand = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True, scaler=env.get_scaler())\n", + "n_eval_episodes = 1000\n", + "\n", + "evaluate_agent(env_test_rand, max_steps, n_eval_episodes, model, random=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean profit 3.7792178955078124\n" + ] + } + ], + "source": [ + "# trade sequentially with random actions \n", + "max_steps = len(eth_test)\n", + "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False, scaler=env.get_scaler())\n", + "n_eval_episodes = 1\n", + "\n", + "all_profit=[]\n", + "for i in range(1000):\n", + " _,_,profit,_=evaluate_agent(env_test, max_steps, n_eval_episodes, model, random=True)\n", + " all_profit.append(profit)\n", + "print(f\"Mean profit {np.mean(all_profit)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results\n", + "\n", + "| Model | 1000 trades 20 steps | Sequential trading | 1000 trades 20 steps random actions | Sequential random|\n", + "|------------|----------------------|--------------------|-------------------------------------|------------------|\n", + "|Q-learning | 113.14 | 563.67 | -18.10 | 39.30 |\n", + "|DQN | 87.62 | 381.17 | 4.95 | 3.77 |\n", + "|Policy Gradi| | | | |\n", + "\n", + "\n", + "#### Actions are: Buy/Sell/Hold 1 ETH \n", + "1000 trades 20 steps - Made 1000 episodes, 20 trades each episode, result is the mean return of each episode \n", + "\n", + "Sequential trading (175 days)- Trade the test set sequentially from start to end day \n", + "\n", + "1000 trades 20 steps random actions - Made 1000 episodes, 20 trades each episode taking random actions \n", + "\n", + "Sequential random (175 days)- Trade the test set sequentially from start to end day with random actions " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3.8.13 ('rl2')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "01a2dbcb714e40148b41c761fcf43147": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20b0f38ec3234ff28a62a286cd57b933": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "PasswordModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "PasswordModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "PasswordView", + "continuous_update": true, + "description": "Token:", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147", + "placeholder": "​", + "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce", + "value": "" + } + }, + "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da", + "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933", + "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77", + "IPY_MODEL_f1675c09d16a4251b403f9c56255f168", + "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2" + ], + "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d" + } + }, + "2dc5fa9aa3334dfcbdee9c238f2ef60b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3e753b0212644990b558c68853ff2041": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3fa248114ac24656ba74923936a94d2d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": "center", + "align_self": null, + "border": null, + "bottom": null, + "display": "flex", + "flex": null, + "flex_flow": "column", + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50%" + } + }, + "42d140b838b844819bc127afc1b7bc84": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "90c874e91b304ee1a7ef147767ac00ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9d847f9a7d47458d8cd57d9b599e47c6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a02224a43d8d4af3bd31d326540d25da": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284", + "placeholder": "​", + "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b", + "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" + } + }, + "a2cfb91cf66447d7899292854bd64a07": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c1a82965ae26479a98e4fdbde1e64ec2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6", + "placeholder": "​", + "style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84", + "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " + } + }, + "caef095934ec47bbb8b64eab22049284": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eaba3f1de4444aabadfea2a3dadb1d80": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee4a21bedc504171ad09d205d634b528": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": null, + "font_weight": "" + } + }, + "f1675c09d16a4251b403f9c56255f168": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "", + "description": "Login", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07", + "style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528", + "tooltip": "" + } + }, + "f6c845330d6743c0b35c2c7ad834de77": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "CheckboxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "Add token as git credential?", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041", + "style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80", + "value": true + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}