{ "cells": [ { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading settings from ../../env/ai.json\n" ] } ], "source": [ "import os\n", "import json\n", "\n", "pathToSettings = '../../env/ai.json'\n", "if os.path.exists(pathToSettings):\n", " # Load setting from Json outside of project.\n", " print(f'Reading settings from {pathToSettings}')\n", " f = open(pathToSettings)\n", " settingsJson = json.load(f)\n", " del f\n", "\n", " for key in settingsJson:\n", " os.environ[key] = settingsJson[key]\n", " \n", " del settingsJson" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# PyPdf" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "from pypdf import PdfReader\n", "from pypdf.generic import Destination\n", "import os" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "#path = '../my-data/finance/FINANCE Essencial finance.pdf'\n", "#path = '../my-data/finance/CoreCourseFinancialAccounting.pdf'\n", "path = '../my-data/finance/Liquidity Markets and Trading in Action An Interdisciplinary.pdf'" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "pathFooterDict = { \n", " '../my-data/finance/Liquidity Markets and Trading in Action An Interdisciplinary.pdf': \n", " ['2 Liquidity, Trading, and Price Determination in Equity Markets: A Finance Course…', \n", " '2 Liquidity, Trading, and Price Determination in Equity Markets: A Finance Course'\n", " ]\n", " }\n" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "import re \n", "\n", "def clean_characters(text):\n", " text = re.sub(r'[^\\x00-\\x7F]',' ', text) \n", " text = text.replace('\\t', ' ') \n", " \n", " while ' ' in text:\n", " text = text.replace(' ', ' ')\n", " \n", " return text\n", "\n", "def clean_footer_out(filename, text):\n", " for t in pathFooterDict[filename]:\n", " text = text.replace(t, '')\n", " return text" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../my-data/finance/Liquidity Markets and Trading in Action An Interdisciplinary.pdf\n" ] } ], "source": [ "def get_bookmarks(pdf):\n", " bookmarks = []\n", " rd = PdfReader(pdf)\n", "\n", " def recursive(outline, level=0):\n", " for i in outline:\n", " if isinstance(i, Destination):\n", " d = {\n", " 'page': rd.get_destination_page_number(i),\n", " 'level': level,\n", " 'bookmark': i.title # clean_characters(i.title),\n", " }\n", " bookmarks.append(d)\n", " elif isinstance(i, list):\n", " recursive(i, level + 1)\n", "\n", " recursive(rd.outline)\n", " return bookmarks\n", "\n", "result = get_bookmarks(path)\n", "print(path)\n" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "# reader = PdfReader(path)\n", "# print(reader.pages[33].extract_text())\n", "\n", "# print(\"#########################\")\n", "# value = clean_footer_out(path, reader.pages[33].extract_text())\n", "# print(value)\n", "\n", "# # print('########################################')\n", "# # print(reader.pages[34].extract_text())\n", "# # print('########################################')\n", "# # print(reader.pages[100].extract_text())\n", "# # print('########################################')\n", "# # print(reader.pages[101].extract_text())\n" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "pages = []\n", "reader = PdfReader(path)\n", "for p in reader.pages:\n", " page_text = p.extract_text()\n", " pages.append(page_text)\n", " \n", "with open(f'{path}.txt', 'w') as f:\n", " for page_text in pages: \n", " #page_text = clean_characters(page_text)\n", " #page_text = clean_footer_out(path, page_text)\n", " f.write(f\"{page_text}\\n\")" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'page': 0, 'level': 0, 'bookmark': 'Liquidity, Markets and Trading in Action'}\n", "*********************\n", "{'page': 5, 'level': 1, 'bookmark': 'Preface'}\n", "*********************\n", "{'page': 5, 'level': 2, 'bookmark': 'Microeconomics, Chap. 1'}\n", "*********************\n", "{'page': 6, 'level': 2, 'bookmark': 'Finance, Chap. 2'}\n", "*********************\n", "{'page': 6, 'level': 2, 'bookmark': 'Macroeconomics, Chap. 3'}\n", "*********************\n", "{'page': 7, 'level': 2, 'bookmark': 'Information Systems, Chap. 4'}\n", "*********************\n", "{'page': 7, 'level': 2, 'bookmark': 'Simulated Trading, Chap. 5'}\n", "*********************\n", "{'page': 7, 'level': 2, 'bookmark': 'Aim of\\xa0This Book'}\n", "*********************\n", "{'page': 9, 'level': 1, 'bookmark': 'Acknowledgment'}\n", "*********************\n", "{'page': 10, 'level': 1, 'bookmark': 'Contents'}\n", "*********************\n", "{'page': 11, 'level': 1, 'bookmark': '1: Economics and\\xa0the\\xa0Equity Market: A\\xa0Microeconomics Course Application'}\n", "*********************\n", "{'page': 12, 'level': 2, 'bookmark': '1.1\\t Microeconomics in\\xa0a\\xa0Nutshell'}\n", "*********************\n", "{'page': 13, 'level': 2, 'bookmark': '1.2\\t Microeconomic Analysis Goes to\\xa0an\\xa0Equity Market'}\n", "*********************\n", "{'page': 14, 'level': 2, 'bookmark': '1.3\\t Risk, Return Indifference Curves'}\n", "*********************\n", "{'page': 15, 'level': 2, 'bookmark': '1.4\\t The Constraint'}\n", "*********************\n", "{'page': 19, 'level': 2, 'bookmark': '1.5\\t Demand Curve to\\xa0Hold Shares of\\xa0the\\xa0Market Portfolio'}\n", "*********************\n", "{'page': 24, 'level': 2, 'bookmark': '1.6\\t What About the\\xa0Supply Curve?'}\n", "*********************\n", "{'page': 24, 'level': 2, 'bookmark': '1.7\\t Buy and\\xa0Sell Curves'}\n", "*********************\n", "{'page': 27, 'level': 2, 'bookmark': '1.8\\t The Non-frictionless Market'}\n", "*********************\n", "{'page': 28, 'level': 2, 'bookmark': '1.9\\t Wrap Up: Microeconomics in\\xa0a\\xa0Non-frictionless Financial Market'}\n", "*********************\n", "{'page': 30, 'level': 1, 'bookmark': '2: Liquidity, Trading, and\\xa0Price Determination in\\xa0Equity Markets: A\\xa0Finance Course Application'}\n", "*********************\n", "{'page': 33, 'level': 2, 'bookmark': '2.1\\t Order Types'}\n", "*********************\n", "{'page': 34, 'level': 2, 'bookmark': '2.2\\t Trading Costs'}\n", "*********************\n", "{'page': 36, 'level': 2, 'bookmark': '2.3\\t What Drives Trading?'}\n", "*********************\n", "{'page': 37, 'level': 2, 'bookmark': '2.4\\t Price Discovery: A\\xa0Major Function of\\xa0a\\xa0Marketplace'}\n", "*********************\n", "{'page': 37, 'level': 2, 'bookmark': '2.5\\t Trading: The\\xa0Implementation of\\xa0an\\xa0Investment Decision'}\n", "*********************\n", "{'page': 38, 'level': 2, 'bookmark': '2.6\\t Intraday Price Volatility'}\n", "*********************\n", "{'page': 39, 'level': 2, 'bookmark': '2.7\\t Liquidity'}\n", "*********************\n", "{'page': 40, 'level': 2, 'bookmark': '2.8\\t Equity Market Structures'}\n", "*********************\n", "{'page': 43, 'level': 3, 'bookmark': '2.8.1\\t Hybrid Markets'}\n", "*********************\n", "{'page': 43, 'level': 3, 'bookmark': '2.8.2\\t Handling Large Orders'}\n", "*********************\n", "{'page': 44, 'level': 2, 'bookmark': '2.9\\t Financial Markets and\\xa0the\\xa0Process of\\xa0Turning Orders into Trades'}\n", "*********************\n", "{'page': 44, 'level': 3, 'bookmark': '2.9.1\\t Trades in\\xa0Continuous Order-Driven Markets'}\n", "*********************\n", "{'page': 46, 'level': 3, 'bookmark': '2.9.2\\t Trades in\\xa0Call Auction Markets'}\n", "*********************\n", "{'page': 48, 'level': 3, 'bookmark': '2.9.3\\t Trades in\\xa0Continuous Dealer Markets'}\n", "*********************\n", "{'page': 48, 'level': 2, 'bookmark': '2.10\\t Regulation, Technology, and the Quality of Market Structure\\x04'}\n", "*********************\n", "{'page': 55, 'level': 2, 'bookmark': '2.11\\t Wrapping It Up: Market Efficiency in\\xa0a\\xa0Non-frictionless World'}\n", "*********************\n", "{'page': 59, 'level': 1, 'bookmark': '3: Liquidity and\\xa0the\\xa0Impact of\\xa0Information Shocks: A\\xa0Macroeconomics Course Application'}\n", "*********************\n", "{'page': 60, 'level': 2, 'bookmark': '3.1\\t Economic Conditions, Business Cycles, and\\xa0the\\xa0Role of\\xa0Interest Rates'}\n", "*********************\n", "{'page': 61, 'level': 2, 'bookmark': '3.2\\t The Federal Reserve and\\xa0the\\xa0Link Between the\\xa0Macroeconomy and\\xa0Financial Markets'}\n", "*********************\n", "{'page': 62, 'level': 2, 'bookmark': '3.3\\t The Impact of\\xa0Information Shocks on\\xa0Divergent Expectations and\\xa0Price Discovery'}\n", "*********************\n", "{'page': 63, 'level': 2, 'bookmark': '3.4\\t The Various Types of\\xa0Financial Markets'}\n", "*********************\n", "{'page': 66, 'level': 2, 'bookmark': '3.5\\t Example of\\xa0an\\xa0Information Shock Based on\\xa0the\\xa0Fed’s Actions and\\xa0the\\xa0Financial Market’s Reactions'}\n", "*********************\n", "{'page': 72, 'level': 2, 'bookmark': '3.6\\t Using the\\xa0TraderEx Simulation to\\xa0Understand Interactions Between Financial Markets and\\xa0the\\xa0Macroeconomy'}\n", "*********************\n", "{'page': 73, 'level': 2, 'bookmark': '3.7\\t Making the\\xa0Trade: Combining Macroeconomics with\\xa0Fundamental Analysis and\\xa0Technical Analysis'}\n", "*********************\n", "{'page': 76, 'level': 2, 'bookmark': '3.8\\t Wrapping Up: How Information Shocks Affect Financial Markets and\\xa0the\\xa0Economy'}\n", "*********************\n", "{'page': 78, 'level': 1, 'bookmark': '4: Trading and\\xa0Technology: An\\xa0Information Systems Course Application'}\n", "*********************\n", "{'page': 79, 'level': 2, 'bookmark': '4.1\\t IT Innovations: Disruptive Versus Incremental'}\n", "*********************\n", "{'page': 80, 'level': 2, 'bookmark': '4.2\\t IT Infrastructure for\\xa0Financial Markets'}\n", "*********************\n", "{'page': 81, 'level': 2, 'bookmark': '4.3\\t IT Support for\\xa0the\\xa0Economic Functions of\\xa0Financial Markets'}\n", "*********************\n", "{'page': 82, 'level': 2, 'bookmark': '4.4\\t Instruments and\\xa0Market Data'}\n", "*********************\n", "{'page': 84, 'level': 2, 'bookmark': '4.5\\t Foundational Technologies for\\xa0Trading'}\n", "*********************\n", "{'page': 85, 'level': 2, 'bookmark': '4.6\\t IT Functions in\\xa0Trading'}\n", "*********************\n", "{'page': 87, 'level': 2, 'bookmark': '4.7\\t Managing Trading Technology'}\n", "*********************\n", "{'page': 92, 'level': 2, 'bookmark': '4.8\\t Conclusion'}\n", "*********************\n", "{'page': 94, 'level': 1, 'bookmark': '5: Experiencing Market Dynamics with\\xa0TraderEx: A\\xa0Trading Decision-Making Simulation'}\n", "*********************\n", "{'page': 95, 'level': 2, 'bookmark': '5.1\\t Conceptual Features of\\xa0the\\xa0TraderEx Trading Simulation'}\n", "*********************\n", "{'page': 98, 'level': 2, 'bookmark': '5.2\\t Using the\\xa0TraderEx Trading Simulation'}\n", "*********************\n", "{'page': 99, 'level': 2, 'bookmark': '5.3\\t Orders Types'}\n", "*********************\n", "{'page': 101, 'level': 2, 'bookmark': '5.4\\t Running a\\xa0TraderEx Simulation'}\n", "*********************\n", "{'page': 105, 'level': 2, 'bookmark': '5.5\\t Diving Deeper'}\n", "*********************\n", "{'page': 106, 'level': 2, 'bookmark': '5.6\\t Conclusion'}\n", "*********************\n", "{'page': 108, 'level': 1, 'bookmark': 'Index'}\n", "*********************\n" ] } ], "source": [ "# print(result[0])\n", "# print('###################################')\n", "# print(result[1])\n", "# print('###################################')\n", "# print(result[5])\n", "# print('###################################')\n", "# print(len(result))\n", "\n", "for p in result:\n", " print(p)\n", " print('*********************')" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 2 }