{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data24.csv parse_description_test.ipynb\n", "google_job_rwtest.ipynb\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/leowalker/Documents/Projects/ml_project_job_analysis\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# import sys\n", "# sys.path.append('../utils')\n", "\n", "from utils import parse_description\n", "\n", "import pprint\n", "import os\n", "import pandas as pd\n", "from sqlalchemy import create_engine\n", "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def read_data_from_db(table_name):\n", " engine = create_engine(f\"postgresql://{os.getenv('PSQL_MASTER_NAME')}:{os.getenv('PSQL_KEY')}@{os.getenv('RDS_ENDPOINT')}:5432/postgres\")\n", " \n", " try:\n", " with engine.connect() as conn:\n", " query = f'SELECT * FROM \"{table_name}\"'\n", " df = pd.read_sql(query, conn)\n", " return df\n", " except Exception as e:\n", " print(f\"Error occurred while reading data from the database: {str(e)}\")\n", " return None\n", "\n", "data24_df = read_data_from_db('usajobstest')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | title | \n", "company_name | \n", "location | \n", "description | \n", "extensions | \n", "job_id | \n", "retrieve_date | \n", "
---|---|---|---|---|---|---|---|
330 | \n", "Data Scientist 3 | \n", "United Launch Alliance | \n", "Denver, CO | \n", "Your Role: What you'll be doing\\n\\nULA is look... | \n", "{\"111,700–174,504 a year\",Full-time,\"Paid time... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCAzIiwiY2... | \n", "2024-05-07 | \n", "
399 | \n", "VP ENG, Gen AI | \n", "Voicera | \n", "San Francisco, CA | \n", "Job description\\n\\nJob Title: VP Engineering, ... | \n", "{\"6 days ago\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJWUCBFTkcsIEdlbiBBSSIsImNvbX... | \n", "2024-05-07 | \n", "
409 | \n", "Fraud Strategy Data Scientist | \n", "Softworld, Inc. | \n", "Mountain View, CA | \n", "Job Title: Fraud Strategy Data Scientist\\n\\nJo... | \n", "{\"1 day ago\",Contractor} | \n", "eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF... | \n", "2024-05-07 | \n", "
420 | \n", "Data Scientist at Remedly in Mountain View, CA | \n", "Remedly | \n", "Mountain View, CA | \n", "Rhombus is purposefully transforming the natio... | \n", "{\"22 hours ago\",\"20–28 an hour\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW... | \n", "2024-05-07 | \n", "
421 | \n", "Principal Data Scientist | \n", "Microsoft | \n", "Mountain View, CA | \n", "The Search + Distribution (S+D) team is the le... | \n", "{\"24 days ago\",\"134K–257K a year\",Full-time,\"H... | \n", "eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn... | \n", "2024-05-07 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
907 | \n", "Data Engineer | \n", "Bonfy.AI | \n", "Mountain View, CA | \n", "At Bonfy.AI, we're working behind the scenes o... | \n", "{\"21 days ago\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG... | \n", "2024-05-07 | \n", "
908 | \n", "Data Engineer - Onsite - Mountain View, CA | \n", "MethodHub | \n", "Mountain View, CA | \n", "Job Details\\n\\nRequirements...\\n• Bachelor s d... | \n", "{\"27 days ago\",Full-time} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX... | \n", "2024-05-07 | \n", "
909 | \n", "Data Engineer | \n", "Ampcus Incorporated | \n", "Mountain View, CA | \n", "Location: Mountain View, CA (Hybrid)\\n\\nExperi... | \n", "{\"22 days ago\",Contractor,\"No degree mentioned\"} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG... | \n", "2024-05-07 | \n", "
910 | \n", "AWS Data Engineer (Mountainview, CA; ) | \n", "CEDENT | \n", "Mountain View, CA | \n", "5+ years of data engineer experience in develo... | \n", "{\"4 days ago\",Contractor,\"No degree mentioned\"} | \n", "eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW... | \n", "2024-05-07 | \n", "
911 | \n", "Data Infrastructure Engineer | \n", "Applied Intuition | \n", "Mountain View, CA | \n", "About the role\\n\\nWe are looking for infrastru... | \n", "{Full-time,\"No degree mentioned\"} | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE... | \n", "2024-05-07 | \n", "
495 rows × 7 columns
\n", "\n", " | title | \n", "company_name | \n", "location | \n", "description | \n", "job_id | \n", "
---|---|---|---|---|---|
409 | \n", "Fraud Strategy Data Scientist | \n", "Softworld, Inc. | \n", "Mountain View, CA | \n", "Job Title: Fraud Strategy Data Scientist\\n\\nJo... | \n", "eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF... | \n", "
420 | \n", "Data Scientist at Remedly in Mountain View, CA | \n", "Remedly | \n", "Mountain View, CA | \n", "Rhombus is purposefully transforming the natio... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW... | \n", "
421 | \n", "Principal Data Scientist | \n", "Microsoft | \n", "Mountain View, CA | \n", "The Search + Distribution (S+D) team is the le... | \n", "eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn... | \n", "
422 | \n", "Software Engineer - Agent AI | \n", "Applied Intuition | \n", "Mountain View, CA | \n", "About the role\\n\\nWe are looking for a softwar... | \n", "eyJqb2JfdGl0bGUiOiJTb2Z0d2FyZSBFbmdpbmVlciAtIE... | \n", "
423 | \n", "DATA SCIENTIST | \n", "Mythical Games | \n", "Mountain View, CA | \n", "We are looking for a highly-skilled Data Scien... | \n", "eyJqb2JfdGl0bGUiOiJEQVRBIFNDSUVOVElTVCIsImNvbX... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
907 | \n", "Data Engineer | \n", "Bonfy.AI | \n", "Mountain View, CA | \n", "At Bonfy.AI, we're working behind the scenes o... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG... | \n", "
908 | \n", "Data Engineer - Onsite - Mountain View, CA | \n", "MethodHub | \n", "Mountain View, CA | \n", "Job Details\\n\\nRequirements...\\n• Bachelor s d... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX... | \n", "
909 | \n", "Data Engineer | \n", "Ampcus Incorporated | \n", "Mountain View, CA | \n", "Location: Mountain View, CA (Hybrid)\\n\\nExperi... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG... | \n", "
910 | \n", "AWS Data Engineer (Mountainview, CA; ) | \n", "CEDENT | \n", "Mountain View, CA | \n", "5+ years of data engineer experience in develo... | \n", "eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW... | \n", "
911 | \n", "Data Infrastructure Engineer | \n", "Applied Intuition | \n", "Mountain View, CA | \n", "About the role\\n\\nWe are looking for infrastru... | \n", "eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE... | \n", "
76 rows × 5 columns
\n", "\n", " | about | \n", "mission_and_values | \n", "remote | \n", "responsibilities | \n", "required_qualifications | \n", "preferred_qualifications | \n", "salary_or_pay_range | \n", "benefits_and_perks | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "\n", " | \n", " | \n", " | [Design rules to detect/mitigate fraud., Devel... | \n", "[Minimum 2 years of experience in risk analyti... | \n", "[AWS, fraud investigations, payment rule syste... | \n", "\n", " | [] | \n", "
1 | \n", "Rhombus Power Inc. (Rhombus) is a NASA Researc... | \n", "Transforming the nation's defense and national... | \n", "Not specified | \n", "[Discover datasets that could help in solution... | \n", "[Masters or Ph.D. in Sciences, Mathematics or ... | \n", "[Experience with Cloud Computing environments ... | \n", "$100K -- $150K | \n", "[Not specified] | \n", "
2 | \n", "Microsoft is an equal opportunity employer. Co... | \n", "Microsoft’s mission is to empower every person... | \n", "Not specified | \n", "[Define, invent, and deliver online and offlin... | \n", "[Doctorate in Data Science, Mathematics, Stati... | \n", "[6+ years of experience coding in Python, C++,... | \n", "USD $133,600 - $256,800 per year | \n", "[Inclusive work environment, Collaborative cul... | \n", "
3 | \n", "Applied Intuition is a company focused on auto... | \n", "The company culture is dynamic and customer-fo... | \n", "Not specified | \n", "[Design and implement platform capabilities an... | \n", "[Passion for turning domain expertise into too... | \n", "[MSc or PhD in planning, control, or closely r... | \n", "$65,000 USD to $400,000 USD annually | \n", "[Not specified] | \n", "
4 | \n", "Unspecified | \n", "Unspecified | \n", "Unspecified | \n", "[Participate in the full lifecycle of a model ... | \n", "[Bachelor's Degree in a quantitative field, 3 ... | \n", "[Ph.D.] | \n", "$80K -- $100K | \n", "[Unspecified] | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
71 | \n", "Bonfy.AI is a rapidly growing startup working ... | \n", "Make AI trustworthy | \n", "\n", " | [Design and construct robust data pipelines to... | \n", "[Bachelor's or Master's degree in Computer Sci... | \n", "[] | \n", "\n", " | [] | \n", "
72 | \n", "\n", " | \n", " | \n", " | [] | \n", "[Bachelor's degree in computer science, Engine... | \n", "[AWS Certification, Experience with other AWS ... | \n", "\n", " | [] | \n", "
73 | \n", "\n", " | \n", " | Hybrid | \n", "[] | \n", "[10+ yrs...] | \n", "[Python (Numpy, Pandas), SQL, Hadoop, Hive, Py... | \n", "\n", " | [] | \n", "
74 | \n", "Company overview, industry, products, services... | \n", "Company mission, vision, values, and culture, ... | \n", "Remote work options for the role (full, hybrid... | \n", "[List of responsibilities, including tasks, du... | \n", "[Essential educational qualifications and prof... | \n", "[Any additional qualifications that a candidat... | \n", "Salary range or hourly pay range for the role | \n", "[List of benefits and perks offered for the ro... | \n", "
75 | \n", "Applied Intuition is a company that uses data ... | \n", "The company encourages engineers to take owner... | \n", "Not specified | \n", "[Develop and deploy event-driven pipelines usi... | \n", "[Experience with large-scale open source data ... | \n", "[Expertise with modern programming languages (... | \n", "$65,000 USD to $400,000 USD annually | \n", "[Not specified] | \n", "
76 rows × 8 columns
\n", "\n", " | about | \n", "mission_and_values | \n", "remote | \n", "responsibilities | \n", "required_qualifications | \n", "preferred_qualifications | \n", "salary_or_pay_range | \n", "benefits_and_perks | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "\n", " | \n", " | \n", " | [Design rules to detect/mitigate fraud., Devel... | \n", "[Minimum 2 years of experience in risk analyti... | \n", "[AWS, fraud investigations, payment rule syste... | \n", "\n", " | [] | \n", "
1 | \n", "Rhombus Power Inc. (Rhombus) is a NASA Researc... | \n", "Transforming the nation's defense and national... | \n", "Not specified | \n", "[Discover datasets that could help in solution... | \n", "[Masters or Ph.D. in Sciences, Mathematics or ... | \n", "[Experience with Cloud Computing environments ... | \n", "$100K -- $150K | \n", "[Not specified] | \n", "
2 | \n", "Microsoft is an equal opportunity employer. Co... | \n", "Microsoft’s mission is to empower every person... | \n", "Not specified | \n", "[Define, invent, and deliver online and offlin... | \n", "[Doctorate in Data Science, Mathematics, Stati... | \n", "[6+ years of experience coding in Python, C++,... | \n", "USD $133,600 - $256,800 per year | \n", "[Inclusive work environment, Collaborative cul... | \n", "
3 | \n", "Applied Intuition is a company focused on auto... | \n", "The company culture is dynamic and customer-fo... | \n", "Not specified | \n", "[Design and implement platform capabilities an... | \n", "[Passion for turning domain expertise into too... | \n", "[MSc or PhD in planning, control, or closely r... | \n", "$65,000 USD to $400,000 USD annually | \n", "[Not specified] | \n", "
4 | \n", "Unspecified | \n", "Unspecified | \n", "Unspecified | \n", "[Participate in the full lifecycle of a model ... | \n", "[Bachelor's Degree in a quantitative field, 3 ... | \n", "[Ph.D.] | \n", "$80K -- $100K | \n", "[Unspecified] | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
71 | \n", "Bonfy.AI is a rapidly growing startup working ... | \n", "Make AI trustworthy | \n", "\n", " | [Design and construct robust data pipelines to... | \n", "[Bachelor's or Master's degree in Computer Sci... | \n", "[] | \n", "\n", " | [] | \n", "
72 | \n", "\n", " | \n", " | \n", " | [] | \n", "[Bachelor's degree in computer science, Engine... | \n", "[AWS Certification, Experience with other AWS ... | \n", "\n", " | [] | \n", "
73 | \n", "\n", " | \n", " | Hybrid | \n", "[] | \n", "[10+ yrs...] | \n", "[Python (Numpy, Pandas), SQL, Hadoop, Hive, Py... | \n", "\n", " | [] | \n", "
74 | \n", "Company overview, industry, products, services... | \n", "Company mission, vision, values, and culture, ... | \n", "Remote work options for the role (full, hybrid... | \n", "[List of responsibilities, including tasks, du... | \n", "[Essential educational qualifications and prof... | \n", "[Any additional qualifications that a candidat... | \n", "Salary range or hourly pay range for the role | \n", "[List of benefits and perks offered for the ro... | \n", "
75 | \n", "Applied Intuition is a company that uses data ... | \n", "The company encourages engineers to take owner... | \n", "Not specified | \n", "[Develop and deploy event-driven pipelines usi... | \n", "[Experience with large-scale open source data ... | \n", "[Expertise with modern programming languages (... | \n", "$65,000 USD to $400,000 USD annually | \n", "[Not specified] | \n", "
76 rows × 8 columns
\n", "\n", " | index | \n", "title | \n", "company_name | \n", "location | \n", "description | \n", "about | \n", "mission_and_values | \n", "remote | \n", "responsibilities | \n", "required_qualifications | \n", "preferred_qualifications | \n", "salary_or_pay_range | \n", "benefits_and_perks | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
69 | \n", "905 | \n", "Data Engineer- Mountain View, CA | \n", "Georgia IT, Inc. | \n", "Mountain View, CA | \n", "Position: Data Engineer\\n\\nLocation: Mountain ... | \n", "Company in Mountain View, CA | \n", "\n", " | \n", " | [Ability to work with Business and technical s... | \n", "[10+ years of overall experience in data manag... | \n", "[] | \n", "DOE | \n", "[] | \n", "
57 | \n", "843 | \n", "Business Intelligence Analyst | \n", "BayOne | \n", "Mountain View, CA | \n", "Gather and analyze data, reason logically, dra... | \n", "Overview of the company, industry, products, s... | \n", "Company mission, vision, values, and culture, ... | \n", "Remote work options for the role (full, hybrid... | \n", "[List of responsibilities, including tasks, du... | \n", "[Essential educational qualifications and prof... | \n", "[Any additional qualifications that a candidat... | \n", "Salary range or hourly pay range for the role | \n", "[List of benefits and perks offered for the ro... | \n", "
54 | \n", "831 | \n", "Business Intelligence Engineer | \n", "Amazon.com Services LLC | \n", "Palo Alto, CA | \n", "Job summaryAre you passionate about delighting... | \n", "Amazon | \n", "Delight hundreds of millions of customers and ... | \n", "Not specified | \n", "[Analyze billions of customer feedback and beh... | \n", "[Strong business acumen, Written and verbal co... | \n", "[Strong technical expertise, Strong problem-so... | \n", "Hourly | \n", "[Competitive salary] | \n", "
59 | \n", "845 | \n", "Business Intelligence Analyst II | \n", "Akorbi | \n", "Mountain View, CA | \n", "Role Title: Data Analyst\\n\\nDuration: 6 months... | \n", "No information provided | \n", "No information provided | \n", "Hybrid | \n", "[Serving as a technical SME for Team Graph for... | \n", "[BA in Engineering or CS, 5+ years professiona... | \n", "[Strong communication and presentation skills] | \n", "No information provided | \n", "[] | \n", "
15 | \n", "563 | \n", "Senior Software Engineer, AI | \n", "Recruiting from Scratch | \n", "Mountain View, CA | \n", "Who is Recruiting from Scratch :\\n\\nRecruiting... | \n", "Recruiting from Scratch is a premier talent fi... | \n", "Our team is 100% remote and we work with teams... | \n", "On-site / In-office (≥4 days a week) | \n", "[Building edge applications processing vision ... | \n", "[3+ years of experience writing production sof... | \n", "[Experience with Docker, CI / CD pipelines, Ex... | \n", "$150,000-$220,000 base | \n", "[Flexible PTO & Sick Policy, Medical, Dental, ... | \n", "
29 | \n", "638 | \n", "Senior Technical Data Analyst | \n", "Intuit | \n", "Mountain View, CA | \n", "We are looking for an experienced technical da... | \n", "Intuit | \n", "Delivering data-driven insights, driving chang... | \n", "Not specified | \n", "[Driving data models and definitions, Communic... | \n", "[Expertise in analytics methodologies, Deep kn... | \n", "[Deep knowledge of ETL and data warehousing, E... | \n", "Not specified | \n", "[Competitive salary, Benefits package, Opportu... | \n", "
23 | \n", "632 | \n", "Data Analyst I - FT - Days - Concern: EAP | \n", "El Camino Health | \n", "Mountain View, CA | \n", "El Camino Health is committed to hiring, retai... | \n", "El Camino Health is committed to hiring, retai... | \n", "Compassion, innovation, collaboration, and del... | \n", "Not specified | \n", "[Understand the database structure and schema ... | \n", "[Bachelor’s degree in a technology or science ... | \n", "[Experience collaborating with software develo... | \n", "$36.13 - $54.20 USD Hourly | \n", "[Not specified] | \n", "
36 | \n", "666 | \n", "Senior Data Analyst | \n", "Scilex Pharmaceuticals Inc. | \n", "Palo Alto, CA | \n", "Salary Range: $114,000 - $131,000\\n\\nThe pay r... | \n", "Scilex Holding Company is an innovative revenu... | \n", "Committed to social, environmental, economic, ... | \n", "Not specified | \n", "[Collaborate with Commercial team to help synt... | \n", "[Working knowledge of pharmaceutical industry ... | \n", "[Bachelor's degree (business discipline prefer... | \n", "$114,000 - $131,000/annually | \n", "[Medical benefits, 401(k) eligibility, Vacatio... | \n", "
64 | \n", "852 | \n", "Business Intelligence/Data Analyst at Sandy Sp... | \n", "Sandy Spring Bancorp, Inc. | \n", "Mountain View, CA | \n", "Sandy Spring Bank is currently recruiting for ... | \n", "Sandy Spring Bank is a financial institution | \n", "Provides strategic, analytical, and technical ... | \n", "Unknown | \n", "[Provides strategic, analytical, and technical... | \n", "[Bachelor's Degree in Computer Science, Inform... | \n", "[Experience with managing data stores and data... | \n", "$80K -- $100K | \n", "[Salary range from $80K to $100K, Benefits pac... | \n", "
52 | \n", "828 | \n", "Business Intelligence Manager | \n", "Ford Motor | \n", "Palo Alto, CA | \n", "The Business Intelligence Manager role will ma... | \n", "Ford Motor is an Equal Opportunity Employer. W... | \n", "\n", " | \n", " | [Manage projects for our team, Lead user analy... | \n", "[] | \n", "[] | \n", "\n", " | [] | \n", "
\n", " | title | \n", "company_name | \n", "location | \n", "description | \n", "
---|---|---|---|---|
401 | \n", "Senior Business Intelligence Analyst, Operatio... | \n", "Rivian | \n", "Palo Alto, CA | \n", "About Rivian:\\n\\nRivian is on a mission to kee... | \n", "
217 | \n", "Generative AI Engineer | \n", "Knitit.ai | \n", "Palo Alto, CA | \n", "We are looking for a AI/ML Engineer to join a ... | \n", "
235 | \n", "Senior Data Analyst | \n", "DynPro Inc. | \n", "Mountain View, CA | \n", "Duration: 6 Months\\n\\nLocation: Bay Area, CA M... | \n", "