{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data24.csv                    parse_description_test.ipynb\n",
      "google_job_rwtest.ipynb\n"
     ]
    }
   ],
   "source": [
    "ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/leowalker/Documents/Projects/ml_project_job_analysis\n"
     ]
    }
   ],
   "source": [
    "cd .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import sys\n",
    "# sys.path.append('../utils')\n",
    "\n",
    "from utils import parse_description\n",
    "\n",
    "import pprint\n",
    "import os\n",
    "import pandas as pd\n",
    "from sqlalchemy import create_engine\n",
    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_data_from_db(table_name):\n",
    "    engine = create_engine(f\"postgresql://{os.getenv('PSQL_MASTER_NAME')}:{os.getenv('PSQL_KEY')}@{os.getenv('RDS_ENDPOINT')}:5432/postgres\")\n",
    "    \n",
    "    try:\n",
    "        with engine.connect() as conn:\n",
    "            query = f'SELECT * FROM \"{table_name}\"'\n",
    "            df = pd.read_sql(query, conn)\n",
    "            return df\n",
    "    except Exception as e:\n",
    "        print(f\"Error occurred while reading data from the database: {str(e)}\")\n",
    "        return None\n",
    "\n",
    "data24_df = read_data_from_db('usajobstest')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>company_name</th>\n",
       "      <th>location</th>\n",
       "      <th>description</th>\n",
       "      <th>extensions</th>\n",
       "      <th>job_id</th>\n",
       "      <th>retrieve_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>330</th>\n",
       "      <td>Data Scientist 3</td>\n",
       "      <td>United Launch Alliance</td>\n",
       "      <td>Denver, CO</td>\n",
       "      <td>Your Role: What you'll be doing\\n\\nULA is look...</td>\n",
       "      <td>{\"111,700–174,504 a year\",Full-time,\"Paid time...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCAzIiwiY2...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>VP ENG, Gen AI</td>\n",
       "      <td>Voicera</td>\n",
       "      <td>San Francisco, CA</td>\n",
       "      <td>Job description\\n\\nJob Title: VP Engineering, ...</td>\n",
       "      <td>{\"6 days ago\",Full-time}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJWUCBFTkcsIEdlbiBBSSIsImNvbX...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>409</th>\n",
       "      <td>Fraud Strategy Data Scientist</td>\n",
       "      <td>Softworld, Inc.</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Job Title: Fraud Strategy Data Scientist\\n\\nJo...</td>\n",
       "      <td>{\"1 day ago\",Contractor}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>420</th>\n",
       "      <td>Data Scientist at Remedly in Mountain View, CA</td>\n",
       "      <td>Remedly</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Rhombus is purposefully transforming the natio...</td>\n",
       "      <td>{\"22 hours ago\",\"20–28 an hour\",Full-time}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>421</th>\n",
       "      <td>Principal Data Scientist</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>The Search + Distribution (S+D) team is the le...</td>\n",
       "      <td>{\"24 days ago\",\"134K–257K a year\",Full-time,\"H...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>907</th>\n",
       "      <td>Data Engineer</td>\n",
       "      <td>Bonfy.AI</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>At Bonfy.AI, we're working behind the scenes o...</td>\n",
       "      <td>{\"21 days ago\",Full-time}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>908</th>\n",
       "      <td>Data Engineer - Onsite - Mountain View, CA</td>\n",
       "      <td>MethodHub</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Job Details\\n\\nRequirements...\\n• Bachelor s d...</td>\n",
       "      <td>{\"27 days ago\",Full-time}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>909</th>\n",
       "      <td>Data Engineer</td>\n",
       "      <td>Ampcus Incorporated</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Location: Mountain View, CA (Hybrid)\\n\\nExperi...</td>\n",
       "      <td>{\"22 days ago\",Contractor,\"No degree mentioned\"}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>910</th>\n",
       "      <td>AWS Data Engineer (Mountainview, CA; )</td>\n",
       "      <td>CEDENT</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>5+ years of data engineer experience in develo...</td>\n",
       "      <td>{\"4 days ago\",Contractor,\"No degree mentioned\"}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>911</th>\n",
       "      <td>Data Infrastructure Engineer</td>\n",
       "      <td>Applied Intuition</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>About the role\\n\\nWe are looking for infrastru...</td>\n",
       "      <td>{Full-time,\"No degree mentioned\"}</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE...</td>\n",
       "      <td>2024-05-07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>495 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              title            company_name  \\\n",
       "330                                Data Scientist 3  United Launch Alliance   \n",
       "399                                  VP ENG, Gen AI                 Voicera   \n",
       "409                   Fraud Strategy Data Scientist         Softworld, Inc.   \n",
       "420  Data Scientist at Remedly in Mountain View, CA                 Remedly   \n",
       "421                        Principal Data Scientist               Microsoft   \n",
       "..                                              ...                     ...   \n",
       "907                                   Data Engineer                Bonfy.AI   \n",
       "908      Data Engineer - Onsite - Mountain View, CA               MethodHub   \n",
       "909                                   Data Engineer     Ampcus Incorporated   \n",
       "910          AWS Data Engineer (Mountainview, CA; )                  CEDENT   \n",
       "911                    Data Infrastructure Engineer       Applied Intuition   \n",
       "\n",
       "              location                                        description  \\\n",
       "330         Denver, CO  Your Role: What you'll be doing\\n\\nULA is look...   \n",
       "399  San Francisco, CA  Job description\\n\\nJob Title: VP Engineering, ...   \n",
       "409  Mountain View, CA  Job Title: Fraud Strategy Data Scientist\\n\\nJo...   \n",
       "420  Mountain View, CA  Rhombus is purposefully transforming the natio...   \n",
       "421  Mountain View, CA  The Search + Distribution (S+D) team is the le...   \n",
       "..                 ...                                                ...   \n",
       "907  Mountain View, CA  At Bonfy.AI, we're working behind the scenes o...   \n",
       "908  Mountain View, CA  Job Details\\n\\nRequirements...\\n• Bachelor s d...   \n",
       "909  Mountain View, CA  Location: Mountain View, CA (Hybrid)\\n\\nExperi...   \n",
       "910  Mountain View, CA  5+ years of data engineer experience in develo...   \n",
       "911  Mountain View, CA  About the role\\n\\nWe are looking for infrastru...   \n",
       "\n",
       "                                            extensions  \\\n",
       "330  {\"111,700–174,504 a year\",Full-time,\"Paid time...   \n",
       "399                           {\"6 days ago\",Full-time}   \n",
       "409                           {\"1 day ago\",Contractor}   \n",
       "420         {\"22 hours ago\",\"20–28 an hour\",Full-time}   \n",
       "421  {\"24 days ago\",\"134K–257K a year\",Full-time,\"H...   \n",
       "..                                                 ...   \n",
       "907                          {\"21 days ago\",Full-time}   \n",
       "908                          {\"27 days ago\",Full-time}   \n",
       "909   {\"22 days ago\",Contractor,\"No degree mentioned\"}   \n",
       "910    {\"4 days ago\",Contractor,\"No degree mentioned\"}   \n",
       "911                  {Full-time,\"No degree mentioned\"}   \n",
       "\n",
       "                                                job_id retrieve_date  \n",
       "330  eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCAzIiwiY2...    2024-05-07  \n",
       "399  eyJqb2JfdGl0bGUiOiJWUCBFTkcsIEdlbiBBSSIsImNvbX...    2024-05-07  \n",
       "409  eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF...    2024-05-07  \n",
       "420  eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW...    2024-05-07  \n",
       "421  eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn...    2024-05-07  \n",
       "..                                                 ...           ...  \n",
       "907  eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...    2024-05-07  \n",
       "908  eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX...    2024-05-07  \n",
       "909  eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...    2024-05-07  \n",
       "910  eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW...    2024-05-07  \n",
       "911  eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE...    2024-05-07  \n",
       "\n",
       "[495 rows x 7 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data24_df[data24_df['retrieve_date']=='2024-05-07' ] # where retrieve_date =2024-05-07"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "title_company = data24_df.loc[data24_df['retrieve_date']=='2024-05-07',['title', 'company_name', 'location', 'description', 'job_id']].drop_duplicates()\n",
    "filtered_title_company = title_company[title_company['location'].isin(['Mountain View, CA', 'Palo Alto, CA'])]\n",
    "filtered_sample_df = filtered_title_company.sample(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>company_name</th>\n",
       "      <th>location</th>\n",
       "      <th>description</th>\n",
       "      <th>job_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>409</th>\n",
       "      <td>Fraud Strategy Data Scientist</td>\n",
       "      <td>Softworld, Inc.</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Job Title: Fraud Strategy Data Scientist\\n\\nJo...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>420</th>\n",
       "      <td>Data Scientist at Remedly in Mountain View, CA</td>\n",
       "      <td>Remedly</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Rhombus is purposefully transforming the natio...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>421</th>\n",
       "      <td>Principal Data Scientist</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>The Search + Distribution (S+D) team is the le...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>422</th>\n",
       "      <td>Software Engineer - Agent AI</td>\n",
       "      <td>Applied Intuition</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>About the role\\n\\nWe are looking for a softwar...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJTb2Z0d2FyZSBFbmdpbmVlciAtIE...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>DATA SCIENTIST</td>\n",
       "      <td>Mythical Games</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>We are looking for a highly-skilled Data Scien...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEQVRBIFNDSUVOVElTVCIsImNvbX...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>907</th>\n",
       "      <td>Data Engineer</td>\n",
       "      <td>Bonfy.AI</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>At Bonfy.AI, we're working behind the scenes o...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>908</th>\n",
       "      <td>Data Engineer - Onsite - Mountain View, CA</td>\n",
       "      <td>MethodHub</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Job Details\\n\\nRequirements...\\n• Bachelor s d...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>909</th>\n",
       "      <td>Data Engineer</td>\n",
       "      <td>Ampcus Incorporated</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Location: Mountain View, CA (Hybrid)\\n\\nExperi...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>910</th>\n",
       "      <td>AWS Data Engineer (Mountainview, CA; )</td>\n",
       "      <td>CEDENT</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>5+ years of data engineer experience in develo...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>911</th>\n",
       "      <td>Data Infrastructure Engineer</td>\n",
       "      <td>Applied Intuition</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>About the role\\n\\nWe are looking for infrastru...</td>\n",
       "      <td>eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>76 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              title         company_name  \\\n",
       "409                   Fraud Strategy Data Scientist      Softworld, Inc.   \n",
       "420  Data Scientist at Remedly in Mountain View, CA              Remedly   \n",
       "421                        Principal Data Scientist            Microsoft   \n",
       "422                    Software Engineer - Agent AI    Applied Intuition   \n",
       "423                                  DATA SCIENTIST       Mythical Games   \n",
       "..                                              ...                  ...   \n",
       "907                                   Data Engineer             Bonfy.AI   \n",
       "908      Data Engineer - Onsite - Mountain View, CA            MethodHub   \n",
       "909                                   Data Engineer  Ampcus Incorporated   \n",
       "910          AWS Data Engineer (Mountainview, CA; )               CEDENT   \n",
       "911                    Data Infrastructure Engineer    Applied Intuition   \n",
       "\n",
       "              location                                        description  \\\n",
       "409  Mountain View, CA  Job Title: Fraud Strategy Data Scientist\\n\\nJo...   \n",
       "420  Mountain View, CA  Rhombus is purposefully transforming the natio...   \n",
       "421  Mountain View, CA  The Search + Distribution (S+D) team is the le...   \n",
       "422  Mountain View, CA  About the role\\n\\nWe are looking for a softwar...   \n",
       "423  Mountain View, CA  We are looking for a highly-skilled Data Scien...   \n",
       "..                 ...                                                ...   \n",
       "907  Mountain View, CA  At Bonfy.AI, we're working behind the scenes o...   \n",
       "908  Mountain View, CA  Job Details\\n\\nRequirements...\\n• Bachelor s d...   \n",
       "909  Mountain View, CA  Location: Mountain View, CA (Hybrid)\\n\\nExperi...   \n",
       "910  Mountain View, CA  5+ years of data engineer experience in develo...   \n",
       "911  Mountain View, CA  About the role\\n\\nWe are looking for infrastru...   \n",
       "\n",
       "                                                job_id  \n",
       "409  eyJqb2JfdGl0bGUiOiJGcmF1ZCBTdHJhdGVneSBEYXRhIF...  \n",
       "420  eyJqb2JfdGl0bGUiOiJEYXRhIFNjaWVudGlzdCBhdCBSZW...  \n",
       "421  eyJqb2JfdGl0bGUiOiJQcmluY2lwYWwgRGF0YSBTY2llbn...  \n",
       "422  eyJqb2JfdGl0bGUiOiJTb2Z0d2FyZSBFbmdpbmVlciAtIE...  \n",
       "423  eyJqb2JfdGl0bGUiOiJEQVRBIFNDSUVOVElTVCIsImNvbX...  \n",
       "..                                                 ...  \n",
       "907  eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...  \n",
       "908  eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIC0gT25zaX...  \n",
       "909  eyJqb2JfdGl0bGUiOiJEYXRhIEVuZ2luZWVyIiwiY29tcG...  \n",
       "910  eyJqb2JfdGl0bGUiOiJBV1MgRGF0YSBFbmdpbmVlciAoTW...  \n",
       "911  eyJqb2JfdGl0bGUiOiJEYXRhIEluZnJhc3RydWN0dXJlIE...  \n",
       "\n",
       "[76 rows x 5 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_title_company"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def filtered_parse_desc(text, job_id):\n",
    "    try:\n",
    "        description_response = parse_description.extract_desc_fields(text)\n",
    "        about = description_response.company_overview.about\n",
    "        mission_and_values = description_response.company_overview.mission_and_values\n",
    "        remote = description_response.role_summary.remote\n",
    "        responsibilities = description_response.responsibilities_and_qualifications.responsibilities\n",
    "        required_qualifications = description_response.responsibilities_and_qualifications.required_qualifications\n",
    "        preferred_qualifications = description_response.responsibilities_and_qualifications.preferred_qualifications\n",
    "        salary_or_pay_range = description_response.compensation_and_benefits.salary_or_pay_range\n",
    "        benefits_and_perks = description_response.compensation_and_benefits.benefits_and_perks\n",
    "        \n",
    "        return [job_id, about, mission_and_values, remote, responsibilities, required_qualifications, preferred_qualifications, salary_or_pay_range, benefits_and_perks]\n",
    "    except Exception as e:\n",
    "        return [job_id]+([None]*8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The output of the extract_desc_fields is a JobDescription object from the job_desc_pydantic.py. We need to assign fields to the output to columns in our table."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='San Jose, CA') role_summary=RoleSummary(title='Fraud Strategy Data Scientist', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design rules to detect/mitigate fraud.', 'Develop python scripts and models that support strategies.', 'Investigate novel/large cases.', 'Identify root cause.', 'Set strategy for different risk types.', 'Work with product/engineering to improve control capabilities.', 'Develop and present strategies and guide execution'], required_qualifications=['Minimum 2 years of experience in risk analytics, data analysis, and data science within relevant industry experience in eCommerce, online payments, user trust/risk/fraud, or investigation/product abuse.', \"Bachelor's degree in computer science, Engineering, Mathematics, Statistics, Data Mining or related field or equivalent practical experience\", 'Experience using statistics and data science to solve complex business problems.', 'Proficiency in SQL, Python, Excel including key data science libraries.', 'Proficiency in data visualization including Tableau.', 'Experience working with large datasets.', 'Ability to clearly communicate complex results to technical experts, business partners, and executives including development of dashboards and visualizations, i.e. Tableau.', 'Comfortable with ambiguity and yet able to steer analytics projects toward clear business goals, testable hypotheses, and action-oriented outcomes.', 'Demonstrated analytical thinking through data-driven decisions, as well as the technical know-how, and ability to work with your team to make a big impact.'], preferred_qualifications=['AWS', 'fraud investigations', 'payment rule systems', 'working with ML teams', 'fraud typologies']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Rhombus Power Inc. (Rhombus) is a NASA Research Park startup located in the heart of Silicon Valley.', mission_and_values=\"Transforming the nation's defense and national security enterprises with Guardian, its Artificial Intelligence platform for strategic, operational, and tactical decision-making at the speed of relevance.\", size='Not specified', locations='San Francisco Bay Area') role_summary=RoleSummary(title='Data Scientist', team_or_department='Product team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Discover datasets that could help in solution development', 'Data curation, analysis, quantitative modeling', 'Validation and quality assurance of data, models and results', 'Deploy and implement solutions in collaboration with product team', 'Interact with the product team on current and upcoming user requirements'], required_qualifications=['Masters or Ph.D. in Sciences, Mathematics or Engineering, especially numerical methods and simulations', 'Strong background in database management solutions', 'Familiarity with databases such as MySQL and Oracle'], preferred_qualifications=['Experience with Cloud Computing environments (AWS, GCloud, Azure)', 'Background in Statistics', 'Experience with Machine Learning']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$100K -- $150K', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Microsoft is an equal opportunity employer. Consistent with applicable law, all qualified applicants will receive consideration for employment without regard to age, ancestry, citizenship, color, family or medical care leave, gender identity or expression, genetic information, immigration status, marital status, medical condition, national origin, physical or mental disability, political affiliation, protected veteran or military status, race, ethnicity, religion, sex (including pregnancy), sexual orientation, or any other characteristic protected by applicable local laws, regulations and ordinances.', mission_and_values='Microsoft’s mission is to empower every person and every organization on the planet to achieve more.', size='500M+ monthly active users', locations='Various locations') role_summary=RoleSummary(title='Principal Data Scientist', team_or_department='Search + Distribution (S+D) team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Define, invent, and deliver online and offline behavioral and human labeled metrics', 'Apply behavioral game theory and social science understanding', 'Develop deep understanding of business metrics'], required_qualifications=['Doctorate in Data Science, Mathematics, Statistics, Econometrics, Economics, Operations Research, Computer Science, or related field AND 5+ year(s) data-science experience', \"Master's Degree in Data Science, Mathematics, Statistics, Econometrics, Economics, Operations Research, Computer Science, or related field AND 7+ years data-science experience\", \"Bachelor's Degree in Data Science, Mathematics, Statistics, Econometrics, Economics, Operations Research, Computer Science, or related field AND 10+ years data-science experience\"], preferred_qualifications=['6+ years of experience coding in Python, C++, C#, C or Java', 'Customer focused, strategic, drives for results, is self-motivated, and has a propensity for action', 'Organizational, analytical, data science skills and intuition']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='USD $133,600 - $256,800 per year', bonus_and_equity='Competitive package', benefits_and_perks=['Inclusive work environment', 'Collaborative culture', 'Growth opportunities'])\n",
      "company_overview=CompanyOverview(about='Applied Intuition is a company focused on autonomous systems.', mission_and_values='The company culture is dynamic and customer-focused, with a focus on excellence.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Software Engineer', team_or_department='Not specified', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design and implement platform capabilities and product workflows', 'Build out foundational libraries that push the platform beyond the state of the art', 'Work closely with in-house and customer engineers to design and build supporting toolchains'], required_qualifications=['Passion for turning domain expertise into tooling that boosts productivity', 'Experience building software components or (sub) systems that address real-world planning and control challenges', 'Hands-on experience with more than one domain of relevant software framework or tools'], preferred_qualifications=['MSc or PhD in planning, control, or closely related field', 'Deep hands-on expertise in relevant algorithms or methods', 'Experience building and shipping software frameworks or tools that are used by others outside of the authors of the framework']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$65,000 USD to $400,000 USD annually', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Unspecified', mission_and_values='Unspecified', size='Unspecified', locations='Unspecified') role_summary=RoleSummary(title='Data Scientist', team_or_department='Data Science & Machine Learning', role_type='Full-time', remote='Unspecified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Participate in the full lifecycle of a model including design, coding, testing, and release', 'Collaborate with the data and analytics team, marketing team, and marketplace teams', 'Work on models such as fraud, player churn, or causal inference', 'Utilizing experimentation, help analyze and project optimal values', 'ML visualizations', 'Predictive analytics'], required_qualifications=[\"Bachelor's Degree in a quantitative field\", '3 years of professional experience with Python', 'Experience with Bayesian techniques', 'Experience with tools and frameworks like Pytorch, TensorFlow, Scikit-Learn, SQL, etc', 'Familiarity with some of the following models: outlier detection, lifetime value modeling, or causal inference'], preferred_qualifications=['Ph.D.']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='Unspecified', benefits_and_perks=['Unspecified'])\n",
      "company_overview=CompanyOverview(about='Google Ads Marketing aims to help advertisers of all sizes succeed with digital marketing.', mission_and_values=\"Advance the science of Marketing to customers that use Google's advertising solutions.\", size='Large company', locations='US') role_summary=RoleSummary(title='Business Data Scientist', team_or_department='Google Ads Marketing', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with large, complex data sets', 'Solve complex analysis problems, applying advanced problem-solving methods (such as statistical and machine learning models) as needed', 'Conduct analysis that includes problem formulation, data gathering and requirements specification, processing, analysis, ongoing deliverables, and presentations', 'Design and analyze controlled experiments or counterfactual causal inference studies to examine the incremental impact of Ads marketing programs', 'Build and prototype analysis pipelines iteratively to provide insights at scale', 'Develop comprehensive knowledge of Google data structures and metrics, advocating for changes where needed', 'Interact cross-functionally, making business recommendations (e.g., cost-benefit, forecasting, experiment analysis) with effective presentations of findings at multiple levels of stakeholders through visual displays of quantitative information', 'Develop and automate reports, iteratively build and prototype dashboards to provide insights at scale, solving for business priorities'], required_qualifications=[\"Master's degree in Statistics, Mathematics, Bioinformatics, Economics, a related field, or equivalent practical experience\", '2 years of experience in a data science field', 'Experience with statistical software (e.g., R, Python, MATLAB) and database languages (i.e., SQL)', 'Experience leveraging data insights into storytelling for business stakeholders'], preferred_qualifications=['PhD in Statistics or a related field', '2 years of experience with statistical data analysis such as generalized linear models, multivariate analysis, clustering / segmentation, and sampling methods', 'Experience with machine learning on large-scale computing systems like Hadoop, MapReduce, or similar environments', 'Experience in controlled experiment design and causal inference methods', 'Ability to prioritize requests and partner well in an environment with competing demands from stakeholders', 'Excellent communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$127,000-$187,000', bonus_and_equity='Bonus and equity benefits', benefits_and_perks=['Bonus', 'Equity', 'Benefits'])\n",
      "company_overview=CompanyOverview(about='TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy.', mission_and_values='To inspire creativity and bring joy. To us, every challenge, no matter how difficult, is an opportunity; to learn, to innovate, and to grow as one team.', size='Not specified', locations='U.S.') role_summary=RoleSummary(title='Risk Analyst', team_or_department='USDS-Platform and Community Integrity (PaCI) team', role_type='Full-time', remote='Hybrid work schedule (3 days in office, 2 days remote)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Build rules, algorithms and machine learning models, to respond to and mitigate business risks in TikTok products/platforms', 'Analyze business and security data, uncover evolving attack motion, identify weaknesses and opportunities in risk defense solutions, explore new space from the discoveries', 'Define risk control measurements. Quantify, generalize and monitor risk related business and operational metrics. Align risk teams and their stakeholders on risk control numeric goals, promote impact-oriented, data-driven data science practices for risks'], required_qualifications=['Bachelor or degrees above in computer science, statistics, math, internet security or other relevant STEM majors', 'Solid data science skills', 'Proficiency in statistical analytical tools, such as SQL, R and Python'], preferred_qualifications=['Familiarity with machine learning or social/content online platform analytics', 'Proficiency in modern machine learning applications']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$144000 - $312444 annually', bonus_and_equity='Discretionary bonuses/incentives, and restricted stock units', benefits_and_perks=['100% premium coverage for employee medical insurance', 'Flexible Spending Account(FSA) Options like Health Care, Limited Purpose and Dependent Care', 'Mental and emotional health benefits through our EAP and Lyra', 'Gym and cellphone service reimbursements', '401K company match'])\n",
      "company_overview=CompanyOverview(about='Qventus is a real-time decision making platform for hospital operations. Our mission is to simplify how healthcare operates, so that hospitals and caregivers can focus on delivering the best possible care to patients.', mission_and_values='Simplify how healthcare operates, so that hospitals and caregivers can focus on delivering the best possible care to patients.', size='Not specified', locations='Multiple locations') role_summary=RoleSummary(title='Senior Data Scientist', team_or_department='Data Science', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Solve operational challenges in healthcare through the development, operationalization, and tuning of impactful models developed thoughtfully using a variety of machine learning, mathematical, and statistical approaches.', 'Collaborate with Product and Clinical partners to investigate, assess, and deploy POC solutions in strategic business areas', 'Design comprehensive experiments and analytics to measure the net impact of our interventions in healthcare settings', 'Create tools and resources to improve transparency in Data Science technical architecture and increase collaboration with engineering and analytics partners'], required_qualifications=['3+ years of research experience using a wide variety of statistical and machine learning techniques - particularly in NLP, explainable ML', '2+ years of relative industry experience developing, launching, and iterating on machine learning models and/or developing the core data science platform', 'Expertise with Python including an expansive of available data science libraries and util', 'Hands-on data engineering experience manipulating data sets, data cleaning, and pipelines.', 'Hands-on experience building and maintaining production machine learning pipelines (experience with Sagemaker preferred)', 'Strong software development foundations - dedication to high code quality, stable architecture, and an eye toward maintainability'], preferred_qualifications=['Dedication to mentorship and growing strong collaborative data science teams', 'Experience working with productionalized Generative AI (ChatGPT etc.)', 'Proven ability to work with domain experts to design relevant features and design, develop and tailor algorithmic solutions to problems in healthcare, particularly in the hospital setting', 'Strong cross-functional communication with technical and non-technical partners', 'Experience with healthcare data and industry']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$170,000 to $190,000', bonus_and_equity='Not specified', benefits_and_perks=['Competitive medical, dental, and vision coverage', 'Generous HSA contribution', 'Employer-provided Short Term and Long Term Disability insurance and Basic Life and AD&D insurance', '100% paid Parental and Pregnancy Leave', 'Monthly Wellness and Technology stipend', 'Ability to participate in the 401(k) plan', 'Generous Stock Option awards'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Data Science & Machine Learning', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Solve ambiguous, challenging business problems using data-driven approaches', 'Design the framework of data analysis to drive business insight and facilitate decisions', 'Establish the standard work process and best practices for data science including analytics, experimentation, and modeling', 'Work closely with data engineers to build and improve the reliability... accuracy of the data logging and data pipelines', \"Work with stakeholders including senior executives, products, and design teams to improve the team's productivity and quality of output\"], required_qualifications=['MS or PhD degree in Statistics, Computer Science, Econometrics or equivalent quantitative field', '3 years of experience in analytical or data warehousing experience', 'Strong strategic thinking and problem solving skills', 'Experience with A/B test experiment design and analysis', 'Experience with exploratory data analysis, statistical analysis and testing, and machine learning models', 'Advanced SQL skills', 'Ability to use languages and tools like Python, R, Hive, Spark to work with large data sets', 'Excellent communication skills, both written and verbal'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary Range: $80K -- $100K', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Walmart U.S. offers an extensive selection that customers value, whether they shop online at Walmart.com, through one of our mobile apps, or in-store.', mission_and_values='Improve the lives of customers by empowering team members, stores, and merchants with technological innovation.', size='Undisclosed', locations='Sunnyvale, CA') role_summary=RoleSummary(title='Staff Data Scientist', team_or_department='Data Science', role_type='Full-time', remote='Hybrid work') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Building advanced machine learning and deep learning models with various applications in marketing', 'Building solutions that can be easily integrated into external frameworks or existing applications', 'Selecting appropriate ML modeling techniques for complex problems with large-scale data'], required_qualifications=[\"Master's degree in Computer Science, Statistics, Optimization, or related field plus 3 years' experience in a machine learning / deep learning related field\"], preferred_qualifications=['PhD in Machine Learning, Computer Science, Information Technology, Operations Research, Statistics, Applied Mathematics, Econometrics', 'Successful completion of one or more assessments in Python, Spark, Scala, or R', 'Using open source frameworks (for example, scikit learn, tensorflow, torch)']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$143,000.00-$286,000.00', bonus_and_equity='Annual or quarterly performance bonuses, and stock', benefits_and_perks=['401(k) match', 'stock purchase plan', 'paid maternity and parental leave', 'PTO', 'multiple health plans'])\n",
      "company_overview=CompanyOverview(about='Online payments industry', mission_and_values='Not provided', size='Not provided', locations='Draper, UT or San Jose, CA') role_summary=RoleSummary(title='Credit Strategy Data Scientist', team_or_department='Credit Risk Strategy team', role_type='Full-time', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design rules to detect/mitigate loss', 'Investigate novel/large cases', 'Identify root cause', 'Set strategy for different risk types', 'Work with product/engineering to improve control capabilities', 'Develop and present strategies and guide execution', 'Drive results that maximize eligible customers while controlling losses'], required_qualifications=[\"Bachelor's degree in computer science, Engineering, Mathematics, Statistics, Data Mining or related field or equivalent practical experience\", 'Minimum 2 years of experience in risk analytics, data analysis, or data science within the Fintech or online payments industry', 'Proficiency in SQL, Python, Excel including key data science libraries'], preferred_qualifications=['Experience with AWS', 'Experience with payment rule systems', 'Knowledge of credit products']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not provided', bonus_and_equity='Not provided', benefits_and_perks=['Hybrid work environment'])\n",
      "company_overview=CompanyOverview(about='Walmart builds and deploys core AI assistant experiences across Walmart.', mission_and_values='To make life easier for hundreds of millions of people.', size='Tens of millions of active users across multiple countries', locations='Sunnyvale, CA') role_summary=RoleSummary(title='Distinguished Engineer - Conversational AI', team_or_department='Conversational AI team', role_type='Distinguished Engineer', remote='Hybrid work') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Define the AI assistant architecture leveraging generative AI in a rapidly evolving ecosystem', 'Partner with multiple teams as the senior platform architect and evangelize the architecture, enable integrations and support development of new AI assistants', 'Be up-to-date on industry trends in the latest generative AI and AI assistant architecture patterns', 'Work with the applied scientists, ML engineers, software engineers and product managers to develop next-generation of AI assistant experiences'], required_qualifications=[\"Bachelor's degree in computer science, computer engineering, computer information systems, software engineering, or related area and 6 years' experience in software engineering or related area\", \"8 years' experience in software engineering or related area\"], preferred_qualifications=[\"Master's degree in computer science, computer engineering, computer information systems, software engineering, or related area\", 'Background in creating inclusive digital experiences, demonstrating knowledge in implementing Web Content Accessibility Guidelines (WCAG) 2.2 AA standards, assistive technologies, and integrating digital accessibility seamlessly']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$169,000.00-$338,000.00', bonus_and_equity='Annual or quarterly performance bonuses, Stock', benefits_and_perks=['401(k) match', 'Stock purchase plan', 'Paid maternity and parental leave', 'PTO', 'Multiple health plans'])\n",
      "company_overview=CompanyOverview(about='Samsung Research America Digital Health Team', mission_and_values='Empower people to live healthier lives by leveraging wearables, smartphones, medical devices, AI, and health services', size='Millions of Samsung users’ lives around the world have been touched by our products', locations='Samsung Research America') role_summary=RoleSummary(title='Senior Research Engineer', team_or_department='Samsung Research America Digital Health Team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Prepare proof-of-concept, demo / prototypes, design and execute pilot studies using Generative AI and Large Language Models', 'Work with project team and contribute to dataset/document preparation, prompt engineering, customization, training, fine-tuning and deployment of healthcare LLMs', 'Lead the development of innovative ideas and generative AI algorithms based on noisy signals from sensors including, but not limited to, light, audio, physiological, and inertial sensor data collected both in lab and field settings', 'Generate patents and scientific research papers for top-tier publications'], required_qualifications=['Ph.D. in Computer Science, Computer Engineering, Biomedical Engineering, Electrical Engineering, Biophysics, Mathematics, health sciences or related areas or related field, or equivalent combination of education, training, and experience', 'Demonstrable knowledge and experience in Generative AI, Large Language Models, and their applications on mobile sensor data collected in lab and in the wild', '10+years of experience in one or more of the following programming languages: Python, Java, Android, MATLAB, R', 'Strong interpersonal and collaboration skills, ability to present complex information in an understandable and compelling manner, and comfortable working with multi-disciplinary teams', 'Publications in top-tier AI computing venues including but not limited to NeurIPS, CVPR, ICML, ICLR, ICCV, and ACL'], preferred_qualifications=['Experience in NLP and Conversational AI', 'Experience in LLM validation, reliability, toxicity/harmfulness avoidance', 'Experience with building knowledge graphs from unstructured texts', 'Experience with vector DB', 'Strong mathematics background, especially statistics', 'Turn the analyzed data into actionable insight and/or understandable visualization', 'On-device implementation of ML/AI algorithms in C/C++/Java', 'Clinical study design, IRB development, in-lab and/or in-the-wild data collection studies', 'Product development and prototyping experience in order to implement and validate solutions', 'Experience in designing and conducting studies of wearable devices or clinical-trials with human subjects both in lab and field settings', 'Previous experience developing algorithms in healthcare research/industry using EMR/EHR and mobile/wearable sensor data', 'Have working knowledge of the healthcare industry and experience curating and analyzing healthcare and wellness data', 'Proficiency in the Android/Tizen/iOS development environment with the ability to create fully working prototypes', 'Experience in collaborating on software implementations of algorithms and computing models with client and cloud engineers', 'Understanding of human factors, usability and comfort with wearable sensors', 'Experience operating under HIPAA is a plus']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['exciting technical challenges', 'rewarding career experience'])\n",
      "company_overview=CompanyOverview(about='LinkedIn is the world’s largest professional network, built to help members of all backgrounds and experiences achieve more in their careers.', mission_and_values='Create economic opportunity for every member of the global workforce.', size='Not specified', locations='Mountain View, CA, San Francisco, CA, or Bellevue, WA') role_summary=RoleSummary(title='Staff Software Engineer', team_or_department='AI Training Infra team', role_type='Staff Software Engineer', remote='Hybrid work options') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Owning the technical strategy for broad or complex requirements', 'Designing, implementing, and optimizing the performance of large-scale distributed serving or training for personalized recommendation as well as large language models', 'Improving the observability and understandability of various systems'], required_qualifications=['Bachelor’s Degree in Computer Science or related technical discipline, or equivalent practical experience', '4+ years of experience in the industry with leading/building deep learning systems', '4+ years of experience with Java, C++, Python, Go, Rust, C# and/or Functional languages such as Scala or other relevant coding languages'], preferred_qualifications=['BS and 8+ years of relevant work experience', 'MS and 7+ years of relevant work experience, or PhD and 4+ years of relevant work experience', 'Previous experience working with geographically distributed co-workers', 'Outstanding interpersonal communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$156,000 - $255,000', bonus_and_equity='Not specified', benefits_and_perks=['Hybrid work options', 'Opportunities for career growth', 'Comprehensive health and wellness programs'])\n",
      "company_overview=CompanyOverview(about='Gen is a global company powering Digital Freedom through consumer brands including Norton, Avast, LifeLock, Avira, AVG, ReputationDefender, and CCleaner.', mission_and_values='Powering Digital Freedom by protecting consumers and giving them control of their digital lives.', size='More than 500 million users in 150 countries', locations='Global') role_summary=RoleSummary(title='AI Technical Director', team_or_department='AI research and innovation team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Provide technical leadership in adopting the latest AI advances into solutions for the benefit of consumers.', 'Research, architect and implement state-of-the-art AI techniques.', 'Establish best AI practices and follow them across project portfolio.', 'Define AI performance, accuracy and reliability metrics and implement their consistent measurement.', 'Define and implement AI safety measures.', 'Architect, develop and maintain scalable systems and infrastructures for data ingestion, transformation, analysis, model hosting, and inference.', 'Build and deploy AI solutions in multi-cloud environment.', 'Work on solutions with diverse teams from various functions of the company.', 'Document and communicate the results and insights of AI projects to exec-level audience.'], required_qualifications=['Ph.D. level degree in artificial intelligence, computer science, engineering, mathematics, or related field;', 'Experience in developing and deploying industrial grade AI solutions', 'Experience in using AI frameworks, trending models and libraries.', 'Experience in using cloud services and platforms.', 'Enthusiasm, strong analytical mind, willingness to learn and desire to make things real.', 'Excellent communication and presentation skills.', 'Ability to work independently and in a team.'], preferred_qualifications=['Large software project experience with multi-threaded, multi-process distributed computing environments and correct Software Engineering practices', 'Hands-on experience with Linux, Bash, GIT, Docker, Kubernetes, continuous integration, package deployment and dependency management', 'Knowledge of cybersecurity and digital safety concepts.']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary Ranges: $249,000.00 - $290,100.00', bonus_and_equity='On Target Earnings (annual base salary + target annual commission)', benefits_and_perks=['Competitive benefits package', 'Opportunity for bonus incentives'])\n",
      "company_overview=CompanyOverview(about='Recruiting from Scratch is a premier talent firm that focuses on placing the best product managers, software, and hardware talent at innovative companies.', mission_and_values='Our team is 100% remote and we work with teams across the United States to help them hire.', size='Not specified', locations='Sunnyvale, US') role_summary=RoleSummary(title='Edge Computing Engineer', team_or_department='Engineering team', role_type='Full-time', remote='On-site / In-office (≥4 days a week)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Building edge applications processing vision data and communication layers for the compute-constrained edge devices', 'Deploying machine learning models to production', 'Optimizing the platform runtime for maximum performance', 'Building observability and telemetry'], required_qualifications=['3+ years of experience writing production software in C++ and Python', 'Experience using various profiling tools (e.g., gdb, Nsight, Valgrind, flame graph) to optimize the code', 'Experience building applications processing real-time data and optimizing them for latency and memory'], preferred_qualifications=['Experience with Docker, CI / CD pipelines', 'Experience with infrastructure management (Salt)', 'Experience with monitoring (Grafana)', 'Experience with video processing & Streaming (Gstreamer)', 'Experience Interfacing ML Models (PyTorch)']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$150,000-$220,000 base', bonus_and_equity='Not specified', benefits_and_perks=['Flexible PTO & Sick Policy', 'Medical, Dental, and Vision insurance', '401k'])\n",
      "company_overview=CompanyOverview(about='Gen is a global company powering Digital Freedom through consumer brands including Norton, Avast, LifeLock, Avira, AVG, ReputationDefender, and CCleaner.', mission_and_values='Powering Digital Freedom by protecting consumers and giving them control of their digital lives.', size='More than 500 million users in 150 countries', locations='Multiple locations') role_summary=RoleSummary(title='AI Technical Director', team_or_department='AI Research and Innovation Team', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Provide technical leadership in adopting the latest AI advances into solutions for the benefit of consumers.', 'Research, architect and implement state-of-the-art AI techniques, such as large generative models, multi-modal models, reinforcement learning, natural language processing, computer vision, etc., for digital safety applications.', 'Establish best AI practices and follow them across project portfolio.', 'Define AI performance, accuracy and reliability metrics and implement their consistent measurement.', 'Define and implement AI safety measures.', 'Architect, develop and maintain scalable systems and infrastructures for data ingestion, transformation, analysis, model hosting, and inference.', 'Build and deploy AI solutions in multi-cloud environment.', 'Work on solutions with diverse teams from various functions of the company.', 'Document and communicate the results and insights of AI projects to exec-level audience.'], required_qualifications=['Ph.D. level degree in artificial intelligence, computer science, engineering, mathematics, or related field', 'Experience in developing and deploying industrial grade AI solutions', 'Experience in using AI frameworks, trending models and libraries, such as TensorFlow, PyTorch, GPT, Gemini, LaMDA, LangChain, Vertex AI, SageMaker, etc.', 'Experience in using cloud services and platforms, such as GCP, AWS, etc.', 'Enthusiasm, strong analytical mind, willingness to learn and desire to make things real', 'Excellent communication and presentation skills', 'Ability to work independently and in a team'], preferred_qualifications=['Large software project experience with multi-threaded, multi-process distributed computing environments and correct Software Engineering practices', 'Hands-on experience with Linux, Bash, GIT, Docker, Kubernetes, continuous integration, package deployment and dependency management', 'Knowledge of cybersecurity and digital safety concepts such as threat detection, scam detection, social engineering, incident response, identity management, etc.']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary range: $249,000.00 - $290,100.00', bonus_and_equity='Opportunity for bonus incentives', benefits_and_perks=['Competitive benefits package', 'Opportunity for bonus incentives'])\n",
      "company_overview=CompanyOverview(about='Recruiting from Scratch is a premier talent firm that focuses on placing the best product managers, software, and hardware talent at innovative companies.', mission_and_values='Helping companies hire the best talent', size='Not specified', locations='Palo Alto') role_summary=RoleSummary(title='AI/ML Engineer', team_or_department='AI research and development', role_type='Full-time', remote='No') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['3-8 years of experience', 'Bachelors of Science in Computer Science', 'Expertise in cloud services (AWS, Azure, GCP) and deploying applications in cloud environments', 'Experience with containerization and orchestration technologies (Docker, Kubernetes)', 'Proficiency in programming languages such as Python, C++, or Go', 'Familiarity with AI and machine learning concepts, particularly in deploying ML models'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$150-200k base', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Amazon Web Services (AWS) is the world’s most comprehensive and broadly adopted cloud platform.', mission_and_values='AWS values diverse experiences.', size='large scale', locations='Atlanta, GA, USA | Austin, TX, USA | Boston, MA, USA | Chicago, IL, USA | Dallas, TX, USA | Herndon, VA, USA | Jamaica Plain, MA, USA | Jersey City, NJ, USA | Mountain View, CA, USA | Seattle, WA, USA') role_summary=RoleSummary(title='Machine Learning Engineer', team_or_department='Sales, Marketing and Global Services (SMGS)', role_type='full-time', remote='flexible work hours and arrangements') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, implement, test, deploy and maintain innovative ML solutions', 'Research implementations that deliver the best possible experiences for customers', 'Build high-quality, highly available, always-on products'], required_qualifications=['Master’s degree in computer science or equivalent'], preferred_qualifications=['3+ years of non-internship professional software development experience', '3+ years of programming with at least one software programming language experience', '3+ years of leading design or architecture (design patterns, reliability and scaling) of new and existing machine learning systems experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$115,000/year - $223,600/year', bonus_and_equity='equity, sign-on payments, and other forms of compensation', benefits_and_perks=['health insurance', 'retirement plans', 'paid time off'])\n",
      "company_overview=CompanyOverview(about='Quickbooks', mission_and_values='Driving marketing decisions through data-backed insights', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Staff Business Data Analyst', team_or_department='Quickbooks Marketing Analytics team', role_type='Staff Business Data Analyst', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Drive opportunity identification and sizing in our web funnel', 'Work closely with our business performance analytics, web marketing, and web analytics teams', 'Own and improve the core set of dashboards for web funnel', 'Drive improvements to web analytics instrumentation and data architecture', 'Lead the construction of web funnel reporting and data narratives to senior leaders'], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Intuit is the global financial technology platform that powers prosperity for the people and communities we serve.', mission_and_values='We believe that everyone should have the opportunity to prosper.', size='Approximately 100 million customers worldwide', locations='Mountain View, California') role_summary=RoleSummary(title='Staff Business Data Analyst', team_or_department='QuickBooks Marketing Analytics team', role_type='Full-time', remote='Not mentioned') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Drive opportunity identification and sizing in our web funnel', 'Work closely with business performance analytics, web marketing, and web analytics teams', 'Own and improve the core set of dashboards for web funnel', 'Drive improvements to web analytics instrumentation and data architecture', 'Lead the construction of web funnel reporting and data narratives to senior leaders'], required_qualifications=[\"Bachelor's degree in a quantitative field or equivalent work experience\", 'Advanced degree preferred'], preferred_qualifications=['5+ years of experience in web analytics', 'Experience in advanced statistical analysis, test design, and marketing attribution', 'Practical experience constructing data pipelines and ETL utilizing SQL and Python', 'Strong data storytelling skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Base pay range: $152,500-206,000', bonus_and_equity='Expected base pay range: $152,500-206,000', benefits_and_perks=['Competition compensation package', 'Strong pay for performance rewards approach', 'Cash bonus', 'Equity rewards', 'Benefits'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Understand the day-to-day issues that our business faces, which can be better understood with data', \"Compile and analyze data related to business' issues\", 'Develop clear visualizations to convey complicated data in a straightforward fashion'], required_qualifications=[\"Bachelor's or Master's degree in Statistics or Applied Mathematics or equivalent experience\", \"1 - 2 years' Data Analysis experience\", 'Proficient in SQL'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Intuit is a financial software company', mission_and_values='To power prosperity around the world', size='Large company', locations='Various') role_summary=RoleSummary(title='Finance and Operations Analytics', team_or_department='Finance and Operations Analytics team', role_type='Full-time', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Developing Machine Learning models', 'Building reporting dashboards', 'Generating insights from data'], required_qualifications=['Strong analytical skills', 'Experience with data analytics'], preferred_qualifications=['Experience with machine learning', 'Strong communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Competitive salary', bonus_and_equity='Performance-based bonus and equity', benefits_and_perks=['Health insurance', 'Retirement plan', 'Paid time off'])\n",
      "company_overview=CompanyOverview(about='El Camino Health is committed to hiring, retaining and growing the best and brightest professionals who will carry our mission and vision forward. We are proud of our reputation in the community: One built on compassion, innovation, collaboration and delivering high-quality care.', mission_and_values='Compassion, innovation, collaboration, and delivering high-quality care', size='Not specified', locations='Santa Clara') role_summary=RoleSummary(title='Data Analyst', team_or_department='Not specified', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Understand the database structure and schema of our case management software, design and run reports in the system or with additional tools such as SQL or Tableau, analyze key data and make meaningful recommendations to the Concern management team', 'Provide key insights into the business processes and any improvements required by mining the data and gathering trends and insights', 'Collaborate with the development teams to test features and functionality and identify the root cause of issues as well as track and communicate status of defects'], required_qualifications=['Bachelor’s degree in a technology or science related field', 'Three (3) years in an analyst position having related responsibilities and duties', 'Advanced Excel and Office suite skills', 'Statistical computational skills', 'Advanced knowledge of relational databases such as Access, SQL, Amazon Aurora', 'Ability to understand complex data and interpret business needs in simple terms', 'Ability to understand the data, database structure, schema and functionality of case management software and client-facing digital platform and design, query, run reports', 'Ability to conceptualize and design data dashboards and reports', 'Excellent communication and interpersonal skills', 'Strong organizational skills and ability to problem solve, prioritize tasks and think critically', 'Ability to understand customer needs'], preferred_qualifications=['Experience collaborating with software development teams to test, track, and communicate status of defects', 'Experience working with Jira or other project management tools']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$36.13 - $54.20 USD Hourly', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='full-time', remote='full-time entry level option for remote job') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Extract, organize and analyze media buying performance data', 'Interface with the data engineering team', 'Investigate and present results of requests for ad hoc data analysis'], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='EROS Technologies was founded with a simple motive of offering the clients exactly what they want, how they want and when they want it. By leveraging for its clients its technological edge and right-sourcing advantage, EROS in a short period of time has grown to become one of the most trusted strategic technology partners. Treating every client as the top priority, we customize our solutions and services to align with the unique needs of each client.', mission_and_values='', size='', locations='Mountain view-CA') role_summary=RoleSummary(title='Data Analyst', team_or_department='', role_type='Full time only', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['5-7 years of experience', 'Good Communication', 'Data Analysis Skills and excellent SQL knowledge'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80-95k/annum + benefits', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Georgia IT Inc', mission_and_values='', size='', locations='Mountain View, CA 94043') role_summary=RoleSummary(title='Jr Data Analyst', team_or_department='', role_type='Contract', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Gather business requirements and determine most effective and efficient reporting platform', 'Design and develop metrics and reports, dashboards and analyses to drive key business decisions', 'Conduct large scale data analysis and modeling to derive actionable insights', 'Review outputs for trends', 'Work with team to conduct root cause analyses and communicate improvement recommendations'], required_qualifications=['Coding experience with Javascript, Apps Script, SQL and/or BigQuery', 'Strong background in KPI Dashboards development and automation and information systems management', 'Strong visualization experience – charts, bars, images, dynamic reports etc.', 'Some exposure to Procurement, Planning and Product Lifecycle Management functions', 'Excellent Oral/Written Communication and Stakeholder management Skills', 'Excellent analytical and problem solving skills', 'Self driven, ability to work in an unstructured environment with minimal supervision'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Rate: Doe', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Spin Analytics and Strategy LLC', mission_and_values='', size='', locations='Palo Alto, CA') role_summary=RoleSummary(title='Data Analyst (Red shift) & Mode Analytics', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Write queries against Red shift', 'Produce reports on Mode Analytics', 'Automate publishing reports to slack'], required_qualifications=['BS degree in specific technical fields like computer science, math, statistics'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='Data Analyst focused on Warehousing', team_or_department='', role_type='Full-time', remote='In person') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Developing and maintaining databases, and data systems – reorganizing data in a readable format', 'Performing analysis to assess the quality and meaning of data', 'Using statistical tools to identify, analyze, and interpret patterns and trends in complex data sets', 'Preparing reports for the management stating trends, patterns, and predictions using relevant data', 'Working with programmers, engineers, and management heads to identify process improvement opportunities, propose system modifications, and devise data governance strategies', 'Preparing final analysis reports for the stakeholders to understand the data-analysis steps, enabling them to make important decisions based on various facts and trends'], required_qualifications=['Demonstrated expertise in SQL programming', 'Experience in creating technical documentation such as Requirement Documents and Source to Target Sheets', 'Proficiency in developing User Acceptance Tests for metrics validation', 'Strong collaboration and communication skills to work effectively with cross-functional teams'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$4,000.00 - $6,000.00 per month', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Intuit', mission_and_values='Delivering data-driven insights, driving change management, and bringing together products and GenAI LLMs to make a tangible impact on customer experience', size='650+ analysts', locations='Various') role_summary=RoleSummary(title='Technical Data Analyst', team_or_department='Analytics', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Driving data models and definitions', 'Communication and Change Management', 'Analytics Methodologies'], required_qualifications=['Expertise in analytics methodologies', 'Deep knowledge of Gen AI measurement, incrementality measurement frameworks'], preferred_qualifications=['Deep knowledge of ETL and data warehousing', 'Excellent understanding of data architectures', 'Passion for delivering data-driven insights']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Competitive salary', 'Benefits package', 'Opportunity to shape analytics for GenAI'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Accurately label machine learning data through various methods to refine AI models', 'Collaborate with team members to identify improvements to the labeling interface or labeling processes in order to drive efficiency and high-quality', 'Complete assignments by deadlines while meeting team expectations and goals'], required_qualifications=[\"Bachelor's degree in English, Literature, Creative Writing, Journalism, or a related field (or equivalent work experience)\", 'Proficiency in reading comprehension and possessive of strong writing skills, with the ability to understand and interpret complex and diverse texts', 'Ability to work independently, follow instructions, manage time effectively, and meet deadlines in a dynamic and fast-paced environment', 'Ability to manage ambiguity, uncertainty, and changing priorities with limited resources', 'Ability to accept and integrate continuous feedback while remaining professional', 'Excellent verbal and written communication skills and the ability to collaborate within a multi-disciplinary team', 'Quick learner and self-starter with strong problem-solving skills'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Palo Alto, CA', mission_and_values='', size='', locations='Palo Alto, CA') role_summary=RoleSummary(title='Data Analyst', team_or_department='Supply Chain Management', role_type='Full-time', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Analyze and visualize data to help with the successful launch and scale of various programs', 'Create reports and data visualizations to guide decision-making across the supply chain team', 'Respond promptly, accurately and efficiently to data requests and questions', 'Support supply chain with master data updates as needed', 'Drive data integrity including identifying and acquiring missing information', 'Communicate findings effectively to anyone in the company from executives to engineers', 'Proactively identify inconsistencies in the data through visualizations and with statistical tests', 'Continuously improve SQL and python skills, helping automate repetitive work and multiply yourself', 'Experiment and learn rapidly'], required_qualifications=[\"Bachelor's degree in a quantitative field and/or equivalent experience or evidence of exceptional deductive ability\", 'Expert in data analysis, visualization, and communication, with prior full-time work experience strongly preferred', 'Proficient in Advanced MS Excel, SQL and Python', 'Data visualization programs such as Tableau and PowerBi are preferred', 'Experience in supply chain / manufacturing Program Management helpful', 'Ability to work in a fast-paced start-up technology environment and deliver excellent results', 'Strong communication skills, and ability to work with multiple stakeholders', 'Experienced in Sourcing, MRP and fulfilment systems is plus', 'Highly proficient in automation and supply chain analytics'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$72.3K - $91.5K a year', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Amazon via Allegis Global Solutions', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Data Analyst', team_or_department='', role_type='Contract', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Support effective decision-making through economic, financial and market analysis of critical issues, including capacity planning, price and cost modeling, and returns on investment', 'Support the capacity forecasting, budgeting and reporting efforts for operating expense, capital and resource allocation', 'Design and coordinate the development of systems and tools to keep pace with the group’s growing and rapidly changing businesses'], required_qualifications=['Undergraduate degree in business, finance, engineering, or a related field', 'Strong analytic skills and a demonstrated ability to build and manage financial models for business forecasting, variance analysis, and problem solving', 'Fluency in standard software including MS Excel', 'Knowledge of SQL is a strong plus'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about=\"Argo AI is a global self-driving products and services company on a mission to make the world's streets and roadways safe, accessible, and useful for all.\", mission_and_values='Building self-driving technology you can trust', size='Not specified', locations='Various') role_summary=RoleSummary(title='Data Analyst', team_or_department=\"Argo's Fleet Operations team\", role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Gather data, structure problems, and perform analyses', 'Research technical and business issues', 'Work cross-functionally to analyze opportunities, synthesize findings, and drive insights', 'Act as part of the ongoing team to translate recommendations into results', 'Present data-driven recommendations and optimize our current go-to-market strategy', \"Track and report key performance metrics regarding fleet utilization and output, with detailed analysis of how each is supporting Argo's mission\", 'Build analytics dashboards using mostly off-the-shelf tools', 'Using dashboards, create regular reports that will be presented and reviewed by the data services team', 'Help analyze and understand gaps in reporting', 'Document process and playbooks for analysis', \"Create 'source of truth' tables and dashboards that the company (and our team) can use to identify, diagnose and address issues\", 'Understand input data and data roadmaps from other teams across the company to ensure that the top-level funnels are well defined and understood', 'Build scalable analytical frameworks; partner with engineering teams to drive instrumentation of new metrics, if/where we determine this is needed', 'Work with cross-functional teams such as engineering, product management, various data teams to deploy data quality across critical pipelines and to set up processes to triage data issues', 'Effectively and proactively communicate insights, contribute to team presentations and leadership reviews, and drive projects to completion to hit team goals', 'Create and drive data quality standards and frameworks to ensure inclusion into pipeline engineering efforts', 'Assist with new product experimentation including plan creation, roll-out, and monitoring'], required_qualifications=[\"Bachelor's Degree\", '5 years of experience in analytics or a closely related field', 'High proficiency with business intelligence tools and SQL'], preferred_qualifications=['Masters Degree with some focus or concentration in statistic, data analysis, data science', 'Experience working with Data Warehouses like Redshift, BigQuery, or Snowflake', 'Strong understanding of statistics and experience with business intelligence and data analysis', 'Ability to manage and prioritize conflicting objectives', 'Excellent problem solving and troubleshooting, implementing solutions, and documenting results', 'Strong desire to learn new skills related to technology and software', 'Excellent communication skills with the ability to span a large and varied workforce', 'Proven self-starter mindset and the ability to work independently or with minimal supervision', 'Detail oriented/good organizational skills', 'Occasional travel to remote offices and test facilities', 'Established strong analysis and presentation skills including advanced use of Excel or G Sheets and Powerpoint or G Slides', 'Solid written and verbal communication skills, including presentation skills', 'Basic understanding of experimental design (such as A/B experiments) and statistical methods', 'Ability and experience in extracting insights from data, and summarizing learnings / takeaways', 'Experience with Excel and some dashboarding/data visualization (i.e. Tableau, Mixpanel, Looker, or similar)']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K', bonus_and_equity='Not specified', benefits_and_perks=['High-quality individual and family medical, dental, and vision insurance', 'Competitive compensation packages', 'Employer-matched 401(k) retirement plan with immediate vesting', 'Employer-paid group term life insurance and the option to elect voluntary life insurance', 'Paid parental leave', 'Adoption & Surrogacy Assistance Program', 'Paid medical leave', '30 day paid sabbatical upon 5 years of employment', 'Unlimited vacation', 'Complimentary daily lunches, beverages, and snacks', 'Pre-tax commuter benefits', 'Monthly wellness stipend', 'Professional development reimbursement', 'Employee assistance program', 'Discounted programs that include legal services, identity theft protection, pet insurance, and more'])\n",
      "company_overview=CompanyOverview(about='ScoreData is a company that specializes in providing data analytics solutions.', mission_and_values='Focused on customer success', size='Not specified', locations='Palo Alto, USA') role_summary=RoleSummary(title='Hands-on Data Scientist', team_or_department='Not specified', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work on specific customer data analytics project from start to finish', 'Understand, document and deliver customer projects using the ScoreFast(™) technology', 'Focus on customer satisfaction and success', 'Work closely with other team members to deliver projects', 'Explore new uses cases / ideas to solve customer challenge', 'May require travel to customer sites based on need basis'], required_qualifications=['B.S or or higher in the area of data science, computer science, statistics, mathematics, physics, engineering, operations research or other quantitative analytical field from a reputed school', '3 years of working experience with at least 2 years’ experience with performing customer analytics projects', '2+ years’ experience with Machine Learning and Data Mining', '2+ years using R or Python for data analytics', '1+ year working with data in Hadoop and /or Spark ecosystem', 'Comfortable articulating technical problems at a business value level', 'Languages: Python, Java/Scala', 'DataBase: MySQL, HBase'], preferred_qualifications=['Knowledge of Github, JIRA, Confluence, Jenkins']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Stanford University is committed to diversity, equity and inclusion.', mission_and_values='Promoting anti-racism and equity through policies, programs, and practices at all levels.', size='Not specified', locations='Stanford, California, United States') role_summary=RoleSummary(title='Research Data Analyst', team_or_department='Department of Pediatrics', role_type='Part-time', remote='Hybrid eligible') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Manage and analyze large amounts of information', 'Employ new and existing tools to interpret, analyze, and visualize multivariate relationships in data', 'Create databases and reports, develop algorithms and statistical models, and perform statistical analyses', 'Collaborate with faculty and research staff on data collection and analysis methods', 'Provide documentation based on audit and reporting criteria to investigators and research staff', 'Communicate with government officials, grant agencies and industry representatives'], required_qualifications=['Bachelor’s degree or a combination of education and relevant experience', 'Experience in a quantitative discipline such as economics, finance, statistics or engineering', 'Substantial experience with MS Office and analytical programs', 'Strong writing and analytical skills', 'Ability to prioritize workload'], preferred_qualifications=['Master’s degree or a combination of education and relevant experience in computational biology or bioinformatics', 'Experience in DNA, RNA extraction and quality control, library preparation, sequencing, and data analysis', 'Familiarities to machine learning and deep learning algorithms']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$32,240 - $48,500 per year', bonus_and_equity='Not specified', benefits_and_perks=['Employee benefits', 'Diversity, equity and inclusion', 'Continuous learning and improvement'])\n",
      "company_overview=CompanyOverview(about='Scilex Holding Company is an innovative revenue-generating company focused on acquiring, developing and commercializing non-opioid pain management products for the treatment of acute and chronic pain.', mission_and_values='Committed to social, environmental, economic, and ethical principles to responsibly develop pharmaceutical products to maximize quality of life.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Senior Data Analyst', team_or_department='Commercial Department', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Collaborate with Commercial team to help synthesize insights in a streamlined manner', 'Architect complex data-centric and business intelligence solutions', 'Support advanced analytics efforts'], required_qualifications=['Working knowledge of pharmaceutical industry data sources', 'Prior experience developing business requirements documents and managing data projects'], preferred_qualifications=[\"Bachelor's degree (business discipline preferred)\", \"5-7 years' experience as a Data Analyst or in a similar role working directly with clinical healthcare data\"]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$114,000 - $131,000/annually', bonus_and_equity='Not specified', benefits_and_perks=['Medical benefits', '401(k) eligibility', 'Vacation', 'Sick time', 'Parental leave'])\n",
      "company_overview=CompanyOverview(about='Tesla', mission_and_values='ClimateTech', size='Not specified', locations='Palo Alto, California') role_summary=RoleSummary(title='Data Analyst, Supply Chain', team_or_department='Supply Chain Management', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Analyze and visualize data', 'Ensure effective utilization of large supply chain datasets', 'Support global supply management team'], required_qualifications=['Data analysis skills', 'Supply chain management knowledge', 'Experience with large datasets'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$68,000 - $234,000/annual salary', bonus_and_equity='Not specified', benefits_and_perks=['cash and stock awards', 'benefits'])\n",
      "company_overview=CompanyOverview(about=\"Activehours is reinventing the way people get paid by giving them immediate access to the money they've earned.\", mission_and_values=\"We celebrate victory, don't shy from failure, and are always learning.\", size='Rapidly growing company', locations='Palo Alto') role_summary=RoleSummary(title='Senior Data Analyst', team_or_department='Data Analyst team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Deep-dive, synthesize, and articulate actionable next steps', 'Hypothesis-driven testing', 'Define strategy and execution for business intelligence and live operations reporting', 'Recommend analytical tools to implement in partnership with developers and the Head of Growth', 'Lifetime value and cohort tracking'], required_qualifications=['3 years of demonstrable experience in digital data and measurement', \"Bachelor's degree in mathematics, statistics, computer science, or related quantitative field\", 'Deep expertise in SQL'], preferred_qualifications=['Python', 'R', \"Master's degree\"]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Kofi Group places software engineers and machine learning engineers on a direct-hire basis with VC-backed startups in San Francisco/Silicon Valley, New York, and Austin. Startups we have recruited for are backed by Andreessen Horowitz, Lightspeed Venture Partners, Wing Venture Capital, Index Ventures, Redpoint, Amplify Partners, Foundation Capital, GV, and several other top venture capital firms.', mission_and_values='None', size='None', locations='San Francisco/Silicon Valley, New York, and Austin') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Engineering', role_type='Full-time', remote='None') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop, design, refine, adjust, and evaluate ETL pipelines or systems to enhance our Machine Learning and Analytics capabilities', 'Ensure data integrity throughout its lifecycle by implementing safeguards, health checks, and alerts', 'Collaborate closely with clients to gain profound insights into their data ecosystems, effectively consolidating our data models for predictive analysis', 'Foster a deep understanding of relevant data domains, codebase, and systems', 'Demonstrate expertise in data engineering, data architecture, programming, and software engineering', 'Exercise discretion in choosing methods and techniques to achieve solutions', 'Operate independently, utilizing available resources to overcome challenges and meet deadlines by employing sound judgment and problem-solving abilities', 'Become proficient in internal development standards, progressively managing tasks and projects of increasing complexity from coding to code release', 'Collaborate with Data Science, Product Managers, and Software Engineers to develop robust ETL pipelines that empower the Product Support team to deliver compelling user experiences', 'Display empathy for customers to ensure that data processes and workflows meet their needs'], required_qualifications=[\"Bachelor's degree in Computer Science, Data Science, Information Systems, Data Mining, Mathematics, Statistics, Physics, Applied Sciences, or a related field\", 'At least 2 years of hands-on industry experience in data engineering', 'Proficiency in event backbone and job pool platforms (e.g., Kafka)', 'Over 2 years of experience with Typescript, Python (Django), and the development of complex ETL pipelines', 'Hands-on experience in SQL database design, data modeling, and data mining'], preferred_qualifications=['Demonstrated passion for data and machine learning to drive positive outcomes']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='None', bonus_and_equity='None', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about=\"Amazon Web Services (AWS) has been the world's most comprehensive and broadly adopted cloud platform for over 11 years.\", mission_and_values='To power their infrastructure and drive continuous improvements in field productivity.', size='Millions of active customers around the world', locations='Palo Alto, CA USA, Seattle, WA USA, Denver, CO USA, San Francisco, CA USA, Austin, TX USA, New York, NY USA, Herndon, VA USA, Boston, MA USA, Atlanta, GA USA') role_summary=RoleSummary(title='Data Engineer', team_or_department='Sales Strategy, Operations, and Customer Programs organization', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, develop and maintain at scale automated, user-friendly systems, reports, dashboards', 'Detect trends in AWS and 3rd party data to guide decision making and predict outcomes', 'Work directly within the global sales data to orchestrate metrics, key performance indicators and decision criteria'], required_qualifications=['Experience in data engineering', 'Ability to work hands-on and pioneer new solutions'], preferred_qualifications=['Strong analytical acumen', 'Ability to earn trust and communicate effectively across multiple and non-business units in a global organization']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='Not specified', benefits_and_perks=['Relocation assistance from within the United States'])\n",
      "company_overview=CompanyOverview(about='Rhombus is purposefully transforming the nation’s defense and national security enterprises with Guardian, its Artificial Intelligence platform for strategic, operational, and tactical decision-making at the speed of relevance.', mission_and_values='Transforming national security enterprises with Artificial Intelligence', size='Startup', locations='Palo Alto, CA') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Engineering', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop code using various programming and scripting languages to automate data ingestion and improve data management processes', 'Architect data repositories, stand up data platforms and develop data pipelines for ingestion, transformation, and aggregation', 'Review existing architecture, data strategy, and improve processes for data governance, data quality, and metadata management', 'Extract and analyze raw data from multiple data sources via APIs, SQL Stored Procedures, or Python scripts', 'Collaborate with a multi-disciplinary team of analysts, data scientists, data engineers, developers, and data consumers in a fast-paced, agile environment'], required_qualifications=['A Bachelor’s degree in Data Analytics, Computer Science, Computer Engineering, Information Systems/Sciences, or other relevant area (or equivalent experience) and at least 1 year of professional experience, or a Master’s degree with strong academic project experience', 'Ability to obtain and maintain a US security clearance'], preferred_qualifications=['Experience with 1 or more programming and scripting languages', 'Experience with 1 or more of the following relational, noSQL and/or file based storage', 'Experience building and maintaining ETL data pipelines', 'Experience with software development life cycle including testing, documenting, delivery and support', 'Working knowledge of AWS/cloud technologies', 'Experience using query optimization as well as data modeling techniques', 'Familiarity with machine learning frameworks']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Bonus and other incentive programs', benefits_and_perks=['Full medical, dental, vision coverage for employee and dependents', '401k matching program', 'PTO and Holidays', 'Bonus and other incentive programs'])\n",
      "company_overview=CompanyOverview(about='Kofi Group places software engineers and machine learning engineers on a direct-hire basis with VC-backed startups in San Francisco/Silicon Valley, New York, and Austin.', mission_and_values='No information available', size='No information available', locations='San Francisco/Silicon Valley, New York, Austin') role_summary=RoleSummary(title='Software Engineer/Machine Learning Engineer Recruiter', team_or_department='No information available', role_type='No information available', remote='No information available') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='No information available', bonus_and_equity='No information available', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Rivian is on a mission to keep the world adventurous forever.', mission_and_values='Protect the outdoors for future generations', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Sr. Data Visualization Engineer', team_or_department='Service organization', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work within the Service organization to understand needs, goals, and objectives', 'Design and develop secure, scalable, high-performance and reliable (cost effective) big data and analytics solutions', 'Use Data & Analytics to answer business questions that lead to insights and actionable outcomes', 'Deep data investigation to project the trends in business', 'Define templates and process for the analysis of data models, data flows, and integration patterns', 'Perform ad hoc data analysis to provide efficient analytical support for on-the-spot business inquiries and program management adjustments', 'Aid in requirements gathering and development prioritization where necessary', 'Filter incoming data requests and help ensure orderly creation and management of reports and dashboards', 'Perform research and analysis in support of operations'], required_qualifications=['Business Administration, Finance, Data Science, or related quantitative major', '5 years of strong data analysis experience in Finance, Supply Chain, Engineering, Sales, Marketing, or Manufacturing organizations', '5 years of hands-on experience creating complex dashboards and data stories using Tableau', '5 years of hands-on IT experience in Alteryx, Data Warehouse, ETL & Reporting', '5 years of experience in understanding variety of complex business use cases and modelling the data in the data warehouse'], preferred_qualifications=['MBA', 'Previous experience in Management consulting firm, start-up environment, Automotive OEM or equivalent experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Tesla is a climate tech company', mission_and_values='Not provided', size='Not provided', locations='Palo Alto, California') role_summary=RoleSummary(title='Sr. Data Engineer, Automation and Analytics', team_or_department='Electrical component team in Supply Chain', role_type='Full-time', remote='Not provided') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['plan effective data storage, security, sharing, and publishing', 'maintain large supply chain datasets', 'formulate applicable data-driven solutions'], required_qualifications=['self-motivated for data analytics', 'experience with batch and real-time processing frameworks'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80,000 - $258,000/annual salary', bonus_and_equity='Not provided', benefits_and_perks=['cash and stock awards', 'benefits'])\n",
      "company_overview=CompanyOverview(about='Headquartered in New Jersey (U.S), Cygnus Professionals Inc. is a next generation global information technology Solution and Consulting company powered by strong management and leadership team with over 30 person years of experience.', mission_and_values='extend our presence across industries and geographies with our industry-focused business excellence.', size='over 30 person years of experience', locations='New Jersey, U.S') role_summary=RoleSummary(title='Big Data Engineer w/Spark', team_or_department='not specified', role_type='Contract to Hire', remote='not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['not specified'], required_qualifications=['Experience with Java, Spark and Hadoop', 'A minimum of 2 years of experience working with distributed systems', 'Knowledge in distributed system design, data pipelining, and implementation', 'Knowledge in machine learning algorithms', 'Knowledge and experience in building large scale applications using various software design patterns and OO design principles', 'Experience with either distributed computing (Hadoop/Spark/Cloud) or parallel processing (CUDA/threads/MPI)', 'Expertise in design pattern (UML diagrams) and data modeling of large scale analytic systems', 'Experience in research, analysis, and the conversion of large amount of raw collected data and content into new sets of data that is structured and does not reduce data context in order to enable the Productization of new products', 'Worked with data warehousing and distributed/parallel processing of large data sets using parallel computing system to map/reduce computation and Linux clusters (e.g. Hadoop/Cloud technologies, HDFS); cluster;', 'Experienced in modern development methodology such as Agile, Scrum and SDLC', 'Ability to work in a research oriented, fast pace, and highly technical environment', 'Quick thinker and a fast learner, collaborative spirit, and excellent communication and interpersonal skills'], preferred_qualifications=['not specified']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='not specified', bonus_and_equity='not specified', benefits_and_perks=['not specified'])\n",
      "company_overview=CompanyOverview(about=\"Aisera offers the world's first AI-driven service experience solution that automates operations and support for IT, Sales and customer service, making businesses and customers successful by offering consumer-like self-service resolutions to users.\", mission_and_values='Aisera is a top-tier, VC-funded startup headquartered in Palo Alto, Calif. and a strategic partner with AWS, Microsoft Azure, Google Cloud, ServiceNow and Salesforce.', size='100 employees', locations='Palo Alto, Calif.') role_summary=RoleSummary(title='Senior Data Engineer', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with teammates, leadership and product management to design and deliver the data platform and connector features as per roadmap', 'Deliver testable, modular, highly scalable and reusable code, test cases and documentation on time', 'Work in an agile development environment'], required_qualifications=[\"Bachelor's Degree in Computer Science or Computer Engineering or Electrical Engineering\", 'At least 3 years professional experience in similar positions in software development', 'Strong Java skill', 'Strong database skills', 'Good experience in microservices', 'Good experience in Elasticsearch', 'Good experience in using one or more public cloud environments (AWS, Azure, Google Cloud, etc.)', 'Very keen on quality in any aspect', 'Responsible over projects quality we deliver', 'Very good verbal and written communication skills (English)'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary Range: $80K -- $100K', bonus_and_equity='', benefits_and_perks=['Medical, dental, and vision benefits', 'Holidays and flexible PTO', 'Paid family leave', '401(k) plan', 'Stock Options', 'Employment Assistance Program'])\n",
      "company_overview=CompanyOverview(about='Intapp provides equal employment opportunities to all qualified applicants and will make hiring decisions without regard to race, color, sex, sexual orientation, gender identity or expression, religion, national origin or ancestry, age, disability, marital status, pregnancy, protected veteran status, protected genetic information, political affiliation, or any other characteristic protected by federal, state or local laws.', mission_and_values='Our culture at Intapp emphasizes accountability, responsibility, and growth. We support each other in a positive, open atmosphere that fosters creativity, approachability, and teamwork.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Data Engineer', team_or_department='Not specified', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop and implement data pipelines and ETL processes to ensure data quality, reliability, and availability for analysis', 'Perform exploratory data analysis to gain insights into data patterns, trends, and relationships, using statistical methods', 'Identify relevant features and variables for predictive modeling and analysis', 'Data Engineering (ETL/ELT) Development', 'Data Pipeline development in Python or similar languages', 'Develop and implement machine learning models and algorithms to solve business problems and extract actionable insights from data', 'Evaluate model performance, fine-tune parameters, and optimize algorithms to achieve desired outcomes', 'Work closely with cross-functional teams, including data engineers, business analysts, and stakeholders, to understand business requirements and translate them into data-driven solutions'], required_qualifications=[\"Bachelor's degree in computer science, Data Science, Statistics, or a related field\", 'Strong background, 3+ years, in data engineering, including experience with data collection, preprocessing, and ETL processes', 'Desired 5+ years of proficiency in programming languages such as Python, SQL, and/or R, and experience with data manipulation and analysis libraries (e.g., pandas, NumPy, scikit-learn, requests, etc..)', 'Knowledge of machine learning techniques and algorithms, with practical experience in model development, evaluation, and deployment'], preferred_qualifications=['Experience with big data technologies and frameworks (e.g., Hadoop, Spark, Kafka)', 'Familiarity with cloud platforms and services (e.g., AWS, Azure, Google Cloud)', 'Knowledge of data visualization tools (e.g., Matplotlib, Tableau) and techniques', 'Understanding of software development principles and practices, including version control, testing, and deployment']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Competitive base salary plus variable compensation and equity', 'Generous paid parental leave, including adoptive leave', 'Traditional comprehensive benefits', 'Generous Paid Time Off', 'Tuition reimbursement plan', 'Family Formation benefit offered by Carrot', 'Wellness programs and benefits provided by Modern Health', 'Paid volunteer time off and donation matching for the causes you care about', 'Home office stipend'])\n",
      "company_overview=CompanyOverview(about='HP is a technology company that operates in more than 170 countries around the world united in creating technology that makes life better for everyone, everywhere.', mission_and_values=\"HP's commitment to diversity, equity and inclusion - it's just who we are.\", size='Global company with operations in more than 170 countries', locations='Multiple locations') role_summary=RoleSummary(title='Business Intelligence & Analytics', team_or_department='Data Engineering', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Link between data engineers who ingest data and the business units that depend on that data to make decisions', 'Collaborate with various business teams, understand their data needs and build plans to address those with intelligent data solutions', 'Deliver data solutions that tell a story and provide insights to the businesses, enabling them to make better decisions'], required_qualifications=[\"Bachelor's degree in Computer Science, Software Engineering, or equivalent practical experience\", '3-5 years of relevant work experience'], preferred_qualifications=['Experience with building data infrastructure', 'Knowledge of programming languages such as SQL or Python', 'Visualization expertise in tools such as Tableau, Power BI, Dataiku etc.', 'Conceptual, logical data modeling and data architecture knowledge']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='Not specified', benefits_and_perks=['Competitive salary', 'Benefits package'])\n",
      "company_overview=CompanyOverview(about='The City of Palo Alto is a local government organization in the US.', mission_and_values='To create a model for local democracy using innovation and technology.', size='N/A', locations='Palo Alto') role_summary=RoleSummary(title='Senior BI Analyst', team_or_department='Information Technology Department', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Designing, planning, implementation, and support/administration of SAP BI Application', 'Leading BW/BI projects in collaboration with Business, IT/Basis, IS functional/technical/security analysts / external BW consultants', 'Facilitating and gathering reporting requirements for assigned projects', 'Establishing strong business relationships to identify, plan and scope future BW projects', 'Writing functional design documents and detailed technical documents', 'Providing ongoing maintenance, assistance and support for BW/BI', 'Investigating and resolving data discrepancies with IS functional and technical analysts'], required_qualifications=[\"Bachelor's Degree in Information Technology or a related field\", '5+ years of SAP Business Warehouse/Intelligence development experience', 'Expertise in BW Solution Architecture, BW APD development, Process Chain Maintenance, Bex Report Enhancements, Extraction – ABAP development, Data sources, ABAP Routines development, Transformations, Transfer Rules & DTP', 'Experience in business requirements gathering, data analysis, data mapping, and dimensional data modeling', 'Excellent interpersonal skills including verbal and written communication, teamwork, and issue resolution'], preferred_qualifications=['IS-Utility experience', 'Project management skills', 'Knowledge of BW on HANA', 'BEX Broadcasting']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='N/A', bonus_and_equity='N/A', benefits_and_perks=['Being part of a game-changing agency', 'Opportunity to work on high-priority customer initiatives', 'Challenging and complex data challenges'])\n",
      "company_overview=CompanyOverview(about='Amazon Search team creates powerful, customer-focused search solutions and technologies.', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Search Capacity team', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Scale the Amazon Search service by working closely with service owners in engineering and operations to understand the service in depth and drive optimal scaling and capacity planning.', 'Identify and track key performance metrics around efficiency and costs.', 'Manage planning, ordering, and budgeting for hardware and other computational resources.', 'Develop and improve tools for automating the foregoing responsibilities wherever possible.', 'Analyze resource utilization and performance test data to identify variables impacting performance and scalability.', 'Develop models for required hardware resources to meet current and future SLAs.'], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Estimated Salary: $20 to $28 per hour based on qualifications', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Tesla is a pioneering electric vehicle and clean energy company.', mission_and_values=\"Accelerating the world's transition to sustainable energy\", size='A multinational corporation with over 100,000 employees', locations='Palo Alto, California') role_summary=RoleSummary(title='Business Intelligence Analyst Intern', team_or_department='North America Sales & Service Infrastructure Development', role_type='Intern', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Analyze sales data to identify trends and opportunities for growth', 'Develop and maintain databases and data systems', 'Collaborate with cross-functional teams to drive business decisions'], required_qualifications=['Currently pursuing a degree in Business, Computer Science, or related field', 'Strong analytical and problem-solving skills', 'Experience with data visualization tools and programming languages'], preferred_qualifications=['Experience with data warehousing and ETL processes', 'Knowledge of machine learning algorithms and statistical modeling']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Competitive hourly rate', bonus_and_equity='Not provided', benefits_and_perks=['Opportunity to work with a pioneering company in the electric vehicle and clean energy industry', 'Collaborative and dynamic work environment', 'Professional development and growth opportunities'])\n",
      "company_overview=CompanyOverview(about='Ford Motor is an Equal Opportunity Employer. We celebrate diversity and are committed to creating an inclusive environment for all employees.', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Business Intelligence Manager', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Manage projects for our team', 'Lead user analytics and business metrics development', 'Design customer and market research and benchmarking analysis', 'Identify and recommend value metrics to measure our products', 'Work closely with internal stakeholders to identify business models that support our products', 'Bring customer and market data to life through rich storytelling', \"Build analytic capabilities to grow our organization's knowledge\"], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Alexa Shopping team at Amazon', mission_and_values='Instantly order whatever customers need using smart devices', size='Not specified', locations='Remote') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Alexa Shopping team', role_type='Full-time', remote='Yes') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Understand and mine large amounts of data', 'Build metrics and drive customer adoption', 'Partner with customers to answer key business questions'], required_qualifications=['Experience with BI tools', 'Analytical and creative skills'], preferred_qualifications=['Not specified']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Estimated $20 to $28 per hour based on qualifications', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Amazon', mission_and_values='Delight hundreds of millions of customers and build the best search experience', size='Large scale', locations='Various') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Search Organization', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Analyze billions of customer feedback and behavioral data', 'Implement principles of data mining, data modeling and analytical skills', 'Work closely with ML & data scientists to extract, integrate and work on critical data', 'Develop and maintain customer segmentation and predictive models', 'Design and drive experiments to form actionable recommendations', 'Present to business leaders in both ad-hoc forums and routine business reviews'], required_qualifications=['Strong business acumen', 'Written and verbal communication skills'], preferred_qualifications=['Strong technical expertise', 'Strong problem-solving skills', 'Excellent leadership skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Hourly', bonus_and_equity='Estimated $20 to $28 per hour based on qualifications', benefits_and_perks=['Competitive salary'])\n",
      "company_overview=CompanyOverview(about='Amazon Web Services (AWS)', mission_and_values='Culture of data-driven decision making', size='Not specified', locations='Sunnyvale, CA, Palo Alto, CA, Cupertino, CA, San Francisco, CA') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='EC2 Capacity Intelligence & Enablement team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Owning the design, development, and maintenance of scalable solutions for ongoing metrics, reports, analyses, dashboards, etc.', 'Translating basic business problem statements into analysis requirements', 'Using analytical and statistical rigor to answer business questions and drive business decisions', 'Finding and creating ways to measure the customer experience to drive business outcomes', 'Developing queries and visualizations for ad-hoc requests and projects, as well as ongoing reporting', 'Writing queries and output efficiently', 'Troubleshooting operational data-quality issues', 'Reviewing and auditing existing ETL jobs and queries', 'Recommending improvements to back-end data sources for increased accuracy and simplicity'], required_qualifications=['Highly collaborative and organized', 'Capable of prioritizing multiple deliverables and effectively communicating progress and blockers', 'Comfortable working across multiple data-sources'], preferred_qualifications=['Not specified']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Amazon Advertising is dedicated to driving measurable outcomes for brand advertisers, agencies, authors, and entrepreneurs.', mission_and_values='Find, attract, and engage intended audiences throughout their daily journeys.', size='Not specified', locations='Palo Alto, California') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Advertising Forecasting Science team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, develop, and maintain scalable, automated metrics reports and dashboards', 'Provide insights on data analytics to influence model development roadmap', \"Work closely with stakeholders and leadership to define and present the team's business operational metrics\", \"Work closely with scientists to define and present the team's model-related metrics\", 'Recognize and adopt best practices in reporting and analytics'], required_qualifications=['Strong analytical skills', 'Business acumen and judgement', 'Intellectual curiosity', 'Technical skills', 'Excellent written and verbal communications'], preferred_qualifications=['Not specified']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour', bonus_and_equity='Estimated Salary: $20 to $28 per hour based on qualifications', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Overview of the company, industry, products, services, and notable achievements', mission_and_values='Company mission, vision, values, and culture, including commitments to diversity, inclusion, social responsibility, and work-life balance', size='Details about company size, such as number of employees', locations='City, State where this position is based.') role_summary=RoleSummary(title='Title of the job role', team_or_department='Team, department, or business unit the role belongs to, including any collaborations with other teams', role_type='Type of role (full-time, part-time, contract, etc.)', remote='Remote work options for the role (full, hybrid, none)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['List of responsibilities, including tasks, duties, and expectations for the role'], required_qualifications=['Essential educational qualifications and professional experience required for the role.'], preferred_qualifications=['Any additional qualifications that a candidate may possess to stand out or excel in the role.']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary range or hourly pay range for the role', bonus_and_equity='Information about bonus and equity compensation', benefits_and_perks=['List of benefits and perks offered for the role, such as insurance, retirement plans, and paid time off.'])\n",
      "company_overview=CompanyOverview(about='Google Play offers music, movies, books, apps and games for devices, powered by the cloud. It syncs across devices and on the web.', mission_and_values='Enable third-party app and game developers to have successful businesses using the Android platform and Google Play.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Business Intelligence Analyst', team_or_department='Google Play Partnerships team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Manage an extended workforce team responsible for building and maintaining business intelligence infrastructure for the Google Play Partnerships team', 'Contribute to project scoping, manage project priorities, and allocate resources within the project and informing/escalating appropriately when external factors impact execution', 'Manage changes and launched of data products, navigating testing, approvals, access, roll-out, training, and/or user feedback', 'Own relationships with Engineering/Technical Analyst teams', 'Ensure the alignment of activities and deliverables with other related projects/teams', \"Contribute to the development of the Google Play Partnerships team's tools, skills, culture, and impact\"], required_qualifications=[\"Bachelor's degree or equivalent practical experience\", '8 years of experience as a business intelligence analyst or similar analytical role'], preferred_qualifications=[\"Master's degree in Engineering, Business, or a quantitative field\", 'Experience developing new models, methods, analysis, and approaches', 'Experience identifying opportunities for business/product improvement and defining/measuring the success of initiatives']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$100K -- $150K', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='No information provided', mission_and_values='No information provided', size='No information provided', locations='In office 3x days a week') role_summary=RoleSummary(title='Data Analyst', team_or_department='No information provided', role_type='Contract (6 months)', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Serving as a technical SME for Team Graph for access management and structural changes', 'Build out g3 documentation (website)', 'Collecting and analyzing team graph data sheets to ensure seamless transition of assets from one team node to another', 'Preparing bulk upload sheets for Team Graph changes (ad hoc and scheduled)', 'Building out Search Platforms Team Graph G3 doc site with all relevant FAQs, technical documents and links', 'Developing process flow documentation and 1-pager docs providing guidance on Team Graph as it pertains to Search Platforms', 'Coordinate with TPgMs on any AIs as it relates to their respective orgs for Team Graph adjustments'], required_qualifications=['BA in Engineering or CS', '5+ years professional experience', 'Google Suite, SQL, Web Dev (HTML, Python, etc)'], preferred_qualifications=['Strong communication and presentation skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='No information provided', bonus_and_equity='No information provided', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Georgia IT Inc', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='Business Intelligence / Data Science Analyst', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['Data Warehousing / Business Intelligence knowledge', 'Advanced SQL skills', 'Advanced Machine Learning skills in Python or GCP products like Tensor Flow', 'Dashboard development skills'], preferred_qualifications=['Autonomy and independence', 'Communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$60/hr. on w2', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy.', mission_and_values='Humility, intelligence, compassion, and creativity. We create to inspire - for you, for us, and for more than 1 billion users on our platform.', size='Global offices including Los Angeles, New York, London, Paris, Berlin, Dubai, Singapore, Jakarta, Seoul and Tokyo', locations='Los Angeles, New York, London, Paris, Berlin, Dubai, Singapore, Jakarta, Seoul and Tokyo') role_summary=RoleSummary(title='HR Data Analyst', team_or_department='Human Resources Business Intelligence (HRBI)', role_type='Full-time', remote='Not mentioned') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Understand the business deeply, through data analysis, research, interviews and other methods to proactively identify and predict strategic issues and come up with feasible recommendations to the stakeholders', 'Cultivate strong relationships with Human Resources Business Partners and other stakeholders that enable the successful delivery of reports, dashboards and analytics', 'Establish and iterate our people analytics approach to business problems via advanced analysis and understanding the data infrastructure at TikTok', 'As a promoter of HR data delivery applications, cooperate with related businesses and functions to continuously iterate people analytics product offerings', 'Work effectively in a complex, global, fast-paced matrixed environment, filled with tight deliverable timeframes and multiple stakeholders'], required_qualifications=[\"Bachelor's degree in management, statistics, human resource management, economics, organizational behavior, psychology, industrial psychology, financial management, accounting, or a related field\", '5 years of HR data related experience'], preferred_qualifications=['Prior experience in people analytics, business analytics, industry research or consulting firm background', 'Using SQL, Tableau, R, Python or other analysis tools']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$124800 - $228000 annually', bonus_and_equity='Eligible for additional discretionary bonuses/incentives, and restricted stock units', benefits_and_perks=['100% premium coverage for employee medical insurance', 'Dental, Vision, Short/Long term Disability, Basic Life, Voluntary Life and AD&D insurance plans', 'Flexible Spending Account(FSA) Options like Health Care, Limited Purpose and Dependent Care', '401K company match', 'gym and cellphone service reimbursements'])\n",
      "company_overview=CompanyOverview(about='Intuit...', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='BI Reporting Analyst IV', team_or_department='', role_type='Contract', remote='Onsite') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Performs business analysis using various techniques, e.g., statistical analysis, explanatory and predictive modeling, data mining.', 'Determines best practices and develops actionable insights and recommendations for the current business operations.', 'Works directly with the internal or external client to identify analytical requirements.', 'May help to produce ad hoc data and reports.', 'May assist in implementing or developing systems to capture business operation information.', 'May occasionally guide less experienced business data analysts.'], required_qualifications=['Qlik', 'SQL', 'Python or R', 'AWS Redshift', 'Athena', 'MS PowerPoint', 'Google Presentation', 'Analytical mindset', 'Ability to dig deeper into the data to generate actionable insights', '4-6 yrs of related experience', 'Tableau, Qlik (2+ years)', 'SQL- extensive knowledge (4+yrs)', 'Python or R', 'Bachelors in related field'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$60-65/hr. C2C', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Sandy Spring Bank is a financial institution', mission_and_values='Provides strategic, analytical, and technical support for business intelligence activities', size='Unknown', locations='Maryland') role_summary=RoleSummary(title='Business Intelligence/Data Analyst', team_or_department='Information Technology Department', role_type='Full-time', remote='Unknown') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Provides strategic, analytical, and technical support for business intelligence activities', 'Develops and summarizes business information to present to stakeholders', 'Manages data analysis and usage requirements'], required_qualifications=[\"Bachelor's Degree in Computer Science, Information Technology, Business Information Systems, Mathematics/Analytics, Engineering or relevant experience\", '3 years of experience in business intelligence/data analyst role involving data store development, programming, reporting, analytics and/or architecture'], preferred_qualifications=['Experience with managing data stores and data ingestion workflow', 'Working knowledge of statistical analysis for hypothesis testing and prediction', 'Experience with one or more programming languages such as Python']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='Unknown', benefits_and_perks=['Salary range from $80K to $100K', 'Benefits package'])\n",
      "company_overview=CompanyOverview(about='Russell Tobin is an equal-opportunity employer and values diversity in the workplace.', mission_and_values='None', size='None', locations='Mountain View, CA/San Diego CA') role_summary=RoleSummary(title='BI Reporting Analyst', team_or_department='None', role_type='Contract', remote='None') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Performs business analysis using various techniques, e.g. statistical analysis, explanatory and predictive modeling, data mining.', 'Determines best practices and develops actionable insights and recommendations for the current business operations.', 'Works directly with the internal or external client to identify analytical requirements.', 'May help to produce ad hoc data and reports.', 'May assist in implementing or developing systems to capture business operation information.', 'May occasionally guide less experienced business data analysts.'], required_qualifications=['Analytical background'], preferred_qualifications=['Advanced spreadsheets (Google Sheets ideally)', 'Intermediate SQL', 'Tableau or other BI knowledge']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$65/hr to $75hr W2 (DOE)', bonus_and_equity='None', benefits_and_perks=['Comprehensive healthcare coverage (medical, dental, and vision plans)', 'Supplemental coverage (accident insurance, critical illness insurance and hospital indemnity)', '401(k)-retirement savings', 'Life & disability insurance', 'Employee assistance program', 'Legal support', 'Auto, home insurance', 'Pet insurance', 'Employee discounts with preferred vendors'])\n",
      "company_overview=CompanyOverview(about='TikTok is the leading destination for short-form mobile video.', mission_and_values='To inspire creativity and bring joy.', size='Not specified', locations='U.S.') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Cycling Center', role_type='Full-time', remote='Hybrid work schedule, 3 days a week in the office') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with business stakeholders, engineering, product and SRE teams to understand business requirements', 'Extract data from various sources', 'Design, build, and maintain data pipelines', 'Implement and monitor quality control measures', 'Create and maintain technical documentation', 'Optimize pipelines, dashboards, frameworks, and systems', 'Collaborate with engineers, product managers, and data scientists', 'Analyze and visualize data to provide business stakeholders with impactful, actionable insights'], required_qualifications=['Bachelors degree in Statistics, Economics, Computer Science or another quantitative field', '5+ years of experience working with data analytics and data engineering', '2+ years experience building dashboards in Tableau, Power BI or any similar visualization tool', 'Proficiency in distributed data processing using Big Data technologies like Spark/Scala, Java, Hadoop/HDFS/AWS/S3, Cassandra and Kafka', 'Proficiency in data modeling, data design, SQL, and NoSQL databases'], preferred_qualifications=['Experience in a consumer web or mobile company', 'Strong background in algorithms and data structures', 'Experience working with PII and GDPR data']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Base salary range: $145000 - $355000 annually', bonus_and_equity='Discretionary bonuses/incentives, and restricted stock units', benefits_and_perks=['100% premium coverage for employee medical insurance', 'Flexible Spending Account(FSA) Options like Health Care, Limited Purpose and Dependent Care', 'Gym and cellphone service reimbursements', '401K company match', 'Mental and emotional health benefits through EAP and Lyra'])\n",
      "company_overview=CompanyOverview(about='Databricks Information Technology is a product led organization transforming the way data is sourced, designed and used to help us scale seamlessly in face of incredible growth.', mission_and_values='Databricks is the data and AI company.', size='More than 10,000 organizations worldwide', locations='San Francisco, CA or Mountain View, CA') role_summary=RoleSummary(title='Data Engineer', team_or_department='IT Data team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Influence decision-making for business teams through data, analytics, and business intelligence', 'Guide the business in identifying data needs and delivering mechanisms for acquiring and reporting such information and addressing the actual needs', 'Gather and maintain best practices that can be adopted across the business', 'Be considered the go-to expert for the business for data analysis, reporting, data warehousing, and business intelligence', 'Be responsible for the daily operations inclusive of troubleshooting and job monitoring'], required_qualifications=[\"2+ years of related experience with a Bachelor's or Master's degree in Computer Science, Statistics, Information Systems or another quantitative field\"], preferred_qualifications=[\"2+ years of related experience with a Bachelor's or Master's degree in Computer Science, Statistics, Information Systems or another quantitative field\", \"Experience building and optimizing 'big data' data pipelines, architectures and data sets\", 'Knowledge of data modeling and design of schemas for read and write performance', 'Advanced working knowledge and experience working with relational databases, query authoring as well as working familiarity with a variety of databases', 'Experience performing root cause analysis on internal and external data and processes to answer specific business questions and identify opportunities for improvement', 'Build processes supporting data transformation, data structures, metadata, dependency and workload management', \"Knowledge of message queuing, stream processing, API based extraction and highly scalable 'big data' data stores\", 'Experience supporting and working with cross-functional teams', 'Experience with Python and SQL', 'Experience with building data pipelines from business applications like Salesforce, Marketo, NetSuite, Workday etc.', 'Knowledge of Databricks Platform', 'Knowledge of AI/ML/Data Science', 'Knowledge of BI Tools like Tableau, Looker etc']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Equity awards', benefits_and_perks=['Comprehensive health coverage including medical, dental, and vision', '401(k) Plan', 'Equity awards', 'Flexible time off', 'Paid parental leave', 'Family Planning', 'Gym reimbursement', 'Annual personal development fund', 'Employee Assistance Program (EAP)', 'Mental wellness resources'])\n",
      "company_overview=CompanyOverview(about='Walmart is a retail company with a large scale of data', mission_and_values='To help people save money and live better', size='Large company with a big data set', locations='Sunnyvale, California US-08479 and BENTONVILLE, Arkansas US-09050') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Ventures', role_type='Full-time', remote='Hybrid work: office and virtual') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Data Transformation and Integration', 'Data Source Identification', 'Data Modeling', 'Code Development and Testing', 'Applied Business Acumen', 'Data Governance'], required_qualifications=[\"Bachelor's degree in Computer Science and 2 years' experience in software engineering or related field\", \"4 years' experience in software engineering or related field\", \"Master's degree in Computer Science\"], preferred_qualifications=['Data engineering, database engineering, business intelligence, or business analytics', \"Master's degree in Computer Science or related field and 2 years' experience in software engineering or related field\", 'Knowledge of accessibility best practices']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$117,000.00-$234,000.00 or $90,000.00-$180,000.00', bonus_and_equity='Annual or quarterly performance bonuses', benefits_and_perks=['401(k) match', 'stock purchase plan', 'paid maternity and parental leave', 'PTO', 'multiple health plans'])\n",
      "company_overview=CompanyOverview(about='Company in Mountain View, CA', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='Data Engineer', team_or_department='', role_type='Contract', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Ability to work with Business and technical stake holders independently with minimal guidance', 'Strong written and verbal communication and ability to articulate data with the business stake holders'], required_qualifications=['10+ years of overall experience in data management space', 'At least 5 years of working in large data sets in a data lake environment', 'Highly proficient in SQL', 'Solid understanding of Spark including performance tuning', 'Solid understanding of the AWS Platform', 'Experience in Python'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='DOE', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Cepheid is passionate about improving health care through fast, accurate diagnostic testing.', mission_and_values=\"Our mission drives us, every moment of every day, as we develop scalable, groundbreaking solutions to solve the world's most complex health challenges.\", size='Part of the Danaher Diagnostics companies', locations='Sunnyvale, CA') role_summary=RoleSummary(title='Senior Staff Data Engineer', team_or_department='Biochip R&D Team', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, implement, maintain and continually improve a central cloud platform for data science operations', 'Build scalable, high performance data pipelines', 'Execute technical troubleshooting and improve the maintainability, quality and stability of our codebase', 'Evaluate emerging technologies and develop architectural changes to enhance the performance, scalability, and maintainability of existing data ecosystem', 'Collaborate cross-functionally with internal teams to suggest and implement new functionalities or identify areas for improvement'], required_qualifications=[\"Bachelor's degree in software engineering, computer science or a similar field with 10 years of related work experience, OR Master's degree with 8 years of related work experience\"], preferred_qualifications=['Hands on experience with cloud (AWS and Azure)', 'Experience deploying commercial, production-grade software platforms, services and applications']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$170K-$210K', bonus_and_equity='Bonus/incentive pay', benefits_and_perks=['paid time off', 'medical/dental/vision insurance', '401(k)'])\n",
      "company_overview=CompanyOverview(about='Bonfy.AI is a rapidly growing startup working on game-changing innovations to make AI trustworthy.', mission_and_values='Make AI trustworthy', size='Rapidly growing startup', locations='Mountain View, California') role_summary=RoleSummary(title='Data Engineer', team_or_department='Machine Learning Operations', role_type='Full-time', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design and construct robust data pipelines to collect, integrate, and prepare large volumes of data for machine learning', 'Work closely with ML engineers and data scientists to understand their data needs and implement systems that support model training, validation, and deployment', 'Develop and maintain an efficient data warehouse to store and manage data effectively, ensuring data quality and accessibility', 'Implement automated processes for data cleaning, validation, and preprocessing to improve data accuracy and usability for ML purposes', 'Optimize data retrieval and develop dashboards for visualizing data metrics and insights to aid in the ML model development process', 'Assist in the creation and maintenance of a scalable architecture for ML model testing and deployment', 'Troubleshoot and resolve issues in the data pipelines and advocate for improvements to enhance performance and reliability', 'Document all data engineering procedures and create reports for management detailing data usage, data integrity, and the impact of data quality on ML outcomes'], required_qualifications=[\"Bachelor's or Master's degree in Computer Science, Engineering, Information Technology, or a related field\", 'Minimum of 3 years of experience in a data engineering role', 'Strong programming skills in Python', 'Proficient in SQL and experience with relational databases, query authoring, as well as working familiarity with a variety of databases', 'Experience with data modeling, data warehousing, and building ETL pipelines', 'Familiarity with machine learning frameworks (e.g., TensorFlow, PyTorch) and understanding of data needs for ML model training and testing', 'Strong analytical skills and ability to work with large, complex data sets', 'Excellent problem-solving skills and attention to detail'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=[\"Bachelor's degree in computer science, Engineering, or related field.\", 'Proven experience working with Amazon Aurora and/or PostgreSQL in a production environment.', 'Strong SQL skills and experience with SQL tuning techniques.', 'Proficiency in AWS services such as EC2, Route 53, VPC, IAM, and CloudFormation.', 'Hands-on experience with scripting languages (e.g., Python, Bash) for automation.', 'Familiarity with database security concepts and best practices.', 'Excellent problem-solving skills and attention to detail.'], preferred_qualifications=['AWS Certification', 'Experience with other AWS database services such as RDS.', 'Knowledge of containerization technologies (e.g., Docker, Kubernetes).', 'Experience with DevOps practices and tools (e.g., CI/CD pipelines, Git).']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['10+ yrs...'], preferred_qualifications=['Python (Numpy, Pandas)', 'SQL', 'Hadoop, Hive, Pyspark', 'RDBMS', 'Tableau/Qliksense/Power BI', 'Advanced Excel Visuals (Pivot and Regression)']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Company overview, industry, products, services, and notable achievements', mission_and_values='Company mission, vision, values, and culture, including commitments to diversity, inclusion, social responsibility, and work-life balance', size='Details about company size, such as number of employees', locations='City, State where this position is based') role_summary=RoleSummary(title='Data Engineer', team_or_department='Team, department, or business unit the role belongs to', role_type='Type of role (full-time, part-time, contract, etc.)', remote='Remote work options for the role (full, hybrid, none)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['List of responsibilities, including tasks, duties, and expectations for the role'], required_qualifications=['Essential educational qualifications and professional experience required for the role'], preferred_qualifications=['Any additional qualifications that a candidate may possess to stand out or excel in the role']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary range or hourly pay range for the role', bonus_and_equity='Information about bonus and equity compensation', benefits_and_perks=['List of benefits and perks offered for the role, such as insurance, retirement plans, and paid time off'])\n",
      "company_overview=CompanyOverview(about='Applied Intuition is a company that uses data and machine learning to improve its products and services.', mission_and_values='The company encourages engineers to take ownership of technical and product decisions, interact with users to collect feedback, and contribute to a dynamic team culture.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Infrastructure Engineer - Data Pipelines', team_or_department='Data & ML infra group', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop and deploy event-driven pipelines using extract, load and transform (ELT) architecture focused on distributed ingestion', \"Build features to tune processing pipeline for fast data ingestion and indexing depending on customer's needs and workloads\", 'Enable product workflows that expose performant query interfaces and offer easy-to-use integration hooks', 'Develop and deploy high-quality software using modern tooling and frameworks', 'Encourage change, especially in support of data engineering best practices, and maintain a high standard of excellence', 'Work with products and teams across Applied Intuition'], required_qualifications=['Experience with large-scale open source data processing frameworks (Spark, Kafka, Airflow, Flink, Hudi, etc.)', 'Experience with containerization and other modern software development workflows', 'Knowledge of the open source landscape with judgment on when to choose open source versus build in-house', 'Strong knowledge of data concepts, including experience in using a big data warehouse'], preferred_qualifications=['Expertise with modern programming languages (Python, C++, GoLang, etc.)', 'Experience with enterprise software, including on-prem and/or cloud environments', 'Deep knowledge of data quality, data profiling and cleansing techniques']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$65,000 USD to $400,000 USD annually', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n"
     ]
    }
   ],
   "source": [
    "# output = filtered_title_company['description'].apply(filtered_parse_desc)\n",
    "output = filtered_title_company.apply(lambda x: filtered_parse_desc(x.description, x.job_id), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_title_company.reset_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>about</th>\n",
       "      <th>mission_and_values</th>\n",
       "      <th>remote</th>\n",
       "      <th>responsibilities</th>\n",
       "      <th>required_qualifications</th>\n",
       "      <th>preferred_qualifications</th>\n",
       "      <th>salary_or_pay_range</th>\n",
       "      <th>benefits_and_perks</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[Design rules to detect/mitigate fraud., Devel...</td>\n",
       "      <td>[Minimum 2 years of experience in risk analyti...</td>\n",
       "      <td>[AWS, fraud investigations, payment rule syste...</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Rhombus Power Inc. (Rhombus) is a NASA Researc...</td>\n",
       "      <td>Transforming the nation's defense and national...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Discover datasets that could help in solution...</td>\n",
       "      <td>[Masters or Ph.D. in Sciences, Mathematics or ...</td>\n",
       "      <td>[Experience with Cloud Computing environments ...</td>\n",
       "      <td>$100K -- $150K</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Microsoft is an equal opportunity employer. Co...</td>\n",
       "      <td>Microsoft’s mission is to empower every person...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Define, invent, and deliver online and offlin...</td>\n",
       "      <td>[Doctorate in Data Science, Mathematics, Stati...</td>\n",
       "      <td>[6+ years of experience coding in Python, C++,...</td>\n",
       "      <td>USD $133,600 - $256,800 per year</td>\n",
       "      <td>[Inclusive work environment, Collaborative cul...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Applied Intuition is a company focused on auto...</td>\n",
       "      <td>The company culture is dynamic and customer-fo...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Design and implement platform capabilities an...</td>\n",
       "      <td>[Passion for turning domain expertise into too...</td>\n",
       "      <td>[MSc or PhD in planning, control, or closely r...</td>\n",
       "      <td>$65,000 USD to $400,000 USD annually</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Unspecified</td>\n",
       "      <td>Unspecified</td>\n",
       "      <td>Unspecified</td>\n",
       "      <td>[Participate in the full lifecycle of a model ...</td>\n",
       "      <td>[Bachelor's Degree in a quantitative field, 3 ...</td>\n",
       "      <td>[Ph.D.]</td>\n",
       "      <td>$80K -- $100K</td>\n",
       "      <td>[Unspecified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>Bonfy.AI is a rapidly growing startup working ...</td>\n",
       "      <td>Make AI trustworthy</td>\n",
       "      <td></td>\n",
       "      <td>[Design and construct robust data pipelines to...</td>\n",
       "      <td>[Bachelor's or Master's degree in Computer Sci...</td>\n",
       "      <td>[]</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "      <td>[Bachelor's degree in computer science, Engine...</td>\n",
       "      <td>[AWS Certification, Experience with other AWS ...</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>Hybrid</td>\n",
       "      <td>[]</td>\n",
       "      <td>[10+ yrs...]</td>\n",
       "      <td>[Python (Numpy, Pandas), SQL, Hadoop, Hive, Py...</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>Company overview, industry, products, services...</td>\n",
       "      <td>Company mission, vision, values, and culture, ...</td>\n",
       "      <td>Remote work options for the role (full, hybrid...</td>\n",
       "      <td>[List of responsibilities, including tasks, du...</td>\n",
       "      <td>[Essential educational qualifications and prof...</td>\n",
       "      <td>[Any additional qualifications that a candidat...</td>\n",
       "      <td>Salary range or hourly pay range for the role</td>\n",
       "      <td>[List of benefits and perks offered for the ro...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>Applied Intuition is a company that uses data ...</td>\n",
       "      <td>The company encourages engineers to take owner...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Develop and deploy event-driven pipelines usi...</td>\n",
       "      <td>[Experience with large-scale open source data ...</td>\n",
       "      <td>[Expertise with modern programming languages (...</td>\n",
       "      <td>$65,000 USD to $400,000 USD annually</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>76 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                about  \\\n",
       "0                                                       \n",
       "1   Rhombus Power Inc. (Rhombus) is a NASA Researc...   \n",
       "2   Microsoft is an equal opportunity employer. Co...   \n",
       "3   Applied Intuition is a company focused on auto...   \n",
       "4                                         Unspecified   \n",
       "..                                                ...   \n",
       "71  Bonfy.AI is a rapidly growing startup working ...   \n",
       "72                                                      \n",
       "73                                                      \n",
       "74  Company overview, industry, products, services...   \n",
       "75  Applied Intuition is a company that uses data ...   \n",
       "\n",
       "                                   mission_and_values  \\\n",
       "0                                                       \n",
       "1   Transforming the nation's defense and national...   \n",
       "2   Microsoft’s mission is to empower every person...   \n",
       "3   The company culture is dynamic and customer-fo...   \n",
       "4                                         Unspecified   \n",
       "..                                                ...   \n",
       "71                                Make AI trustworthy   \n",
       "72                                                      \n",
       "73                                                      \n",
       "74  Company mission, vision, values, and culture, ...   \n",
       "75  The company encourages engineers to take owner...   \n",
       "\n",
       "                                               remote  \\\n",
       "0                                                       \n",
       "1                                       Not specified   \n",
       "2                                       Not specified   \n",
       "3                                       Not specified   \n",
       "4                                         Unspecified   \n",
       "..                                                ...   \n",
       "71                                                      \n",
       "72                                                      \n",
       "73                                             Hybrid   \n",
       "74  Remote work options for the role (full, hybrid...   \n",
       "75                                      Not specified   \n",
       "\n",
       "                                     responsibilities  \\\n",
       "0   [Design rules to detect/mitigate fraud., Devel...   \n",
       "1   [Discover datasets that could help in solution...   \n",
       "2   [Define, invent, and deliver online and offlin...   \n",
       "3   [Design and implement platform capabilities an...   \n",
       "4   [Participate in the full lifecycle of a model ...   \n",
       "..                                                ...   \n",
       "71  [Design and construct robust data pipelines to...   \n",
       "72                                                 []   \n",
       "73                                                 []   \n",
       "74  [List of responsibilities, including tasks, du...   \n",
       "75  [Develop and deploy event-driven pipelines usi...   \n",
       "\n",
       "                              required_qualifications  \\\n",
       "0   [Minimum 2 years of experience in risk analyti...   \n",
       "1   [Masters or Ph.D. in Sciences, Mathematics or ...   \n",
       "2   [Doctorate in Data Science, Mathematics, Stati...   \n",
       "3   [Passion for turning domain expertise into too...   \n",
       "4   [Bachelor's Degree in a quantitative field, 3 ...   \n",
       "..                                                ...   \n",
       "71  [Bachelor's or Master's degree in Computer Sci...   \n",
       "72  [Bachelor's degree in computer science, Engine...   \n",
       "73                                       [10+ yrs...]   \n",
       "74  [Essential educational qualifications and prof...   \n",
       "75  [Experience with large-scale open source data ...   \n",
       "\n",
       "                             preferred_qualifications  \\\n",
       "0   [AWS, fraud investigations, payment rule syste...   \n",
       "1   [Experience with Cloud Computing environments ...   \n",
       "2   [6+ years of experience coding in Python, C++,...   \n",
       "3   [MSc or PhD in planning, control, or closely r...   \n",
       "4                                             [Ph.D.]   \n",
       "..                                                ...   \n",
       "71                                                 []   \n",
       "72  [AWS Certification, Experience with other AWS ...   \n",
       "73  [Python (Numpy, Pandas), SQL, Hadoop, Hive, Py...   \n",
       "74  [Any additional qualifications that a candidat...   \n",
       "75  [Expertise with modern programming languages (...   \n",
       "\n",
       "                              salary_or_pay_range  \\\n",
       "0                                                   \n",
       "1                                  $100K -- $150K   \n",
       "2                USD $133,600 - $256,800 per year   \n",
       "3            $65,000 USD to $400,000 USD annually   \n",
       "4                                   $80K -- $100K   \n",
       "..                                            ...   \n",
       "71                                                  \n",
       "72                                                  \n",
       "73                                                  \n",
       "74  Salary range or hourly pay range for the role   \n",
       "75           $65,000 USD to $400,000 USD annually   \n",
       "\n",
       "                                   benefits_and_perks  \n",
       "0                                                  []  \n",
       "1                                     [Not specified]  \n",
       "2   [Inclusive work environment, Collaborative cul...  \n",
       "3                                     [Not specified]  \n",
       "4                                       [Unspecified]  \n",
       "..                                                ...  \n",
       "71                                                 []  \n",
       "72                                                 []  \n",
       "73                                                 []  \n",
       "74  [List of benefits and perks offered for the ro...  \n",
       "75                                    [Not specified]  \n",
       "\n",
       "[76 rows x 8 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Assuming your Series is named 'output'\n",
    "df = pd.DataFrame(output.tolist(), columns=['about', 'mission_and_values', 'remote', 'responsibilities', 'required_qualifications', 'preferred_qualifications', 'salary_or_pay_range', 'benefits_and_perks'])\n",
    "\n",
    "# Print the resulting DataFrame\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('sf_recent_jobs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>about</th>\n",
       "      <th>mission_and_values</th>\n",
       "      <th>remote</th>\n",
       "      <th>responsibilities</th>\n",
       "      <th>required_qualifications</th>\n",
       "      <th>preferred_qualifications</th>\n",
       "      <th>salary_or_pay_range</th>\n",
       "      <th>benefits_and_perks</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[Design rules to detect/mitigate fraud., Devel...</td>\n",
       "      <td>[Minimum 2 years of experience in risk analyti...</td>\n",
       "      <td>[AWS, fraud investigations, payment rule syste...</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Rhombus Power Inc. (Rhombus) is a NASA Researc...</td>\n",
       "      <td>Transforming the nation's defense and national...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Discover datasets that could help in solution...</td>\n",
       "      <td>[Masters or Ph.D. in Sciences, Mathematics or ...</td>\n",
       "      <td>[Experience with Cloud Computing environments ...</td>\n",
       "      <td>$100K -- $150K</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Microsoft is an equal opportunity employer. Co...</td>\n",
       "      <td>Microsoft’s mission is to empower every person...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Define, invent, and deliver online and offlin...</td>\n",
       "      <td>[Doctorate in Data Science, Mathematics, Stati...</td>\n",
       "      <td>[6+ years of experience coding in Python, C++,...</td>\n",
       "      <td>USD $133,600 - $256,800 per year</td>\n",
       "      <td>[Inclusive work environment, Collaborative cul...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Applied Intuition is a company focused on auto...</td>\n",
       "      <td>The company culture is dynamic and customer-fo...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Design and implement platform capabilities an...</td>\n",
       "      <td>[Passion for turning domain expertise into too...</td>\n",
       "      <td>[MSc or PhD in planning, control, or closely r...</td>\n",
       "      <td>$65,000 USD to $400,000 USD annually</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Unspecified</td>\n",
       "      <td>Unspecified</td>\n",
       "      <td>Unspecified</td>\n",
       "      <td>[Participate in the full lifecycle of a model ...</td>\n",
       "      <td>[Bachelor's Degree in a quantitative field, 3 ...</td>\n",
       "      <td>[Ph.D.]</td>\n",
       "      <td>$80K -- $100K</td>\n",
       "      <td>[Unspecified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>Bonfy.AI is a rapidly growing startup working ...</td>\n",
       "      <td>Make AI trustworthy</td>\n",
       "      <td></td>\n",
       "      <td>[Design and construct robust data pipelines to...</td>\n",
       "      <td>[Bachelor's or Master's degree in Computer Sci...</td>\n",
       "      <td>[]</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "      <td>[Bachelor's degree in computer science, Engine...</td>\n",
       "      <td>[AWS Certification, Experience with other AWS ...</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>Hybrid</td>\n",
       "      <td>[]</td>\n",
       "      <td>[10+ yrs...]</td>\n",
       "      <td>[Python (Numpy, Pandas), SQL, Hadoop, Hive, Py...</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>Company overview, industry, products, services...</td>\n",
       "      <td>Company mission, vision, values, and culture, ...</td>\n",
       "      <td>Remote work options for the role (full, hybrid...</td>\n",
       "      <td>[List of responsibilities, including tasks, du...</td>\n",
       "      <td>[Essential educational qualifications and prof...</td>\n",
       "      <td>[Any additional qualifications that a candidat...</td>\n",
       "      <td>Salary range or hourly pay range for the role</td>\n",
       "      <td>[List of benefits and perks offered for the ro...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>Applied Intuition is a company that uses data ...</td>\n",
       "      <td>The company encourages engineers to take owner...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Develop and deploy event-driven pipelines usi...</td>\n",
       "      <td>[Experience with large-scale open source data ...</td>\n",
       "      <td>[Expertise with modern programming languages (...</td>\n",
       "      <td>$65,000 USD to $400,000 USD annually</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>76 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                about  \\\n",
       "0                                                       \n",
       "1   Rhombus Power Inc. (Rhombus) is a NASA Researc...   \n",
       "2   Microsoft is an equal opportunity employer. Co...   \n",
       "3   Applied Intuition is a company focused on auto...   \n",
       "4                                         Unspecified   \n",
       "..                                                ...   \n",
       "71  Bonfy.AI is a rapidly growing startup working ...   \n",
       "72                                                      \n",
       "73                                                      \n",
       "74  Company overview, industry, products, services...   \n",
       "75  Applied Intuition is a company that uses data ...   \n",
       "\n",
       "                                   mission_and_values  \\\n",
       "0                                                       \n",
       "1   Transforming the nation's defense and national...   \n",
       "2   Microsoft’s mission is to empower every person...   \n",
       "3   The company culture is dynamic and customer-fo...   \n",
       "4                                         Unspecified   \n",
       "..                                                ...   \n",
       "71                                Make AI trustworthy   \n",
       "72                                                      \n",
       "73                                                      \n",
       "74  Company mission, vision, values, and culture, ...   \n",
       "75  The company encourages engineers to take owner...   \n",
       "\n",
       "                                               remote  \\\n",
       "0                                                       \n",
       "1                                       Not specified   \n",
       "2                                       Not specified   \n",
       "3                                       Not specified   \n",
       "4                                         Unspecified   \n",
       "..                                                ...   \n",
       "71                                                      \n",
       "72                                                      \n",
       "73                                             Hybrid   \n",
       "74  Remote work options for the role (full, hybrid...   \n",
       "75                                      Not specified   \n",
       "\n",
       "                                     responsibilities  \\\n",
       "0   [Design rules to detect/mitigate fraud., Devel...   \n",
       "1   [Discover datasets that could help in solution...   \n",
       "2   [Define, invent, and deliver online and offlin...   \n",
       "3   [Design and implement platform capabilities an...   \n",
       "4   [Participate in the full lifecycle of a model ...   \n",
       "..                                                ...   \n",
       "71  [Design and construct robust data pipelines to...   \n",
       "72                                                 []   \n",
       "73                                                 []   \n",
       "74  [List of responsibilities, including tasks, du...   \n",
       "75  [Develop and deploy event-driven pipelines usi...   \n",
       "\n",
       "                              required_qualifications  \\\n",
       "0   [Minimum 2 years of experience in risk analyti...   \n",
       "1   [Masters or Ph.D. in Sciences, Mathematics or ...   \n",
       "2   [Doctorate in Data Science, Mathematics, Stati...   \n",
       "3   [Passion for turning domain expertise into too...   \n",
       "4   [Bachelor's Degree in a quantitative field, 3 ...   \n",
       "..                                                ...   \n",
       "71  [Bachelor's or Master's degree in Computer Sci...   \n",
       "72  [Bachelor's degree in computer science, Engine...   \n",
       "73                                       [10+ yrs...]   \n",
       "74  [Essential educational qualifications and prof...   \n",
       "75  [Experience with large-scale open source data ...   \n",
       "\n",
       "                             preferred_qualifications  \\\n",
       "0   [AWS, fraud investigations, payment rule syste...   \n",
       "1   [Experience with Cloud Computing environments ...   \n",
       "2   [6+ years of experience coding in Python, C++,...   \n",
       "3   [MSc or PhD in planning, control, or closely r...   \n",
       "4                                             [Ph.D.]   \n",
       "..                                                ...   \n",
       "71                                                 []   \n",
       "72  [AWS Certification, Experience with other AWS ...   \n",
       "73  [Python (Numpy, Pandas), SQL, Hadoop, Hive, Py...   \n",
       "74  [Any additional qualifications that a candidat...   \n",
       "75  [Expertise with modern programming languages (...   \n",
       "\n",
       "                              salary_or_pay_range  \\\n",
       "0                                                   \n",
       "1                                  $100K -- $150K   \n",
       "2                USD $133,600 - $256,800 per year   \n",
       "3            $65,000 USD to $400,000 USD annually   \n",
       "4                                   $80K -- $100K   \n",
       "..                                            ...   \n",
       "71                                                  \n",
       "72                                                  \n",
       "73                                                  \n",
       "74  Salary range or hourly pay range for the role   \n",
       "75           $65,000 USD to $400,000 USD annually   \n",
       "\n",
       "                                   benefits_and_perks  \n",
       "0                                                  []  \n",
       "1                                     [Not specified]  \n",
       "2   [Inclusive work environment, Collaborative cul...  \n",
       "3                                     [Not specified]  \n",
       "4                                       [Unspecified]  \n",
       "..                                                ...  \n",
       "71                                                 []  \n",
       "72                                                 []  \n",
       "73                                                 []  \n",
       "74  [List of benefits and perks offered for the ro...  \n",
       "75                                    [Not specified]  \n",
       "\n",
       "[76 rows x 8 columns]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "# I need to join filtered_title_company with output where you keep each of the columns in output as a separate column in the final dataframe\n",
    "clean_full_df = pd.concat([filtered_title_company, pd.DataFrame(output.tolist(), columns=['about', 'mission_and_values', 'remote', 'responsibilities', 'required_qualifications', 'preferred_qualifications', 'salary_or_pay_range', 'benefits_and_perks'])], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "clean_full_df.to_csv('sf_recent_jobs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# drop rows where responsibilities len() is 0\n",
    "clean_full_df = clean_full_df[clean_full_df['responsibilities'].apply(len) > 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>title</th>\n",
       "      <th>company_name</th>\n",
       "      <th>location</th>\n",
       "      <th>description</th>\n",
       "      <th>about</th>\n",
       "      <th>mission_and_values</th>\n",
       "      <th>remote</th>\n",
       "      <th>responsibilities</th>\n",
       "      <th>required_qualifications</th>\n",
       "      <th>preferred_qualifications</th>\n",
       "      <th>salary_or_pay_range</th>\n",
       "      <th>benefits_and_perks</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>905</td>\n",
       "      <td>Data Engineer- Mountain View, CA</td>\n",
       "      <td>Georgia IT, Inc.</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Position: Data Engineer\\n\\nLocation: Mountain ...</td>\n",
       "      <td>Company in Mountain View, CA</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[Ability to work with Business and technical s...</td>\n",
       "      <td>[10+ years of overall experience in data manag...</td>\n",
       "      <td>[]</td>\n",
       "      <td>DOE</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>843</td>\n",
       "      <td>Business Intelligence Analyst</td>\n",
       "      <td>BayOne</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Gather and analyze data, reason logically, dra...</td>\n",
       "      <td>Overview of the company, industry, products, s...</td>\n",
       "      <td>Company mission, vision, values, and culture, ...</td>\n",
       "      <td>Remote work options for the role (full, hybrid...</td>\n",
       "      <td>[List of responsibilities, including tasks, du...</td>\n",
       "      <td>[Essential educational qualifications and prof...</td>\n",
       "      <td>[Any additional qualifications that a candidat...</td>\n",
       "      <td>Salary range or hourly pay range for the role</td>\n",
       "      <td>[List of benefits and perks offered for the ro...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>831</td>\n",
       "      <td>Business Intelligence Engineer</td>\n",
       "      <td>Amazon.com Services LLC</td>\n",
       "      <td>Palo Alto, CA</td>\n",
       "      <td>Job summaryAre you passionate about delighting...</td>\n",
       "      <td>Amazon</td>\n",
       "      <td>Delight hundreds of millions of customers and ...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Analyze billions of customer feedback and beh...</td>\n",
       "      <td>[Strong business acumen, Written and verbal co...</td>\n",
       "      <td>[Strong technical expertise, Strong problem-so...</td>\n",
       "      <td>Hourly</td>\n",
       "      <td>[Competitive salary]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>845</td>\n",
       "      <td>Business Intelligence Analyst II</td>\n",
       "      <td>Akorbi</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Role Title: Data Analyst\\n\\nDuration: 6 months...</td>\n",
       "      <td>No information provided</td>\n",
       "      <td>No information provided</td>\n",
       "      <td>Hybrid</td>\n",
       "      <td>[Serving as a technical SME for Team Graph for...</td>\n",
       "      <td>[BA in Engineering or CS, 5+ years professiona...</td>\n",
       "      <td>[Strong communication and presentation skills]</td>\n",
       "      <td>No information provided</td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>563</td>\n",
       "      <td>Senior Software Engineer, AI</td>\n",
       "      <td>Recruiting from Scratch</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Who is Recruiting from Scratch :\\n\\nRecruiting...</td>\n",
       "      <td>Recruiting from Scratch is a premier talent fi...</td>\n",
       "      <td>Our team is 100% remote and we work with teams...</td>\n",
       "      <td>On-site / In-office (≥4 days a week)</td>\n",
       "      <td>[Building edge applications processing vision ...</td>\n",
       "      <td>[3+ years of experience writing production sof...</td>\n",
       "      <td>[Experience with Docker, CI / CD pipelines, Ex...</td>\n",
       "      <td>$150,000-$220,000 base</td>\n",
       "      <td>[Flexible PTO &amp; Sick Policy, Medical, Dental, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>638</td>\n",
       "      <td>Senior Technical Data Analyst</td>\n",
       "      <td>Intuit</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>We are looking for an experienced technical da...</td>\n",
       "      <td>Intuit</td>\n",
       "      <td>Delivering data-driven insights, driving chang...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Driving data models and definitions, Communic...</td>\n",
       "      <td>[Expertise in analytics methodologies, Deep kn...</td>\n",
       "      <td>[Deep knowledge of ETL and data warehousing, E...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Competitive salary, Benefits package, Opportu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>632</td>\n",
       "      <td>Data Analyst I - FT - Days - Concern: EAP</td>\n",
       "      <td>El Camino Health</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>El Camino Health is committed to hiring, retai...</td>\n",
       "      <td>El Camino Health is committed to hiring, retai...</td>\n",
       "      <td>Compassion, innovation, collaboration, and del...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Understand the database structure and schema ...</td>\n",
       "      <td>[Bachelor’s degree in a technology or science ...</td>\n",
       "      <td>[Experience collaborating with software develo...</td>\n",
       "      <td>$36.13 - $54.20 USD Hourly</td>\n",
       "      <td>[Not specified]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>666</td>\n",
       "      <td>Senior Data Analyst</td>\n",
       "      <td>Scilex Pharmaceuticals Inc.</td>\n",
       "      <td>Palo Alto, CA</td>\n",
       "      <td>Salary Range: $114,000 - $131,000\\n\\nThe pay r...</td>\n",
       "      <td>Scilex Holding Company is an innovative revenu...</td>\n",
       "      <td>Committed to social, environmental, economic, ...</td>\n",
       "      <td>Not specified</td>\n",
       "      <td>[Collaborate with Commercial team to help synt...</td>\n",
       "      <td>[Working knowledge of pharmaceutical industry ...</td>\n",
       "      <td>[Bachelor's degree (business discipline prefer...</td>\n",
       "      <td>$114,000 - $131,000/annually</td>\n",
       "      <td>[Medical benefits, 401(k) eligibility, Vacatio...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>852</td>\n",
       "      <td>Business Intelligence/Data Analyst at Sandy Sp...</td>\n",
       "      <td>Sandy Spring Bancorp, Inc.</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Sandy Spring Bank is currently recruiting for ...</td>\n",
       "      <td>Sandy Spring Bank is a financial institution</td>\n",
       "      <td>Provides strategic, analytical, and technical ...</td>\n",
       "      <td>Unknown</td>\n",
       "      <td>[Provides strategic, analytical, and technical...</td>\n",
       "      <td>[Bachelor's Degree in Computer Science, Inform...</td>\n",
       "      <td>[Experience with managing data stores and data...</td>\n",
       "      <td>$80K -- $100K</td>\n",
       "      <td>[Salary range from $80K to $100K, Benefits pac...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>828</td>\n",
       "      <td>Business Intelligence Manager</td>\n",
       "      <td>Ford Motor</td>\n",
       "      <td>Palo Alto, CA</td>\n",
       "      <td>The Business Intelligence Manager role will ma...</td>\n",
       "      <td>Ford Motor is an Equal Opportunity Employer. W...</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>[Manage projects for our team, Lead user analy...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td></td>\n",
       "      <td>[]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    index                                              title  \\\n",
       "69    905                   Data Engineer- Mountain View, CA   \n",
       "57    843                      Business Intelligence Analyst   \n",
       "54    831                     Business Intelligence Engineer   \n",
       "59    845                   Business Intelligence Analyst II   \n",
       "15    563                       Senior Software Engineer, AI   \n",
       "29    638                      Senior Technical Data Analyst   \n",
       "23    632          Data Analyst I - FT - Days - Concern: EAP   \n",
       "36    666                                Senior Data Analyst   \n",
       "64    852  Business Intelligence/Data Analyst at Sandy Sp...   \n",
       "52    828                      Business Intelligence Manager   \n",
       "\n",
       "                   company_name           location  \\\n",
       "69             Georgia IT, Inc.  Mountain View, CA   \n",
       "57                       BayOne  Mountain View, CA   \n",
       "54      Amazon.com Services LLC      Palo Alto, CA   \n",
       "59                       Akorbi  Mountain View, CA   \n",
       "15      Recruiting from Scratch  Mountain View, CA   \n",
       "29                       Intuit  Mountain View, CA   \n",
       "23             El Camino Health  Mountain View, CA   \n",
       "36  Scilex Pharmaceuticals Inc.      Palo Alto, CA   \n",
       "64   Sandy Spring Bancorp, Inc.  Mountain View, CA   \n",
       "52                   Ford Motor      Palo Alto, CA   \n",
       "\n",
       "                                          description  \\\n",
       "69  Position: Data Engineer\\n\\nLocation: Mountain ...   \n",
       "57  Gather and analyze data, reason logically, dra...   \n",
       "54  Job summaryAre you passionate about delighting...   \n",
       "59  Role Title: Data Analyst\\n\\nDuration: 6 months...   \n",
       "15  Who is Recruiting from Scratch :\\n\\nRecruiting...   \n",
       "29  We are looking for an experienced technical da...   \n",
       "23  El Camino Health is committed to hiring, retai...   \n",
       "36  Salary Range: $114,000 - $131,000\\n\\nThe pay r...   \n",
       "64  Sandy Spring Bank is currently recruiting for ...   \n",
       "52  The Business Intelligence Manager role will ma...   \n",
       "\n",
       "                                                about  \\\n",
       "69                       Company in Mountain View, CA   \n",
       "57  Overview of the company, industry, products, s...   \n",
       "54                                             Amazon   \n",
       "59                            No information provided   \n",
       "15  Recruiting from Scratch is a premier talent fi...   \n",
       "29                                             Intuit   \n",
       "23  El Camino Health is committed to hiring, retai...   \n",
       "36  Scilex Holding Company is an innovative revenu...   \n",
       "64       Sandy Spring Bank is a financial institution   \n",
       "52  Ford Motor is an Equal Opportunity Employer. W...   \n",
       "\n",
       "                                   mission_and_values  \\\n",
       "69                                                      \n",
       "57  Company mission, vision, values, and culture, ...   \n",
       "54  Delight hundreds of millions of customers and ...   \n",
       "59                            No information provided   \n",
       "15  Our team is 100% remote and we work with teams...   \n",
       "29  Delivering data-driven insights, driving chang...   \n",
       "23  Compassion, innovation, collaboration, and del...   \n",
       "36  Committed to social, environmental, economic, ...   \n",
       "64  Provides strategic, analytical, and technical ...   \n",
       "52                                                      \n",
       "\n",
       "                                               remote  \\\n",
       "69                                                      \n",
       "57  Remote work options for the role (full, hybrid...   \n",
       "54                                      Not specified   \n",
       "59                                             Hybrid   \n",
       "15               On-site / In-office (≥4 days a week)   \n",
       "29                                      Not specified   \n",
       "23                                      Not specified   \n",
       "36                                      Not specified   \n",
       "64                                            Unknown   \n",
       "52                                                      \n",
       "\n",
       "                                     responsibilities  \\\n",
       "69  [Ability to work with Business and technical s...   \n",
       "57  [List of responsibilities, including tasks, du...   \n",
       "54  [Analyze billions of customer feedback and beh...   \n",
       "59  [Serving as a technical SME for Team Graph for...   \n",
       "15  [Building edge applications processing vision ...   \n",
       "29  [Driving data models and definitions, Communic...   \n",
       "23  [Understand the database structure and schema ...   \n",
       "36  [Collaborate with Commercial team to help synt...   \n",
       "64  [Provides strategic, analytical, and technical...   \n",
       "52  [Manage projects for our team, Lead user analy...   \n",
       "\n",
       "                              required_qualifications  \\\n",
       "69  [10+ years of overall experience in data manag...   \n",
       "57  [Essential educational qualifications and prof...   \n",
       "54  [Strong business acumen, Written and verbal co...   \n",
       "59  [BA in Engineering or CS, 5+ years professiona...   \n",
       "15  [3+ years of experience writing production sof...   \n",
       "29  [Expertise in analytics methodologies, Deep kn...   \n",
       "23  [Bachelor’s degree in a technology or science ...   \n",
       "36  [Working knowledge of pharmaceutical industry ...   \n",
       "64  [Bachelor's Degree in Computer Science, Inform...   \n",
       "52                                                 []   \n",
       "\n",
       "                             preferred_qualifications  \\\n",
       "69                                                 []   \n",
       "57  [Any additional qualifications that a candidat...   \n",
       "54  [Strong technical expertise, Strong problem-so...   \n",
       "59     [Strong communication and presentation skills]   \n",
       "15  [Experience with Docker, CI / CD pipelines, Ex...   \n",
       "29  [Deep knowledge of ETL and data warehousing, E...   \n",
       "23  [Experience collaborating with software develo...   \n",
       "36  [Bachelor's degree (business discipline prefer...   \n",
       "64  [Experience with managing data stores and data...   \n",
       "52                                                 []   \n",
       "\n",
       "                              salary_or_pay_range  \\\n",
       "69                                            DOE   \n",
       "57  Salary range or hourly pay range for the role   \n",
       "54                                         Hourly   \n",
       "59                        No information provided   \n",
       "15                         $150,000-$220,000 base   \n",
       "29                                  Not specified   \n",
       "23                     $36.13 - $54.20 USD Hourly   \n",
       "36                   $114,000 - $131,000/annually   \n",
       "64                                  $80K -- $100K   \n",
       "52                                                  \n",
       "\n",
       "                                   benefits_and_perks  \n",
       "69                                                 []  \n",
       "57  [List of benefits and perks offered for the ro...  \n",
       "54                               [Competitive salary]  \n",
       "59                                                 []  \n",
       "15  [Flexible PTO & Sick Policy, Medical, Dental, ...  \n",
       "29  [Competitive salary, Benefits package, Opportu...  \n",
       "23                                    [Not specified]  \n",
       "36  [Medical benefits, 401(k) eligibility, Vacatio...  \n",
       "64  [Salary range from $80K to $100K, Benefits pac...  \n",
       "52                                                 []  "
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clean_full_df.sample(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'The Business Intelligence Manager role will manage projects for our team while leading user analytics and business metrics development, supporting the organization’s ambitious growth strategy. Design customer and market research and benchmarking analysis. Identify and recommend value metrics to measure our products. Work closely with internal stakeholders to identify business models that support... our products. Bring customer and market data to life through rich storytelling. Build analytic capabilities to grow our organization’s knowledge.\\n\\nFord Motor is an Equal Opportunity Employer. We celebrate diversity and are committed to creating an inclusive environment for all employees. We do not discriminate based upon race, religion, color, national origin, sex, sexual orientation, gender identity, age, status as a protected veteran, status as an individual with a disability, or other applicable legally protected characteristics'"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clean_full_df.loc[52, 'description']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "company_overview=CompanyOverview(about='eCommerce, online payments, user trust/risk/fraud, or investigation/product abuse', mission_and_values='Supporting key projects associated with fraud detection, risk analysis and loss mitigation', size='Not specified', locations='San Jose, CA') role_summary=RoleSummary(title='Fraud Strategy Data Scientist', team_or_department='Fraud Risk Strategy team', role_type='Full-time', remote='Onsite') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design rules to detect/mitigate fraud', 'Develop python scripts and models that support strategies', 'Investigate novel/large cases', 'Identify root cause', 'Set strategy for different risk types', 'Work with product/engineering to improve control capabilities', 'Develop and present strategies and guide execution'], required_qualifications=['Minimum 2 years of experience in risk analytics, data analysis, and data science within relevant industry experience in eCommerce, online payments, user trust/risk/fraud, or investigation/product abuse', \"Bachelor's degree in computer science, Engineering, Mathematics, Statistics, Data Mining or related field or equivalent practical experience\"], preferred_qualifications=['AWS', 'fraud investigations', 'payment rule systems', 'working with ML teams', 'fraud typologies']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Rhombus Power Inc. (Rhombus) is a NASA Research Park startup located in the heart of Silicon Valley.', mission_and_values=\"Transforming the nation's defense and national security enterprises with Guardian, its Artificial Intelligence platform for strategic, operational, and tactical decision-making at the speed of relevance.\", size='Not specified', locations='San Francisco Bay Area') role_summary=RoleSummary(title='Data Scientist', team_or_department='Product team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[\"Design, develop, and launch efficient and reliable data pipelines to move, analyze and model data and to provide intuitive analytics from Rhombus' large and complex datasets.\", 'Discover datasets that could help in solution development', 'Data curation, analysis, quantitative modeling', 'Validation and quality assurance of data, models and results', 'Deploy and implement solutions in collaboration with product team', 'Interact with the product team on current and upcoming user requirements'], required_qualifications=['Masters or Ph.D. in Sciences, Mathematics or Engineering, especially numerical methods and simulations', 'Strong background in database management solutions, familiarity with databases such as MySQL and Oracle', 'Background in Statistics is expected, and Experience with Machine Learning is a bonus'], preferred_qualifications=['Experience with Cloud Computing environments (AWS, GCloud, Azure)']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$100K -- $150K', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Microsoft is a leading technology company responsible for delivering quality experiences to over 500M+ monthly active users around the world in Microsoft’s search engine, Bing.', mission_and_values='Microsoft’s mission is to empower every person and every organization on the planet to achieve more.', size='Large company', locations='Multiple locations') role_summary=RoleSummary(title='Principal Data Scientist', team_or_department='Search + Distribution (S+D) team', role_type='Full-time', remote='Not mentioned') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Define, invent, and deliver online and offline behavioral and human labeled metrics which accurately measure the satisfaction and success of our customers interacting with Search.', 'Apply behavioral game theory and social science understanding to get the quality work out of crowd workers from around the world', 'Develop deep understanding of business metrics such as daily active users, query share, click share and query volume across all the relevant entry points'], required_qualifications=['Doctorate in Data Science, Mathematics, Statistics, Econometrics, Economics, Operations Research, Computer Science, or related field AND 5+ year(s) data-science experience', 'Organizational, analytical, data science skills and intuition.'], preferred_qualifications=['6+ years of experience coding in Python, C++, C#, C or Java.', 'Customer focused, strategic, drives for results, is self-motivated, and has a propensity for action.']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Base pay range for this role across the U.S. is USD $133,600 - $256,800 per year.', bonus_and_equity='Base pay range for this role across the U.S. is USD $133,600 - $256,800 per year.', benefits_and_perks=['Competitive pay', 'Benefits and perks'])\n",
      "company_overview=CompanyOverview(about='Applied Intuition is a company working on autonomous systems.', mission_and_values='The company focuses on excellence and fast-paced innovation.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Software Engineer', team_or_department='Planning and control for autonomous vehicles or mobile robots', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design and implement platform capabilities and product workflows', 'Build out foundational libraries that push our platform beyond the state of the art', 'Work closely with in-house and customer engineers to design and build supporting toolchains'], required_qualifications=['Experience building software components or (sub) systems that address real-world planning and control challenges', 'Hands-on experience with more than one domain of relevant software framework or tools'], preferred_qualifications=['MSc or PhD in planning, control, or closely related field', 'Deep hands-on expertise in relevant algorithms or methods']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$65,000 USD to $400,000 USD annually', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Data Science & Machine Learning company', mission_and_values='Not specified', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Data Scientist', team_or_department='Data Science & Machine Learning', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Participate in the full lifecycle of a model including design, coding, testing, and release', 'Collaborate with the data and analytics team, marketing team, and marketplace teams', 'Work on models such as fraud, player churn, or causal inference', 'Utilizing experimentation, help analyze and project optimal values', 'ML visualizations', 'Predictive analytics'], required_qualifications=[\"Bachelor's Degree in a quantitative field\", '3 years of professional experience with Python', 'Experience with Bayesian techniques', 'Experience with tools and frameworks like Pytorch, TensorFlow, Scikit-Learn, SQL, etc', 'Familiarity with some of the following models: outlier detection, lifetime value modeling, or causal inference', 'The ability to ELI5', 'Drive to learn and improve'], preferred_qualifications=['Ph.D. in a quantitative field']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K per year, or $20 to $28 per hour based on qualifications', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Google Ads Marketing aims to help advertisers of all sizes succeed with digital marketing.', mission_and_values='Not specified', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Business Data Scientist', team_or_department='Google Ads Marketing', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with large, complex data sets', 'Solve complex analysis problems, applying advanced problem-solving methods', 'Conduct analysis that includes problem formulation, data gathering and requirements specification, processing, analysis, ongoing deliverables, and presentations', 'Design and analyze controlled experiments or counterfactual causal inference studies', 'Build and prototype analysis pipelines iteratively to provide insights at scale', 'Interact cross-functionally, making business recommendations with effective presentations of findings at multiple levels of stakeholders'], required_qualifications=[\"Master's degree in Statistics, Mathematics, Bioinformatics, Economics, a related field, or equivalent practical experience\", '2 years of experience in a data science field', 'Experience with statistical software (e.g., R, Python, MATLAB) and database languages (i.e., SQL)', 'Experience leveraging data insights into storytelling for business stakeholders'], preferred_qualifications=['PhD in Statistics or a related field', '2 years of experience with statistical data analysis', 'Experience with machine learning on large-scale computing systems', 'Experience in controlled experiment design and causal inference methods', 'Ability to prioritize requests and partner well in an environment with competing demands from stakeholders', 'Excellent communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary range: $127,000-$187,000', bonus_and_equity='Salary range: $127,000-$187,000', benefits_and_perks=['bonus', 'equity', 'benefits'])\n",
      "company_overview=CompanyOverview(about='TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy.', mission_and_values='To us, every challenge, no matter how difficult, is an opportunity; to learn, to innovate, and to grow as one team. Status quo? Never. Courage? Always.', size='Millions of Americans', locations='U.S.') role_summary=RoleSummary(title='Job Title not specified', team_or_department='USDS-Platform and Community Integrity (PaCI) team', role_type='Full-time', remote='Hybrid work schedule (3 days a week in office)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Build rules, algorithms and machine learning models, to respond to and mitigate business risks in TikTok products/platforms', 'Analyze business and security data, uncover evolving attack motion, identify weaknesses and opportunities in risk defense solutions, explore new space from the discoveries', 'Define risk control measurements. Quantify, generalize and monitor risk related business and operational metrics. Align risk teams and their stakeholders on risk control numeric goals, promote impact-oriented, data-driven data science practices for risks'], required_qualifications=['Bachelor or degrees above in computer science, statistics, math, internet security or other relevant STEM majors'], preferred_qualifications=['Solid data science skills', 'Proficiency in statistical analytical tools, such as SQL, R and Python', 'Familiarity with machine learning or social/content online platform analytics', 'Bonus given to proficiency in modern machine learning applications']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$144000 - $312444 annually', bonus_and_equity='Discretionary bonuses/incentives, and restricted stock units', benefits_and_perks=['100% premium coverage for employee medical insurance', 'approximately 75% premium coverage for dependents', 'Health Savings Account(HSA) with a company match', 'Dental, Vision, Short/Long term Disability, Basic Life, Voluntary Life and AD&D insurance plans', 'Flexible Spending Account(FSA) Options like Health Care, Limited Purpose and Dependent Care', '10 paid holidays per year', '17 days of Paid Personal Time Off (PPTO)', '10 paid sick days per year', '12 weeks of paid Parental leave', '8 weeks of paid Supplemental Disability', 'Mental and emotional health benefits through our EAP and Lyra', '401K company match', 'Gym and cellphone service reimbursements'])\n",
      "company_overview=CompanyOverview(about='Qventus is a real-time decision making platform for hospital operations.', mission_and_values='Our mission is to simplify how healthcare operates, so that hospitals and caregivers can focus on delivering the best possible care to patients.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Senior Data Scientist', team_or_department='Data Science', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Solve operational challenges in healthcare through the development, operationalization, and tuning of impactful models developed thoughtfully using a variety of machine learning, mathematical, and statistical approaches.', 'Collaborate with Product and Clinical partners to investigate, assess, and deploy POC solutions in strategic business areas', 'Design comprehensive experiments and analytics to measure the net impact of our interventions in healthcare settings', 'Create tools and resources to improve transparency in Data Science technical architecture and increase collaboration with engineering and analytics partners'], required_qualifications=['3+ years of research experience using a wide variety of statistical and machine learning techniques - particularly in NLP, explainable ML', '2+ years of relative industry experience developing, launching, and iterating on machine learning models and/or developing the core data science platform', 'Expertise with Python including an expansive of available data science libraries and util', 'Hands-on data engineering experience manipulating data sets, data cleaning, and pipelines.', 'Hands-on experience building and maintaining production machine learning pipelines (experience with Sagemaker preferred)', 'Strong software development foundations - dedication to high code quality, stable architecture, and an eye toward maintainability'], preferred_qualifications=['Dedication to mentorship and growing strong collaborative data science teams', 'Experience working with productionalized Generative AI (ChatGPT etc.)', 'Proven ability to work with domain experts to design relevant features and design, develop and tailor algorithmic solutions to problems in healthcare, particularly in the hospital setting', 'Strong cross-functional communication with technical and non-technical partners', 'Experience with healthcare data and industry', 'Practical hands-on experience with:']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$170,000 to $190,000', bonus_and_equity='Not specified', benefits_and_perks=['Competitive medical, dental, and vision coverage with a 90% employer-paid premiums for employees option', 'Generous HSA contribution, when elected and participating in an eligible plan, up to $1,500 annual company contribution', 'Employer-provided (100% paid) Short Term and Long Term Disability insurance and Basic Life and AD&D insurance', '100% paid Parental and Pregnancy Leave', 'Monthly Wellness and Technology stipend up to $50 per month', 'Ability to participate in the 401(k) plan', 'Generous Stock Option awards'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Data Science & Machine Learning', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Solve ambiguous, challenging business problems using data-driven approaches', 'Design the framework of data analysis to drive business insight and facilitate decisions', 'Establish the standard work process and best practices for data science including analytics, experimentation, and modeling', 'Work closely with data engineers to build and improve the reliability and accuracy of the data logging and data pipelines', \"Work with stakeholders including senior executives, products, and design teams to improve the team's productivity and quality of output\"], required_qualifications=['MS or PhD degree in Statistics, Computer Science, Econometrics or equivalent quantitative field', '3 years of experience in analytical or data warehousing experience', 'Strong strategic thinking and problem solving skills', 'Experience with A/B test experiment design and analysis', 'Experience with exploratory data analysis, statistical analysis and testing, and machine learning models', 'Advanced SQL skills', 'Ability to use languages and tools like Python, R, Hive, Spark to work with large data sets', 'Excellent communication skills, both written and verbal'], preferred_qualifications=['Tech lead or management experience', 'Experience with ELT data processing and/or Lakehouse systems', 'Experience in productionization of algorithms and data pipelines', 'Experience in FP&A']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Walmart U.S. offers an extensive selection that our customers value, whether they shop online at Walmart.com, through one of our mobile apps, or in-store.', mission_and_values='We are people-led and tech-empowered. We train our team in the skillsets of the future and bring in experts like you to help us grow.', size='Hundreds of millions of people', locations='680 WEST CALIFORNIA AVENUE, SUNNYVALE, CA 94086-4834, United States of America') role_summary=RoleSummary(title='Staff Data Scientist', team_or_department='Marketing', role_type='Staff Data Scientist', remote='Flexible, Hybrid Work') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Building advanced machine learning and deep learning models with various applications in marketing, such as customer targeting and personalized recommendation', 'Build solutions that can be easily integrated into external frameworks or existing applications', 'Select appropriate ML modeling techniques for complex problems with large-scale data'], required_qualifications=[\"Master's degree / PhD in computer science, Statistics, Optimization, or related field plus 3 years' experience in a machine learning / deep learning related field\", 'Strong hands-on skills in sourcing, cleaning, manipulating, and analyzing large volumes of data using distributed computing platform (Python, R, SQL, Spark, Hive etc.)'], preferred_qualifications=['PhD in Machine Learning, Computer Science, Information Technology, Operations Research, Statistics, Applied Mathematics, Econometrics', 'Successful completion of one or more assessments in Python, Spark, Scala, or R', 'Using open source frameworks (for example, scikit learn, tensorflow, torch)']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$143,000.00-$286,000.00', bonus_and_equity='annual or quarterly performance bonuses', benefits_and_perks=['Flexible, Hybrid Work', '401(k) match', 'stock purchase plan', 'paid maternity and parental leave', 'PTO', 'multiple health plans'])\n",
      "company_overview=CompanyOverview(about='Credit risk assessment and loss mitigation in the Fintech or online payments industry', mission_and_values='Work closely with team members and stakeholders to consult, design, develop, and manage credit trends and losses that not only solve emerging loss trends but also provide a great experience to end customers.', size='Not specified', locations='Draper, UT or San Jose, CA') role_summary=RoleSummary(title='Credit Strategy Data Scientist', team_or_department='Credit Risk Strategy team', role_type='Full-time', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design rules to detect/mitigate loss', 'Investigate novel/large cases', 'Identify root cause', 'Set strategy for different risk types', 'Work with product/engineering to improve control capabilities', 'Develop and present strategies and guide execution', 'Drive results that maximize eligible customers while controlling losses'], required_qualifications=['Minimum 2 years of experience in risk analytics, data analysis, or data science within the Fintech or online payments industry', \"Bachelor's degree in computer science, Engineering, Mathematics, Statistics, Data Mining or related field or equivalent practical experience\", 'Proficiency in SQL, Python, Excel including key data science libraries'], preferred_qualifications=['Experience with AWS, payment rule systems, and knowledge of credit products']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Hybrid work environment'])\n",
      "company_overview=CompanyOverview(about='Walmart is the largest retailer in the country, builds and deploys core AI assistant experiences across Walmart.', mission_and_values='Walmart is an Equal Opportunity Employer - By Choice. We believe we are best equipped to help our associates, customers and the communities we serve live better when we really know them.', size='Tens of millions of active users across multiple countries', locations='Sunnyvale, CA') role_summary=RoleSummary(title='Distinguished Engineer - Conversational AI', team_or_department='Conversational AI team', role_type='Distinguished Engineer', remote='Flexible, hybrid work') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Define the AI assistant architecture leveraging generative AI in a rapidly evolving ecosystem', 'Partner with multiple teams as the senior platform architect and evangelize the architecture, enable integrations and support development of new AI assistants', 'Be up-to-date on industry trends in the latest generative AI and AI assistant architecture patterns'], required_qualifications=[\"Bachelor's degree in computer science, computer engineering, computer information systems, software engineering, or related area and 6 years' experience in software engineering or related area\", \"8 years' experience in software engineering or related area\"], preferred_qualifications=[\"Master's degree in computer science, computer engineering, computer information systems, software engineering, or related area\", 'Background in creating inclusive digital experiences, demonstrating knowledge in implementing Web Content Accessibility Guidelines (WCAG) 2.2 AA standards, assistive technologies, and integrating digital accessibility seamlessly']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$169,000.00-$338,000.00', bonus_and_equity='Annual or quarterly performance bonuses', benefits_and_perks=['401(k) match', 'stock purchase plan', 'paid maternity and parental leave', 'PTO', 'multiple health plans'])\n",
      "company_overview=CompanyOverview(about='Samsung Research America Digital Health Team', mission_and_values='Empower people to live healthier lives by leveraging wearables, smartphones, medical devices, AI, and health services', size='Samsung Research America', locations='Various') role_summary=RoleSummary(title='Senior Research Engineer', team_or_department='Samsung Research America Digital Health Team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Prepare proof-of-concept, demo / prototypes, design and execute pilot studies using Generative AI and Large Language Models', 'Work with project team and contribute to dataset/document preparation, prompt engineering, customization, training, fine-tuning and deployment of healthcare LLMs', 'Lead the development of innovative ideas and generative AI algorithms based on noisy signals from sensors including, but not limited to, light, audio, physiological, and inertial sensor data collected both in lab and field settings', 'Generate patents and scientific research papers for top-tier publications'], required_qualifications=['Ph.D. in Computer Science, Computer Engineering, Biomedical Engineering, Electrical Engineering, Biophysics, Mathematics, health sciences or related areas or equivalent combination of education, training, and experience', 'Demonstrable knowledge and experience in Generative AI, Large Language Models, and their applications on mobile sensor data collected in lab and in the wild', '10+years of experience in one or more of the following programming languages: Python, Java, Android, MATLAB, R', 'Strong interpersonal and collaboration skills, ability to present complex information in an understandable and compelling manner, and comfortable working with multi-disciplinary teams', 'Publications in top-tier AI computing venues including but not limited to NeurIPS, CVPR, ICML, ICLR, ICCV, and ACL'], preferred_qualifications=['Experience in NLP and Conversational AI', 'Experience in LLM validation, reliability, toxicity/harmfulness avoidance', 'Experience with building knowledge graphs from unstructured texts', 'Experience with vector DB', 'Strong mathematics background, especially statistics', 'Turn the analyzed data into actionable insight and/or understandable visualization', 'On-device implementation of ML/AI algorithms in C/C++/Java', 'Clinical study design, IRB development, in-lab and/or in-the-wild data collection studies', 'Product development and prototyping experience in order to implement and validate solutions', 'Experience in designing and conducting studies of wearable devices or clinical-trials with human subjects both in lab and field settings', 'Previous experience developing algorithms in healthcare research/industry using EMR/EHR and mobile/wearable sensor data', 'Have working knowledge of the healthcare industry and experience curating and analyzing healthcare and wellness data', 'Proficiency in the Android/Tizen/iOS development environment with the ability to create fully working prototypes', 'Experience in collaborating on software implementations of algorithms and computing models with client and cloud engineers', 'Understanding of human factors, usability and comfort with wearable sensors', 'Experience operating under HIPAA is a plus']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not disclosed', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Overview of the company, industry, products, services, and notable achievements', mission_and_values='Company mission, vision, values, and culture, including commitments to diversity, inclusion, social responsibility, and work-life balance', size='Details about company size, such as number of employees', locations='Mountain View, CA, San Francisco, CA, or Bellevue, WA') role_summary=RoleSummary(title='Staff Software Engineer', team_or_department='AI Model Training Infrastructure', role_type='Full-time', remote='Hybrid work options, meaning you can work from home and commute to a LinkedIn office, depending on what’s best for you and when your team needs to be together') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Owning the technical strategy for broad or complex requirements with insightful and forward-looking approaches that go beyond the direct team and solve large open-ended problems', 'Designing, implementing, and optimizing the performance of large-scale distributed serving or training for personalized recommendation as well as large language models', 'Improving the observability and understandability of various systems with a focus on improving developer productivity and system sustenance', 'Mentoring other engineers, defining our challenging technical culture, and helping to build a fast-growing team', 'Working closely with the open-source community to participate and influence cutting edge open-source projects'], required_qualifications=['Bachelor’s Degree in Computer Science or related technical discipline, or equivalent practical experience', '4+ years of experience in the industry with leading/building deep learning systems', '4+ years of experience with Java, C++, Python, Go, Rust, C# and/or Functional languages such as Scala or other relevant coding languages'], preferred_qualifications=['BS and 8+ years of relevant work experience, MS and 7+ years of relevant work experience, or PhD and 4+ years of relevant work experience', 'Previous experience working with geographically distributed co-workers', 'Outstanding interpersonal communication skills (including listening, speaking, and writing) and ability to work well in a diverse, team-focused environment with other SRE/SWE Engineers, Project Managers, etc.', 'Experience building ML applications, LLM serving, GPU serving', 'Experience with search systems or similar large-scale distributed systems']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='156,000 - 255,000', bonus_and_equity='Annual performance bonus, stock, benefits and/or other applicable incentive compensation plans', benefits_and_perks=['Generous health and wellness programs', 'Time away for employees of all levels'])\n",
      "company_overview=CompanyOverview(about='Gen is a global company powering Digital Freedom through consumer brands including Norton, Avast, LifeLock, Avira, AVG, ReputationDefender, and CCleaner.', mission_and_values='We bring leading technology solutions in cybersecurity, privacy and identity protection to more than 500 million users in 150 countries so they can live their digital lives safely, privately, and confidently today and for generations to come.', size='More than 500 million users', locations='Global') role_summary=RoleSummary(title='AI Technical Director', team_or_department='AI research and innovation team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Provide technical leadership in adopting the latest AI advances into solutions for the benefit of consumers', 'Research, architect and implement state-of-the-art AI techniques', 'Establish best AI practices and follow them across project portfolio', 'Define AI performance, accuracy and reliability metrics and implement their consistent measurement', 'Define and implement AI safety measures', 'Architect, develop and maintain scalable systems and infrastructures for data ingestion, transformation, analysis, model hosting, and inference', 'Build and deploy AI solutions in multi-cloud environment', 'Work on solutions with diverse teams from various functions of the company', 'Document and communicate the results and insights of AI projects to exec-level audience'], required_qualifications=['Ph.D. level degree in artificial intelligence, computer science, engineering, mathematics, or related field', 'Experience in developing and deploying industrial grade AI solutions', 'Experience in using AI frameworks, trending models and libraries', 'Experience in using cloud services and platforms'], preferred_qualifications=['Large software project experience with multi-threaded, multi-process distributed computing environments and correct Software Engineering practices', 'Hands-on experience with Linux, Bash, GIT, Docker, Kubernetes, continuous integration, package deployment and dependency management', 'Knowledge of cybersecurity and digital safety concepts']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary range: $249,000.00 - $290,100.00', bonus_and_equity='Salary range: $249,000.00 - $290,100.00', benefits_and_perks=['Competitive benefits package', 'Bonus incentives'])\n",
      "company_overview=CompanyOverview(about='Recruiting from Scratch is a premier talent firm that focuses on placing the best product managers, software, and hardware talent at innovative companies.', mission_and_values='Started in 2021, our client is building the best business AI video system on the market.', size='Not specified', locations='Sunnyvale, US') role_summary=RoleSummary(title='Edge Computing Engineer', team_or_department='Engineering team', role_type='Full-time', remote='On-site / In-office (≥4 days a week)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Building edge applications processing vision data and communication layers for the compute-constrained edge devices', 'Deploying machine learning models to production', 'Optimizing the platform runtime for maximum performance', 'Building observability and telemetry'], required_qualifications=['3+ years of experience writing production software in C++ and Python', 'Experience using various profiling tools', 'Experience with Docker, CI / CD pipelines'], preferred_qualifications=['Edge/IoT computing', 'Infrastructure management', 'Monitoring', 'Video processing & Streaming', 'Experience Interfacing ML Models']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$150,000-$220,000 base', bonus_and_equity='Not specified', benefits_and_perks=['Flexible PTO & Sick Policy', 'Medical, Dental, and Vision insurance', '401k'])\n",
      "company_overview=CompanyOverview(about='Gen is a global company powering Digital Freedom through consumer brands including Norton, Avast, LifeLock, Avira, AVG, ReputationDefender, and CCleaner.', mission_and_values='Our combined heritage is rooted in providing safety for the first digital generations.', size='500 million users in 150 countries', locations='Global') role_summary=RoleSummary(title='AI Technical Director', team_or_department='AI Team', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Provide technical leadership in adopting the latest AI advances', 'Research, architect and implement state-of-the-art AI techniques', 'Establish best AI practices and follow them across project portfolio', 'Define AI performance, accuracy and reliability metrics and implement their consistent measurement', 'Define and implement AI safety measures', 'Architect, develop and maintain scalable systems and infrastructures for data ingestion, transformation, analysis, model hosting, and inference', 'Build and deploy AI solutions in multi-cloud environment', 'Work on solutions with diverse teams from various functions of the company', 'Document and communicate the results and insights of AI projects to exec-level audience'], required_qualifications=['Ph.D. level degree in artificial intelligence, computer science, engineering, mathematics, or related field', 'Experience in developing and deploying industrial grade AI solutions', 'Experience in using AI frameworks, trending models and libraries', 'Experience in using cloud services and platforms'], preferred_qualifications=['Large software project experience with multi-threaded, multi-process distributed computing environments', 'Hands-on experience with Linux, Bash, GIT, Docker, Kubernetes, continuous integration, package deployment and dependency management', 'Knowledge of cybersecurity and digital safety concepts']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$249,000.00 - $290,100.00', bonus_and_equity='On Target Earnings (annual base salary + target annual commission)', benefits_and_perks=['Competitive benefits package', 'Bonus incentives'])\n",
      "company_overview=CompanyOverview(about='Recruiting from Scratch is a premier talent firm that focuses on placing the best product managers, software, and hardware talent at innovative companies.', mission_and_values='Our team is 100% remote and we work with teams across the United States to help them hire.', size='N/A', locations='Downtown Palo Alto') role_summary=RoleSummary(title='N/A', team_or_department='N/A', role_type='Full-time', remote='In person') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['N/A'], required_qualifications=['3-8 years of experience', 'Bachelors of Science in Computer Science', 'Expertise in cloud services (AWS, Azure, GCP) and deploying applications in cloud environments', 'Experience with containerization and orchestration technologies (Docker, Kubernetes)', 'Proficiency in programming languages such as Python, C++, or Go', 'Familiarity with AI and machine learning concepts, particularly in deploying ML models'], preferred_qualifications=[\"Experience deploying AI models on cloud platforms (AWS, Azure, GCP) and within customers' VPCs\"]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$150-200k base', bonus_and_equity='N/A', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Amazon Web Services (AWS) is the world’s most comprehensive and broadly adopted cloud platform.', mission_and_values='AWS values diverse experiences.', size='Large company', locations='Multiple locations') role_summary=RoleSummary(title='Machine Learning Engineer', team_or_department='GAI Innovation Center', role_type='Full-time', remote='Flexible work hours and arrangements') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Solve complex technical problems', 'Design, implement, test, deploy and maintain innovative ML solutions', 'Build high-quality, highly available, always-on products'], required_qualifications=['Master’s degree in computer science or equivalent'], preferred_qualifications=['3+ years of non-internship professional software development experience', '3+ years of programming with at least one software programming language experience', '3+ years of leading design or architecture (design patterns, reliability and scaling) of new and existing machine learning systems experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$115,000/year - $223,600/year', bonus_and_equity='Equity and sign-on payments', benefits_and_perks=['Flexible work hours and arrangements', 'Mentorship and career growth resources'])\n",
      "company_overview=CompanyOverview(about='Intuit is a global technology platform that helps consumers and small businesses overcome their most important financial challenges.', mission_and_values='We are dedicated to powering prosperity for our customers and communities, and we are always looking for talented individuals to join our team.', size='Over 10,000 employees', locations='Mountain View, CA') role_summary=RoleSummary(title='Staff Business Data Analyst', team_or_department='Quickbooks Marketing Analytics', role_type='Full-time', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Drive opportunity identification and sizing in our web funnel', 'Work closely with our business performance analytics, web marketing, and web analytics teams', 'Own and improve the core set of dashboards for web funnel', 'Drive improvements to web analytics instrumentation and data architecture', 'Lead the construction of web funnel reporting and data narratives to senior leaders'], required_qualifications=[\"Bachelor's degree in a quantitative field\", '2+ years of experience in data analysis'], preferred_qualifications=['Experience with data analysis', 'Strong communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Competitive salary', bonus_and_equity='Stock options', benefits_and_perks=['Health insurance', 'Retirement plans', 'Paid time off'])\n",
      "company_overview=CompanyOverview(about='Intuit is the global financial technology platform that powers prosperity for the people and communities we serve. With approximately 100 million customers worldwide using products such as TurboTax, Credit Karma, QuickBooks, and Mailchimp, we believe that everyone should have the opportunity to prosper.', mission_and_values=\"Intuit's company culture is rooted in our core values, which promote an innovative and inclusive environment where diverse perspectives and new ideas are celebrated.\", size='Approximately 100 million customers worldwide', locations='Mountain View, California') role_summary=RoleSummary(title='Staff Business Data Analyst', team_or_department='Quickbooks Marketing Analytics team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Drive opportunity identification and sizing in our web funnel, unlocking critical web optimization and experimentation efforts to drive new customer acquisition growth', 'Work closely with our business performance analytics, web marketing, and web analytics teams to understand and quantify web drivers of ongoing Quickbooks customer acquisition and size performance of web acquisition projects', 'Own and improve the core set of dashboards for web funnel to unlock speed of insights across the analytics and marketing organization', 'Drive improvements to web analytics instrumentation and data architecture to unlock new metrics and insights, as well as improving the durability of data pipelines and data signals', 'Lead the construction of web funnel reporting and data narratives to senior leaders to power strategic web marketing decisions'], required_qualifications=['5+ years of experience in web analytics, analyzing web traffic patterns, segment performance, and conversion funnels', \"Bachelor's degree in a quantitative field or equivalent work experience, advanced degree preferred\"], preferred_qualifications=['Advanced statistical analysis, test design, and marketing attribution', 'Practical experience constructing data pipelines and ETL utilizing SQL and Python, as well as data solutions from cloud platforms', 'Strong data storytelling skills, with a proven ability to rapidly construct impactful visualization, communicate insights and influence marketing leadership', 'Deep understanding of AI/ML models in marketing attribution and personalization applications']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Bay Area California $152,500-206,000', bonus_and_equity='Expected base pay range for this position is Bay Area California $152,500-206,000', benefits_and_perks=['Award-winning culture, best-in-class benefits', 'Competitive compensation package with a strong pay for performance rewards approach', 'Cash bonus, equity rewards and benefits'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Data Analyst', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Understand the day-to-day issues that our business faces, which can be better understood with data', \"Compile and analyze data related to business' issues\", 'Develop clear visualizations to convey complicated data in a straightforward fashion'], required_qualifications=[\"Bachelor's or Master's degree in Statistics or Applied Mathematics or equivalent experience\", \"1 - 2 years' Data Analysis experience\", 'Proficient in SQL'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Intuit is a leader in the finance and accounting software industry.', mission_and_values='Not provided', size='Not provided', locations='Not provided') role_summary=RoleSummary(title='Finance and Operations Analytics', team_or_department='Finance and Operations Analytics', role_type='Full-time', remote='Not provided') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Performing company-level analyses', 'Identifying and implementing AI/ML opportunities', 'Executing data-based decisions', 'Building reporting dashboards', 'Deriving insights from existing datasets', 'Defining success metrics for new initiatives', 'Building models to enable accurate forecasting'], required_qualifications=['Highly analytical, strategic, excellent communicator, comfortable with ambiguity'], preferred_qualifications=['Not provided']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not provided', bonus_and_equity='Not provided', benefits_and_perks=['Not provided'])\n",
      "company_overview=CompanyOverview(about='El Camino Health is committed to hiring, retaining and growing the best and brightest professionals who will carry our mission and vision forward.', mission_and_values='Compassion, innovation, collaboration and delivering high-quality care.', size='Not specified', locations='Santa Clara') role_summary=RoleSummary(title='Data Analyst', team_or_department='Not specified', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design and run reports in the system or with additional tools such as SQL or Tableau', 'Analyze key data and make meaningful recommendations to the Concern management team', 'Collaborate with the development teams to test features and functionality and identify the root cause of issues as well as track and communicate status of defects'], required_qualifications=['Bachelor’s degree in a technology or science related field.', 'Three (3) years in an analyst position having related responsibilities and duties preferred', 'Advanced Excel and Office suite skills.', 'Statistical computational skills.', 'Advanced knowledge of relational databases such as Access, SQL, Amazon Aurora'], preferred_qualifications=['Experience collaborating with software development teams to test, track, and communicate status of defects', 'Experience working with Jira or other project management tools']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$36.13 - $54.20 USD Hourly', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='Full-time', remote='Full-time entry level option for remote job') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Extract, organize and analyze media buying performance data while using various tools', 'interface with the data engineering team', 'investigate and present results of requests for ad hoc data analysis'], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Estimated Salary: $20 to $28 per hour based on qualifications', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='EROS Technologies was founded with a simple motive of offering the clients exactly what they want, how they want and when they want it. By leveraging for its clients its technological edge and right-sourcing advantage, EROS in a short period of time has grown to become one of the most trusted strategic technology partners.', mission_and_values='Treating every client as the top priority, we customize our solutions and services to align with the unique needs of each client.', size='', locations='Mountain view-CA') role_summary=RoleSummary(title='Data Analyst', team_or_department='', role_type='Full time', remote='Full time only') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=[\"5-7 years' experience\", 'Good Communication', 'Data Analysis Skills', 'excellent SQL knowledge'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80-95k/annum', bonus_and_equity='', benefits_and_perks=['benefits'])\n",
      "company_overview=CompanyOverview(about='Georgia IT Inc', mission_and_values='', size='', locations='Mountain View, CA 94043') role_summary=RoleSummary(title='Jr Data Analyst', team_or_department='', role_type='Contract', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Gather business requirements and determine most effective and efficient reporting platform', 'Design and develop metrics and reports, dashboards and analyses to drive key business decisions', 'Conduct large scale data analysis and modeling to derive actionable insights', 'Review outputs for trends', 'Work with team to conduct root cause analyses and communicate improvement recommendations'], required_qualifications=['Coding experience with Javascript, Apps Script, SQL and/or BigQuery', 'Strong background in KPI Dashboards development and automation and information systems management', 'Strong visualization experience – charts, bars, images, dynamic reports etc.', 'Some exposure to Procurement, Planning and Product Lifecycle Management functions', 'Excellent Oral/Written Communication and Stakeholder management Skills', 'Excellent analytical and problem solving skills', 'Self driven, ability to work in an unstructured environment with minimal supervision'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Rate : Doe', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Spin Analytics and Strategy LLC', mission_and_values='', size='', locations='Palo Alto, CA') role_summary=RoleSummary(title='Data Analyst (Red shift) & Mode Analytics', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Write queries against Red shift', 'Produce reports on Mode Analytics', 'Automate publishing those reports to slack'], required_qualifications=['BS degree in specific technical fields like computer science, math, statistics'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='N/A', mission_and_values='N/A', size='N/A', locations='Mountain View, CA') role_summary=RoleSummary(title='Data Analyst focused on Warehousing', team_or_department='N/A', role_type='Full-time', remote='In person') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Developing and maintaining databases, and data systems – reorganizing data in a readable format', 'Performing analysis to assess the quality and meaning of data', 'Using statistical tools to identify, analyze, and interpret patterns and trends in complex data sets', 'Preparing reports for the management stating trends, patterns, and predictions using relevant data', 'Working with programmers, engineers, and management heads to identify process improvement opportunities, propose system modifications, and devise data governance strategies', 'Preparing final analysis reports for the stakeholders to understand the data-analysis steps, enabling them to make important decisions based on various facts and trends'], required_qualifications=['Demonstrated expertise in SQL programming', 'Experience in creating technical documentation such as Requirement Documents and Source to Target Sheets', 'Proficiency in developing User Acceptance Tests for metrics validation', 'Strong collaboration and communication skills to work effectively with cross-functional teams'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$4,000.00 - $6,000.00 per month', bonus_and_equity='N/A', benefits_and_perks=['Benefits can be discussed in detail during the interview'])\n",
      "company_overview=CompanyOverview(about='Intuit is a company building Gen AI capabilities across a team of 650+ analysts', mission_and_values='To shape analytics for GenAI, bringing together products and GenAI LLMs to make a tangible impact on customer experience', size='650+ analysts', locations='Not specified') role_summary=RoleSummary(title='Technical Data Analyst', team_or_department='Analytics Team', role_type='Technical Data Analyst', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Driving data models and definitions', 'Communication and Change Management', 'Analytics Methodologies'], required_qualifications=['Deep knowledge of ETL and data warehousing', 'Expertise in analytics methodologies', 'Experience with Gen AI measurement and incrementality measurement frameworks'], preferred_qualifications=['Excellent understanding of data architectures', 'Acumen to map business needs to technology', 'Analytical skills', 'Passion for delivering data-driven insights']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Accurately label machine learning data through various methods to refine AI models', 'Collaborate with team members to identify improvements to the labeling interface or labeling processes in order to drive efficiency and high-quality', 'Complete assignments by deadlines while meeting team expectations and goals'], required_qualifications=[\"Bachelor's degree in English, Literature, Creative Writing, Journalism, or a related field (or equivalent work experience)\"], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Palo Alto, CA', mission_and_values='To be determined', size='To be determined', locations='Palo Alto, CA') role_summary=RoleSummary(title='Data Analyst', team_or_department='Supply Chain Management', role_type='Full-time', remote='To be determined') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Create reports and data visualizations to guide decision-making across the supply chain team', 'Respond promptly, accurately and efficiently to data requests and questions', 'Support supply chain with master data updates as needed', 'Drive data integrity including identifying and acquiring missing information', 'Communicate your findings effectively to anyone in the company from executives to engineers', 'Proactively identify inconsistencies in the data through your visualizations and with statistical tests', 'Continuously improve SQL and python skills, helping you automate repetitive work and multiply yourself', 'Experiment and learn rapidly'], required_qualifications=[\"Bachelor's degree in a quantitative field and/or equivalent experience or evidence of exceptional deductive ability\", 'Expert in data analysis, visualization, and communication, with prior full-time work experience strongly preferred', 'Proficient in Advanced MS Excel, SQL and Python', 'Data visualization programs such as Tableau and PowerBi are preferred'], preferred_qualifications=['Experience creating dashboards/visualizations using Tableau (or similar)', 'Experience in supply chain / manufacturing Program Management', 'Experienced in Sourcing, MRP and fulfilment systems']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$72.3K - $91.5K a year', bonus_and_equity='To be determined', benefits_and_perks=['To be determined'])\n",
      "company_overview=CompanyOverview(about='AGS is an equal Opportunity/Affirmative Action Employer (M/F/Disability/Veterans).', mission_and_values='Not specified', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Data Analyst', team_or_department='Not specified', role_type='Contract', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Support effective decision-making through economic, financial and market analysis of critical issues, including capacity planning, price and cost modeling, and returns on investment.', 'Support the capacity forecasting, budgeting and reporting efforts for operating expense, capital and resource allocation.', 'Design and coordinate the development of systems and tools to keep pace with the group’s growing and rapidly changing businesses.'], required_qualifications=['Undergraduate degree in business, finance, engineering, or a related field.', 'Strong analytic skills and a demonstrated ability to build and manage financial models for business forecasting, variance analysis, and problem solving.', 'Fluency in standard software including MS Excel', 'Strong communication skills – both written and verbal', 'Ability to maintain a high level of attention to detail, ability to work independently, adjust priorities, and work in a continuously changing environment'], preferred_qualifications=['Direct experience in operations or capacity planning']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about=\"Argo AI is a global self-driving products and services company on a mission to make the world's streets and roadways safe, accessible, and useful for all.\", mission_and_values=\"Make the world's streets and roadways safe, accessible, and useful for all\", size='Global company', locations='Various') role_summary=RoleSummary(title='Data Analyst', team_or_department='Argo Vehicle Operations team', role_type='Data Analyst', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Gather data, structure problems, and perform analyses', 'Research technical and business issues', 'Work cross-functionally to analyze opportunities, synthesize findings, and drive insights', 'Act as part of the ongoing team to translate recommendations into results', 'Present data-driven recommendations and optimize our current go-to-market strategy', \"Track and report key performance metrics regarding fleet utilization and output, with detailed analysis of how each is supporting Argo's mission\", 'Build analytics dashboards using mostly off-the-shelf tools', 'Create regular reports that will be presented and reviewed by the data services team', 'Help analyze and understand gaps in reporting', 'Document process and playbooks for analysis', \"Create 'source of truth' tables and dashboards that the company (and our team) can use to identify, diagnose and address issues\", 'Understand input data and data roadmaps from other teams across the company to ensure that the top-level funnels are well defined and understood', 'Build scalable analytical frameworks; partner with engineering teams to drive instrumentation of new metrics, if/where we determine this is needed', 'Work with cross-functional teams such as engineering, product management, various data teams to deploy data quality across critical pipelines and to set up processes to triage data issues', 'Effectively and proactively communicate insights, contribute to team presentations and leadership reviews, and drive projects to completion to hit team goals', 'Create and drive data quality standards and frameworks to ensure inclusion into pipeline engineering efforts', 'Assist with new product experimentation including plan creation, roll-out, and monitoring'], required_qualifications=['Ability to manage and prioritize conflicting objectives', 'Excellent problem solving and troubleshooting, implementing solutions, and documenting results', 'Strong desire to learn new skills related to technology and software', 'Excellent communication skills with the ability to span a large and varied workforce', 'Proven self-starter mindset and the ability to work independently or with minimal supervision', 'Detail oriented/good organizational skills', 'Occasional travel to remote offices and test facilities', 'Established strong analysis and presentation skills including advanced use of Excel or G Sheets and Powerpoint or G Slides', 'Solid written and verbal communication skills, including presentation skills', 'Basic understanding of experimental design (such as A/B experiments) and statistical methods', 'Ability and experience in extracting insights from data, and summarizing learnings / takeaways', 'Experience with Excel and some dashboarding/data visualization (i.e. Tableau, Mixpanel, Looker, or similar)'], preferred_qualifications=[\"Bachelor's Degree\", 'Masters Degree with some focus or concentration in statistic, data analysis, data science', '5 years of experience in analytics or a closely related field', 'High proficiency with business intelligence tools and SQL', 'Experience working with Data Warehouses like Redshift, BigQuery, or Snowflake', 'Strong understanding of statistics and experience with business intelligence and data analysis']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='80K', bonus_and_equity='Competitive compensation packages', benefits_and_perks=['High-quality individual and family medical, dental, and vision insurance', 'Competitive compensation packages', 'Employer-matched 401(k) retirement plan with immediate vesting', 'Employer-paid group term life insurance and the option to elect voluntary life insurance', 'Paid parental leave', 'Adoption & Surrogacy Assistance Program', 'Paid medical leave', '30 day paid sabbatical upon 5 years of employment', 'Unlimited vacation', 'Complimentary daily lunches, beverages, and snacks', 'Pre-tax commuter benefits', 'Monthly wellness stipend', 'Professional development reimbursement', 'Employee assistance program', 'Discounted programs that include legal services, identity theft protection, pet insurance, and more'])\n",
      "company_overview=CompanyOverview(about='ScoreData is a company that offers data analytics services using its ScoreFast(™) technology stack.', mission_and_values='ScoreData values customer satisfaction and success.', size='Not specified', locations='Palo Alto, USA') role_summary=RoleSummary(title='Hands-on Data Scientist', team_or_department='Data Science', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work on specific customer data analytics project from start to finish', 'Understand, document and deliver customer projects using the ScoreFast(™) technology', 'Focus on customer satisfaction and success', 'Work closely with other team members to deliver projects', 'Explore new uses cases / ideas to solve customer challenge', 'May require travel to customer sites based on need basis'], required_qualifications=['B.S or higher in the area of data science, computer science, statistics, mathematics, physics, engineering, operations research or other quantitative analytical field from a reputed school', '3 years of working experience with at least 2 years’ experience with performing customer analytics projects', '2+ years’ experience with Machine Learning and Data Mining', '2+ years using R or Python for data analytics'], preferred_qualifications=['Knowledge of Github, JIRA, Confluence, Jenkins', 'Experience with Hadoop and/or Spark ecosystem']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Competitive salary', 'Opportunities for professional growth'])\n",
      "company_overview=CompanyOverview(about='Stanford University', mission_and_values='Stanford University is committed to diversity, equity and inclusion.', size='Not specified', locations='Stanford, CA') role_summary=RoleSummary(title='Research Data Analyst', team_or_department='Not specified', role_type='Part-time', remote='Hybrid eligible') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Manage and analyze large amounts of information', 'Employ new and existing tools to interpret, analyze, and visualize multivariate relationships in data', 'Create databases and reports, develop algorithms and statistical models, and perform statistical analyses appropriate to data and reporting requirements'], required_qualifications=[\"Bachelor's degree or a combination of education and relevant experience in a quantitative discipline such as economics, finance, statistics or engineering\"], preferred_qualifications=[\"Master's degree or a combination of education and relevant experience in computational biology or bioinformatics\"]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$32,240 - 48,500 per year', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Scilex Holding Company is an innovative revenue-generating company focused on acquiring, developing and commercializing non-opioid pain management products for the treatment of acute and chronic pain.', mission_and_values='Scilex Holding Company is committed to equal employment opportunities, and a workplace that embraces diversity and inclusion for qualified individuals of all backgrounds.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Senior Data Analyst', team_or_department='Commercial Department', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Conducts thorough data analysis and develops data flows aligned to strategic vision', 'Collaborate with Commercial team to help synthesize insights in a streamlined manner', 'Architect complex data-centric and business intelligence solutions', 'Support advanced analytics efforts', 'Develop first-hand knowledge of datasets and function as data expert to navigate emerging business needs and evolving data landscape'], required_qualifications=['Bachelor’s degree (business discipline preferred)', '5-7 years’ experience as a Data Analyst or in a similar role working directly with clinical healthcare data', 'Working knowledge of pharmaceutical industry data sources', 'Prior experience developing business requirements documents and managing data projects'], preferred_qualifications=['Bachelor’s degree (business discipline preferred)', '5-7 years’ experience as a Data Analyst or in a similar role working directly with clinical healthcare data', 'Working knowledge of pharmaceutical industry data sources', 'Prior experience developing business requirements documents and managing data projects', 'Experience designing enterprise reporting solutions driven by a data lake, data warehouse or operational data store', 'Strong analytical/critical thinking skills', 'Coding skills and/or Analytical Tools experience', 'Experience with using retail and specialty pharmacy data', 'Adept communication skills', 'Cross-functional collaboration skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$114,000 - $131,000/annually', bonus_and_equity='Not specified', benefits_and_perks=['medical benefits', '401(k) eligibility', 'vacation', 'sick time', 'parental leave'])\n",
      "company_overview=CompanyOverview(about='Tesla is a pioneering company in the ClimateTech industry', mission_and_values=\"Tesla's mission is to accelerate the world's transition to sustainable energy\", size='Large company with a global presence', locations='Palo Alto, California') role_summary=RoleSummary(title='Data Analyst, Supply Chain', team_or_department='Supply Chain Management', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Analyze and visualize data', 'Ensure effective utilization of large supply chain datasets', 'Support the global supply management team'], required_qualifications=['Data analysis skills', 'Supply chain management knowledge'], preferred_qualifications=['job-related knowledge', 'skills', 'experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$68,000 - $234,000/annual salary', bonus_and_equity='stock awards', benefits_and_perks=['cash and stock awards', 'benefits'])\n",
      "company_overview=CompanyOverview(about=\"Activehours is reinventing the way people get paid by giving them immediate access to the money they've earned.\", mission_and_values=\"Celebrate victory, don't shy from failure, and are always learning.\", size='Rapidly growing company', locations='Palo Alto') role_summary=RoleSummary(title='Senior Data Analyst', team_or_department='', role_type='Full-time', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Deep-dive, synthesize, and articulate actionable next steps by leveraging our full ecosystem of data sources', 'Hypothesis-driven testing, including defining, facilitating, and analyzing A/B testing', 'Define strategy and execution for business intelligence and live operations reporting, including dashboards and alerts', 'Recommend analytical tools to implement in partnership with developers and the Head of Growth', 'Lifetime value and cohort tracking'], required_qualifications=['Minimum of 3 years of demonstrable experience in digital data and measurement', 'Deep expertise in SQL', \"Bachelor's degree in mathematics, statistics, computer science, or related quantitative field\"], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Kofi Group places software engineers and machine learning engineers on a direct-hire basis with VC-backed startups in San Francisco/Silicon Valley, New York, and Austin.', mission_and_values='drive positive outcomes', size='NA', locations='San Francisco/Silicon Valley, New York, and Austin') role_summary=RoleSummary(title='Data Engineer', team_or_department='NA', role_type='Full-time', remote='NA') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop, design, refine, adjust, and evaluate ETL pipelines or systems to enhance our Machine Learning and Analytics capabilities', 'Ensure data integrity throughout its lifecycle by implementing safeguards, health checks, and alerts', 'Collaborate closely with clients to gain profound insights into their data ecosystems, effectively consolidating our data models for predictive analysis', 'Foster a deep understanding of relevant data domains, codebase, and systems', 'Demonstrate expertise in data engineering, data architecture, programming, and software engineering', 'Exercise discretion in choosing methods and techniques to achieve solutions', 'Operate independently, utilizing available resources to overcome challenges and meet deadlines by employing sound judgment and problem-solving abilities', 'Become proficient in internal development standards, progressively managing tasks and projects of increasing complexity from coding to code release', 'Collaborate with Data Science, Product Managers, and Software Engineers to develop robust ETL pipelines that empower the Product Support team to deliver compelling user experiences', 'Display empathy for customers to ensure that data processes and workflows meet their needs'], required_qualifications=[\"Bachelor's degree in Computer Science, Data Science, Information Systems, Data Mining, Mathematics, Statistics, Physics, Applied Sciences, or a related field\", 'At least 2 years of hands-on industry experience in data engineering', 'Proficiency in event backbone and job pool platforms (e.g., Kafka)', 'Willingness to engage extensively with various internal and client software solutions', 'Experience in client interaction and translating client requirements into valuable features', 'Over 2 years of experience with Typescript, Python (Django), and the development of complex ETL pipelines', 'Hands-on experience in SQL database design, data modeling, and data mining', 'Demonstrated ability to quickly grasp new technologies'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='NA', bonus_and_equity='NA', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about=\"Amazon Web Services (AWS) has been the world's most comprehensive and broadly adopted cloud platform.\", mission_and_values='Deliver results, drive effectiveness, predictability, and continuous improvement in field productivity.', size='Millions of active customers around the world', locations='Palo Alto, CA USA; Seattle, WA USA; Denver, CO USA; San Francisco, CA USA; Austin, TX USA; New York, NY USA; Herndon, VA USA; Boston, MA USA; Atlanta, GA USA') role_summary=RoleSummary(title='Data Engineer', team_or_department='Sales Strategy, Operations, and Customer Programs organization', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, develop and maintain at scale automated, user-friendly systems', 'Write high-quality SQL queries', 'Detect trends in AWS and 3rd party data', 'Collaborate effectively with internal teams', 'Set-up the framework and effectively analyzing data and feedback for insights'], required_qualifications=['Experience as a Data Engineer', 'Ability to design, develop and maintain automated systems'], preferred_qualifications=['Strong analytical acumen', 'Ability to earn trust and communicate effectively']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='Not specified', benefits_and_perks=['Relocation assistance from within the United States'])\n",
      "company_overview=CompanyOverview(about='Rhombus is purposefully transforming the nation’s defense and national security enterprises with Guardian, its Artificial Intelligence platform for strategic, operational, and tactical decision-making at the speed of relevance.', mission_and_values='Transform national security', size='Start-up', locations='Palo Alto, CA') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Engineering', role_type='Full-time', remote='Undisclosed') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop code using various programming and scripting languages to automate data ingestion and improve data management processes', 'Architect data repositories, stand up data platforms and develop data pipelines for ingestion, transformation, and aggregation', 'Review existing architecture, data strategy, and improve processes for data governance, data quality, and metadata management', 'Extract and analyze raw data from multiple data sources via APIs, SQL Stored Procedures, or Python scripts', 'Ability to develop scripts and programs for converting various types of data into usable formats and support project team to scale, monitor and operate data platforms', 'Collaborate with a multi-disciplinary team of analysts, data scientists, data engineers, developers, and data consumers in a fast-paced, agile environment', 'Communicate project status and results to various levels of leadership'], required_qualifications=['A Bachelor’s degree in Data Analytics, Computer Science, Computer Engineering, Information Systems/Sciences, or other relevant area (or equivalent experience)', 'At least 1 year of professional experience', 'Ability to obtain and maintain a US security clearance', 'U.S. citizenship'], preferred_qualifications=['Experience with 1 or more programming and scripting languages', 'Experience with 1 or more of the following relational, noSQL and/or file based storage', 'Experience building and maintaining ETL data pipelines', 'Experience with software development life cycle including testing, documenting, delivery and support', 'Working knowledge of AWS/cloud technologies', 'Experience using query optimization as well as data modeling techniques', 'Familiarity with machine learning frameworks']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Undisclosed', bonus_and_equity='Bonus and other incentive programs', benefits_and_perks=['Full medical, dental, vision coverage for employee and dependents', '401k matching program', 'PTO and Holidays', 'Bonus and other incentive programs'])\n",
      "company_overview=CompanyOverview(about='Kofi Group is a recruiting firm that places software engineers and machine learning engineers at VC-backed startups in San Francisco/Silicon Valley, New York, and Austin.', mission_and_values='', size='', locations='San Francisco/Silicon Valley, New York, Austin') role_summary=RoleSummary(title='Software Engineer/Machine Learning Engineer', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Rivian is on a mission to keep the world adventurous forever.', mission_and_values='Operate comfortably in areas that are unknown, protect the outdoors for future generations.', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Sr. Data Visualization Engineer', team_or_department='Service organization', role_type='Sr. Data Visualization Engineer', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work within the Service organization to understand needs, goals, and objectives', 'Design and develop secure, scalable, high-performance and reliable (cost effective) big data and analytics solutions', 'Use Data & Analytics to answer business questions that lead to insights and actionable outcomes', 'Perform ad hoc data analysis to provide efficient analytical support for on-the-spot business inquiries and program management adjustments'], required_qualifications=['5 years of strong data analysis experience', '5 years of hands-on experience creating complex dashboards and data stories using Tableau', '5 years of hands-on IT experience in Alteryx, Data Warehouse, ETL & Reporting'], preferred_qualifications=['MBA', 'Previous experience in Management consulting firm, start-up environment, Automotive OEM or equivalent experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80,000 - $100,000', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Tesla is an electric vehicle and clean energy company', mission_and_values='ClimateTechList gathers 33,000+ job openings from over 1,809 climate tech companies and updates them daily', size='Not specified', locations='Palo Alto, California') role_summary=RoleSummary(title='Sr. Data Engineer, Automation and Analytics', team_or_department='Electrical component team in Supply Chain', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Plan effective data storage, security, sharing and publishing within the organization', 'Maintain large supply chain datasets and formulate applicable data-driven solutions'], required_qualifications=['Data analytics', 'Process raw, unstructured data using batch and real time processing frameworks'], preferred_qualifications=['Not specified']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80,000 - $258,000/annual salary', bonus_and_equity='Not specified', benefits_and_perks=['cash and stock awards', 'benefits'])\n",
      "company_overview=CompanyOverview(about='Cygnus Professionals Inc. is a next generation global information technology Solution and Consulting company powered by strong management and leadership team with over 30 person years of experience.', mission_and_values='extend our presence across industries and geographies with our industry-focused business excellence', size='more than 25 satisfied customers', locations='New Jersey, U.S') role_summary=RoleSummary(title='Big Data Engineer w/Spark', team_or_department='', role_type='Contract to Hire', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['Experience with Java, Spark and Hadoop', 'A minimum of 2 years of experience working with distributed systems', 'Knowledge in distributed system design, data pipelining, and implementation', 'Knowledge in machine learning algorithms', 'Knowledge and experience in building large scale applications using various software design patterns and OO design principles', 'Experience with either distributed computing (Hadoop/Spark/Cloud) or parallel processing (CUDA/threads/MPI)', 'Expertise in design pattern (UML diagrams) and data modeling of large scale analytic systems', 'Experience in research, analysis, and the conversion of large amount of raw collected data and content into new sets of data that is structured and does not reduce data context in order to enable the Productization of new products', 'Worked with data warehousing and distributed/parallel processing of large data sets using parallel computing system to map/reduce computation and Linux clusters (e.g. Hadoop/Cloud technologies, HDFS); cluster;', 'Experienced in modern development methodology such as Agile, Scrum and SDLC', 'Ability to work in a research oriented, fast pace, and highly technical environment', 'Quick thinker and a fast learner, collaborative spirit, and excellent communication and interpersonal skills'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about=\"Aisera offers the world's first AI-driven service experience solution that automates operations and support for IT, Sales and customer service, making businesses and customers successful by offering consumer-like self-service resolutions to users.\", mission_and_values=\"Aisera's mission is to make businesses and customers successful by offering consumer-like self-service resolutions to users.\", size='100', locations='Palo Alto, Calif.') role_summary=RoleSummary(title='Senior Data Engineer', team_or_department='Platform Engineering Team', role_type='Full-time', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with teammates, leadership and product management to design and deliver the data platform and connector features as per roadmap.', 'Deliver testable, modular, highly scalable and reusable code, test cases and documentation on time.'], required_qualifications=[\"Bachelor's Degree in Computer Science or Computer Engineering or Electrical Engineering\", 'At least 3 years professional experience in similar positions in software development', 'Strong Java skill', 'Strong database skills', 'Good experience in microservices', 'Good experience in Elasticsearch', 'Good experience in using one or more public cloud environments (AWS, Azure, Google Cloud, etc.)', 'Very keen on quality in any aspect', 'Very good verbal and written communication skills (English)'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='', benefits_and_perks=['Medical, dental, and vision benefits', 'Holidays and flexible PTO', 'Paid family leave', '401(k) plan', 'Stock Options', 'Employment Assistance Program'])\n",
      "company_overview=CompanyOverview(about='Intapp provides equal employment opportunities to all qualified applicants and will make hiring decisions without regard to race, color, sex, sexual orientation, gender identity or expression, religion, national origin or ancestry, age, disability, marital status, pregnancy, protected veteran status, protected genetic information, political affiliation, or any other characteristic protected by federal, state or local laws.', mission_and_values='Our culture at Intapp emphasizes accountability, responsibility, and growth. We support each other in a positive, open atmosphere that fosters creativity, approachability, and teamwork.', size='Growing public company', locations='Remote work options available') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Engineering Team', role_type='Full-time', remote='Remote work options available') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop and implement data pipelines and ETL processes to ensure data quality, reliability, and availability for analysis', 'Perform exploratory data analysis to gain insights into data patterns, trends, and relationships, using statistical methods', 'Identify relevant features and variables for predictive modeling and analysis', 'Develop and implement machine learning models and algorithms to solve business problems and extract actionable insights from data', 'Evaluate model performance, fine-tune parameters, and optimize algorithms to achieve desired outcomes', 'Work closely with cross-functional teams, including data engineers, business analysts, and stakeholders, to understand business requirements and translate them into data-driven solutions'], required_qualifications=[\"Bachelor's degree in computer science, Data Science, Statistics, or a related field\", 'Strong background, 3+ years, in data engineering, including experience with data collection, preprocessing, and ETL processes', 'Desired 5+ years of proficiency in programming languages such as Python, SQL, and/or R, and experience with data manipulation and analysis libraries (e.g., pandas, NumPy, scikit-learn, requests, etc..)', 'Knowledge of machine learning techniques and algorithms, with practical experience in model development, evaluation, and deployment'], preferred_qualifications=['Experience with big data technologies and frameworks (e.g., Hadoop, Spark, Kafka)', 'Familiarity with cloud platforms and services (e.g., AWS, Azure, Google Cloud)', 'Knowledge of data visualization tools (e.g., Matplotlib, Tableau) and techniques', 'Understanding of software development principles and practices, including version control, testing, and deployment']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Competitive base salary plus variable compensation and equity', benefits_and_perks=['Generous paid parental leave, including adoptive leave', 'Traditional comprehensive benefits', 'Paid Time Off', 'Tuition reimbursement plan', 'Family Formation benefit offered by Carrot', 'Wellness programs and benefits provided by Modern Health', 'Paid volunteer time off and donation matching for the causes you care about', 'Home office stipend'])\n",
      "company_overview=CompanyOverview(about='HP is a technology company that operates in more than 170 countries around the world united in creating technology that makes life better for everyone, everywhere.', mission_and_values=\"HP's commitment to diversity, equity and inclusion - it's just who we are.\", size='More than 170 countries', locations='Multiple locations') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Engineering', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Link between data engineers who ingest data and the business units that depend on that data to make decisions', 'Collaborate with various business teams, understand their data needs and build plans to address those with intelligent data solutions in a timely and sustainable fashion', 'Deliver data solutions that tell a story and provide insights to the businesses, enabling them to make better decisions', 'Collaborate internal analytical leaders to identify opportunities to improve data quality', 'Work on pipelines, data models, and data visualizations with your team'], required_qualifications=['Knowledge & experience with building data infrastructure', 'Experience in programming such as SQL or Python', 'Visualization expertise in tools such as Tableau, Power BI, Dataiku etc.', 'Conceptual, logical data modeling and data architecture knowledge'], preferred_qualifications=[\"Bachelor's degree in Computer Science, Software Engineering, other technical degree, or equivalent practical experience\", '3-5 years of relevant work experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Salary Range: $80K -- $100K', bonus_and_equity='Not specified', benefits_and_perks=['Salary Range: $80K -- $100K', 'Estimated Salary: $20 to $28 per hour based on qualifications'])\n",
      "company_overview=CompanyOverview(about='The City of Palo Alto is undergoing radical change and much of it is being powered by new technologies.', mission_and_values='Leading the use of innovation to power the next generation of government services.', size='Not specified', locations='Palo Alto') role_summary=RoleSummary(title='Senior BI Analyst', team_or_department='Information Technology Department', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[\"Supporting the City's existing SAP BI Application\", 'Designing, planning, implementation, and support/administration', 'Leading BW/BI projects in collaboration with Business, IT/Basis, IS functional/technical/security analysts', 'Facilitating and gathering reporting requirements for assigned projects', 'Establishing strong business relationships to identify, plan and scope future BW projects', 'Writing functional design document and detailed technical document', 'Providing ongoing maintenance, assistance and support for BW/BI', 'Providing ongoing business user training and support as needed'], required_qualifications=[\"Bachelor's Degree in Information Technology or a related field\", '5 years of progressively responsible, professional Information Technology experience', '8+ years of SAP Business Warehouse/Intelligence development experience'], preferred_qualifications=['IS-Utility experience']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Amazon is a leading e-commerce company', mission_and_values='Customer-focused, innovative, and customer-obsessed', size='Large company with millions of customers', locations='Multiple locations worldwide') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Search Capacity team', role_type='Contract', remote='Not mentioned') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Scale the Amazon Search service', 'Identify and track key performance metrics', 'Manage planning, ordering, and budgeting for hardware and other computational resources', 'Develop and improve tools for automating tasks', 'Analyze resource utilization and performance test data', 'Develop models for required hardware resources'], required_qualifications=['Not mentioned'], preferred_qualifications=['Not mentioned']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='Not mentioned', benefits_and_perks=['Competitive salary'])\n",
      "company_overview=CompanyOverview(about='Tesla is a pioneer in the electric vehicle and clean energy industry.', mission_and_values=\"Accelerating the world's transition to sustainable energy\", size='Global company with over 100,000 employees', locations='Palo Alto, California') role_summary=RoleSummary(title='Business Intelligence Analyst', team_or_department='North America Sales & Service Infrastructure Development', role_type='Internship', remote='Hybrid work arrangement') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with cross-functional teams to analyze and visualize business data', 'Develop and maintain dashboards and reports to drive business insights', 'Conduct data analysis and provide recommendations to stakeholders'], required_qualifications=[\"Bachelor's degree in Business Administration, Computer Science, or related field\", 'Prior internship or co-op experience in business intelligence or analytics', 'Strong analytical and problem-solving skills'], preferred_qualifications=['Experience with data analysis and visualization tools', 'Knowledge of business intelligence and analytics', 'Strong communication and problem-solving skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Competitive salary range', bonus_and_equity='Performance-based bonuses and equity opportunities', benefits_and_perks=['Competitive salary and bonus structure', 'Comprehensive health insurance', '401(k) matching', 'Flexible work arrangements'])\n",
      "company_overview=CompanyOverview(about='Ford Motor is an Equal Opportunity Employer. We celebrate diversity and are committed to creating an inclusive environment for all employees.', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='Business Intelligence Manager', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Manage projects for our team', 'Lead user analytics and business metrics development', 'Support the organization’s ambitious growth strategy', 'Design customer and market research and benchmarking analysis', 'Identify and recommend value metrics to measure our products', 'Work closely with internal stakeholders to identify business models that support our products', 'Bring customer and market data to life through rich storytelling', 'Build analytic capabilities to grow our organization’s knowledge'], required_qualifications=[], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Alexa Shopping team, building experiences to allow customers to instantly order products using smart devices', mission_and_values='Provide instant ordering capabilities to customers using smart devices', size='N/A', locations='N/A') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Alexa Shopping team', role_type='Business Intelligence Engineer', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Build metrics and drive customer adoption', 'Provide analytical advice to the team', 'Implement creative approaches to complex data sets'], required_qualifications=['Experience with SQL (Redshift, Oracle)', 'Ability to use a major programming and/or scripting language'], preferred_qualifications=['Experience with BI tools', 'Analytical and creative thinking', 'Ability to work with large data sets']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20-$28 per hour', bonus_and_equity='N/A', benefits_and_perks=['Competitive salary ($20-$28 per hour)'])\n",
      "company_overview=CompanyOverview(about='Amazon', mission_and_values='Delight hundreds of millions of customers and build the best search experience to help customers make well-informed purchase decisions on Amazon', size='Large company', locations='Seattle, WA') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Search Organization', role_type='Full-time', remote='Unknown') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Own, design, develop, document, and manage scalable solutions for new and ongoing metrics, reports, analyses, visualizations, and dashboards', 'Work closely with ML & data scientists to extract, integrate and work on critical data', 'Present to business leaders in both ad-hoc forums and routine business reviews (MBR/QBR) to drive decisions'], required_qualifications=['Strong technical expertise', 'Strong problem-solving skills', 'Excellent leadership skills', 'Strong business acumen', 'Written and verbal communication skills'], preferred_qualifications=['Experience with MySQL, Redshift, Oracle, and Quicksight', 'Experience with data mining, data modeling, and data warehousing']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='Performance-based bonus and equity opportunities', benefits_and_perks=['Competitive salary', 'Benefits package', 'Opportunities for growth and development'])\n",
      "company_overview=CompanyOverview(about='Amazon Web Services (AWS) is a leading cloud computing platform', mission_and_values='Customer-obsessed company culture with a focus on data-driven decision making', size='Large technology company', locations='Sunnyvale, CA, Palo Alto, CA, Cupertino, CA, San Francisco, CA') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='EC2 Capacity Intelligence & Enablement team', role_type='full-time', remote='not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['design, develop, and maintain scalable solutions for business intelligence', 'translate business problems into analysis requirements', 'develop queries and visualizations for ad-hoc requests and projects'], required_qualifications=[\"bachelor's degree in computer science or related field\", '3+ years of experience in business intelligence engineering'], preferred_qualifications=['experience with data analytics', 'knowledge of cloud computing']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$20 to $28 per hour based on qualifications', bonus_and_equity='performance-based bonuses and stock options', benefits_and_perks=['competitive salary', 'health insurance', '401(k) matching', 'paid time off'])\n",
      "company_overview=CompanyOverview(about='Amazon Advertising is dedicated to driving measurable outcomes for brand advertisers, agencies, authors, and entrepreneurs.', mission_and_values=\"Amazon's innovations and insights to find, attract, and engage intended audiences throughout their daily journeys.\", size='Not specified', locations='Palo Alto, California') role_summary=RoleSummary(title='Business Intelligence Engineer', team_or_department='Advertising Forecasting Science team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, develop, and maintain scalable, automated metrics reports and dashboards to support our analytical and business needs', 'Provide insights on data analytics to influence model development roadmap', \"Work closely with stakeholders and leadership to define and present the team's business operational metrics\", \"Work closely with scientists to define and present the team's model-related metrics\"], required_qualifications=['Not specified'], preferred_qualifications=['Strong analytical skills', 'Business acumen and judgement', 'Intellectual curiosity', 'Technical skills', 'Excellent written and verbal communications']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Estimated Salary: $20 to $28 per hour based on qualifications', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Please provide the company overview', mission_and_values='Please provide the mission and values', size='Please provide the company size', locations='Please provide the locations') role_summary=RoleSummary(title='Please provide the title', team_or_department='Please provide the team or department', role_type='Please provide the role type', remote='Please provide the remote work options') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Please provide the responsibilities'], required_qualifications=['Please provide the required qualifications'], preferred_qualifications=['Please provide the preferred qualifications']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Please provide the salary or pay range', bonus_and_equity='Please provide the bonus and equity information', benefits_and_perks=['Please provide the benefits and perks'])\n",
      "company_overview=CompanyOverview(about=\"Overview of Google Play, Android and Mobile ecosystem and Google's mission to connect third-party app and game developers with the right opportunities.\", mission_and_values=\"Google Play's mission is connecting the third-party app and game developers with the right opportunities at the right time.\", size='N/A', locations='Remote') role_summary=RoleSummary(title='Business Intelligence Analyst', team_or_department='Google Play Partnerships team, Strategy and Operations team', role_type='Full-time', remote='Remote') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Manage an extended workforce team responsible for building and maintaining business intelligence infrastructure for the Google Play Partnerships team', 'Contribute to project scoping, manage project priorities, and allocate resources within the project and informing/escalating appropriately when external factors impact execution', 'Manage changes and launched of data products, navigating testing, approvals, access, roll-out, training, and/or user feedback', 'Own relationships with Engineering/Technical Analyst teams', 'Ensure the alignment of activities and deliverables with other related projects/teams', \"Contribute to the development of the Google Play Partnerships team's tools, skills, culture, and impact\"], required_qualifications=[\"Bachelor's degree or equivalent practical experience\", '8 years of experience as a business intelligence analyst or similar analytical role'], preferred_qualifications=[\"Master's degree in Engineering, Business, or a quantitative field\", 'Experience developing new models, methods, analysis, and approaches', 'Experience identifying opportunities for business/product improvement and defining/measuring the success of initiatives']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Estimated Salary: $20 to $28 per hour based on qualifications', bonus_and_equity='Salary Range: $100K -- $150K', benefits_and_perks=['N/A'])\n",
      "company_overview=CompanyOverview(about='Company overview not provided in the job description.', mission_and_values='Not provided in the job description.', size='Not provided in the job description.', locations='In office 3x days a week') role_summary=RoleSummary(title='Data Analyst', team_or_department='Not provided in the job description.', role_type='Contract (6 months)', remote='Hybrid (in office 3x days a week)') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Serving as a technical SME for Team Graph for access management and structural changes', 'Build out g3 documentation (website)', 'Collecting and analyzing team graph data sheets to ensure seamless transition of assets from one team node to another', 'Preparing bulk upload sheets for Team Graph changes (ad hoc and scheduled)', 'Building out Search Platforms Team Graph G3 doc site with all relevant FAQs, technical documents and links', 'Developing process flow documentation and 1-pager docs providing guidance on Team Graph as it pertains to Search Platforms', 'Coordinate with TPgMs on any AIs as it relates to their respective orgs for Team Graph adjustments'], required_qualifications=['BA in Engineering or CS', '5+ years professional experience', 'Google Suite, SQL, Web Dev (HTML, Python, etc) experience'], preferred_qualifications=['Strong communication and presentation skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not provided in the job description.', bonus_and_equity='Not provided in the job description.', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Overview of Pure Storage, industry, products, services, and notable achievements', mission_and_values='Company mission, vision, values, and culture, including commitments to diversity, inclusion, social responsibility, and work-life balance', size='Details about company size, such as number of employees', locations='Mountain View, CA') role_summary=RoleSummary(title='Business Intelligence (BI) Analyst', team_or_department='Go-To-Market Strategy, BI and Analytics', role_type='Full-time', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Gather and document requirements from stakeholders', 'Develop executive ready reporting and visualizations', 'Present work to functional leaders and executives on a regular basis'], required_qualifications=['8+ years of experience in/or supporting Sales or Revenue Operations', 'BA degree required (degrees in business, financial or analytical fields)'], preferred_qualifications=['Ability to manage multiple priorities simultaneously', 'Ability to partner cross-functionally']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Annual base salary range: $114,000-191,000', bonus_and_equity='Information about bonus and equity compensation', benefits_and_perks=['Flexible time off', 'Wellness resources', 'Company-sponsored team events'])\n",
      "company_overview=CompanyOverview(about='Georgia IT Inc', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='Business Intelligence / Data Science Analyst', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['Data Warehousing / Business Intelligence knowledge', 'Advanced SQL skills', 'Advanced Machine Learning skills in Python or GCP products like Tensor Flow', 'Dashboard development skills'], preferred_qualifications=['Autonomy and independence', 'Communication skills']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$60/hr. on w2', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy.', mission_and_values='To inspire creativity and bring joy.', size='Over 1 billion users on our platform', locations='Los Angeles, New York, London, Paris, Berlin, Dubai, Singapore, Jakarta, Seoul and Tokyo') role_summary=RoleSummary(title='Human Resources Business Intelligence', team_or_department='Human Resources Business Intelligence (HRBI)', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Understand the business deeply through data analysis, research, interviews and other methods to proactively identify and predict strategic issues and come up with feasible recommendations to the stakeholders', 'Cultivate strong relationships with Human Resources Business Partners and other stakeholders that enable the successful delivery of reports, dashboards and analytics', 'Establish and iterate our people analytics approach to business problems via advanced analysis and understanding the data infrastructure at TikTok', 'As a promoter of HR data delivery applications, cooperate with related businesses and functions to continuously iterate people analytics product offerings', 'Work effectively in a complex, global, fast-paced matrixed environment, filled with tight deliverable timeframes and multiple stakeholders'], required_qualifications=['Analytical mindset and framework thinking', 'Ability to accurately define problems, propose hypotheses and provide solutions', 'Experience with SQL, Tableau, R, Python or other analysis tools'], preferred_qualifications=[\"Bachelor's degree in management, statistics, human resource management, economics, organizational behavior, psychology, industrial psychology, financial management, accounting, or a related field\", '5 years of HR data related experience', 'Prior experience in people analytics, business analytics, industry research or consulting firm background', 'Interest in solving management and strategic problems with numbers', 'Experience in partnering with a variety of different HR and business stakeholders', 'Excellent communication skills and ability to influence others', 'Good at storytelling']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$124800 - $228000 annually', bonus_and_equity='Discretionary bonuses/incentives and restricted stock units', benefits_and_perks=['100% premium coverage for employee medical insurance', 'Flexible Spending Account(FSA) Options like Health Care, Limited Purpose and Dependent Care', 'Gym and cellphone service reimbursements', 'Mental and emotional health benefits through EAP and Lyra', '401K company match'])\n",
      "company_overview=CompanyOverview(about='Intuit', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='BI Reporting Analyst IV', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Performs business analysis using various techniques', 'Determines best practices and develops actionable insights and recommendations', 'Works directly with the internal or external client to identify analytical requirements', 'May help to produce ad hoc data and reports', 'May assist in implementing or developing systems to capture business operation information', 'May occasionally guide less experienced business data analysts'], required_qualifications=[\"Bachelor's in related field\", '4-6 years of related experience', 'Tableau, Qlik (2+ years)', 'SQL - extensive knowledge (4+ years)', 'Python or R'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$60-65/hr. C2C', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Sandy Spring Bank is a banking institution.', mission_and_values='To provide excellent banking services', size='Large company', locations='Various locations') role_summary=RoleSummary(title='Business Intelligence/Data Analyst', team_or_department='Information Technology Department', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Provides strategic, analytical, and technical support for business intelligence activities', \"Develops, summarizes, and presents business information to the bank's business lines\", 'Works with stakeholders, process specialists, and users to elicit, analyze and document business requirements'], required_qualifications=[\"Bachelor's Degree in Computer Science, Information Technology, Business Information Systems, Mathematics/Analytics, Engineering or relevant experience\", '3 years of experience in business intelligence/data analyst role'], preferred_qualifications=['Experience with business intelligence tools', 'Knowledge of ETL (Extract-Transform-Load) concepts', 'Working knowledge of statistical analysis']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$80K -- $100K', bonus_and_equity='Information about bonus and equity compensation', benefits_and_perks=['Insurance', 'Retirement plans', 'Paid time off'])\n",
      "company_overview=CompanyOverview(about='Russell Tobin is an equal-opportunity employer and values diversity in the workplace.', mission_and_values='', size='', locations='Mountain View, CA/San Diego CA') role_summary=RoleSummary(title='BI Reporting Analyst', team_or_department='', role_type='Contract', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Performs business analysis using various techniques', 'Determines best practices and develops actionable insights and recommendations for the current business operations', 'Works directly with the internal or external client to identify analytical requirements', 'May help to produce ad hoc data and reports', 'May assist in implementing or developing systems to capture business operation information', 'May occasionally guide less experienced business data analysts'], required_qualifications=['Analytical background'], preferred_qualifications=['Advanced spreadsheets (Google Sheets ideally)', 'Intermediate SQL', 'Tableau or other BI knowledge']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$65/hr to $75/hr W2 (DOE)', bonus_and_equity='', benefits_and_perks=['Comprehensive healthcare coverage (medical, dental, and vision plans)', 'Supplemental coverage (accident insurance, critical illness insurance and hospital indemnity)', '401(k)-retirement savings', 'Life & disability insurance', 'Employee assistance program', 'Legal support', 'Auto, home insurance', 'Pet insurance', 'Employee discounts with preferred vendors'])\n",
      "company_overview=CompanyOverview(about='TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy.', mission_and_values='To us, every challenge, no matter how difficult, is an opportunity; to learn, to innovate, and to grow as one team.', size='Millions of users', locations='U.S.') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Cycling Center', role_type='Full-time', remote='Hybrid work schedule, 3 days a week in office') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Work with business stakeholders, engineering, product and SRE teams to understand business requirements and convert that into technical requirements, including developing prototypes to demonstrate the feasibility of data and analytics solutions.', 'Extract data from various sources such as APIs, HIVE tables and other structured and unstructured data sources to process and store large volumes of data ensuring data accuracy, consistency, and security.', 'Design, build, and maintain data pipelines utilizing optimal ETL patterns, frameworks, query techniques, sourcing from structured and unstructured data sources to ensure data is easily accessible and can be used effectively by other members of the organization.', 'Implement and monitor quality control measures to ensure data accuracy, completeness, and consistency.', 'Create and maintain technical documentation, such as data dictionaries, data flow diagrams, and system documentation, to ensure efficient and effective data management and analysis.', 'Optimize pipelines, dashboards, frameworks, and systems to facilitate easier development of data artifacts', 'Collaborate with engineers, product managers, and data scientists to understand data needs, representing key data insights in a meaningful way', 'Ability to analyze and visualize data to provide business stakeholders with impactful, actionable insights'], required_qualifications=['Bachelors degree in Statistics, Economics, Computer Science or another quantitative field', '5+ years of experience working with data analytics and data engineering, including experience with data cleaning and preprocessing, data analysis and dashboard development.', '2+ years experience building dashboards in Tableau, Power BI or any similar visualization tool.', 'Proficiency in distributed data processing using Big Data technologies like Spark/Scala, Java, Hadoop/HDFS/AWS/S3, Cassandra and Kafka', 'Proficiency in data modeling, data design, SQL, and NoSQL databases'], preferred_qualifications=['Experience in a consumer web or mobile company', 'Strong background in algorithms and data structures', 'Experience working with PII and GDPR data', 'Ability to communicate effectively, both written and verbal, with technical and non-technical partners', 'Ability to deliver consistent high quality results while working in a fast environment', 'Passionate, curious, and seeking to tackle every day problems with innovation']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$145000 - $355000 annually', bonus_and_equity='Discretionary bonuses/incentives, and restricted stock units', benefits_and_perks=['Medical insurance', 'Dental insurance', 'Vision insurance', 'Short/Long term Disability', 'Basic Life', 'Voluntary Life and AD&D insurance plans', 'Flexible Spending Account(FSA) Options', 'Paid holidays', 'Paid Personal Time Off (PPTO)', 'Paid sick days', 'Paid Parental leave', 'Paid Supplemental Disability', 'Mental and emotional health benefits', '401K company match', 'Gym reimbursement', 'Cellphone service reimbursement'])\n",
      "company_overview=CompanyOverview(about='Databricks is the data and AI company.', mission_and_values='Transforming the way data is sourced, designed and used to help us scale seamlessly in face of incredible growth.', size='Over 10,000 organizations worldwide', locations='San Francisco, CA or Mountain View, CA') role_summary=RoleSummary(title='Data Engineer', team_or_department='IT Data team', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[\"Design/Strategy: Design and support the business's database and table schemas for new and existing data sources for the data warehouse.\", 'Collaboration: Work closely with analysts, data scientists, and other data consumers within the business in an attempt to gather and deliver high-quality data for business-cases.', 'Analytics: Play an analytical role in quickly and thoroughly analyzing business requirements and subsequently translating the emanating results into good technical data designs.'], required_qualifications=[\"2+ years of related experience with a Bachelor's or Master's degree in Computer Science, Statistics, Information Systems or another quantitative field.\"], preferred_qualifications=[\"2+ years of related experience with a Bachelor's or Master's degree in Computer Science, Statistics, Information Systems or another quantitative field.\", \"Experience building and optimizing 'big data' data pipelines, architectures and data sets.\", 'Knowledge of data modeling and design of schemas for read and write performance.', 'Advanced working knowledge and experience working with relational databases, query authoring as well as working familiarity with a variety of databases.', 'Experience performing root cause analysis on internal and external data and processes to answer specific business questions and identify opportunities for improvement.', 'Build processes supporting data transformation, data structures, metadata, dependency and workload management.', \"Knowledge of message queuing, stream processing, API based extraction and highly scalable 'big data' data stores.\", 'Experience supporting and working with cross-functional teams', 'Experience with Python and SQL', 'Experience with building data pipelines from business applications like Salesforce, Marketo, NetSuite, Workday etc.', 'Knowledge of Databricks Platform', 'Knowledge of AI/ML/Data Science', 'Knowledge of BI Tools like Tableau, Looker etc']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Equity awards', benefits_and_perks=['Comprehensive health coverage including medical, dental, and vision', '401(k) Plan', 'Equity awards', 'Flexible time off', 'Paid parental leave', 'Family Planning', 'Gym reimbursement', 'Annual personal development fund', 'Employee Assistance Program (EAP)', 'Mental wellness resources'])\n",
      "company_overview=CompanyOverview(about=\"Walmart's environment comes with the biggest of big data sets.\", mission_and_values=\"Walmart's Everyday Low-Cost philosophy.\", size='Details about company size, such as number of employees', locations='Sunnyvale, California US-08479, BENTONVILLE, Arkansas US-09050') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Ventures', role_type='Full-time', remote='Hybrid work') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Data Transformation and Integration', 'Data Source Identification', 'Data Modeling', 'Code Development and Testing', 'Applied Business Acumen', 'Data Governance'], required_qualifications=['Well versed with Hadoop, Spark, Cloud, Python/PySpark and Java, Streaming, Kafka, Backend', 'Proven track record coding with at least one programming language', 'Experienced in one of cloud computing platforms', 'Skilled in data modeling & data migration protocols'], preferred_qualifications=['Knowledge of Databricks', 'Hands on knowledge in NoSQL like Cosmos DB along with RDBMS like MySQL, Postgres', 'Hands on working experience in any messaging platform like Kafka']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$117,000.00-$234,000.00', bonus_and_equity=None, benefits_and_perks=['Paid maternity and parental leave', 'PTO', 'Multiple health plans', '401(k) match', 'Stock purchase plan'])\n",
      "company_overview=CompanyOverview(about='Company not specified', mission_and_values='Not specified', size='Not specified', locations='Mountain View, CA') role_summary=RoleSummary(title='Data Engineer', team_or_department='Not specified', role_type='Contract', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['10+ years of overall experience in data management space', '5 years of working in large data sets in a data lake environment', 'Highly proficient in SQL', 'Solid understanding of Spark including performance tuning', 'Solid understanding of the AWS Platform', 'Experience in Python'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='DOE', bonus_and_equity='Not specified', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='Cepheid is passionate about improving health care through fast, accurate diagnostic testing.', mission_and_values=\"Our mission drives us, every moment of every day, as we develop scalable, groundbreaking solutions to solve the world's most complex health challenges.\", size='Part of the Danaher Diagnostics companies', locations='Sunnyvale, CA') role_summary=RoleSummary(title='Senior Staff Data Engineer', team_or_department='Biochip R&D Team', role_type='Full-time', remote='N/A') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design, implement, maintain and continually improve a central cloud platform for data science operations', 'Build scalable, high performance data pipelines (data layer, services, APIs, database integration and web applications) to enable new data analytics workflows', 'Execute technical troubleshooting and improve the maintainability, quality and stability of our codebase by performing code reviews, fixing issues and adding unit tests and other necessary enhancements to the existing backend systems and services', 'Evaluate emerging technologies and develop architectural changes to enhance the performance, scalability, and maintainability of existing data ecosystem', 'Collaborate cross-functionally with internal teams to suggest and implement new functionalities or identify areas for improvement'], required_qualifications=[\"Bachelor's degree in software engineering, computer science or a similar field with 10 years of related work experience, OR Master's degree with 8 years of related work experience\", 'Hands on experience with cloud (AWS and Azure), database technologies (SQL server, PostgreSQL, DynamoDB, Redshift, etc.), server technologies (event-pipeline, distributed computing and storage, etc.) and designing high performing microservice based solutions', 'Strong programming fundamentals using Python', 'Experience deploying commercial, production-grade software platforms, services and applications', 'Understanding of modern automated test frameworks and CI/CD tooling'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$170K-$210K', bonus_and_equity='Eligible for bonus/incentive pay', benefits_and_perks=['Paid time off', 'Medical/dental/vision insurance', '401(k)'])\n",
      "company_overview=CompanyOverview(about='Bonfy.AI is working behind the scenes on game-changing innovations to make AI trustworthy.', mission_and_values='Make AI trustworthy', size='Rapidly growing startup', locations='Mountain View, California') role_summary=RoleSummary(title='Data Engineer', team_or_department='Data Engineering', role_type='Full-time', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Design and construct robust data pipelines to collect, integrate, and prepare large volumes of data for machine learning', 'Work closely with ML engineers and data scientists to understand their data needs and implement systems that support model training, validation, and deployment', 'Develop and maintain an efficient data warehouse to store and manage data effectively, ensuring data quality and accessibility', 'Implement automated processes for data cleaning, validation, and preprocessing to improve data accuracy and usability for ML purposes', 'Optimize data retrieval and develop dashboards for visualizing data metrics and insights to aid in the ML model development process', 'Assist in the creation and maintenance of a scalable architecture for ML model testing and deployment', 'Troubleshoot and resolve issues in the data pipelines and advocate for improvements to enhance performance and reliability', 'Document all data engineering procedures and create reports for management detailing data usage, data integrity, and the impact of data quality on ML outcomes'], required_qualifications=[\"Bachelor's or Master's degree in Computer Science, Engineering, Information Technology, or a related field\", 'Minimum of 3 years of experience in a data engineering role', 'Strong programming skills in Python', 'Proficient in SQL and experience with relational databases, query authoring, as well as working familiarity with a variety of databases', 'Experience with data modeling, data warehousing, and building ETL pipelines', 'Familiarity with machine learning frameworks (e.g., TensorFlow, PyTorch) and understanding of data needs for ML model training and testing', 'Strong analytical skills and ability to work with large, complex data sets', 'Excellent problem-solving skills and attention to detail'], preferred_qualifications=[]) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=[\"Bachelor's degree in computer science, Engineering, or related field.\", 'Proven experience working with Amazon Aurora and/or PostgreSQL in a production environment.', 'Strong SQL skills and experience with SQL tuning techniques.', 'Proficiency in AWS services such as EC2, Route 53, VPC, IAM, and CloudFormation.', 'Hands-on experience with scripting languages (e.g., Python, Bash) for automation.', 'Familiarity with database security concepts and best practices.', 'Excellent problem-solving skills and attention to detail.', 'Strong communication and collaboration skills, with the ability to work effectively in a team environment.'], preferred_qualifications=['AWS Certification', 'Experience with other AWS database services such as RDS.', 'Knowledge of containerization technologies (e.g., Docker, Kubernetes).', 'Experience with DevOps practices and tools (e.g., CI/CD pipelines, Git).']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='', mission_and_values='', size='', locations='Mountain View, CA') role_summary=RoleSummary(title='', team_or_department='', role_type='', remote='Hybrid') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=[], required_qualifications=['10+ yrs of experience'], preferred_qualifications=['Python (Numpy, Pandas)', 'SQL', 'Hadoop, Hive, Pyspark', 'RDBMS', 'Tableau/Qliksense/Power BI - Any one of those', 'Advanced Excel Visuals (Pivot and Regression']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='', bonus_and_equity='', benefits_and_perks=[])\n",
      "company_overview=CompanyOverview(about='The company is a preferred vendor', mission_and_values='Not specified', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='AWS Data Engineer', team_or_department='Not specified', role_type='Contract', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Analyze, define, and document system requirements for data, workflow, logical processes, interfaces with other systems, auditing, reporting requirements, and production configuration', 'Design, develop, implement, deliver and manage ETL processes or data pipelines to enable data accessibility to application, reporting and analytics', 'Support the creation of the new cloud infrastructure and data ecosystem in the cloud'], required_qualifications=['5+ years of data engineer experience', 'Expertise in designing, developing, and maintaining database schema objects', 'Proficiency in cloud data technologies', 'Knowledge of Snowflake/AWS Redshift as a data warehousing solution', 'Advanced knowledge in designing, developing, implementing and managing data pipelines', 'Strong experience creating and maintaining functional and technical specifications documents', 'Strong experience creating test plans, test data sets, and automated testing', 'Strong SQL technical experience', 'Programming skill in Python or Scala'], preferred_qualifications=['5+ years of data engineer experience', 'Expertise in designing, developing, and maintaining database schema objects', 'Proficiency in cloud data technologies', 'Knowledge of Snowflake/AWS Redshift as a data warehousing solution', 'Advanced knowledge in designing, developing, implementing and managing data pipelines', 'Strong experience creating and maintaining functional and technical specifications documents', 'Strong experience creating test plans, test data sets, and automated testing', 'Strong SQL technical experience', 'Programming skill in Python or Scala', 'Bonus skills: Strong experience in NoSQL database, Strong experience in streaming technology, Strong experience in working in the healthcare industry including PHI, HIPAA regulations, and BAA processes']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='Not specified', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n",
      "company_overview=CompanyOverview(about='Applied Intuition is a company that provides data engineering services', mission_and_values='Encourage change and maintain a high standard of excellence', size='Not specified', locations='Not specified') role_summary=RoleSummary(title='Infrastructure Engineer - Data Pipelines', team_or_department='Data & ML infra group', role_type='Full-time', remote='Not specified') responsibilities_and_qualifications=ResponsibilitiesAndQualifications(responsibilities=['Develop and deploy event-driven pipelines using extract, load and transform (ELT) architecture focused on distributed ingestion', \"Build features to tune processing pipeline for fast data ingestion and indexing depending on customer's needs and workloads\", 'Enable product workflows that expose performant query interfaces and offer easy-to-use integration hooks', 'Develop and deploy high-quality software using modern tooling and frameworks', 'Encourage change, especially in support of data engineering best practices, and maintain a high standard of excellence', 'Work with products and teams across Applied Intuition'], required_qualifications=['Experience with large-scale open source data processing frameworks (Spark, Kafka, Airflow, Flink, Hudi, etc.)', 'Experience with containerization and other modern software development workflows', 'Knowledge of the open source landscape with judgment on when to choose open source versus build in-house', 'Strong knowledge of data concepts, including experience in using a big data warehouse'], preferred_qualifications=['Expertise with modern programming languages (Python, C++, GoLang, etc.)', 'Experience with enterprise software, including on-prem and/or cloud environments', 'Deep knowledge of data quality, data profiling and cleansing techniques']) compensation_and_benefits=CompensationAndBenefits(salary_or_pay_range='$65,000 USD to $400,000 USD annually', bonus_and_equity='Not specified', benefits_and_perks=['Not specified'])\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "Length of values (76) does not match length of index (3)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiltered_title_company\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdescription\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiltered_parse_desc\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtolist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[43m                      \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mabout\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmission_and_values\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mremote\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresponsibilities\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m                               \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrequired_qualifications\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpreferred_qualifications\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m                               \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msalary_or_pay_range\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbenefits_and_perks\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m                      \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfiltered_sample_df\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# Join the output DataFrame with filtered_sample_df based on the index\u001b[39;00m\n\u001b[1;32m      8\u001b[0m filtered_title_company_df \u001b[38;5;241m=\u001b[39m filtered_title_company\u001b[38;5;241m.\u001b[39mjoin(output)\n",
      "File \u001b[0;32m~/anaconda3/envs/datajobs/lib/python3.11/site-packages/pandas/core/frame.py:859\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m    850\u001b[0m         columns \u001b[38;5;241m=\u001b[39m ensure_index(columns)\n\u001b[1;32m    851\u001b[0m     arrays, columns, index \u001b[38;5;241m=\u001b[39m nested_data_to_arrays(\n\u001b[1;32m    852\u001b[0m         \u001b[38;5;66;03m# error: Argument 3 to \"nested_data_to_arrays\" has incompatible\u001b[39;00m\n\u001b[1;32m    853\u001b[0m         \u001b[38;5;66;03m# type \"Optional[Collection[Any]]\"; expected \"Optional[Index]\"\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    857\u001b[0m         dtype,\n\u001b[1;32m    858\u001b[0m     )\n\u001b[0;32m--> 859\u001b[0m     mgr \u001b[38;5;241m=\u001b[39m \u001b[43marrays_to_mgr\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    860\u001b[0m \u001b[43m        \u001b[49m\u001b[43marrays\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    861\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    862\u001b[0m \u001b[43m        \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    863\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    864\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtyp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmanager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    865\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    866\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    867\u001b[0m     mgr \u001b[38;5;241m=\u001b[39m ndarray_to_mgr(\n\u001b[1;32m    868\u001b[0m         data,\n\u001b[1;32m    869\u001b[0m         index,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    873\u001b[0m         typ\u001b[38;5;241m=\u001b[39mmanager,\n\u001b[1;32m    874\u001b[0m     )\n",
      "File \u001b[0;32m~/anaconda3/envs/datajobs/lib/python3.11/site-packages/pandas/core/internals/construction.py:119\u001b[0m, in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, columns, index, dtype, verify_integrity, typ, consolidate)\u001b[0m\n\u001b[1;32m    116\u001b[0m         index \u001b[38;5;241m=\u001b[39m ensure_index(index)\n\u001b[1;32m    118\u001b[0m     \u001b[38;5;66;03m# don't force copy because getting jammed in an ndarray anyway\u001b[39;00m\n\u001b[0;32m--> 119\u001b[0m     arrays, refs \u001b[38;5;241m=\u001b[39m \u001b[43m_homogenize\u001b[49m\u001b[43m(\u001b[49m\u001b[43marrays\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    120\u001b[0m     \u001b[38;5;66;03m# _homogenize ensures\u001b[39;00m\n\u001b[1;32m    121\u001b[0m     \u001b[38;5;66;03m#  - all(len(x) == len(index) for x in arrays)\u001b[39;00m\n\u001b[1;32m    122\u001b[0m     \u001b[38;5;66;03m#  - all(x.ndim == 1 for x in arrays)\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    125\u001b[0m \n\u001b[1;32m    126\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    127\u001b[0m     index \u001b[38;5;241m=\u001b[39m ensure_index(index)\n",
      "File \u001b[0;32m~/anaconda3/envs/datajobs/lib/python3.11/site-packages/pandas/core/internals/construction.py:630\u001b[0m, in \u001b[0;36m_homogenize\u001b[0;34m(data, index, dtype)\u001b[0m\n\u001b[1;32m    627\u001b[0m         val \u001b[38;5;241m=\u001b[39m lib\u001b[38;5;241m.\u001b[39mfast_multiget(val, oindex\u001b[38;5;241m.\u001b[39m_values, default\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mnan)\n\u001b[1;32m    629\u001b[0m     val \u001b[38;5;241m=\u001b[39m sanitize_array(val, index, dtype\u001b[38;5;241m=\u001b[39mdtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m--> 630\u001b[0m     \u001b[43mcom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequire_length_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    631\u001b[0m     refs\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m    633\u001b[0m homogenized\u001b[38;5;241m.\u001b[39mappend(val)\n",
      "File \u001b[0;32m~/anaconda3/envs/datajobs/lib/python3.11/site-packages/pandas/core/common.py:573\u001b[0m, in \u001b[0;36mrequire_length_match\u001b[0;34m(data, index)\u001b[0m\n\u001b[1;32m    569\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    570\u001b[0m \u001b[38;5;124;03mCheck the length of data matches the length of the index.\u001b[39;00m\n\u001b[1;32m    571\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    572\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(index):\n\u001b[0;32m--> 573\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    574\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLength of values \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    575\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    576\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdoes not match length of index \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    577\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(index)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    578\u001b[0m     )\n",
      "\u001b[0;31mValueError\u001b[0m: Length of values (76) does not match length of index (3)"
     ]
    }
   ],
   "source": [
    "output = pd.DataFrame(filtered_title_company['description'].apply(filtered_parse_desc).tolist(), \n",
    "                      \n",
    "                      columns=['about', 'mission_and_values', 'remote', 'responsibilities', \n",
    "                               'required_qualifications', 'preferred_qualifications', \n",
    "                               'salary_or_pay_range', 'benefits_and_perks'],\n",
    "                      index=filtered_sample_df.index)\n",
    "\n",
    "# Join the output DataFrame with filtered_sample_df based on the index\n",
    "filtered_title_company_df = filtered_title_company.join(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>company_name</th>\n",
       "      <th>location</th>\n",
       "      <th>description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>401</th>\n",
       "      <td>Senior Business Intelligence Analyst, Operatio...</td>\n",
       "      <td>Rivian</td>\n",
       "      <td>Palo Alto, CA</td>\n",
       "      <td>About Rivian:\\n\\nRivian is on a mission to kee...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>217</th>\n",
       "      <td>Generative AI Engineer</td>\n",
       "      <td>Knitit.ai</td>\n",
       "      <td>Palo Alto, CA</td>\n",
       "      <td>We are looking for a AI/ML Engineer to join a ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>Senior Data Analyst</td>\n",
       "      <td>DynPro Inc.</td>\n",
       "      <td>Mountain View, CA</td>\n",
       "      <td>Duration: 6 Months\\n\\nLocation: Bay Area, CA M...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title company_name  \\\n",
       "401  Senior Business Intelligence Analyst, Operatio...       Rivian   \n",
       "217                             Generative AI Engineer    Knitit.ai   \n",
       "235                                Senior Data Analyst  DynPro Inc.   \n",
       "\n",
       "              location                                        description  \n",
       "401      Palo Alto, CA  About Rivian:\\n\\nRivian is on a mission to kee...  \n",
       "217      Palo Alto, CA  We are looking for a AI/ML Engineer to join a ...  \n",
       "235  Mountain View, CA  Duration: 6 Months\\n\\nLocation: Bay Area, CA M...  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_sample_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Salary Range: $114,000 - $131,000\\n\\nThe pay range for this position is expected to be between $114,000 - $131,000/annually; however, the base pay offered may vary depending on multiple individualized factors, including market location, job-related knowledge, skills and experience. The total compensation package for this position also includes medical benefits, 401(k) eligibility, vacation, sick... time, and parental leave. Additional details of participation in these benefit plans will be provided if an employee receives an offer of employment.\\n\\nIf hired, employee will be in an “at-will position” and the Company reserves the right to modify base salary (as well as any other payment or compensation program) at any time, including reasons related to individual performance, Company or individual department/team performance, and market factors.\\n\\nScilex Holding Company’s pay range data is provided in accordance with local state pay transparency regulations. Scilex Holding Company may post different minimum wage ranges for permanent residency petitions pursuant to US Department of Labor requirements.\\n\\nScilex Holding Company is committed to equal employment opportunities, and a workplace that embraces diversity and inclusion for qualified individuals of all backgrounds.\\n\\nAbout Scilex\\n\\nSCILEX HOLDING COMPANY (Nasdaq: SCLX, “Scilex”), is an innovative revenue-generating company focused on acquiring, developing and commercializing non-opioid pain management products for the treatment of acute and chronic pain. We are uncompromising in our focus to become the global pain management leader committed to social, environmental, economic, and ethical principles to responsibly develop pharmaceutical products to maximize quality of life.\\n\\nThe company’s lead product ZTlido® (lidocaine topical system) 1.8%, is a marketed prescription lidocaine topical product approved by the U.S. Food and Drug Administration for the relief of pain associated with Post-Herpetic Neuralgia (PHN), which is a form of post-shingles nerve pain. ZTlido® possesses novel delivery and adhesion technology designed to address many of the limitations of current prescription lidocaine patches by providing significantly improved adhesion and continuous pain relief throughout the 12-hour administration period. Click here for ZTLido’s Important Safety Information and US Prescribing Information.\\n\\nWe have acquired two FDA approved non-opioid pain products, GLOPERBA and ELYXYBTM, for the treatment of gout in adults and oral solution for the acute treatment of migraine, with or without aura, in adults. We launched ELYXYBTM in the U.S. in April 2023 and are planning to commercialize GLOPERBA in 2024.\\n\\nOur three product candidates are (i) SP-102 (10 mg, dexamethasone sodium phosphate viscous gel) (“SEMDEXATM”), a Phase 3, novel, viscous gel formulation of a widely used corticosteroid for epidural injections to treat lumbosacral radicular pain, or sciatica and has received Fast Track Status from the FDA. Currently there is no FDA approved non-opioid epidural injection to treat lumbosacral radicular pain, or sciatica; (ii) SP-103 (lidocaine topical system) 5.4%, (“SP-103”), a Phase 2, next-generation, triple-strength formulation of ZTlido, for the treatment of low back pain (“LBP”) and has received Fast Track Status from the FDA; and (iii) SP-104 (4.5 mg, low-dose naltrexone hydrochloride delayed-release capsules) (“SP-104”), a novel low-dose delayed-release naltrexone hydrochloride being developed for the treatment of fibromyalgia. If these product candidates are approved by the FDA, we believe each of them could become the treatment option for their respective indications in the United States.\\n\\nSCILEX is committed to harnessing the power of revolutionary delivery technologies designed to safely and effectively provide therapies to those who need them the most. Scilex Pharmaceuticals and Semnur Pharmaceuticals are wholly-owned subsidiaries of Scilex Holding Company.\\n\\nIf you’re a high-energy, compassionate, and collaborative individual with a strong desire to make a difference in patient’s lives, we encourage you to consider joining the Scilex Holding team!\\n\\nRole and Responsibilities\\n\\nThe Senior Data Analyst acts as the SME for commercial data, reporting and analytics workstreams for Scilex portfolio, including in-market, late stage, and early-stage pipeline products. This position will report to Associate Director, Sales Ops & Commercial Analytics within the Commercial Department. Will be primarily responsible for collection, curation of commercial data, creation, and delivery of standard reports & dashboards for executive and other home office decision makers and targeting/secondary data analytics.\\n\\nConducts thorough data analysis and develops data flows aligned to strategic vision. Understands and considers the business implications of data and communicates with impacted groups, as necessary. Implements appropriate processes for defining data requirements with external and internal partners. Coordinates internally & externally to define, document, and execute the setup, quality, cadence, and method of delivery of data files. These efforts will provide a fact-based way to measure and monitor business performance. As such, the Sr. Data Analyst will help set the foundation to identify and address business-critical questions along with other insights and outputs delivered by the larger Commercial team.\\n\\nKey Responsibilities\\n• Collaborate with Commercial team to help synthesize insights in a streamlined manner.\\n• Architect complex data-centric and business intelligence solutions.\\n• Support advanced analytics efforts, such as HCP/account segmentation/targeting, subnational field effort analytics, marketing mix optimization, commercial ROI studies, and operational specialty pharmacy/specialty distributor channel analytics.\\n• Develop first-hand knowledge of datasets and function as data expert to navigate emerging business needs and evolving data landscape.\\n• Execute reporting suite for Commercial decision makers to enable “one view” of the market, ensuring single-source-of-truth reporting in collaboration with Commercial stakeholders. Master all Commercial data via governance and stewardship.\\n• Continuously improve quality of data content, context, and associated business rules to ensure front-end usability of commercial data.\\n• Consult with data custodians/IT to ensure safe custody, transport, and storage of commercial data and implementation of business rules.\\n• Manage complexities of fragmented data sets from Scilex Holding’s distribution partners in context of specialty distribution model for applicable products.\\n• Partner with IT and other functions to design and operate the customer master data management process, including business rule definitions and process flows to ensure data integrity and quality.\\n• Identify and track key performance indicators, such as product market share, duration of therapy, patient compliance, etc., using select claims and electronic health record data sources.\\n• Ensure that data flows and calculations are properly implemented on an ongoing basis.\\n• Ensure that the reporting suite is continuously relevant and meets emerging business needs by performing periodical audit.\\n• Monitor automated data collection, analytics, and cleansing infrastructure.\\n• Capture, develop, and document data definitions, business rules, and data quality requirements. Perform Quality Checks and implement controls for Sales and Marketing data.\\n• Lead KPI definitions effort in partnership with various functional and operational leaders. Document and manage KPI definitions.\\n\\nQualifications and Education Requirements\\n• Bachelor’s degree (business discipline preferred).\\n• 5-7 years’ experience as a Data Analyst or in a similar role working directly with clinical healthcare data (e.g., medical claims, pharmacy claims, population-health data, EMR etc.)\\n• Working knowledge of pharmaceutical industry data sources (e.g., IQVIA/ICON/Veeva Compass, BioMed Tracker, Analysource).\\n• Prior experience developing business requirements documents and managing data projects.\\n• Experience designing enterprise reporting solutions driven by a data lake, data warehouse or operational data store.\\n• Strong analytical/critical thinking skills to address situations with operational complexity.\\n• Coding skills and/or Analytical Tools experience (Alteryx/R/SAS/SQL/Tableau etc.) preferred.\\n• Experience with using retail and specialty pharmacy data preferred.\\n• Adept communication skills, including ability to summarize complex issues concisely and listen effectively to concerns and problems.\\n• Cross-functional collaboration skills. Ability to work with people at various levels of the organization, including but not limited to, Marketing, Sales, Sales Operations, Managed Markets, Access Teams, Finance, IT, and other functions within and outside of Commercial.\\n• Experience in implementation and/or management of CRM/SFA platforms, Sales Reports & Analytics, and business systems.\\n• Good understanding of data security concepts.\\n• Experience in coding ETL extraction tools.\\n• Experience working with AWS databases, especially Redshift and EMR.\\n• The role may require occasional travel (0-25%).\\n\\nPreferred Skills\\n• Experience with business intelligence tools / platforms is a plus.\\n• Experience using Power Query, Power Pivot, Alteryx a plus.\\n• Experience analyzing healthcare data a plus.\\n• Understanding of medical coding system (CPT, ICD-10, DRG, etc.) a plus.\\n• Experience managing data and/or analytics vendors preferred of business rules.\\n• Manage complexities of fragmented data sets from Scilex Holding’s distribution partners in context of specialty distribution model for applicable products.\\n• Partner with IT and other functions to design and operate the customer master data management process, including business rule definitions and process flows to ensure data integrity and quality.\\n• Identify and track key performance indicators, such as product market share, duration of therapy, patient compliance, etc., using select claims and electronic health record data sources.\\n• Ensure that data flows and calculations are properly implemented on an ongoing basis.\\n• Ensure that the reporting suite is continuously relevant and meets emerging business needs by performing periodical audit.\\n• Monitor automated data collection, analytics, and cleansing infrastructure.\\n• Capture, develop, and document data definitions, business rules, and data quality requirements. Perform Quality Checks and implement controls for Sales and Marketing data.\\n• Lead KPI definitions effort in partnership with various functional and operational leaders. Document and manage KPI definitions'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_sample_df.loc[246, 'description']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'filtered_title_company_df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfiltered_title_company_df\u001b[49m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'filtered_title_company_df' is not defined"
     ]
    }
   ],
   "source": [
    "filtered_title_company_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import asyncio\n",
    "# from typing import List\n",
    "# import pandas as pd\n",
    "\n",
    "# def extract_job_fields(job_description):\n",
    "#     return {\n",
    "#         'about': job_description.company_overview.about,\n",
    "#         'mission_and_values': job_description.company_overview.mission_and_values,\n",
    "#         'remote': job_description.role_summary.remote,\n",
    "#         'title': job_description.role_summary.title,\n",
    "#         'responsibilities': ', '.join(job_description.responsibilities_and_qualifications.responsibilities),\n",
    "#         'required_qualifications': ', '.join(job_description.responsibilities_and_qualifications.required_qualifications),\n",
    "#         'preferred_qualifications': ', '.join(job_description.responsibilities_and_qualifications.preferred_qualifications),\n",
    "#         'salary_or_pay_range': job_description.compensation_and_benefits.salary_or_pay_range,\n",
    "#         'benefits_and_perks': ', '.join(job_description.compensation_and_benefits.benefits_and_perks) if job_description.compensation_and_benefits.benefits_and_perks else None\n",
    "#     }\n",
    "\n",
    "# async def process_job_description(text: str) -> dict:\n",
    "#     description_response = await parse_description.extract_desc_fields(text)\n",
    "#     return extract_job_fields(description_response)\n",
    "\n",
    "# async def process_job_descriptions(texts: List[str], max_concurrent_calls: int = 2) -> List[dict]:\n",
    "#     semaphore = asyncio.Semaphore(max_concurrent_calls)\n",
    "    \n",
    "#     async def process_text(text: str) -> dict:\n",
    "#         async with semaphore:\n",
    "#             return await process_job_description(text)\n",
    "    \n",
    "#     tasks = [process_text(text) for text in texts]\n",
    "#     results = await asyncio.gather(*tasks)\n",
    "    \n",
    "#     return results\n",
    "\n",
    "# # Usage\n",
    "# async def main():\n",
    "#     texts = filtered_sample_df['description'].tolist()\n",
    "#     job_data = await process_job_descriptions(texts, max_concurrent_calls=2)\n",
    "#     df = pd.DataFrame(job_data)\n",
    "#     return df\n",
    "\n",
    "# # Run the main function and capture the result\n",
    "# result_future = asyncio.ensure_future(main())\n",
    "\n",
    "# # Wait for the main function to complete\n",
    "# await asyncio.wait([result_future])\n",
    "\n",
    "# # Get the result from the future\n",
    "# df = result_future.result()\n",
    "\n",
    "# # Display the DataFrame\n",
    "# print(df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Initialize the new columns in the DataFrame\n",
    "# new_columns = ['about', 'mission_and_values', 'remote', 'responsibilities', \n",
    "#                'required_qualifications', 'preferred_qualifications', \n",
    "#                'salary_or_pay_range', 'benefits']\n",
    "\n",
    "# for col in new_columns:\n",
    "#     filtered_sample_df[col] = None\n",
    "\n",
    "# # Iterate over the DataFrame rows one by one\n",
    "# for index, row in filtered_sample_df.iterrows():\n",
    "#     # Call the function and get the result\n",
    "#     result = filtered_parse_desc(row['description'])\n",
    "    \n",
    "#     # Assign the result to the corresponding columns\n",
    "#     for i, col in enumerate(new_columns):\n",
    "#         filtered_sample_df.at[index, col] = result[i]\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "datajobs",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}