dsmueller commited on
Commit
5d09bfd
1 Parent(s): 609bf51

Update model_name in train_llm.py

Browse files
Files changed (2) hide show
  1. train_llm.ipynb +0 -83
  2. train_llm.py +2 -2
train_llm.ipynb DELETED
@@ -1,83 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 4,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import os\n",
10
- "import jsonlines\n",
11
- "from uuid import uuid4\n",
12
- "import pandas as pd\n",
13
- "\n",
14
- "from datasets import load_dataset\n",
15
- "import subprocess\n",
16
- "\n",
17
- "from dotenv import load_dotenv,find_dotenv\n",
18
- "load_dotenv(find_dotenv(),override=True)\n"
19
- ]
20
- },
21
- {
22
- "cell_type": "code",
23
- "execution_count": 6,
24
- "metadata": {},
25
- "outputs": [
26
- {
27
- "ename": "ValueError",
28
- "evalue": "Invalid pattern: '**' can only be an entire path component",
29
- "output_type": "error",
30
- "traceback": [
31
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
32
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
33
- "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m dataset_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mai-aerospace/ams_data_train_generic_v0.1_100\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 2\u001b[0m dataset\u001b[38;5;241m=\u001b[39m\u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[43m)\u001b[49m\n",
34
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:2112\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 2107\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m 2108\u001b[0m (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m 2109\u001b[0m )\n\u001b[1;32m 2111\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 2112\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2113\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2114\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2115\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2116\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2117\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2118\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2119\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2120\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2121\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2122\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2123\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2124\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2125\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2127\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m 2128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n",
35
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1798\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 1796\u001b[0m download_config \u001b[38;5;241m=\u001b[39m download_config\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m download_config \u001b[38;5;28;01melse\u001b[39;00m DownloadConfig()\n\u001b[1;32m 1797\u001b[0m download_config\u001b[38;5;241m.\u001b[39mstorage_options\u001b[38;5;241m.\u001b[39mupdate(storage_options)\n\u001b[0;32m-> 1798\u001b[0m dataset_module \u001b[38;5;241m=\u001b[39m \u001b[43mdataset_module_factory\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1799\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1800\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1802\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1803\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1805\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1806\u001b[0m \u001b[38;5;66;03m# Get dataset builder class from the processing script\u001b[39;00m\n\u001b[1;32m 1807\u001b[0m builder_kwargs \u001b[38;5;241m=\u001b[39m dataset_module\u001b[38;5;241m.\u001b[39mbuilder_kwargs\n",
36
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1495\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1490\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e1, \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m):\n\u001b[1;32m 1491\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1492\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1493\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on the Hugging Face Hub either: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(e1)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1494\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1495\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e1 \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1499\u001b[0m )\n",
37
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1479\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1464\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m HubDatasetModuleFactoryWithScript(\n\u001b[1;32m 1465\u001b[0m path,\n\u001b[1;32m 1466\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1469\u001b[0m dynamic_modules_path\u001b[38;5;241m=\u001b[39mdynamic_modules_path,\n\u001b[1;32m 1470\u001b[0m )\u001b[38;5;241m.\u001b[39mget_module()\n\u001b[1;32m 1471\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1472\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mHubDatasetModuleFactoryWithoutScript\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1473\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1474\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1475\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1476\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\n\u001b[1;32m 1481\u001b[0m \u001b[38;5;167;01mException\u001b[39;00m\n\u001b[1;32m 1482\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m e1: \u001b[38;5;66;03m# noqa all the attempts failed, before raising the error we should check if the module is already cached.\u001b[39;00m\n\u001b[1;32m 1483\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
38
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1034\u001b[0m, in \u001b[0;36mHubDatasetModuleFactoryWithoutScript.get_module\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1029\u001b[0m metadata_configs \u001b[38;5;241m=\u001b[39m MetadataConfigs\u001b[38;5;241m.\u001b[39mfrom_dataset_card_data(dataset_card_data)\n\u001b[1;32m 1030\u001b[0m dataset_infos \u001b[38;5;241m=\u001b[39m DatasetInfosDict\u001b[38;5;241m.\u001b[39mfrom_dataset_card_data(dataset_card_data)\n\u001b[1;32m 1031\u001b[0m patterns \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1032\u001b[0m sanitize_patterns(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files)\n\u001b[1;32m 1033\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[43mget_data_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1035\u001b[0m )\n\u001b[1;32m 1036\u001b[0m data_files \u001b[38;5;241m=\u001b[39m DataFilesDict\u001b[38;5;241m.\u001b[39mfrom_patterns(\n\u001b[1;32m 1037\u001b[0m patterns,\n\u001b[1;32m 1038\u001b[0m base_path\u001b[38;5;241m=\u001b[39mbase_path,\n\u001b[1;32m 1039\u001b[0m allowed_extensions\u001b[38;5;241m=\u001b[39mALL_ALLOWED_EXTENSIONS,\n\u001b[1;32m 1040\u001b[0m download_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config,\n\u001b[1;32m 1041\u001b[0m )\n\u001b[1;32m 1042\u001b[0m module_name, default_builder_kwargs \u001b[38;5;241m=\u001b[39m infer_module_for_data_files(\n\u001b[1;32m 1043\u001b[0m data_files\u001b[38;5;241m=\u001b[39mdata_files,\n\u001b[1;32m 1044\u001b[0m path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[1;32m 1045\u001b[0m download_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config,\n\u001b[1;32m 1046\u001b[0m )\n",
39
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:457\u001b[0m, in \u001b[0;36mget_data_patterns\u001b[0;34m(base_path, download_config)\u001b[0m\n\u001b[1;32m 455\u001b[0m resolver \u001b[38;5;241m=\u001b[39m partial(resolve_pattern, base_path\u001b[38;5;241m=\u001b[39mbase_path, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 457\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_data_files_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresolver\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EmptyDatasetError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe directory at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbase_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt contain any data files\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
40
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:248\u001b[0m, in \u001b[0;36m_get_data_files_patterns\u001b[0;34m(pattern_resolver)\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m pattern \u001b[38;5;129;01min\u001b[39;00m patterns:\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 248\u001b[0m data_files \u001b[38;5;241m=\u001b[39m \u001b[43mpattern_resolver\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
41
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:332\u001b[0m, in \u001b[0;36mresolve_pattern\u001b[0;34m(pattern, base_path, allowed_extensions, download_config)\u001b[0m\n\u001b[1;32m 330\u001b[0m base_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 331\u001b[0m pattern, storage_options \u001b[38;5;241m=\u001b[39m _prepare_path_and_storage_options(pattern, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[0;32m--> 332\u001b[0m fs, _, _ \u001b[38;5;241m=\u001b[39m \u001b[43mget_fs_token_paths\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 333\u001b[0m fs_base_path \u001b[38;5;241m=\u001b[39m base_path\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m::\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m://\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;129;01mor\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mroot_marker\n\u001b[1;32m 334\u001b[0m fs_pattern \u001b[38;5;241m=\u001b[39m pattern\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m::\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m://\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
42
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/core.py:653\u001b[0m, in \u001b[0;36mget_fs_token_paths\u001b[0;34m(urlpath, mode, num, name_function, storage_options, protocol, expand)\u001b[0m\n\u001b[1;32m 651\u001b[0m paths \u001b[38;5;241m=\u001b[39m _expand_paths(paths, name_function, num)\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m paths:\n\u001b[0;32m--> 653\u001b[0m paths \u001b[38;5;241m=\u001b[39m [f \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mglob\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpaths\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fs\u001b[38;5;241m.\u001b[39misdir(f)]\n\u001b[1;32m 654\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 655\u001b[0m paths \u001b[38;5;241m=\u001b[39m [paths]\n",
43
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/spec.py:606\u001b[0m, in \u001b[0;36mAbstractFileSystem.glob\u001b[0;34m(self, path, maxdepth, **kwargs)\u001b[0m\n\u001b[1;32m 602\u001b[0m depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 604\u001b[0m allpaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind(root, maxdepth\u001b[38;5;241m=\u001b[39mdepth, withdirs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, detail\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 606\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[43mglob_translate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mends_with_sep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 607\u001b[0m pattern \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mcompile(pattern)\n\u001b[1;32m 609\u001b[0m out \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 610\u001b[0m p: info\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m p, info \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(allpaths\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 618\u001b[0m )\n\u001b[1;32m 619\u001b[0m }\n",
44
- "File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/utils.py:734\u001b[0m, in \u001b[0;36mglob_translate\u001b[0;34m(pat)\u001b[0m\n\u001b[1;32m 732\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 733\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m part:\n\u001b[0;32m--> 734\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 735\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid pattern: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m can only be an entire path component\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 736\u001b[0m )\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m part:\n\u001b[1;32m 738\u001b[0m results\u001b[38;5;241m.\u001b[39mextend(_translate(part, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_sep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m, not_sep))\n",
45
- "\u001b[0;31mValueError\u001b[0m: Invalid pattern: '**' can only be an entire path component"
46
- ]
47
- }
48
- ],
49
- "source": [
50
- "dataset_name = 'ai-aerospace/ams_data_train_generic_v0.1_100'\n",
51
- "dataset=load_dataset(dataset_name)"
52
- ]
53
- },
54
- {
55
- "cell_type": "code",
56
- "execution_count": null,
57
- "metadata": {},
58
- "outputs": [],
59
- "source": []
60
- }
61
- ],
62
- "metadata": {
63
- "kernelspec": {
64
- "display_name": ".venv",
65
- "language": "python",
66
- "name": "python3"
67
- },
68
- "language_info": {
69
- "codemirror_mode": {
70
- "name": "ipython",
71
- "version": 3
72
- },
73
- "file_extension": ".py",
74
- "mimetype": "text/x-python",
75
- "name": "python",
76
- "nbconvert_exporter": "python",
77
- "pygments_lexer": "ipython3",
78
- "version": "3.11.1"
79
- }
80
- },
81
- "nbformat": 4,
82
- "nbformat_minor": 2
83
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
train_llm.py CHANGED
@@ -33,8 +33,8 @@ username='ai-aerospace'
33
  project_name='./llms/'+'ams_data_train-100_'+str(uuid4())
34
  repo_name='ams_data_train-100_'+str(uuid4())
35
 
36
- model_name='TinyLlama/TinyLlama-1.1B-Chat-v0.1'
37
- # model_name='mistralai/Mistral-7B-v0.1'
38
 
39
  # Save parameters to environment variables
40
  os.environ["project_name"] = project_name
 
33
  project_name='./llms/'+'ams_data_train-100_'+str(uuid4())
34
  repo_name='ams_data_train-100_'+str(uuid4())
35
 
36
+ # model_name='TinyLlama/TinyLlama-1.1B-Chat-v0.1'
37
+ model_name='mistralai/Mistral-7B-v0.1'
38
 
39
  # Save parameters to environment variables
40
  os.environ["project_name"] = project_name