Spaces:
Runtime error
Runtime error
Update model_name in train_llm.py
Browse files- train_llm.ipynb +0 -83
- train_llm.py +2 -2
train_llm.ipynb
DELETED
@@ -1,83 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 4,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import os\n",
|
10 |
-
"import jsonlines\n",
|
11 |
-
"from uuid import uuid4\n",
|
12 |
-
"import pandas as pd\n",
|
13 |
-
"\n",
|
14 |
-
"from datasets import load_dataset\n",
|
15 |
-
"import subprocess\n",
|
16 |
-
"\n",
|
17 |
-
"from dotenv import load_dotenv,find_dotenv\n",
|
18 |
-
"load_dotenv(find_dotenv(),override=True)\n"
|
19 |
-
]
|
20 |
-
},
|
21 |
-
{
|
22 |
-
"cell_type": "code",
|
23 |
-
"execution_count": 6,
|
24 |
-
"metadata": {},
|
25 |
-
"outputs": [
|
26 |
-
{
|
27 |
-
"ename": "ValueError",
|
28 |
-
"evalue": "Invalid pattern: '**' can only be an entire path component",
|
29 |
-
"output_type": "error",
|
30 |
-
"traceback": [
|
31 |
-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
32 |
-
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
33 |
-
"Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m dataset_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mai-aerospace/ams_data_train_generic_v0.1_100\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 2\u001b[0m dataset\u001b[38;5;241m=\u001b[39m\u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[43m)\u001b[49m\n",
|
34 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:2112\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 2107\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m 2108\u001b[0m (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m 2109\u001b[0m )\n\u001b[1;32m 2111\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 2112\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2113\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2114\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2115\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2116\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2117\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2118\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2119\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2120\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2121\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2122\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2123\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2124\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2125\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2127\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m 2128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n",
|
35 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1798\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, use_auth_token, storage_options, **config_kwargs)\u001b[0m\n\u001b[1;32m 1796\u001b[0m download_config \u001b[38;5;241m=\u001b[39m download_config\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m download_config \u001b[38;5;28;01melse\u001b[39;00m DownloadConfig()\n\u001b[1;32m 1797\u001b[0m download_config\u001b[38;5;241m.\u001b[39mstorage_options\u001b[38;5;241m.\u001b[39mupdate(storage_options)\n\u001b[0;32m-> 1798\u001b[0m dataset_module \u001b[38;5;241m=\u001b[39m \u001b[43mdataset_module_factory\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1799\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1800\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1802\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1803\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1805\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1806\u001b[0m \u001b[38;5;66;03m# Get dataset builder class from the processing script\u001b[39;00m\n\u001b[1;32m 1807\u001b[0m builder_kwargs \u001b[38;5;241m=\u001b[39m dataset_module\u001b[38;5;241m.\u001b[39mbuilder_kwargs\n",
|
36 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1495\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1490\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e1, \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m):\n\u001b[1;32m 1491\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1492\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1493\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on the Hugging Face Hub either: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(e1)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1494\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1495\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e1 \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1499\u001b[0m )\n",
|
37 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1479\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1464\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m HubDatasetModuleFactoryWithScript(\n\u001b[1;32m 1465\u001b[0m path,\n\u001b[1;32m 1466\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1469\u001b[0m dynamic_modules_path\u001b[38;5;241m=\u001b[39mdynamic_modules_path,\n\u001b[1;32m 1470\u001b[0m )\u001b[38;5;241m.\u001b[39mget_module()\n\u001b[1;32m 1471\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1472\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mHubDatasetModuleFactoryWithoutScript\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1473\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1474\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1475\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1476\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1477\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1478\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 1479\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1480\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\n\u001b[1;32m 1481\u001b[0m \u001b[38;5;167;01mException\u001b[39;00m\n\u001b[1;32m 1482\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m e1: \u001b[38;5;66;03m# noqa all the attempts failed, before raising the error we should check if the module is already cached.\u001b[39;00m\n\u001b[1;32m 1483\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
|
38 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/load.py:1034\u001b[0m, in \u001b[0;36mHubDatasetModuleFactoryWithoutScript.get_module\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1029\u001b[0m metadata_configs \u001b[38;5;241m=\u001b[39m MetadataConfigs\u001b[38;5;241m.\u001b[39mfrom_dataset_card_data(dataset_card_data)\n\u001b[1;32m 1030\u001b[0m dataset_infos \u001b[38;5;241m=\u001b[39m DatasetInfosDict\u001b[38;5;241m.\u001b[39mfrom_dataset_card_data(dataset_card_data)\n\u001b[1;32m 1031\u001b[0m patterns \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1032\u001b[0m sanitize_patterns(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files)\n\u001b[1;32m 1033\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1034\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[43mget_data_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1035\u001b[0m )\n\u001b[1;32m 1036\u001b[0m data_files \u001b[38;5;241m=\u001b[39m DataFilesDict\u001b[38;5;241m.\u001b[39mfrom_patterns(\n\u001b[1;32m 1037\u001b[0m patterns,\n\u001b[1;32m 1038\u001b[0m base_path\u001b[38;5;241m=\u001b[39mbase_path,\n\u001b[1;32m 1039\u001b[0m allowed_extensions\u001b[38;5;241m=\u001b[39mALL_ALLOWED_EXTENSIONS,\n\u001b[1;32m 1040\u001b[0m download_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config,\n\u001b[1;32m 1041\u001b[0m )\n\u001b[1;32m 1042\u001b[0m module_name, default_builder_kwargs \u001b[38;5;241m=\u001b[39m infer_module_for_data_files(\n\u001b[1;32m 1043\u001b[0m data_files\u001b[38;5;241m=\u001b[39mdata_files,\n\u001b[1;32m 1044\u001b[0m path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[1;32m 1045\u001b[0m download_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config,\n\u001b[1;32m 1046\u001b[0m )\n",
|
39 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:457\u001b[0m, in \u001b[0;36mget_data_patterns\u001b[0;34m(base_path, download_config)\u001b[0m\n\u001b[1;32m 455\u001b[0m resolver \u001b[38;5;241m=\u001b[39m partial(resolve_pattern, base_path\u001b[38;5;241m=\u001b[39mbase_path, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 457\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_data_files_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresolver\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EmptyDatasetError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe directory at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbase_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt contain any data files\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
40 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:248\u001b[0m, in \u001b[0;36m_get_data_files_patterns\u001b[0;34m(pattern_resolver)\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m pattern \u001b[38;5;129;01min\u001b[39;00m patterns:\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 248\u001b[0m data_files \u001b[38;5;241m=\u001b[39m \u001b[43mpattern_resolver\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
|
41 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/datasets/data_files.py:332\u001b[0m, in \u001b[0;36mresolve_pattern\u001b[0;34m(pattern, base_path, allowed_extensions, download_config)\u001b[0m\n\u001b[1;32m 330\u001b[0m base_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 331\u001b[0m pattern, storage_options \u001b[38;5;241m=\u001b[39m _prepare_path_and_storage_options(pattern, download_config\u001b[38;5;241m=\u001b[39mdownload_config)\n\u001b[0;32m--> 332\u001b[0m fs, _, _ \u001b[38;5;241m=\u001b[39m \u001b[43mget_fs_token_paths\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 333\u001b[0m fs_base_path \u001b[38;5;241m=\u001b[39m base_path\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m::\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m://\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;129;01mor\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mroot_marker\n\u001b[1;32m 334\u001b[0m fs_pattern \u001b[38;5;241m=\u001b[39m pattern\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m::\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m://\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
|
42 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/core.py:653\u001b[0m, in \u001b[0;36mget_fs_token_paths\u001b[0;34m(urlpath, mode, num, name_function, storage_options, protocol, expand)\u001b[0m\n\u001b[1;32m 651\u001b[0m paths \u001b[38;5;241m=\u001b[39m _expand_paths(paths, name_function, num)\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m paths:\n\u001b[0;32m--> 653\u001b[0m paths \u001b[38;5;241m=\u001b[39m [f \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mglob\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpaths\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fs\u001b[38;5;241m.\u001b[39misdir(f)]\n\u001b[1;32m 654\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 655\u001b[0m paths \u001b[38;5;241m=\u001b[39m [paths]\n",
|
43 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/spec.py:606\u001b[0m, in \u001b[0;36mAbstractFileSystem.glob\u001b[0;34m(self, path, maxdepth, **kwargs)\u001b[0m\n\u001b[1;32m 602\u001b[0m depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 604\u001b[0m allpaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind(root, maxdepth\u001b[38;5;241m=\u001b[39mdepth, withdirs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, detail\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 606\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[43mglob_translate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mends_with_sep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 607\u001b[0m pattern \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mcompile(pattern)\n\u001b[1;32m 609\u001b[0m out \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 610\u001b[0m p: info\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m p, info \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(allpaths\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 618\u001b[0m )\n\u001b[1;32m 619\u001b[0m }\n",
|
44 |
-
"File \u001b[0;32m~/Documents/HuggingFace/autotrain-playground/.venv/lib/python3.11/site-packages/fsspec/utils.py:734\u001b[0m, in \u001b[0;36mglob_translate\u001b[0;34m(pat)\u001b[0m\n\u001b[1;32m 732\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 733\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m part:\n\u001b[0;32m--> 734\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 735\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid pattern: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m can only be an entire path component\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 736\u001b[0m )\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m part:\n\u001b[1;32m 738\u001b[0m results\u001b[38;5;241m.\u001b[39mextend(_translate(part, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_sep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m, not_sep))\n",
|
45 |
-
"\u001b[0;31mValueError\u001b[0m: Invalid pattern: '**' can only be an entire path component"
|
46 |
-
]
|
47 |
-
}
|
48 |
-
],
|
49 |
-
"source": [
|
50 |
-
"dataset_name = 'ai-aerospace/ams_data_train_generic_v0.1_100'\n",
|
51 |
-
"dataset=load_dataset(dataset_name)"
|
52 |
-
]
|
53 |
-
},
|
54 |
-
{
|
55 |
-
"cell_type": "code",
|
56 |
-
"execution_count": null,
|
57 |
-
"metadata": {},
|
58 |
-
"outputs": [],
|
59 |
-
"source": []
|
60 |
-
}
|
61 |
-
],
|
62 |
-
"metadata": {
|
63 |
-
"kernelspec": {
|
64 |
-
"display_name": ".venv",
|
65 |
-
"language": "python",
|
66 |
-
"name": "python3"
|
67 |
-
},
|
68 |
-
"language_info": {
|
69 |
-
"codemirror_mode": {
|
70 |
-
"name": "ipython",
|
71 |
-
"version": 3
|
72 |
-
},
|
73 |
-
"file_extension": ".py",
|
74 |
-
"mimetype": "text/x-python",
|
75 |
-
"name": "python",
|
76 |
-
"nbconvert_exporter": "python",
|
77 |
-
"pygments_lexer": "ipython3",
|
78 |
-
"version": "3.11.1"
|
79 |
-
}
|
80 |
-
},
|
81 |
-
"nbformat": 4,
|
82 |
-
"nbformat_minor": 2
|
83 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_llm.py
CHANGED
@@ -33,8 +33,8 @@ username='ai-aerospace'
|
|
33 |
project_name='./llms/'+'ams_data_train-100_'+str(uuid4())
|
34 |
repo_name='ams_data_train-100_'+str(uuid4())
|
35 |
|
36 |
-
model_name='TinyLlama/TinyLlama-1.1B-Chat-v0.1'
|
37 |
-
|
38 |
|
39 |
# Save parameters to environment variables
|
40 |
os.environ["project_name"] = project_name
|
|
|
33 |
project_name='./llms/'+'ams_data_train-100_'+str(uuid4())
|
34 |
repo_name='ams_data_train-100_'+str(uuid4())
|
35 |
|
36 |
+
# model_name='TinyLlama/TinyLlama-1.1B-Chat-v0.1'
|
37 |
+
model_name='mistralai/Mistral-7B-v0.1'
|
38 |
|
39 |
# Save parameters to environment variables
|
40 |
os.environ["project_name"] = project_name
|