{ "cells": [ { "cell_type": "markdown", "id": "aacb45c2-eecc-4ab0-983f-0f459d3eb59f", "metadata": {}, "source": [ "# IDEFICS_ROCOv2 (checkpoint test)\n", "\n", "This notebook fine-tunes [Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) model. The source model is fine-tuned on the [Radiology Objects in Context (ROCO)](https://huggingface.co/datasets/eltorio/ROCOv2-radiology) dataset, a large-scale medical and multimodal imaging collection. \n", "\n", "The fine-tuning process stores the model checkpoints on a regular basis. Re run the notebook from the last checkpoint to continue the fine-tuning process." ] }, { "cell_type": "code", "execution_count": 1, "id": "d0e6780c-00e5-4617-a4e8-b76e08233dac", "metadata": { "executionInfo": { "elapsed": 1459, "status": "ok", "timestamp": 1730997027344, "user": { "displayName": "Ronan Le Meillat", "userId": "09161391957806824350" }, "user_tz": -60 }, "id": "8F3w0kcbAMtC" }, "outputs": [], "source": [ "dataset_id = \"eltorio/ROCOv2-radiology\"\n", "prompt= \"You are an expert radiologist certified with over 15 years of experience in diagnostic imaging, describe this image\"\n", "source_model_id = \"HuggingFaceM4/Idefics3-8B-Llama3\"\n", "destination_model_id = \"eltorio/IDEFICS3_ROCOv2\"\n", "output_dir = \"IDEFICS3_ROCOv2\"" ] }, { "cell_type": "markdown", "id": "020afb19-c0ee-406b-a0ee-ba0e64aeaddd", "metadata": {}, "source": [ "### Log into Hugging Face" ] }, { "cell_type": "code", "execution_count": 2, "id": "cfe7c2dc-fb94-43f1-a6c8-486282886727", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hugging Face token found in environment variable\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n" ] } ], "source": [ "from huggingface_hub import login\n", "import os\n", "\n", "if os.environ.get('HF_TOKEN') is not None:\n", " HF_TOKEN = os.environ.get('HF_TOKEN')\n", " print(f\"Hugging Face token found in environment variable\")\n", "try:\n", " import google.colab\n", " from google.colab import userdata\n", " if (userdata.get('HF_TOKEN') is not None) and (HF_TOKEN == \"\"):\n", " HF_TOKEN = userdata.get('HF_TOKEN')\n", " else:\n", " raise ValueError(\"Please set your Hugging Face token in the user data panel, or pass it as an environment variable\")\n", "except ModuleNotFoundError:\n", " if HF_TOKEN is None:\n", " raise ValueError(\"Please set your Hugging Face token in the user data panel, or pass it as an environment variable\")\n", "\n", "login(\n", " token=HF_TOKEN,\n", " add_to_git_credential=True\n", ")" ] }, { "cell_type": "markdown", "id": "5826da8d-e57c-434a-b856-3d22d10dd2fb", "metadata": {}, "source": [ "### Load the dataset" ] }, { "cell_type": "code", "execution_count": 3, "id": "a74d47eb-f798-47cc-8d98-9fd589ee60b0", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "9f1fb7edb868495b831e12c535c4ce77", "92e501753b0c49388ce317d0cbad704c", "5c0db4ce097c4a7d9235c48b71e83a76", "6b491ec2f7604976b347a5feb66a2a2d", "98e545ecb77b461290d24e5ccd3cc1d5", "18908dfa00534f9b99fd5973c070bbdc", "ee551beb730743cc8fcf98c8d467de22", "38b31d54ad634e19b41544ba6fa182d6", "e6600d6b8cfe429480cbdb2b993c1aa6", "69d0bffcb7eb4a0d95c743a99dd06725", "a784b176de684e0a8be7995f16212a3a", "1bd6753e9e69481992048d1eafe5849e", "3e3859d3e3c240119a0eb791e611494d", "af2693b9442e4c4896396db4d8336ea5", "031dd614106946d79cd0dfe67cefa023", "5f2589177ecd47908828ac2b19a1d0df", "58304c739def48518cd0a2e51e6c8b9f", "8853d39b97ab429ba5863c0a22197886", "68da45f8ef1f40e89f2ecb7ae7a2617a", "ae11688c634c47d89182e70801f145c3", "0ca8ec79f2dc43d0897b56a3b78ee98d", "e7544f37527c4bf4b6baea5e92d03ec6", "bfce5ea5eec24d8e97a6188164ed31f7", "c6c6989b2b8849d29b516660c375d375", "505b1265c9ea426f8bde4dabba85cc0a", "e5ed0c1c6a2f4202b29a2896e8e31236", "eeb11f35ceb547d9aa71fb08e6a03715", "90f9e80bb8bd4c39b658420517ce6df5", "45d8f2f4cd2142df90b7185ee2ed75c2", "2c32cc37b949475f9231d20b3ae96680", "87367e46663c4a7fbfeceb5c4ecaa0b5", "d2feb51c2e8347f5912b8b1f7d6252e4", "0e475163ce174be1b3d41edc3330baeb", "0fa957f7e02d4494b090ee9c446d7e04", "ab01d3aeb23248b0bfa3b0576e43ea70", "0f23267d51f84de9b621a84cf6799bf0", "52c45aa3e8e5458db2fdd04955b16864", "34b464cada7144bcb3753b23e1d7765d", "4388b903acbe4fd189297ce4b2fd0ff5", "805a520992af4639848412359eabfff5", "8ea57e014d3d40fab57c058b1328d004", "944df001d5d7483a92e6bc683c09701a", "8bc04b1675d14cc6baa8db4a0fe8180c", "d02f2bab1f2f44d1a383a100f46b49b9", "058bdcd5a23240ddb7a4efa8e8334fc5", "a859d2cec2d14756a93c2a90d6e7271c", "38158bc387464c18a79943cbf2ff8d82", "8dd54003710c4e38915d6b99faa2627f", "4e8e310530ee4a0f86ea3da6541bfea4", "42499379fbf3405daa80ad24127934fc", "edb31fdbbd60426ea1b95aa8aadedcba", "7dbaa240dd8e4f8fa8dd010be1da1e57", "1331a9fe5736466e8367f7bf9d4273d2", "be86432fa7b444759028c5ad2afbb7d1", "e9d0cf692c5044faa9b60ac0858d3bb4", "293f5a2a539443aa99764a291405d896", "2cb70998cb6749aba86f4f797366ee4c", "fde431fce3714221a6fb4f25ec00fc1d", "c9d337da383a4cfaae8dbf1a7bda834f", "cfa6776359d94782ad0abcb0eaa544d5", "2dfc1bb79179445d863c17d84acfb4c9", "8c34328ed5d24f22837f73f4accb84a4", "58a8969990ee4da39acfccbe6f4bab81", "052493c48f8c4dddadc76e845b0eef22", "04d3a35c248d46c4b40efe2d8a8adb27", "4ec1ab6f09cc40638ad3028494e24a90", "af495be62c0f4af89c2126c40bc29573", "318d5d1dd67d4e23873a3482cb198a14", "a6f0b3010b414beca703085170734b3a", "dabaf608430448e594c978622e6329ea", "534b998a45cb4b44975dd40a5e452013", "03702c12315f4be79e32f61bae0c577a", "192e6a14527349ea98ece35e3064bcfc", "052904cc91134bcd9d608dd8e2e725d4", "a20a026b5d374c5488100b2b8e322985", "94db136a74734d4386d34d6ac1a99268", "5df7c75e57dc469cab54c4f88f689b12", "ee8b76aad5c9435b9cae1300bf06bc11", "ede1d8b2f1fd4517b297b22b05bc7007", "32e72b9ecd4f4c478ac0f5597fd80830", "fcf96cc71d1341918263e61d8386c998", "8e0d94ea75f6457c9e77f39cdfdad903", "793dbb65d5cf4e1daf5879dd37c20dd7", "7cbdad6324024877a6c319722c055708", "6697cfcb147742cf9303e55fce484da1", "38d7358cf4ed4d588c28bb06d2e44030", "db4bdf52d8d54343a41da4cecaae8317", "235f704533f14d8eb9cde8ee9d7ce347", "49e1bbe36f8d46408b657315fb1378cb", "7364d57f39c94338ac84e92f1c246e82", "3dd1fcae8ee24ac7b54810d93be13b79", "b1f982c6e36a4b0ca88f52fe298bff77", "9125f8f6022c449f933a0f14d33fda0a", "9c5ecd24df5d44309cd3431b15443afe", "300001e07fd6428ea8e0437121d7dee0", "d948e3be81d945648b23c4735cc2a39d", "9558d08bb2ce45fba074623960b5e2cd", "558d79f87f584c05bd00f678f3754e8a", "714d7b0467b94f9f8c0fac2116636d89", "1c05996cd3da4d62801b1cca484fc679", "1a6e7db9705a40c592f124dc3e1bbcb5", "e939831f76e948c4aabd351d9f4f5983", "8ba416ec3c2c41fcb979e23dfd3d2197", "95c9c17fc5a449cf934ee234be3ad456", "e614d797ef214a18bbc21ac5fd5d19d1", "119d54ebbf6543cc9eb46f1c5eac455d", "7b9ed1105d7c411e8776ef87579b1885", "5e2af1d91bb84b1ea6d9d9e78810aedd", "c7350d8bc53e409a9757c07a3338a091", "cdd0fd1afbf5473ea5c700a696ec2714", "2711e1ecd6ba4655a65aed610b2c9bb2", "1a6d5bb2bf36435cb3177426776100f1", "b20003b963b341fea843d78ab5fb9536", "3845e901b2254703802ffa7941100d72", "d006625e56394770bfa7251e758e30cf", "ced9665945454cfd9e39538db2b27d5a", "f2149ee0b7454651a3d4848520b9bf14", "e8e9fb9c23ab40d3bd830b2098f9dc3a", "141cf7d500a74e28894a43f6d4e1460e", "4586b647ccbf4aaeaa76cc18d6e665cb", "7aeccd8d652e454e921c42ecb967e0f5", "b49f56ae212e490987b3b36a77e5aa3b", "b829d0a81fa44ea7974e5289e70c8a2d", "57dda77517494ddaac9628c3f0afcfa2", "6b453755737643398e2e4706a7ed7e4f", "c5adb0e0fe324a9cb27ef1db3becc403", "0c8b3c409bee43849ceefb2536478638", "57f2c0896222444bb083b1cc2cfee55e", "def963efaa98452eb42a71b180e003ef", "ba000563c0b040c782ed66260bd3720e", "cfaa170556f244918584103683cbcc9f", "df20e5eb9c5143a388f7fb87a66ae0b1", "9743ce9b636748dca9d53b8edf7bba4b", "30ce39cf5ec84243b079f56a7f48c346", "3cc9fddbfff6402aafa8b1f95309e868", "e4127c3b1e914646a0c1c284f5dda155", "1f27b696c12a413886594c6a3321132e", "cf6e25dedb3342d2aa78fe790886294f", "9592793d73be44ee9b94e4666b66eee8", "49858f181c35493fad5a78b5714553ca", "4ecc9d6bb9fe42d1822dd3c51ccd796c", "d395d8bea8ec45f586687222550ffbb9", "fa4182c2d5b0454899a06f01cb5a50ed", "a37760b13c624d9494844be3304895ec", "606988656da247d0a397f51485b2e74c", "ea825d67cd2a4753af417c345fde8c27", "ce8bf8faf7904743962166085d6e8cac", "f5886ee593cf4eaeba77704f0f945da0", "cbe25015ecef494a81efba20c8824f85", "5f663c5a363741ddb98385fe6d993382", "5dd10c0678844bd88c7d61628b66d884", "b12122a3d354486587e2d6677c3afb93", "099bf7f4825a432f8536392b9c9a92f9", "f2db7c87f141450faedc81abf9c1e2b9", "69a712d0f248444d8415e3187a08ea66", "e84dc792c822437fa285c88a6bd5f593", "4498e66146c24da9972f1be3effdae35", "ea10a2d4f6714286a611dc319232be74", "b1be925dd9d042c487b4ed2edc5575b0", "e7736b2982ee412782de039b83d6043a", "2cd745b3c6314e5eb3910db8bb060dcd", "fc3f4ed6aa504b60a478d91f0e74c367", "6c6dd664b1ba49bcab55033b4728a1ac", "8852ca4c5ce4483fb3a68e4162fdd7c2", "eda2491baaaa40a0ba478f18d919f0d2", "6c2d3b5ddc464c84976ab22fd427bf64", "976f3fee3e4d4bc49359e65525100cb7", "234b3b4fc84b45bd825bf31edfcaa609", "760917fbf98c4c64bf3ed91771f90c5b", "31e7f2a17f704c0587f65ab935a6474c", "b8e7ee5655274cf8a95c3546f8d35a66", "f30a823e93424a9f88d60a773aa48127", "8dae6c92a4874b369e709cda626b9e16", "f50b6e2f01304cecb4cc3bae4b973689", "b1f5135ec2ed4fb39e6670fee13ae2ac", "ba6f67f2fb47472eb9306d382450a639", "89ddc068529c40acb5f15e1288acbf08", "159ff3152c48436099bc3935b68bc5ff", "4d2cc00cbda049b493e70a34de52f31d", "17fb66b43c8a4b6894e13850230dad50", "f2c8bf3d270b47fc98a2a73176b38e0e", "44234d57983943d5a05d594aea4e2920", "75d0a848fd584e3f900930f1f0284b09", "9757000fbeee463287c139ff92ec1739", "674e2c8442244f47942d8a46455379f3", "5d9da2ae9a444bffa51ac0eeb2466c2a", "f2eca44d0ca54d47bd583c708145f2d7", "602931dff9364a3bb8879726fe9ba637", "9d4319a9e4534cdaab0473fdb58fd040", "b10f679d7f674190a71ee70feb568f2e", "4d040b9b8369407b80d9acee933ead6a", "c7993393d2e24ef693f66ee024f1e149", "da133d2847b94ae89220c6f18ead0020", "04f2a883e2c349ac90e7242a80b27066", "41f12316e4b54358bf98ddf919c897a7", "54509e9406474869a2f459b3abc5aa22", "0e605fc1126849a5a83f5833de8ff3ab", "849bfba20fde4288ba38685cebdfb35c", "048a2a63e0374bcf9208e195c421957a", "b63bd174450a4b8097d2dd83306533c8", "060aa178f0e44659942667115f2a0e43", "13bde01a38044aa082a5f236facbeb5f", "77a9761fbb764ff4b3d543afff4f26d2", "a46a8c9089754927923d84ddd9ab167e", "1b7190e5a2984695852e8cfa2aa5894a", "fa97b3ea652d48a4b9942abb23f46259", "dbeef4558d2147b289fa108ab47a2dcc", "a36f2eb9970340ec96c37f2dec04d3bc", "cc613172d44a42f1bb24f94660710cc3", "94dee2553d274b119d0c8ef589c7f40d", "04bd7ac45116475fbfcf63210e27bbc1", "41884cd70c104ac38ca29e566b9e3d97", "25f1f07563a040749690b9447181267e", "a8da49836fcb43b990ac5f38559e6221", "91e5719eb4744510a58cd1b2236afd69", "709e43eb493e4808b9fb0bdf8d8c70c3", "6264cbd8bcdc4cf4b98871ec6d62d7c0", "ebed8d6a297a41c19bf8adb998428218", "7ebc2f1ce5b84d0b91d0697849330a71", "aeca0598f8e64f8bb16b5dec78f6e2cd", "8a393051dda141f2a107f453cb3a21bd", "b67a33887f2c457c805df04eaea25f12", "501f10738ea742c59a423c00131631e2", "88cb916758314187b0d880df7740ff51", "957d482f0b164233a6c0884d6804e99c", "cee3a6b3b1774e6e8426afe66d738678", "2ec30c90fde44cfe83c8a4fec8f4f0c8", "c831e1725b234647afd35bcdf5b60a48", "7ce0aed330c948bcae8e8c14a5c84b1f", "f8d99fe78fb2479d851bb3be2521539e", "e8e91eae7b374756a873c1a011d5fecb", "37052784502e46d6b50702eeb7786c5d", "33f434725edc4305ab70bc12976e1ee1", "7517ae4b420645e5928928cced44b1ba", "8e9b3df7fb7044c5b3d53b2d05fa1237", "e932dd7eaafe45288452c61ec6e69e10", "ae05630c20a145ff934264a196bb53c5", "cf862ffe6dd8447495d2eb24fc32c54e", "8e2470267fbb4328a18071a6b1a88bdb", "570f0f87519f43f08e28286e0f7cc9aa", "516e1569f9c644b5b7f9680ad1836a0e", "224f44f0c138439599b64b0a22e18561", "2f030628be5944e59bbbc71845b459e5", "6e3835d20e014fa0a156d37c7cdca45b", "6562b0c2d2324f589b91f5d73b1572ea", "5399443d85a941ca8c1d8de3c82d474e", "caa506dfc4b840a9b12dbad5c369d2a9", "98a3259b6668496191c42b4d938989b1", "ce6b97944af3468da286531314b1be55", "f06d40ec0b78474e8e00b8c3337b7c37", "a25f7ac10ff6440ca254c9d649835282", "1b213b4516b2474a81dadc8b62891b40", "4e5cea218561410ab566ee3f8250c7a7", "a0ca9d5fe07841c7a763dff2c09e3723", "c96b087decc04e7f9570524baeee690f", "634d38780680487c9fb4c5602ad49721", "39614acd98c744aea09a933c28b4203e", "159b2fcf518248e8b0e2e3a78c76f05a", "4915320c740c4ae5baeb2d343987823e", "7766e3ff19b345c782bc23c74b115fd6", "4dd83966ac5340b6b5565178106a660e", "a720bb7a7e154611a0fc2f4c645b86a2", "67487b9f5b7e4a5b94731b21337150c1", "9375eff4ffad4c5cb5a7a23868ee2ccd", "8c0f22f1c7364865aa277ade8c3968bd", "df76bba09bc649689cb529159a0f53ec", "369ef8edc5b74e2a87a1ac280db04c76", "7e4f4dd037b547a897a51d3795138ae9", "797cbb6d462540b68dc7c941aebf6696", "8b68d6fa26664dfba7fcb25f12eda0b2", "a4d2e340bd37407cab41900028db60d1", "0ec459dfb233410599d999ac0ff93cb5", "9a3ea32e4533406fb3296ae024187bde", "afaae56bdb7243adaef805fb12381171", "1fb54fa6223d4434a87ed156bd0da59e", "356dfbb75d1647ee84592bc3acdb97c4", "7b98988a584a48d8835e2bafc6656244", "433b1e1da2f044539d3583c4e7c87277", "44f9756ad81c4cba8bbb389dfa346a21", "dc92b0a050864c518bb3edc9cc95d097", "d968f143ad264470b6153e285522d20e", "94ba39aae77c4f5da9ed764d8bbbb787", "5308870c262e4ddd9951dc773fc4ff82", "afe8ec13518348588fb7c121d88bbede", "ee87d5335f1a4902856d9810e7405d86", "b556c220a2f644b0be33bedfc4ab37da", "16f05787e29d4d9ea49607bc59c8635f", "b7945dcbda23460695df21785d842e32", "e25a53f84ed74e259c9c8dca384acfdd", "c7c89190bbdc4ecf85b235f28cd260c4", "0781b655bb6b45d895efc78d1d8800e8", "2e75922cfe6e4a01ae1d754cce683b77", "269f1a47aaa745d688bff8634002d6e4", "21230790d0ad4aeeb06f27befaa94f57", "918b680796214199ba9fda7a28b7b48a", "d90021999afd4b989ac59fdedcc44df4", "2c31a59376a041f9a70bdbcd95b196ac", "0669c278494941728fe0795e9c6f4867", "757195231ca54f5a95d0f35246c98ac1", "28b98661f1094a37989eb170de5ff9fb", "22b31e5d19b54aa993a6fa74a2d8676f", "59382d76dd9c4a0ebeec230e0943d152", "d4292fe89a44486aa120ede14d7f54f7", "2b0d4caf302c4238b82ab41a8218a2ae", "94850b29b8724feba55999563ca5394a", "cd19571be77440b58dc378718acdcfe9", "252fdfd71eb3457eb16bc2ad78bf7c92", "48d07314744544eda49d7d64df228069", "78c07bf46ec142e7be5efea449352368", "4469befba04a46be90a1ada54be23d2f", "cdb4edb6863742cbbfaee7a004f44a06", "8ee664d228f24099855490ca1a6ec511", "3e1cebf6d59747e69eecbebb223a46e6", "8d8e96c495e1455b8bad70e4ec82e707", "4b8cc06cfad64c139f680dc37ce9c0d2", "0912bb3fdea04b91a18b852341586b72", "bb293ee3925f4283be569177c81058cc", "991c717a23104fe4a63ddbe5622bb602", "42a249a3b8004fd9ac1fd6e160a76eb9", "d77a6697f1094585922d186f94d4cded", "d071a8c35b064d489eb3c0302389807d", "8759e89091bc4ef89e6f7eec08cf0060", "7c467e66ba1d4cb6a3e86732eb3808da", "648baf6ccba14e5c9bd220500ac4d907", "3070d7cdf5c24da982b3d067a956853d", "fc543c8083454e05925f0f4a3d576a0b", "f50adaf856074b539b0008a0b8eb2f8d", "ec29599c1281435eb61c59b4ed36b10d", "3c263202204b433ab6d2535f17c388cc", "cd98582b26294c16842c6450e6745ea0", "95bf9b8754eb49998481f7b7ec6de83b", "93b9cd4e86034a62b03cf4a12d830561", "7b3222f646c440869c4b99db56dc06f1", "2f0541b6a7824b86ac2c70eae49366c3", "c0ae8a54c6ef4200ab5ebbaae9d40f51", "acea5f3faec04f1dbdd94b70894d675c", "3602a9e748d34a3faf079bcd6bd1e4c3", "1f21f3ad1e294d6f8dd6fd1b32f21f7f", "f9bedd2a11ee49719830ef2132c47b4b", "f1abe23757c54eb98df5e5995c02e57e", "5b07414f6728479fa54ca08c9b1762d0", "a908cfdd55bd4dd7bcb265b3fc543cd2", "657669383cef4f1babe66f2788cf3793", "99f122891e564a6187cdf1b40c630bc5", "f1876ec512174551a531eedfe7f9f3f7", "b43de568a9624b3dafed127ec05fb33d", "a0c1768cf64d4a5e8df52aac24e3539b", "1e0f5205bd884bd784cc710983cf223a", "d48af41f002448198d23ed3564078478", "0516ed2dcc714d21ad62450a8aefaa3c", "888ca195e590415d895b60370b1dd904", "f620293d10cd45afaf5ed61e2d8597ae", "d918763d272549268b6f6822bc669998", "2c3524bdbf2c4d3f9757dfca9f801433", "81116b6fa75c4d25bd528818532227d0", "cdb3c5d2068344388342c079bd45b1fe", "533464f5ce984ff99ec2d53a393eb336", "ae436c99493c4cc3ad4842358b3bc3ff", "42fe587493cc49c28264d821f1557d35", "d514b8c9d66d48039beb37e28add0a93", "fb48433364dd42778a3e045f9d39c95e", "0d7b3708d2894a49bfe98ebef0aa50a6", "3288c16e67c64fc081a7343018a5b564", "7aeb5b44ef00474284b20a8c07b356bc", "a110709e1dc249d188c7e1967006bf1d", "ea7f6ed74c8b40c3813cfdd1c01a5283", "9deafef19f214d929337ca012dd653d0", "62f32b6f17df46038b938cef9bc1b673", "587a748eb72e4b9fbe1d27612fb37e79", "350ab0925630433992d667a465ac1b11", "9a5a69cba9b94d169bf767bb2bd75d6a", "6002450696db4a15958f39f5014be380", "a203f631dd374d8aa11b1face537e6be", "f93d6bf133434e7899b1786e729b9f47", "d5f3cb4539db47fbae84122a96acdf21", "f0b3536703134a22a57b9427463b12ee", "4dc1b1a7fa68433196bcb7bc30aef421", "274587f016014a68bb20557d05f6e8d3", "15f77c07bb6446ab9c5d3b206b1357bc", "9d25562d85764b3da733616ec8bb68e1", "3b13bd4ff067435586508d91868f3b63", "10248cbdaa7d4ea99830d3dae0838f0f", "c971e53f10b54a8faadb179df71597e7", "290b76e842e84e198c70c606fe20899e", "d974fa71d05148729e08d893095bad24", "669ee8f1b5fc408d81c2ddd2de5f6f9c", "08311eecb34641f5b602cc8a0888d86d", "82c64dba67694d889f0a0c01cf3830e7", "a6210ad7ceb247b0b136db6810605168", "1ae53773abea471580087a823e5301c1", "356430f29d3044acb0da3a7d12f611be", "a14307cb17164b57bdd09e7373681272", "17fe943fdb4546138856cececadbdc41", "cac374bd58e14b50bac063685ed741ab", "a9452c25f1664eb5a87d3e954f8768c8", "17e5bc7c84f846879478f68cc26a3603" ] }, "executionInfo": { "elapsed": 506620, "status": "ok", "timestamp": 1730997570445, "user": { "displayName": "Ronan Le Meillat", "userId": "09161391957806824350" }, "user_tz": -60 }, "id": "QBOsKy-jAMtF", "outputId": "4ccdbf0a-f96f-42e3-8825-6d63dc4a025a" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0c622eb62cbe4679be84993a83e55aa6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/4.50k [00:00\")\n", " ]\n", "\n", " def __call__(self, samples):\n", " texts = []\n", " images = []\n", " for sample in samples:\n", " image = sample[\"image\"]\n", " answer = sample[\"caption\"]\n", " messages = [\n", " {\n", " \"role\": \"system\",\n", " \"content\": [\n", " {\"type\": \"text\", \"text\": prompt}\n", " ]\n", "\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": [\n", " {\"type\": \"image\"},\n", " ]\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": [\n", " {\"type\": \"text\", \"text\": answer}\n", " ]\n", " }\n", " ]\n", " text = processor.apply_chat_template(messages, add_generation_prompt=False)\n", " texts.append(text.strip())\n", " images.append([image.convert('RGB')])\n", "\n", " batch = processor(text=texts, images=images, return_tensors=\"pt\", padding=True)\n", "\n", " labels = batch[\"input_ids\"].clone()\n", " labels[labels == processor.tokenizer.pad_token_id] = self.image_token_id\n", " batch[\"labels\"] = labels\n", "\n", " return batch\n", "\n", "data_collator = MyDataCollator(processor)" ] }, { "cell_type": "markdown", "id": "e6467b63-06c1-4227-ab02-2aa5074c8231", "metadata": { "id": "vsq4TtIJAMtH" }, "source": [ "### Step 6: Setup training parameters" ] }, { "cell_type": "code", "execution_count": 12, "id": "1f06690e-4b81-4db7-882e-3f24a33350c6", "metadata": { "executionInfo": { "elapsed": 1008, "status": "ok", "timestamp": 1730998601172, "user": { "displayName": "Ronan Le Meillat", "userId": "09161391957806824350" }, "user_tz": -60 }, "id": "Q_WKQFfoAMtH" }, "outputs": [], "source": [ "from transformers import TrainingArguments, Trainer\n", "\n", "training_args = TrainingArguments(\n", " output_dir = output_dir,\n", " overwrite_output_dir = False,\n", " auto_find_batch_size = True,\n", " learning_rate = 2e-4,\n", " fp16 = True,\n", " per_device_train_batch_size = 2,\n", " per_device_eval_batch_size = 2,\n", " gradient_accumulation_steps = 8,\n", " dataloader_pin_memory = False,\n", " save_total_limit = 3,\n", " eval_strategy = \"steps\",\n", " save_strategy = \"steps\",\n", " eval_steps = 100,\n", " save_steps = 10, # checkpoint each 10 steps\n", " resume_from_checkpoint = True,\n", " logging_steps = 5,\n", " remove_unused_columns = False,\n", " push_to_hub = False,\n", " label_names = [\"labels\"],\n", " load_best_model_at_end = False,\n", " report_to = \"none\",\n", " optim = \"paged_adamw_8bit\",\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "3dcca8d8-4d4e-49a4-9af5-4958edb9e0fc", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 426, "status": "ok", "timestamp": 1730998605441, "user": { "displayName": "Ronan Le Meillat", "userId": "09161391957806824350" }, "user_tz": -60 }, "id": "vSIo17mgAMtH", "outputId": "3bebd35a-ed7f-49ee-e1bc-91594e8dcd24" }, "outputs": [], "source": [ "trainer = Trainer(\n", " model = model,\n", " args = training_args,\n", " data_collator = data_collator,\n", " train_dataset = train_dataset,\n", " eval_dataset = eval_dataset,\n", ")" ] }, { "cell_type": "code", "execution_count": 14, "id": "ff256dc8-6f5a-423a-9607-81cd9ef7735f", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "No valid checkpoint found in output directory (IDEFICS3_ROCOv2)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[14], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m trainer\u001b[38;5;241m.\u001b[39mtrain(resume_from_checkpoint\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/trainer.py:2109\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 2107\u001b[0m resume_from_checkpoint \u001b[38;5;241m=\u001b[39m get_last_checkpoint(args\u001b[38;5;241m.\u001b[39moutput_dir)\n\u001b[1;32m 2108\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m resume_from_checkpoint \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 2109\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo valid checkpoint found in output directory (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs\u001b[38;5;241m.\u001b[39moutput_dir\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 2111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m resume_from_checkpoint \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 2112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_sagemaker_mp_enabled() \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_deepspeed_enabled \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_fsdp_enabled:\n", "\u001b[0;31mValueError\u001b[0m: No valid checkpoint found in output directory (IDEFICS3_ROCOv2)" ] } ], "source": [ "trainer.train(resume_from_checkpoint=True)" ] }, { "cell_type": "code", "execution_count": 15, "id": "bd15f877-ed10-4a6d-8b36-ebaaab875526", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Copy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n", "\n", "- `transformers` version: 4.47.0.dev0\n", "- Platform: Linux-5.15.167.4-microsoft-standard-WSL2-x86_64-with-glibc2.31\n", "- Python version: 3.12.7\n", "- Huggingface_hub version: 0.26.2\n", "- Safetensors version: 0.4.5\n", "- Accelerate version: 1.1.1\n", "- Accelerate config: \tnot found\n", "- PyTorch version (GPU?): 2.5.1+cu124 (True)\n", "- Tensorflow version (GPU?): not installed (NA)\n", "- Flax version (CPU?/GPU?/TPU?): not installed (NA)\n", "- Jax version: not installed\n", "- JaxLib version: not installed\n", "- Using distributed or parallel set-up in script?: \n", "- Using GPU in script?: \n", "- GPU type: NVIDIA GeForce RTX 2060\n", "\n" ] } ], "source": [ "!transformers-cli env" ] }, { "cell_type": "code", "execution_count": null, "id": "f82b7c75-c859-451f-9c40-fe9a039bf769", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }