diff --git a/merged/config.json b/merged/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3aefaefc9a3728d7226abc3646c3cc71728735cf --- /dev/null +++ b/merged/config.json @@ -0,0 +1 @@ +{"vocab_size": 128256, "max_position_embeddings": 131072, "hidden_size": 8192, "intermediate_size": 28672, "num_hidden_layers": 80, "num_attention_heads": 64, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 500000.0, "rope_scaling": {"factor": 8.0, "low_freq_factor": 1.0, "high_freq_factor": 4.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "attention_bias": false, "attention_dropout": 0.0, "mlp_bias": false, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": "bfloat16", "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": false, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["LlamaForCausalLM"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "bos_token_id": 128000, "pad_token_id": null, "eos_token_id": [128001, 128008, 128009], "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "meta-llama/Meta-Llama-3.1-70B-Instruct", "transformers_version": "4.44.2", "model_type": "llama"} \ No newline at end of file diff --git a/merged/model-00001-of-00030.safetensors b/merged/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4395921665d6a712253ccaf2acfd8bb47b143e5 --- /dev/null +++ b/merged/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1d3e89540b4f0b743d6ccd327ea4f3eb598fab23adcfced970d23f69db5fd1 +size 4584408808 diff --git a/merged/model-00002-of-00030.safetensors b/merged/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af64112e8966eee5b8b503f9523bac8dc74c1017 --- /dev/null +++ b/merged/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e29958810a5aa1f63b65f692498fbd1332e9d32eff882c64fbe004fdaa3ebf +size 4664167376 diff --git a/merged/model-00003-of-00030.safetensors b/merged/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..439d614ebacda969a6e3468c76539b06d1c92114 --- /dev/null +++ b/merged/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ed11fa662250cb183389a997938e69a1b868f1b659e5ab8071556dee74a1c6 +size 4999711704 diff --git a/merged/model-00004-of-00030.safetensors b/merged/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a08b34862f1a13db9ac88414bbe874bff91a9d73 --- /dev/null +++ b/merged/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567a91179a5581e1d136ac6b4811ddd0fbc7550c78f8f215f8920fb4859ab18e +size 4966157032 diff --git a/merged/model-00005-of-00030.safetensors b/merged/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f174cd0e22eebb2038b6ebcfa501c89ac31c1b2 --- /dev/null +++ b/merged/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f17451db1695f9d7f76f7b71fb00c7915f8dcf37cf6d7eabdc36bd8f24cf09 +size 4664134408 diff --git a/merged/model-00006-of-00030.safetensors b/merged/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27fa67fa729258816cfda1f9806a589b691015ea --- /dev/null +++ b/merged/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8cda953c8171e32da8475869938a9ca31b4de34827b198e2811b5dd6e27561c +size 4664167408 diff --git a/merged/model-00007-of-00030.safetensors b/merged/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..962268578321d0e3408ae781332d620758356bc1 --- /dev/null +++ b/merged/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a164949f56698e34ed1454a61474768f38b2117cb434c223d6e22c3f3b65227 +size 4664167408 diff --git a/merged/model-00008-of-00030.safetensors b/merged/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96e1dc3ca4f5460e61722e535e58cdbb91d97a00 --- /dev/null +++ b/merged/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc3b9feb3ba0a4b0a1fd65d407f5891e781b44b2194b5e58b1f3f1b01c52271 +size 4999711728 diff --git a/merged/model-00009-of-00030.safetensors b/merged/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d42ebee9bdabdb577f1ba18eef87a09112e6e705 --- /dev/null +++ b/merged/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb91feebf06dcbb6a6338cb25f7dcb2eca6affa104813c0952f1b1366162c05 +size 4966157056 diff --git a/merged/model-00010-of-00030.safetensors b/merged/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1e3cd8606b7a7455c11af977705a9b2c5f47fd1 --- /dev/null +++ b/merged/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdefc7eac3539981a415cc1fabfce9bd8b0225ca7b99040cb8f9650f92bd1e0 +size 4664134408 diff --git a/merged/model-00011-of-00030.safetensors b/merged/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..803f09899b0c1ab6fcc8d4b5f75e78c1520e3dbf --- /dev/null +++ b/merged/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd277c74f7ba1239b3f60160af7c1fcd16a429b0d8397ebdfcc03196e8d35360 +size 4664167408 diff --git a/merged/model-00012-of-00030.safetensors b/merged/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af1386ab5faac0ac6f73f84addcf9c44477455b7 --- /dev/null +++ b/merged/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3da99af16b35ff96c2902671058183f3fab4a616d3fe72fba8ba5bb6272e962 +size 4664167408 diff --git a/merged/model-00013-of-00030.safetensors b/merged/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8749b4286e7e55acd16368522360a93725a37388 --- /dev/null +++ b/merged/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c9ce1e4152d4184a7a047c3c02c4ba21c214bf730786eaf40f4a15dfe8a70b +size 4999711728 diff --git a/merged/model-00014-of-00030.safetensors b/merged/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b171582b9d976d0fa3980ebf7a47139b2aeea57 --- /dev/null +++ b/merged/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f3877f0403140c6436608f18aa7700286029dd99346315d15e23afb8666893 +size 4966157056 diff --git a/merged/model-00015-of-00030.safetensors b/merged/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1ace1bd12f4a243dea9c07b7ee791abaac76f5a --- /dev/null +++ b/merged/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae79de7539a6877af7c5fe7b66526ef0269eb63e7c7816dec5c9261e134de366 +size 4664134408 diff --git a/merged/model-00016-of-00030.safetensors b/merged/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44837bd418b84b9ab73034cc1a2e2a58255f5035 --- /dev/null +++ b/merged/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7f90bd12e79fccc76e8c5f5f8384f0f4769aaf026fbee9610cafeff04cb7028 +size 4664167408 diff --git a/merged/model-00017-of-00030.safetensors b/merged/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40d767fee5c5dc3947b5bbf621cb6cf256020a76 --- /dev/null +++ b/merged/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8946a1e5b80c2ade8428d2fa3bb9bf888b728063f2307cd76b7a1a06d3cd6e2a +size 4664167408 diff --git a/merged/model-00018-of-00030.safetensors b/merged/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e31c9eded9a5e148cbf92ae235d475efb5fadc50 --- /dev/null +++ b/merged/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5ceee828f6a03212870f0fed6e172ff00884efea9c28f65494bfe006ecef0f +size 4999711728 diff --git a/merged/model-00019-of-00030.safetensors b/merged/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f09de263582fff150c3948b44c0d1ee86acff8c --- /dev/null +++ b/merged/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9789998fdaed9ebc3a829f083258ecc5023bcc57a56a0cbd0a8898bd800004da +size 4966157056 diff --git a/merged/model-00020-of-00030.safetensors b/merged/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97842d760d5e01fe40b28f81b156ddb4fb4320de --- /dev/null +++ b/merged/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1d1c5d7da953a3e4ef52546958ade28098cd5bfdb571d9ae872c7998b948c9 +size 4664134408 diff --git a/merged/model-00021-of-00030.safetensors b/merged/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93e217e12c56cdfefe0a743e9d88637662801a97 --- /dev/null +++ b/merged/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165a7f6089c8a748b37a72a3db6bacf4488cd11a1980465cc0d69f87ea1f1994 +size 4664167408 diff --git a/merged/model-00022-of-00030.safetensors b/merged/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed0049b0cf95ed847929f126d9bcb0dd9243e377 --- /dev/null +++ b/merged/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3bdfcaa03aea1367c67b4287d4ab7e8f49c1adcab2bda2993dc278c1c1aa6f6 +size 4664167408 diff --git a/merged/model-00023-of-00030.safetensors b/merged/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecccfec1417a5adca57341780dc0c028feebea47 --- /dev/null +++ b/merged/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf48aac80da2bcca890ec6c882b422387ba87f9591af9f881f797ee6d85aa07 +size 4999711728 diff --git a/merged/model-00024-of-00030.safetensors b/merged/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba7c403dcb1a834d332cd3478facfbea0544772b --- /dev/null +++ b/merged/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36259dd9df0a29b32a6be42755080a31e627270a578e3f66131a1d8c62fc02a3 +size 4966157056 diff --git a/merged/model-00025-of-00030.safetensors b/merged/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc428fdb818e53b5c01978206e6c2b1af0f3360d --- /dev/null +++ b/merged/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7522894965b74911f39d1f3c7d2046a4d613e44ce8fd14a542934487a520e5 +size 4664134408 diff --git a/merged/model-00026-of-00030.safetensors b/merged/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f4893733caa530a5add87350bb71a77e11d81ff --- /dev/null +++ b/merged/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081f75d617c0e2ebe5bac8f78ea3e547ad27f737c0999810b544bac61496432f +size 4664167408 diff --git a/merged/model-00027-of-00030.safetensors b/merged/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e0a8a666a9944b693456368d784d6cb81f6b1a7 --- /dev/null +++ b/merged/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1320df3711a77ff44c1aa4b8ab3c9bebafb446be5ef886a55d58542e471748 +size 4664167408 diff --git a/merged/model-00028-of-00030.safetensors b/merged/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7725d3680650c2ad182f70bb5796bcc381ec5bef --- /dev/null +++ b/merged/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea7fb72e9aaaf84853b6340da6c2650b2dddc338106e7e203861c31e51b5fd1 +size 4999711728 diff --git a/merged/model-00029-of-00030.safetensors b/merged/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f11da4cee38b3288a49039d5675632579adb705b --- /dev/null +++ b/merged/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17596c917b4e5fb6e38f2b01c564e0b95b114ce67d897cb352e0488a866b23a +size 4966173536 diff --git a/merged/model-00030-of-00030.safetensors b/merged/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c978551ff89d4d36cf8919745ad72792865d9264 --- /dev/null +++ b/merged/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c845d7ccba30e2e0eb8ab20aa313d5ed45dca6634e214a62ed323b70462fa828 +size 2101346432 diff --git a/vllm_bitblas_bf16/config.json b/vllm_bitblas_bf16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3aefaefc9a3728d7226abc3646c3cc71728735cf --- /dev/null +++ b/vllm_bitblas_bf16/config.json @@ -0,0 +1 @@ +{"vocab_size": 128256, "max_position_embeddings": 131072, "hidden_size": 8192, "intermediate_size": 28672, "num_hidden_layers": 80, "num_attention_heads": 64, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 500000.0, "rope_scaling": {"factor": 8.0, "low_freq_factor": 1.0, "high_freq_factor": 4.0, "original_max_position_embeddings": 8192, "rope_type": "llama3"}, "attention_bias": false, "attention_dropout": 0.0, "mlp_bias": false, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": "bfloat16", "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": false, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["LlamaForCausalLM"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "bos_token_id": 128000, "pad_token_id": null, "eos_token_id": [128001, 128008, 128009], "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "meta-llama/Meta-Llama-3.1-70B-Instruct", "transformers_version": "4.44.2", "model_type": "llama"} \ No newline at end of file diff --git a/vllm_bitblas_bf16/model-00001-of-00030.safetensors b/vllm_bitblas_bf16/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2e6d61d6ccc4a5abe7db9fa62ca8ef998d427d6 --- /dev/null +++ b/vllm_bitblas_bf16/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c110777fdbecc75d7bc16a1e246573599b2e80029611a520c76678d877af6b +size 2756485496 diff --git a/vllm_bitblas_bf16/model-00002-of-00030.safetensors b/vllm_bitblas_bf16/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ea097ce4d165e0e3aa900ac5a26cb813a84ea32 --- /dev/null +++ b/vllm_bitblas_bf16/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06d0f13a735365c579fa40977b46424589e753928c062107677b3c4bdeae19a +size 1003259624 diff --git a/vllm_bitblas_bf16/model-00003-of-00030.safetensors b/vllm_bitblas_bf16/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..281e239f8f0883b389942091ac0ffed26021ef68 --- /dev/null +++ b/vllm_bitblas_bf16/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14df7bf9ab5b0fd4b82560239b2dbd78f7f53ef1d7dabea439ce2ae732d2cf2 +size 1058350824 diff --git a/vllm_bitblas_bf16/model-00004-of-00030.safetensors b/vllm_bitblas_bf16/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3b7d6abdfa603c70aad967629c7fd6c22890af5 --- /dev/null +++ b/vllm_bitblas_bf16/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77659aec70798cad40732dfe893a0a85221555d2dec148d58d07c3f2f7199150 +size 1047073184 diff --git a/vllm_bitblas_bf16/model-00005-of-00030.safetensors b/vllm_bitblas_bf16/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6f939c605991a1f1152bc0a9acb953756c5ff74 --- /dev/null +++ b/vllm_bitblas_bf16/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d25253ffefb31bc8ed3aae61bfd4511d828b9b4cceae342606e3e385187bf0a +size 1003267720 diff --git a/vllm_bitblas_bf16/model-00006-of-00030.safetensors b/vllm_bitblas_bf16/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70aa8d0eb263197f18c803cdb21e05d44cd7c51e --- /dev/null +++ b/vllm_bitblas_bf16/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e974a245638f00df79acd4e975ae27ab1a40a932ce3a97ae03e79c15387d0d +size 1150060344 diff --git a/vllm_bitblas_bf16/model-00007-of-00030.safetensors b/vllm_bitblas_bf16/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddb4d88e4e5fe3cca70f03c08440259ac83d0b82 --- /dev/null +++ b/vllm_bitblas_bf16/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ea89159dc74d99ea9a5388caa6193f7a18a98c3fe6a3cdfdd46e87dfbe4d1e +size 1186760544 diff --git a/vllm_bitblas_bf16/model-00008-of-00030.safetensors b/vllm_bitblas_bf16/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d8c8b7780b6076460bfb0a852e2b01977b98ee9 --- /dev/null +++ b/vllm_bitblas_bf16/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29bb9298c76e93f04a524307da6ea8c709d70a820b6115ed51a9ced0996e9aad +size 1168451432 diff --git a/vllm_bitblas_bf16/model-00009-of-00030.safetensors b/vllm_bitblas_bf16/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57941840b66e4127e05a5a100a5dc1405c59c978 --- /dev/null +++ b/vllm_bitblas_bf16/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb45cbdbb592983ece59daed5fa03a19b6e91b411617cc71e88fea7a913c369 +size 1267274216 diff --git a/vllm_bitblas_bf16/model-00010-of-00030.safetensors b/vllm_bitblas_bf16/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b87d9af454babc25dd02eb965bbdac456e3d33f5 --- /dev/null +++ b/vllm_bitblas_bf16/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7363ff4bc9c21e51e601584a4ae13987f12aecaf1cb58dde1641e2e0b5f3b0 +size 1076668048 diff --git a/vllm_bitblas_bf16/model-00011-of-00030.safetensors b/vllm_bitblas_bf16/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d5dca4705c7ea3366d055f583e34aec48e05d14 --- /dev/null +++ b/vllm_bitblas_bf16/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:019dbc38118596aa1e10d5eb105e223d8fb23218e0bfed0221be4144bd284230 +size 1076660080 diff --git a/vllm_bitblas_bf16/model-00012-of-00030.safetensors b/vllm_bitblas_bf16/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be07ff83127f2f49c0066865c8d3fe9fd6a6cc8c --- /dev/null +++ b/vllm_bitblas_bf16/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49d78d66e67f83f30cbeb7e1d5d4514865affa7d0d94ab3cb14042d9a5b91ff +size 1186760544 diff --git a/vllm_bitblas_bf16/model-00013-of-00030.safetensors b/vllm_bitblas_bf16/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee9854628addc32847b119140ef3b36e02cbe66b --- /dev/null +++ b/vllm_bitblas_bf16/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0a0ca463cc1806d491a011d0c918ada18b97a0bcefc8a09a3ce956b0be4527 +size 1168451432 diff --git a/vllm_bitblas_bf16/model-00014-of-00030.safetensors b/vllm_bitblas_bf16/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af66f591bd6065d3ef8430d37aebabbd054a7520 --- /dev/null +++ b/vllm_bitblas_bf16/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f117c26ba2dc5b4a377d0e11cf5b2430d7b11e63cb0aa04d0dba134cba9c0a20 +size 1047073296 diff --git a/vllm_bitblas_bf16/model-00015-of-00030.safetensors b/vllm_bitblas_bf16/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f05cf96051128461c00601fd7ee901fbf2ec623f --- /dev/null +++ b/vllm_bitblas_bf16/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f108174975713894db0b8b610c45be112406324c219125f71b11842089439d +size 1003267720 diff --git a/vllm_bitblas_bf16/model-00016-of-00030.safetensors b/vllm_bitblas_bf16/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f73665e682bfb87ca145e1f04e996e21396ad3a3 --- /dev/null +++ b/vllm_bitblas_bf16/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8137aab0a5d05be18db173c26284f694d46802a2dd7f50af21172f6eeba01f3 +size 1003259760 diff --git a/vllm_bitblas_bf16/model-00017-of-00030.safetensors b/vllm_bitblas_bf16/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cf9b8a050cfb7cd952e50e0642a23d6f7094cef --- /dev/null +++ b/vllm_bitblas_bf16/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3ce9d775d59d56783a33228280bb429b0caf10adc1b78029a1607ac35c7df6 +size 1003259752 diff --git a/vllm_bitblas_bf16/model-00018-of-00030.safetensors b/vllm_bitblas_bf16/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6eecb8d869f1372cdf5eb7bd2c85804e85023e22 --- /dev/null +++ b/vllm_bitblas_bf16/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce26c47f32e123f8977246acb25817a0c8e6835f12f81ec20bc0683056555ea +size 1058350952 diff --git a/vllm_bitblas_bf16/model-00019-of-00030.safetensors b/vllm_bitblas_bf16/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3521725860620739382dab12076828d66177603 --- /dev/null +++ b/vllm_bitblas_bf16/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b32cfb32a6b8d490a22d1709321ef93a7bbd73367359bd146ac66f9fff239ac0 +size 1047073296 diff --git a/vllm_bitblas_bf16/model-00020-of-00030.safetensors b/vllm_bitblas_bf16/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbff149cdf706fc067e4d0c2c2d46f6e176e1fda --- /dev/null +++ b/vllm_bitblas_bf16/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c9057f3452a72d83369d66cbc8a010d126f20e26d1153b10260925fcc4f6a1 +size 1003267720 diff --git a/vllm_bitblas_bf16/model-00021-of-00030.safetensors b/vllm_bitblas_bf16/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04df679844c31041583f35bbfaed3ceb8174f057 --- /dev/null +++ b/vllm_bitblas_bf16/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b121b4ed7475c0f269cc11cb9c5a825a76ff5637a0892bcb7528c0c4fe82ce +size 1113360248 diff --git a/vllm_bitblas_bf16/model-00022-of-00030.safetensors b/vllm_bitblas_bf16/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87550a513608437293280a86e99424213436eaee --- /dev/null +++ b/vllm_bitblas_bf16/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b349b7fc70a0a07a63cce3a559d652207afe55eb8ab9691c85413d5a737d45 +size 1113360240 diff --git a/vllm_bitblas_bf16/model-00023-of-00030.safetensors b/vllm_bitblas_bf16/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04aa8d9d592b5cf5b82e346489d2853f41d2380c --- /dev/null +++ b/vllm_bitblas_bf16/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac0913a1952a9e5007fe1d8efeacc172dd33229e1aa98f2bf3ff5fada08a579 +size 1058350952 diff --git a/vllm_bitblas_bf16/model-00024-of-00030.safetensors b/vllm_bitblas_bf16/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75cecbe2287f3e86078f5ba253a942e2db55ee02 --- /dev/null +++ b/vllm_bitblas_bf16/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87511f0e58b7e6c625410804f722633736e601e89b5049bf44833eda33b46d11 +size 1047073296 diff --git a/vllm_bitblas_bf16/model-00025-of-00030.safetensors b/vllm_bitblas_bf16/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00203e329f52fbaf065f2adcb74638812895d467 --- /dev/null +++ b/vllm_bitblas_bf16/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35105ab634186dd867c249cbc51fb50735bac0baa85fde2c55ee65ac0cf00d80 +size 1076668048 diff --git a/vllm_bitblas_bf16/model-00026-of-00030.safetensors b/vllm_bitblas_bf16/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ec2b19e8ad17fc9ce0153c8be5363a46cfd3ab5 --- /dev/null +++ b/vllm_bitblas_bf16/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4a0b2c3c42f626501ecb470ab3815069b337e8bb133e854544cb62906abaef +size 1076660080 diff --git a/vllm_bitblas_bf16/model-00027-of-00030.safetensors b/vllm_bitblas_bf16/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88bdc2f6e75b9796c95a624af426af3156a0c189 --- /dev/null +++ b/vllm_bitblas_bf16/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3813d380dfd6b88a893aac27ddffe5b97e395b14f340efe450fd29e3a94beb27 +size 1076660056 diff --git a/vllm_bitblas_bf16/model-00028-of-00030.safetensors b/vllm_bitblas_bf16/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c10fccfcc7a7f690943e439ea2d07b15e492ebde --- /dev/null +++ b/vllm_bitblas_bf16/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8402fbc94167fd215f39cab546d74da14388a50fa28d48b86045947040c44331 +size 1058350952 diff --git a/vllm_bitblas_bf16/model-00029-of-00030.safetensors b/vllm_bitblas_bf16/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..965d38ffded47daa117e97253eb7ee36cba14907 --- /dev/null +++ b/vllm_bitblas_bf16/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4731a6133a62a871575935379e14a751b42ff4bc8bc0762dd13d757f62aaf20 +size 1157190248 diff --git a/vllm_bitblas_bf16/model-00030-of-00030.safetensors b/vllm_bitblas_bf16/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c978551ff89d4d36cf8919745ad72792865d9264 --- /dev/null +++ b/vllm_bitblas_bf16/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c845d7ccba30e2e0eb8ab20aa313d5ed45dca6634e214a62ed323b70462fa828 +size 2101346432 diff --git a/vllm_bitblas_bf16/quantize_config.json b/vllm_bitblas_bf16/quantize_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f1c0fffdbbc9a03246ec88b891a53acc60f29a5f --- /dev/null +++ b/vllm_bitblas_bf16/quantize_config.json @@ -0,0 +1,36 @@ +{ + "group_size": { + "gate_up_proj": 32, + "qkv_proj": 128, + "o_proj": 128, + "down_proj": 32 + }, + "nbits": { + "gate_up_proj": 2, + "qkv_proj": 4, + "o_proj": 4, + "down_proj": 2 + }, + "lora_rank": 64, + "skipped_dora_layers": [], + "block_influence_layers": [ + "layers.0", + "layers.13", + "layers.15", + "layers.17", + "layers.19", + "layers.21", + "layers.23", + "layers.26", + "layers.29", + "layers.31", + "layers.33", + "layers.56", + "layers.59", + "layers.68", + "layers.71", + "layers.79" + ], + "groupsize_4bit": 128, + "bitblas_dtype": "bfloat16" +} \ No newline at end of file