azhiboedova commited on
Commit
de0f783
1 Parent(s): 1144ea2

Upload 4 files

Browse files
Files changed (4) hide show
  1. =0.4.2 +175 -0
  2. config.json +115 -0
  3. generation_config.json +10 -0
  4. model.safetensors +3 -0
=0.4.2 ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collecting vllm
2
+ Using cached vllm-0.5.3.post1-cp38-abi3-manylinux1_x86_64.whl.metadata (1.8 kB)
3
+ Requirement already satisfied: cmake>=3.21 in /opt/conda/lib/python3.10/site-packages (from vllm) (3.30.1)
4
+ Requirement already satisfied: ninja in /opt/conda/lib/python3.10/site-packages (from vllm) (1.11.1.1)
5
+ Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from vllm) (5.9.0)
6
+ Requirement already satisfied: sentencepiece in /opt/conda/lib/python3.10/site-packages (from vllm) (0.1.99)
7
+ Requirement already satisfied: numpy<2.0.0 in /opt/conda/lib/python3.10/site-packages (from vllm) (1.26.3)
8
+ Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from vllm) (2.32.3)
9
+ Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from vllm) (4.66.4)
10
+ Requirement already satisfied: py-cpuinfo in /opt/conda/lib/python3.10/site-packages (from vllm) (9.0.0)
11
+ Requirement already satisfied: transformers>=4.42.4 in /opt/conda/lib/python3.10/site-packages (from vllm) (4.43.2)
12
+ Requirement already satisfied: tokenizers>=0.19.1 in /opt/conda/lib/python3.10/site-packages (from vllm) (0.19.1)
13
+ Collecting fastapi (from vllm)
14
+ Using cached fastapi-0.111.1-py3-none-any.whl.metadata (26 kB)
15
+ Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from vllm) (3.9.5)
16
+ Collecting openai (from vllm)
17
+ Using cached openai-1.37.1-py3-none-any.whl.metadata (22 kB)
18
+ Requirement already satisfied: uvicorn[standard] in /opt/conda/lib/python3.10/site-packages (from vllm) (0.30.3)
19
+ Collecting pydantic>=2.0 (from vllm)
20
+ Using cached pydantic-2.8.2-py3-none-any.whl.metadata (125 kB)
21
+ Requirement already satisfied: pillow in /opt/conda/lib/python3.10/site-packages (from vllm) (10.2.0)
22
+ Requirement already satisfied: prometheus-client>=0.18.0 in /opt/conda/lib/python3.10/site-packages (from vllm) (0.20.0)
23
+ Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)
24
+ Using cached prometheus_fastapi_instrumentator-7.0.0-py3-none-any.whl.metadata (13 kB)
25
+ Collecting tiktoken>=0.6.0 (from vllm)
26
+ Using cached tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
27
+ Collecting lm-format-enforcer==0.10.3 (from vllm)
28
+ Using cached lm_format_enforcer-0.10.3-py3-none-any.whl.metadata (16 kB)
29
+ Collecting outlines<0.1,>=0.0.43 (from vllm)
30
+ Using cached outlines-0.0.46-py3-none-any.whl.metadata (15 kB)
31
+ Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from vllm) (4.9.0)
32
+ Requirement already satisfied: filelock>=3.10.4 in /opt/conda/lib/python3.10/site-packages (from vllm) (3.13.1)
33
+ Requirement already satisfied: pyzmq in /opt/conda/lib/python3.10/site-packages (from vllm) (26.0.3)
34
+ Collecting ray>=2.9 (from vllm)
35
+ Using cached ray-2.33.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (13 kB)
36
+ Requirement already satisfied: nvidia-ml-py in /opt/conda/lib/python3.10/site-packages (from vllm) (12.555.43)
37
+ Collecting torch==2.3.1 (from vllm)
38
+ Using cached torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
39
+ Collecting torchvision==0.18.1 (from vllm)
40
+ Using cached torchvision-0.18.1-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
41
+ Collecting xformers==0.0.27 (from vllm)
42
+ Using cached xformers-0.0.27-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.0 kB)
43
+ Collecting vllm-flash-attn==2.5.9.post1 (from vllm)
44
+ Using cached vllm_flash_attn-2.5.9.post1-cp310-cp310-manylinux1_x86_64.whl.metadata (482 bytes)
45
+ Requirement already satisfied: interegular>=0.3.2 in /opt/conda/lib/python3.10/site-packages (from lm-format-enforcer==0.10.3->vllm) (0.3.3)
46
+ Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from lm-format-enforcer==0.10.3->vllm) (23.1)
47
+ Requirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from lm-format-enforcer==0.10.3->vllm) (6.0.1)
48
+ Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (1.12)
49
+ Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (3.1)
50
+ Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (3.1.3)
51
+ Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (2023.10.0)
52
+ Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (12.1.105)
53
+ Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (12.1.105)
54
+ Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (12.1.105)
55
+ Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.1->vllm)
56
+ Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
57
+ Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (12.1.3.1)
58
+ Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (11.0.2.54)
59
+ Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (10.3.2.106)
60
+ Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.3.1->vllm)
61
+ Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
62
+ Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.3.1->vllm)
63
+ Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
64
+ Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (2.20.5)
65
+ Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (12.1.105)
66
+ Requirement already satisfied: triton==2.3.1 in /opt/conda/lib/python3.10/site-packages (from torch==2.3.1->vllm) (2.3.1)
67
+ Requirement already satisfied: nvidia-nvjitlink-cu12 in /opt/conda/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.3.1->vllm) (12.5.82)
68
+ Requirement already satisfied: lark in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (1.1.9)
69
+ Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (1.6.0)
70
+ Collecting cloudpickle (from outlines<0.1,>=0.0.43->vllm)
71
+ Using cached cloudpickle-3.0.0-py3-none-any.whl.metadata (7.0 kB)
72
+ Requirement already satisfied: diskcache in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (5.6.3)
73
+ Requirement already satisfied: numba in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (0.60.0)
74
+ Requirement already satisfied: referencing in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (0.30.2)
75
+ Requirement already satisfied: jsonschema in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (4.19.2)
76
+ Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (2.20.0)
77
+ Requirement already satisfied: pycountry in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (24.6.1)
78
+ Requirement already satisfied: pyairports in /opt/conda/lib/python3.10/site-packages (from outlines<0.1,>=0.0.43->vllm) (2.1.1)
79
+ Collecting starlette<1.0.0,>=0.30.0 (from prometheus-fastapi-instrumentator>=7.0.0->vllm)
80
+ Using cached starlette-0.38.1-py3-none-any.whl.metadata (5.9 kB)
81
+ Collecting annotated-types>=0.4.0 (from pydantic>=2.0->vllm)
82
+ Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
83
+ Requirement already satisfied: pydantic-core==2.20.1 in /opt/conda/lib/python3.10/site-packages (from pydantic>=2.0->vllm) (2.20.1)
84
+ Requirement already satisfied: click>=7.0 in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (8.1.7)
85
+ Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.0.8)
86
+ Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (5.27.2)
87
+ Requirement already satisfied: aiosignal in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.3.1)
88
+ Requirement already satisfied: frozenlist in /opt/conda/lib/python3.10/site-packages (from ray>=2.9->vllm) (1.4.1)
89
+ Requirement already satisfied: regex>=2022.1.18 in /opt/conda/lib/python3.10/site-packages (from tiktoken>=0.6.0->vllm) (2024.7.24)
90
+ Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (2.0.4)
91
+ Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (3.4)
92
+ Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (2.1.0)
93
+ Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->vllm) (2024.2.2)
94
+ Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/conda/lib/python3.10/site-packages (from tokenizers>=0.19.1->vllm) (0.24.2)
95
+ Requirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.42.4->vllm) (0.4.3)
96
+ Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->vllm) (23.1.0)
97
+ Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->vllm) (6.0.5)
98
+ Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->vllm) (1.9.4)
99
+ Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->vllm) (4.0.3)
100
+ Collecting starlette<1.0.0,>=0.30.0 (from prometheus-fastapi-instrumentator>=7.0.0->vllm)
101
+ Using cached starlette-0.37.2-py3-none-any.whl.metadata (5.9 kB)
102
+ Collecting fastapi-cli>=0.0.2 (from fastapi->vllm)
103
+ Using cached fastapi_cli-0.0.4-py3-none-any.whl.metadata (7.0 kB)
104
+ Requirement already satisfied: httpx>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from fastapi->vllm) (0.27.0)
105
+ Requirement already satisfied: python-multipart>=0.0.7 in /opt/conda/lib/python3.10/site-packages (from fastapi->vllm) (0.0.9)
106
+ Requirement already satisfied: email_validator>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from fastapi->vllm) (2.2.0)
107
+ Requirement already satisfied: h11>=0.8 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.14.0)
108
+ Requirement already satisfied: httptools>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.6.1)
109
+ Requirement already satisfied: python-dotenv>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (1.0.1)
110
+ Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (0.19.0)
111
+ Collecting watchfiles>=0.13 (from uvicorn[standard]->vllm)
112
+ Using cached watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
113
+ Requirement already satisfied: websockets>=10.4 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]->vllm) (12.0)
114
+ Requirement already satisfied: anyio<5,>=3.5.0 in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (4.4.0)
115
+ Requirement already satisfied: distro<2,>=1.7.0 in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (1.8.0)
116
+ Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from openai->vllm) (1.3.1)
117
+ Requirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai->vllm) (1.2.0)
118
+ Requirement already satisfied: dnspython>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from email_validator>=2.0.0->fastapi->vllm) (2.6.1)
119
+ Collecting typer>=0.12.3 (from fastapi-cli>=0.0.2->fastapi->vllm)
120
+ Using cached typer-0.12.3-py3-none-any.whl.metadata (15 kB)
121
+ Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx>=0.23.0->fastapi->vllm) (1.0.5)
122
+ Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch==2.3.1->vllm) (2.1.3)
123
+ Requirement already satisfied: pyarrow>=15.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (17.0.0)
124
+ Requirement already satisfied: pyarrow-hotfix in /opt/conda/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (0.6)
125
+ Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (0.3.7)
126
+ Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (2.2.2)
127
+ Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (3.4.1)
128
+ Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets->outlines<0.1,>=0.0.43->vllm) (0.70.15)
129
+ Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema->outlines<0.1,>=0.0.43->vllm) (2023.7.1)
130
+ Requirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema->outlines<0.1,>=0.0.43->vllm) (0.10.6)
131
+ Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /opt/conda/lib/python3.10/site-packages (from numba->outlines<0.1,>=0.0.43->vllm) (0.43.0)
132
+ Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch==2.3.1->vllm) (1.3.0)
133
+ Requirement already satisfied: shellingham>=1.3.0 in /opt/conda/lib/python3.10/site-packages (from typer>=0.12.3->fastapi-cli>=0.0.2->fastapi->vllm) (1.5.4)
134
+ Collecting rich>=10.11.0 (from typer>=0.12.3->fastapi-cli>=0.0.2->fastapi->vllm)
135
+ Using cached rich-13.7.1-py3-none-any.whl.metadata (18 kB)
136
+ Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets->outlines<0.1,>=0.0.43->vllm) (2.9.0.post0)
137
+ Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets->outlines<0.1,>=0.0.43->vllm) (2023.3.post1)
138
+ Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets->outlines<0.1,>=0.0.43->vllm) (2024.1)
139
+ Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets->outlines<0.1,>=0.0.43->vllm) (1.16.0)
140
+ Collecting markdown-it-py>=2.2.0 (from rich>=10.11.0->typer>=0.12.3->fastapi-cli>=0.0.2->fastapi->vllm)
141
+ Using cached markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)
142
+ Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich>=10.11.0->typer>=0.12.3->fastapi-cli>=0.0.2->fastapi->vllm) (2.15.1)
143
+ Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer>=0.12.3->fastapi-cli>=0.0.2->fastapi->vllm) (0.1.2)
144
+ Using cached vllm-0.5.3.post1-cp38-abi3-manylinux1_x86_64.whl (158.3 MB)
145
+ Using cached lm_format_enforcer-0.10.3-py3-none-any.whl (43 kB)
146
+ Using cached torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
147
+ Using cached torchvision-0.18.1-cp310-cp310-manylinux1_x86_64.whl (7.0 MB)
148
+ Using cached vllm_flash_attn-2.5.9.post1-cp310-cp310-manylinux1_x86_64.whl (37.1 MB)
149
+ Using cached xformers-0.0.27-cp310-cp310-manylinux2014_x86_64.whl (164.1 MB)
150
+ Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
151
+ Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)
152
+ Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)
153
+ Using cached outlines-0.0.46-py3-none-any.whl (101 kB)
154
+ Using cached prometheus_fastapi_instrumentator-7.0.0-py3-none-any.whl (19 kB)
155
+ Using cached pydantic-2.8.2-py3-none-any.whl (423 kB)
156
+ Using cached ray-2.33.0-cp310-cp310-manylinux2014_x86_64.whl (64.8 MB)
157
+ Using cached tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
158
+ Using cached fastapi-0.111.1-py3-none-any.whl (92 kB)
159
+ Using cached openai-1.37.1-py3-none-any.whl (337 kB)
160
+ Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)
161
+ Using cached fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)
162
+ Using cached starlette-0.37.2-py3-none-any.whl (71 kB)
163
+ Using cached watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
164
+ Using cached cloudpickle-3.0.0-py3-none-any.whl (20 kB)
165
+ Using cached typer-0.12.3-py3-none-any.whl (47 kB)
166
+ Using cached rich-13.7.1-py3-none-any.whl (240 kB)
167
+ Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)
168
+ Installing collected packages: nvidia-cusparse-cu12, nvidia-cudnn-cu12, markdown-it-py, cloudpickle, annotated-types, watchfiles, tiktoken, starlette, rich, pydantic, nvidia-cusolver-cu12, typer, torch, prometheus-fastapi-instrumentator, openai, lm-format-enforcer, xformers, vllm-flash-attn, torchvision, ray, fastapi-cli, outlines, fastapi, vllm
169
+ Attempting uninstall: torch
170
+ Found existing installation: torch 2.2.1
171
+ Can't uninstall 'torch'. No files were found to uninstall.
172
+ Attempting uninstall: torchvision
173
+ Found existing installation: torchvision 0.17.1
174
+ Can't uninstall 'torchvision'. No files were found to uninstall.
175
+ Successfully installed annotated-types-0.7.0 cloudpickle-3.0.0 fastapi-0.111.1 fastapi-cli-0.0.4 lm-format-enforcer-0.10.3 markdown-it-py-3.0.0 nvidia-cudnn-cu12-8.9.2.26 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 openai-1.37.1 outlines-0.0.46 prometheus-fastapi-instrumentator-7.0.0 pydantic-2.8.2 ray-2.33.0 rich-13.7.1 starlette-0.37.2 tiktoken-0.7.0 torch-2.3.1 torchvision-0.18.1 typer-0.12.3 vllm-0.5.3.post1 vllm-flash-attn-2.5.9.post1 watchfiles-0.22.0 xformers-0.0.27
config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/content/converted/meta-llama/Meta-Llama-3.1-8B-Instruct",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "hidden_act": "silu",
15
+ "hidden_size": 4096,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 14336,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 32,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "quantization_config": {
26
+ "in_group_size": 8,
27
+ "linear_weights_not_to_quantize": [
28
+ "model.layers.0.input_layernorm.weight",
29
+ "model.layers.0.post_attention_layernorm.weight",
30
+ "model.layers.1.input_layernorm.weight",
31
+ "model.layers.1.post_attention_layernorm.weight",
32
+ "model.layers.2.input_layernorm.weight",
33
+ "model.layers.2.post_attention_layernorm.weight",
34
+ "model.layers.3.input_layernorm.weight",
35
+ "model.layers.3.post_attention_layernorm.weight",
36
+ "model.layers.4.input_layernorm.weight",
37
+ "model.layers.4.post_attention_layernorm.weight",
38
+ "model.layers.5.input_layernorm.weight",
39
+ "model.layers.5.post_attention_layernorm.weight",
40
+ "model.layers.6.input_layernorm.weight",
41
+ "model.layers.6.post_attention_layernorm.weight",
42
+ "model.layers.7.input_layernorm.weight",
43
+ "model.layers.7.post_attention_layernorm.weight",
44
+ "model.layers.8.input_layernorm.weight",
45
+ "model.layers.8.post_attention_layernorm.weight",
46
+ "model.layers.9.input_layernorm.weight",
47
+ "model.layers.9.post_attention_layernorm.weight",
48
+ "model.layers.10.input_layernorm.weight",
49
+ "model.layers.10.post_attention_layernorm.weight",
50
+ "model.layers.11.input_layernorm.weight",
51
+ "model.layers.11.post_attention_layernorm.weight",
52
+ "model.layers.12.input_layernorm.weight",
53
+ "model.layers.12.post_attention_layernorm.weight",
54
+ "model.layers.13.input_layernorm.weight",
55
+ "model.layers.13.post_attention_layernorm.weight",
56
+ "model.layers.14.input_layernorm.weight",
57
+ "model.layers.14.post_attention_layernorm.weight",
58
+ "model.layers.15.input_layernorm.weight",
59
+ "model.layers.15.post_attention_layernorm.weight",
60
+ "model.layers.16.input_layernorm.weight",
61
+ "model.layers.16.post_attention_layernorm.weight",
62
+ "model.layers.17.input_layernorm.weight",
63
+ "model.layers.17.post_attention_layernorm.weight",
64
+ "model.layers.18.input_layernorm.weight",
65
+ "model.layers.18.post_attention_layernorm.weight",
66
+ "model.layers.19.input_layernorm.weight",
67
+ "model.layers.19.post_attention_layernorm.weight",
68
+ "model.layers.20.input_layernorm.weight",
69
+ "model.layers.20.post_attention_layernorm.weight",
70
+ "model.layers.21.input_layernorm.weight",
71
+ "model.layers.21.post_attention_layernorm.weight",
72
+ "model.layers.22.input_layernorm.weight",
73
+ "model.layers.22.post_attention_layernorm.weight",
74
+ "model.layers.23.input_layernorm.weight",
75
+ "model.layers.23.post_attention_layernorm.weight",
76
+ "model.layers.24.input_layernorm.weight",
77
+ "model.layers.24.post_attention_layernorm.weight",
78
+ "model.layers.25.input_layernorm.weight",
79
+ "model.layers.25.post_attention_layernorm.weight",
80
+ "model.layers.26.input_layernorm.weight",
81
+ "model.layers.26.post_attention_layernorm.weight",
82
+ "model.layers.27.input_layernorm.weight",
83
+ "model.layers.27.post_attention_layernorm.weight",
84
+ "model.layers.28.input_layernorm.weight",
85
+ "model.layers.28.post_attention_layernorm.weight",
86
+ "model.layers.29.input_layernorm.weight",
87
+ "model.layers.29.post_attention_layernorm.weight",
88
+ "model.layers.30.input_layernorm.weight",
89
+ "model.layers.30.post_attention_layernorm.weight",
90
+ "model.layers.31.input_layernorm.weight",
91
+ "model.layers.31.post_attention_layernorm.weight",
92
+ "model.embed_tokens.weight",
93
+ "model.norm.weight",
94
+ "lm_head.weight"
95
+ ],
96
+ "nbits_per_codebook": 16,
97
+ "num_codebooks": 1,
98
+ "out_group_size": 1,
99
+ "quant_method": "aqlm"
100
+ },
101
+ "rms_norm_eps": 1e-05,
102
+ "rope_scaling": {
103
+ "factor": 8.0,
104
+ "high_freq_factor": 4.0,
105
+ "low_freq_factor": 1.0,
106
+ "original_max_position_embeddings": 8192,
107
+ "rope_type": "llama3"
108
+ },
109
+ "rope_theta": 500000.0,
110
+ "tie_word_embeddings": false,
111
+ "torch_dtype": "float16",
112
+ "transformers_version": "4.43.2",
113
+ "use_cache": true,
114
+ "vocab_size": 128256
115
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "transformers_version": "4.43.2"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e1e62bdf51fdbf652b27a3874df93f09ec0d375d49255386bd954539fda55a
3
+ size 4084429344