ShiyuHuang
commited on
Commit
•
d8db667
1
Parent(s):
c307c31
Update README.md
Browse files
README.md
CHANGED
@@ -13,23 +13,30 @@ inference: false
|
|
13 |
# CogVLM2-Llama3-Caption
|
14 |
|
15 |
<div align="center">
|
16 |
-
<img src=https://raw.githubusercontent.com/THUDM/CogVLM2/cf9cb3c60a871e0c8e5bde7feaf642e3021153e6/resources/logo.svg>
|
17 |
</div>
|
18 |
|
|
|
19 |
# Introduction
|
20 |
|
21 |
Typically, most video data does not come with corresponding descriptive text, so it is necessary to convert the video
|
22 |
-
data into textual descriptions to provide the essential training data for text-to-video models.
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
## Usage
|
25 |
|
26 |
```python
|
27 |
import io
|
|
|
|
|
28 |
import numpy as np
|
29 |
import torch
|
30 |
from decord import cpu, VideoReader, bridge
|
31 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
32 |
-
import argparse
|
33 |
|
34 |
MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
|
35 |
|
@@ -77,7 +84,6 @@ def load_video(video_data, strategy='chat'):
|
|
77 |
tokenizer = AutoTokenizer.from_pretrained(
|
78 |
MODEL_PATH,
|
79 |
trust_remote_code=True,
|
80 |
-
# padding_side="left"
|
81 |
)
|
82 |
|
83 |
model = AutoModelForCausalLM.from_pretrained(
|
@@ -132,7 +138,6 @@ def test():
|
|
132 |
|
133 |
if __name__ == '__main__':
|
134 |
test()
|
135 |
-
|
136 |
```
|
137 |
|
138 |
## License
|
|
|
13 |
# CogVLM2-Llama3-Caption
|
14 |
|
15 |
<div align="center">
|
16 |
+
<img src=https://raw.githubusercontent.com/THUDM/CogVLM2/cf9cb3c60a871e0c8e5bde7feaf642e3021153e6/resources/logo.svg>
|
17 |
</div>
|
18 |
|
19 |
+
|
20 |
# Introduction
|
21 |
|
22 |
Typically, most video data does not come with corresponding descriptive text, so it is necessary to convert the video
|
23 |
+
data into textual descriptions to provide the essential training data for text-to-video models.
|
24 |
+
CogVLM2-Caption is a video captioning model used to generate training data for the CogVideoX model.
|
25 |
+
|
26 |
+
<div align="center">
|
27 |
+
<img width="600px" height="auto" src="./CogVLM2-Caption-example.png">
|
28 |
+
</div>
|
29 |
|
30 |
## Usage
|
31 |
|
32 |
```python
|
33 |
import io
|
34 |
+
|
35 |
+
import argparse
|
36 |
import numpy as np
|
37 |
import torch
|
38 |
from decord import cpu, VideoReader, bridge
|
39 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
40 |
|
41 |
MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
|
42 |
|
|
|
84 |
tokenizer = AutoTokenizer.from_pretrained(
|
85 |
MODEL_PATH,
|
86 |
trust_remote_code=True,
|
|
|
87 |
)
|
88 |
|
89 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
138 |
|
139 |
if __name__ == '__main__':
|
140 |
test()
|
|
|
141 |
```
|
142 |
|
143 |
## License
|