ryanzhangfan
commited on
Commit
•
705a7b5
1
Parent(s):
5d56bea
Update README.md
Browse files
README.md
CHANGED
@@ -16,9 +16,11 @@ import sys
|
|
16 |
sys.path.append(PATH_TO_BAAI_Emu3-Gen_MODEL)
|
17 |
from processing_emu3 import Emu3Processor
|
18 |
|
|
|
19 |
EMU_HUB = "BAAI/Emu3-Gen"
|
20 |
VQ_HUB = "BAAI/Emu3-VisionTokenizer"
|
21 |
|
|
|
22 |
model = AutoModelForCausalLM.from_pretrained(
|
23 |
EMU_HUB,
|
24 |
device_map="cuda:0",
|
@@ -32,6 +34,7 @@ image_processor = AutoImageProcessor.from_pretrained(VQ_HUB, trust_remote_code=T
|
|
32 |
image_tokenizer = AutoModel.from_pretrained(VQ_HUB, device_map="cuda:0", trust_remote_code=True).eval()
|
33 |
processor = Emu3Processor(image_processor, image_tokenizer, tokenizer)
|
34 |
|
|
|
35 |
POSITIVE_PROMPT = " masterpiece, film grained, best quality."
|
36 |
NEGATIVE_PROMPT = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry."
|
37 |
|
@@ -48,6 +51,7 @@ kwargs = dict(
|
|
48 |
pos_inputs = processor(text=prompt, **kwargs)
|
49 |
neg_inputs = processor(text=NEGATIVE_PROMPT, **kwargs)
|
50 |
|
|
|
51 |
GENERATION_CONFIG = GenerationConfig(
|
52 |
use_cache=True,
|
53 |
eos_token_id=model.config.eos_token_id,
|
@@ -71,6 +75,7 @@ logits_processor = LogitsProcessorList([
|
|
71 |
),
|
72 |
])
|
73 |
|
|
|
74 |
outputs = model.generate(
|
75 |
pos_inputs.input_ids.to("cuda:0"),
|
76 |
GENERATION_CONFIG,
|
@@ -78,7 +83,6 @@ outputs = model.generate(
|
|
78 |
)
|
79 |
|
80 |
mm_list = processor.decode(outputs[0])
|
81 |
-
print(mm_list)
|
82 |
for idx, im in enumerate(mm_list):
|
83 |
if not isinstance(im, Image.Image):
|
84 |
continue
|
|
|
16 |
sys.path.append(PATH_TO_BAAI_Emu3-Gen_MODEL)
|
17 |
from processing_emu3 import Emu3Processor
|
18 |
|
19 |
+
# model path
|
20 |
EMU_HUB = "BAAI/Emu3-Gen"
|
21 |
VQ_HUB = "BAAI/Emu3-VisionTokenizer"
|
22 |
|
23 |
+
# prepare model and processor
|
24 |
model = AutoModelForCausalLM.from_pretrained(
|
25 |
EMU_HUB,
|
26 |
device_map="cuda:0",
|
|
|
34 |
image_tokenizer = AutoModel.from_pretrained(VQ_HUB, device_map="cuda:0", trust_remote_code=True).eval()
|
35 |
processor = Emu3Processor(image_processor, image_tokenizer, tokenizer)
|
36 |
|
37 |
+
# prepare input
|
38 |
POSITIVE_PROMPT = " masterpiece, film grained, best quality."
|
39 |
NEGATIVE_PROMPT = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry."
|
40 |
|
|
|
51 |
pos_inputs = processor(text=prompt, **kwargs)
|
52 |
neg_inputs = processor(text=NEGATIVE_PROMPT, **kwargs)
|
53 |
|
54 |
+
# prepare hyper parameters
|
55 |
GENERATION_CONFIG = GenerationConfig(
|
56 |
use_cache=True,
|
57 |
eos_token_id=model.config.eos_token_id,
|
|
|
75 |
),
|
76 |
])
|
77 |
|
78 |
+
# generate
|
79 |
outputs = model.generate(
|
80 |
pos_inputs.input_ids.to("cuda:0"),
|
81 |
GENERATION_CONFIG,
|
|
|
83 |
)
|
84 |
|
85 |
mm_list = processor.decode(outputs[0])
|
|
|
86 |
for idx, im in enumerate(mm_list):
|
87 |
if not isinstance(im, Image.Image):
|
88 |
continue
|