Freak-ppa commited on
Commit
f4d058d
1 Parent(s): 6ffb4a7

Upload 2 files

Browse files
ComfyUI/custom_nodes/img2txt-comfyui-nodes/src/blip_img2txt.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from PIL import Image
2
  from transformers import (
3
  BlipProcessor,
@@ -9,7 +10,7 @@ from transformers import (
9
 
10
  import torch
11
  import model_management
12
-
13
 
14
  class BLIPImg2Txt:
15
  def __init__(
@@ -21,21 +22,24 @@ class BLIPImg2Txt:
21
  repetition_penalty: float,
22
  search_beams: int,
23
  model_id: str = "Salesforce/blip-image-captioning-large",
 
24
  ):
25
  self.conditional_caption = conditional_caption
26
  self.model_id = model_id
 
 
 
 
 
 
27
 
28
- # Determine do_sample and num_beams
29
  if temperature > 1.1 or temperature < 0.90:
30
  do_sample = True
31
- num_beams = 1 # Sampling does not use beam search
32
  else:
33
  do_sample = False
34
- num_beams = (
35
- search_beams if search_beams > 1 else 1
36
- ) # Use beam search if num_beams > 1
37
 
38
- # Initialize text config kwargs
39
  self.text_config_kwargs = {
40
  "do_sample": do_sample,
41
  "max_length": max_words,
@@ -51,18 +55,25 @@ class BLIPImg2Txt:
51
  if image.mode != "RGB":
52
  image = image.convert("RGB")
53
 
54
- processor = BlipProcessor.from_pretrained(self.model_id)
 
 
 
 
 
 
 
55
 
56
- # Update and apply configurations
57
- config_text = BlipTextConfig.from_pretrained(self.model_id)
58
  config_text.update(self.text_config_kwargs)
59
- config_vision = BlipVisionConfig.from_pretrained(self.model_id)
60
  config = BlipConfig.from_text_vision_configs(config_text, config_vision)
61
 
62
  model = BlipForConditionalGeneration.from_pretrained(
63
- self.model_id,
64
  config=config,
65
  torch_dtype=torch.float16,
 
66
  ).to(model_management.get_torch_device())
67
 
68
  inputs = processor(
@@ -78,4 +89,4 @@ class BLIPImg2Txt:
78
  del model
79
  torch.cuda.empty_cache()
80
 
81
- return ret
 
1
+ import os
2
  from PIL import Image
3
  from transformers import (
4
  BlipProcessor,
 
10
 
11
  import torch
12
  import model_management
13
+ import folder_paths
14
 
15
  class BLIPImg2Txt:
16
  def __init__(
 
22
  repetition_penalty: float,
23
  search_beams: int,
24
  model_id: str = "Salesforce/blip-image-captioning-large",
25
+ custom_model_path: str = None,
26
  ):
27
  self.conditional_caption = conditional_caption
28
  self.model_id = model_id
29
+ self.custom_model_path = custom_model_path
30
+
31
+ if self.custom_model_path and os.path.exists(self.custom_model_path):
32
+ self.model_path = self.custom_model_path
33
+ else:
34
+ self.model_path = folder_paths.get_full_path("blip", model_id)
35
 
 
36
  if temperature > 1.1 or temperature < 0.90:
37
  do_sample = True
38
+ num_beams = 1
39
  else:
40
  do_sample = False
41
+ num_beams = search_beams if search_beams > 1 else 1
 
 
42
 
 
43
  self.text_config_kwargs = {
44
  "do_sample": do_sample,
45
  "max_length": max_words,
 
55
  if image.mode != "RGB":
56
  image = image.convert("RGB")
57
 
58
+ if self.model_path and os.path.exists(self.model_path):
59
+ model_path = self.model_path
60
+ local_files_only = True
61
+ else:
62
+ model_path = self.model_id
63
+ local_files_only = False
64
+
65
+ processor = BlipProcessor.from_pretrained(model_path, local_files_only=local_files_only)
66
 
67
+ config_text = BlipTextConfig.from_pretrained(model_path, local_files_only=local_files_only)
 
68
  config_text.update(self.text_config_kwargs)
69
+ config_vision = BlipVisionConfig.from_pretrained(model_path, local_files_only=local_files_only)
70
  config = BlipConfig.from_text_vision_configs(config_text, config_vision)
71
 
72
  model = BlipForConditionalGeneration.from_pretrained(
73
+ model_path,
74
  config=config,
75
  torch_dtype=torch.float16,
76
+ local_files_only=local_files_only
77
  ).to(model_management.get_torch_device())
78
 
79
  inputs = processor(
 
89
  del model
90
  torch.cuda.empty_cache()
91
 
92
+ return ret
ComfyUI/custom_nodes/img2txt-comfyui-nodes/src/img2txt_node.py CHANGED
@@ -14,6 +14,8 @@ from .mini_cpm_img2txt import MiniPCMImg2Txt
14
 
15
  from typing import Tuple
16
 
 
 
17
 
18
  class Img2TxtNode:
19
  CATEGORY = "img2txt"
@@ -145,6 +147,11 @@ class Img2TxtNode:
145
 
146
  captions = []
147
  if use_all_models or use_blip_model:
 
 
 
 
 
148
  blip = BLIPImg2Txt(
149
  conditional_caption=blip_caption_prefix,
150
  min_words=min_words,
@@ -152,6 +159,7 @@ class Img2TxtNode:
152
  temperature=temperature,
153
  repetition_penalty=repetition_penalty,
154
  search_beams=search_beams,
 
155
  )
156
  captions.append(blip.generate_caption(raw_image))
157
 
 
14
 
15
  from typing import Tuple
16
 
17
+ import os
18
+ import folder_paths
19
 
20
  class Img2TxtNode:
21
  CATEGORY = "img2txt"
 
147
 
148
  captions = []
149
  if use_all_models or use_blip_model:
150
+ blip_model_path = folder_paths.get_folder_paths("blip")[0]
151
+ print(f"blip_model_path: {blip_model_path}")
152
+ if not blip_model_path or not os.path.exists(blip_model_path):
153
+ raise ValueError("BLIP model 'blip-image-captioning-large' not found in ComfyUI models directory. Please ensure it's in the 'models/blip' folder.")
154
+
155
  blip = BLIPImg2Txt(
156
  conditional_caption=blip_caption_prefix,
157
  min_words=min_words,
 
159
  temperature=temperature,
160
  repetition_penalty=repetition_penalty,
161
  search_beams=search_beams,
162
+ custom_model_path=blip_model_path
163
  )
164
  captions.append(blip.generate_caption(raw_image))
165