Spaces:

dwb2023
/

model_explorer2

Paused

App Files Files Community

dwb2023 commited on Jun 8

Commit

5f6d3e9

•

1 Parent(s): 79ab92b

Update utils.py

Browse files

refine bnb config and architecture mappings

Files changed (1) hide show

utils.py +53 -51

utils.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import subprocess
-import os, requests
-import torch, torchvision
-import spaces
-from huggingface_hub import login
-from transformers import BitsAndBytesConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -15,63 +14,66 @@ def install_flash_attn():
         shell=True,
     )
-# Authenticate with Hugging Face
-def authenticate_hf(token):
-    login(token=token, add_to_git_credential=True)
-# Function to get the model summary
-model_cache = {}
-@spaces.GPU
 def get_model_summary(model_name):
-    if model_name in model_cache:
-        return model_cache[model_name], ""
     try:
-        # Fetch the config.json file
-        config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
-        headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
-        response = requests.get(config_url, headers=headers)
-        response.raise_for_status()
-        config = response.json()
-        architecture = config["architectures"][0]
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Check if the model is quantized
-        is_quantized = "quantized" in model_name.lower()
         # Set up BitsAndBytesConfig if the model is quantized
-        bnb_config = BitsAndBytesConfig(load_in_4bit=True) if is_quantized else None
-        # Load the model based on its architecture and quantization status
-        if architecture == "LlavaNextForConditionalGeneration":
-            model = LlavaNextForConditionalGeneration.from_pretrained(
-                model_name, config=bnb_config, trust_remote_code=True
-            )
-        elif architecture == "LlavaForConditionalGeneration":
-            model = LlavaForConditionalGeneration.from_pretrained(
-                model_name, config=bnb_config, trust_remote_code=True
-            )
-        elif architecture == "PaliGemmaForConditionalGeneration":
-            model = PaliGemmaForConditionalGeneration.from_pretrained(
-                model_name, config=bnb_config, trust_remote_code=True
-            )
-        elif architecture == "Idefics2ForConditionalGeneration":
-            model = Idefics2ForConditionalGeneration.from_pretrained(
-                model_name, config=bnb_config, trust_remote_code=True
             )
         else:
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name, config=bnb_config, trust_remote_code=True
-            )
         # Move to device only if the model is not quantized
-        if not is_quantized:
-            model = model.to(device)
-        model_summary = str(model)
-        model_cache[model_name] = model_summary
         return model_summary, ""
     except Exception as e:
         return "", str(e)

 import subprocess
+import os
+import torch
+from transformers import BitsAndBytesConfig, AutoConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration
+from functools import lru_cache
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
         shell=True,
     )
+# Architecture to model class mapping
+ARCHITECTURE_MAP = {
+    "LlavaNextForConditionalGeneration": LlavaNextForConditionalGeneration,
+    "LlavaForConditionalGeneration": LlavaForConditionalGeneration,
+    "PaliGemmaForConditionalGeneration": PaliGemmaForConditionalGeneration,
+    "Idefics2ForConditionalGeneration": Idefics2ForConditionalGeneration,
+    "AutoModelForCausalLM": AutoModelForCausalLM
+}
+# Function to get the model summary with caching
+@lru_cache(maxsize=10)
 def get_model_summary(model_name):
+    """
+    Retrieve the model summary for the given model name.
+    Args:
+    model_name (str): The name of the model to retrieve the summary for.
+    Returns:
+    tuple: A tuple containing the model summary (str) and an error message (str), if any.
+    """
     try:
+        # Fetch the model configuration
+        config = AutoConfig.from_pretrained(model_name)
+        architecture = config.architectures[0]
+        quantization_config = getattr(config, 'quantization_config', None)
         # Set up BitsAndBytesConfig if the model is quantized
+        if quantization_config:
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=quantization_config.get('load_in_4bit', False),
+                load_in_8bit=quantization_config.get('load_in_8bit', False),
+                bnb_4bit_compute_dtype=quantization_config.get('bnb_4bit_compute_dtype', torch.float16),
+                bnb_4bit_quant_type=quantization_config.get('bnb_4bit_quant_type', 'nf4'),
+                bnb_4bit_use_double_quant=quantization_config.get('bnb_4bit_use_double_quant', False),
+                llm_int8_enable_fp32_cpu_offload=quantization_config.get('llm_int8_enable_fp32_cpu_offload', False),
+                llm_int8_has_fp16_weight=quantization_config.get('llm_int8_has_fp16_weight', False),
+                llm_int8_skip_modules=quantization_config.get('llm_int8_skip_modules', None),
+                llm_int8_threshold=quantization_config.get('llm_int8_threshold', 6.0),
             )
         else:
+            bnb_config = None
+        # Get the appropriate model class from the architecture map
+        model_class = ARCHITECTURE_MAP.get(architecture, AutoModelForCausalLM)
+        # Load the model
+        model = model_class.from_pretrained(
+            model_name, config=bnb_config, trust_remote_code=True
+        )
         # Move to device only if the model is not quantized
+        if model and not quantization_config:
+            model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
+        model_summary = str(model) if model else "Model architecture not found."
         return model_summary, ""
+    except ValueError as ve:
+        return "", f"ValueError: {ve}"
+    except EnvironmentError as ee:
+        return "", f"EnvironmentError: {ee}"
     except Exception as e:
         return "", str(e)