Spaces:

ggml-org
/

gguf-my-lora

Running on CPU Upgrade

App Files Files Community

ngxson HF staff commited on 27 days ago

Commit

a84add4

•

1 Parent(s): 798439c

better isolation

Browse files

Files changed (3) hide show

.dockerignore +2 -1
.gitignore +1 -0
app.py +83 -69

.dockerignore CHANGED Viewed

@@ -1,3 +1,4 @@
 /downloads
 /llama.cpp
-*.gguf

 /downloads
 /llama.cpp
+*.gguf
+/outputs

.gitignore CHANGED Viewed

@@ -162,6 +162,7 @@ cython_debug/
 #.idea/
 /downloads
 !/downloads/.keep
 /llama.cpp
 *.gguf

 #.idea/
 /downloads
+/outputs
 !/downloads/.keep
 /llama.cpp
 *.gguf

app.py CHANGED Viewed

@@ -43,76 +43,90 @@ def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token:
         if not os.path.exists("downloads"):
             os.makedirs("downloads")
-        with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
-            # Keep the model name as the dirname so the model name metadata is populated correctly
-            local_dir = Path(tmpdir)/model_name
-            print(local_dir)
-            api.snapshot_download(repo_id=peft_model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
-            print("Model downloaded successfully!")
-            print(f"Current working directory: {os.getcwd()}")
-            print(f"Model directory contents: {os.listdir(local_dir)}")
-            adapter_config_dir = local_dir/"adapter_config.json"
-            if not os.path.exists(adapter_config_dir):
-                raise Exception('adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-repo" target="_blank" style="text-decoration:underline">GGUF-my-repo</a> instead.')
-            fp16_conversion = f"python llama.cpp/{CONVERSION_SCRIPT} {local_dir} --outtype {q_method.lower()} --outfile {gguf_output_name}"
-            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
-            print(result)
-            if result.returncode != 0:
-                raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
-            print("Model converted to GGUF successfully!")
-            print(f"Converted model path: {gguf_output_name}")
-        # Create empty repo
-        username = whoami(oauth_token.token)["name"]
-        new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
-        new_repo_id = new_repo_url.repo_id
-        print("Repo created successfully!", new_repo_url)
-        # Upload the GGUF model
-        api.upload_file(
-            path_or_fileobj=gguf_output_name,
-            path_in_repo=gguf_output_name,
-            repo_id=new_repo_id,
-        )
-        print("Uploaded", gguf_output_name)
-        try:
-            card = ModelCard.load(peft_model_id, token=oauth_token.token)
-        except:
-            card = ModelCard("")
-        if card.data.tags is None:
-            card.data.tags = []
-        card.data.tags.append("llama-cpp")
-        card.data.tags.append("gguf-my-lora")
-        card.data.base_model = peft_model_id
-        card.text = dedent(
-            f"""
-            # {new_repo_id}
-            This LoRA adapter was converted to GGUF format from [`{peft_model_id}`](https://huggingface.co/{peft_model_id}) via the ggml.ai's [GGUF-my-lora](https://huggingface.co/spaces/ggml-org/gguf-my-lora) space.
-            Refer to the [original adapter repository](https://huggingface.co/{peft_model_id}) for more details.
-            ## Use with llama.cpp
-            ```bash
-            # with cli
-            llama-cli -m base_model.gguf --lora {gguf_output_name} (...other args)
-            # with server
-            llama-server -m base_model.gguf --lora {gguf_output_name} (...other args)
-            ```
-            To know more about LoRA usage with llama.cpp server, refer to the [llama.cpp server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md).
-            """
-        )
-        card.save(f"README.md")
-        api.upload_file(
-            path_or_fileobj=f"README.md",
-            path_in_repo=f"README.md",
-            repo_id=new_repo_id,
-        )
         return (
             f'<h1>✅ DONE</h1><br/><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>'

         if not os.path.exists("downloads"):
             os.makedirs("downloads")
+        if not os.path.exists("outputs"):
+            os.makedirs("outputs")
+        with tempfile.TemporaryDirectory(dir="outputs") as outputdir:
+            gguf_output_path = Path(outputdir)/gguf_output_name
+            readme_output_path = Path(outputdir)/"README.md"
+            with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
+                # Keep the model name as the dirname so the model name metadata is populated correctly
+                local_dir = Path(tmpdir)/model_name
+                print(local_dir)
+                api.snapshot_download(repo_id=peft_model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
+                print("Model downloaded successfully!")
+                print(f"Current working directory: {os.getcwd()}")
+                print(f"Model directory contents: {os.listdir(local_dir)}")
+                adapter_config_dir = local_dir/"adapter_config.json"
+                if not os.path.exists(adapter_config_dir):
+                    raise Exception('adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-repo" target="_blank" style="text-decoration:underline">GGUF-my-repo</a> instead.')
+                result = subprocess.run([
+                    "python",
+                    f"llama.cpp/{CONVERSION_SCRIPT}",
+                    local_dir,
+                    "--outtype",
+                    q_method.lower(),
+                    "--outfile",
+                    gguf_output_path,
+                ], shell=False, capture_output=True)
+                print(result)
+                if result.returncode != 0:
+                    raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
+                print("Model converted to GGUF successfully!")
+                print(f"Converted model path: {gguf_output_path}")
+            # Create empty repo
+            username = whoami(oauth_token.token)["name"]
+            new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
+            new_repo_id = new_repo_url.repo_id
+            print("Repo created successfully!", new_repo_url)
+            # Upload the GGUF model
+            api.upload_file(
+                path_or_fileobj=gguf_output_path,
+                path_in_repo=gguf_output_name,
+                repo_id=new_repo_id,
+            )
+            print("Uploaded", gguf_output_name)
+            try:
+                card = ModelCard.load(peft_model_id, token=oauth_token.token)
+            except:
+                card = ModelCard("")
+            if card.data.tags is None:
+                card.data.tags = []
+            card.data.tags.append("llama-cpp")
+            card.data.tags.append("gguf-my-lora")
+            card.data.base_model = peft_model_id
+            card.text = dedent(
+                f"""
+                # {new_repo_id}
+                This LoRA adapter was converted to GGUF format from [`{peft_model_id}`](https://huggingface.co/{peft_model_id}) via the ggml.ai's [GGUF-my-lora](https://huggingface.co/spaces/ggml-org/gguf-my-lora) space.
+                Refer to the [original adapter repository](https://huggingface.co/{peft_model_id}) for more details.
+                ## Use with llama.cpp
+                ```bash
+                # with cli
+                llama-cli -m base_model.gguf --lora {gguf_output_name} (...other args)
+                # with server
+                llama-server -m base_model.gguf --lora {gguf_output_name} (...other args)
+                ```
+                To know more about LoRA usage with llama.cpp server, refer to the [llama.cpp server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md).
+                """
+            )
+            card.save(readme_output_path)
+            api.upload_file(
+                path_or_fileobj=readme_output_path,
+                path_in_repo="README.md",
+                repo_id=new_repo_id,
+            )
         return (
             f'<h1>✅ DONE</h1><br/><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>'