✅ DONE

import os
import subprocess
import signal
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr
import tempfile

from huggingface_hub import HfApi, ModelCard, whoami
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from pathlib import Path
from textwrap import dedent
from apscheduler.schedulers.background import BackgroundScheduler


HF_TOKEN = os.environ.get("HF_TOKEN")
CONVERSION_SCRIPT = "convert_lora_to_gguf.py"

def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token: gr.OAuthToken | None):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use GGUF-my-lora")
    model_name = peft_model_id.split('/')[-1]
    gguf_output_name = f"{model_name}-{q_method.lower()}.gguf"

    try:
        api = HfApi(token=oauth_token.token)

        dl_pattern = ["*.md", "*.json", "*.model"]

        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=peft_model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )

        dl_pattern += [pattern]

        if not os.path.exists("downloads"):
            os.makedirs("downloads")

        with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
            # Keep the model name as the dirname so the model name metadata is populated correctly
            local_dir = Path(tmpdir)/model_name
            print(local_dir)
            api.snapshot_download(repo_id=peft_model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
            print("Model downloaded successfully!")
            print(f"Current working directory: {os.getcwd()}")
            print(f"Model directory contents: {os.listdir(local_dir)}")

            adapter_config_dir = local_dir/"adapter_config.json"
            if not os.path.exists(adapter_config_dir):
                raise Exception("adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href=\"https://huggingface.co/spaces/ggml-org/gguf-my-repo\" target=\"_blank\">GGUF-my-repo</a> instead.")

            fp16_conversion = f"python llama.cpp/{CONVERSION_SCRIPT} {local_dir} --outtype {q_method.lower()} --outfile {gguf_output_name}"
            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
            print(result)
            if result.returncode != 0:
                raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
            print("Model converted to GGUF successfully!")
            print(f"Converted model path: {gguf_output_name}")

        # Create empty repo
        username = whoami(oauth_token.token)["name"]
        new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
        new_repo_id = new_repo_url.repo_id
        print("Repo created successfully!", new_repo_url)

        # Upload the GGUF model
        api.upload_file(
            path_or_fileobj=gguf_output_name,
            path_in_repo=gguf_output_name,
            repo_id=new_repo_id,
        )
        print("Uploaded", gguf_output_name)

        try:
            card = ModelCard.load(peft_model_id, token=oauth_token.token)
        except:
            card = ModelCard("")
        if card.data.tags is None:
            card.data.tags = []
        card.data.tags.append("llama-cpp")
        card.data.tags.append("gguf-my-lora")
        card.data.base_model = peft_model_id
        card.text = dedent(
            f"""
            # {new_repo_id}
            This LoRA adapter was converted to GGUF format from [`{peft_model_id}`](https://huggingface.co/{peft_model_id}) via the ggml.ai's [GGUF-my-lora](https://huggingface.co/spaces/ggml-org/gguf-my-lora) space.
            Refer to the [original adapter repository](https://huggingface.co/{peft_model_id}) for more details.
            
            ## Use with llama.cpp
            
            ```bash
            # with cli
            llama-cli -m base_model.gguf --lora {gguf_output_name} (...other args)

            # with server
            llama-server -m base_model.gguf --lora {gguf_output_name} (...other args)
            ```

            To know more about LoRA usage with llama.cpp server, refer to the [llama.cpp server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md).
            """
        )
        card.save(f"README.md")

        api.upload_file(
            path_or_fileobj=f"README.md",
            path_in_repo=f"README.md",
            repo_id=new_repo_id,
        )

        return (
            f'<h1>✅ DONE</h1><br/><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>'
        )
    except Exception as e:
        return (f"<h1>❌ ERROR</h1><br/><br/>{e}")


css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo: 
    gr.Markdown("You must be logged in to use GGUF-my-lora.")
    gr.LoginButton(min_width=250)

    peft_model_id = HuggingfaceHubSearch(
        label="PEFT LoRA repository",
        placeholder="Search for repository on Huggingface",
        search_type="model",
    )

    q_method = gr.Dropdown(
        ["F32", "F16", "Q8_0"],
        label="Quantization Method",
        info="(Note: Quantization less than Q8 produces very poor results)",
        value="F16",
        filterable=False,
        visible=True
    )

    private_repo = gr.Checkbox(
        value=False,
        label="Private Repo",
        info="Create a private repo under your username."
    )

    iface = gr.Interface(
        fn=process_model,
        inputs=[
            peft_model_id,
            q_method,
            private_repo,
        ],
        outputs=[
            gr.Markdown(label="output"),
        ],
        title="Convert PEFT LoRA adapters to GGUF, blazingly fast ⚡!",
        description="The space takes a PEFT LoRA (stored on a HF repo) as an input, converts it to GGUF and creates a Public repo under your HF user namespace.",
        api_name=False
    )


def restart_space():
    HfApi().restart_space(repo_id="ggml-org/gguf-my-lora", token=HF_TOKEN, factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)