Spaces:

unclemusclez
/

ollamafy

Runtime error

File size: 14,312 Bytes

e695ade
 
 
 
0058e79
fe3e2a9
e695ade
b681a48
e695ade
 
 
 
 
 
b681a48
e695ade
 
 
c358045
df1dbc1
a4a3ccb
3c34fae
592fb18
48e4738
b86f40e
0d849a1
c4051d8
 
ec1edfc
7234021
4763941
ec1edfc
 
62f8c49
ee892c2
 
ec1edfc
 
7e07a21
ee892c2
 
 
 
 
 
7e07a21
 
 
 
 
 
 
 
 
 
ee892c2
7e07a21
 
 
 
 
 
45aff13
 
 
 
 
62f8c49
45aff13
 
62f8c49
 
 
 
 
 
7e07a21
 
 
df1dbc1
48e4738
0058e79
1ba9717
f488a6b
c4051d8
85cc441
5b80620
7234021
0058e79
e89573f
e695ade
2cdde93
ee892c2
 
c4051d8
3909c1b
e695ade
 
85cc441
e695ade
 
 
 
 
 
 
 
 
 
 
 
 
fe3e2a9
cd1246d
5e25d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48e4738
2a8570b
48e4738
 
 
1ba9717
 
48e4738
 
 
6b057bb
 
19bd34c
b86f40e
41edeed
19bd34c
48e4738
19bd34c
 
48e4738
19bd34c
 
 
 
 
 
3e12a3f
19bd34c
3e12a3f
94d6df0
19bd34c
 
 
 
 
 
 
 
924c8a5
3e12a3f
19bd34c
924c8a5
3e12a3f
19bd34c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e12a3f
19bd34c
 
 
 
 
f784967
 
924c8a5
3e12a3f
19bd34c
924c8a5
3e12a3f
f784967
19bd34c
 
 
 
 
fe3e2a9
19bd34c
 
 
a189388
 
 
 
f784967
70b9a5b
 
 
 
 
c4051d8
ee892c2
 
 
 
 
 
c4051d8
 
 
 
 
 
 
 
 
 
 
70b9a5b
e695ade
 
 
 
478cf2e
41edeed
 
 
 
 
 
 
 
 
 
e695ade
7f796a5
e695ade
 
 
48e4738
 
 
 
 
 
5e25d21
fe3e2a9
b158147
19bd34c
85b9b71
e695ade
478cf2e
9af226a
6f8fbc7
536cde2
9887b14
6a5b758
592fb18
 
 
41edeed
 
6a5b758
 
924c8a5
50bf865
6b057bb
41edeed
 
 
 
 
6a5b758
41edeed
 
 
 
 
8a3edae
5295b27
8a3edae
41edeed
 
 
490c2d9
 
 
8a3edae
 
e695ade
6a5b758
bd3947f
 
 
e695ade
946ffd4
 
e695ade
41edeed
48e4738
dbe89f7
6b057bb
946ffd4
 
e695ade
 
 
 
55bfde1
e695ade
7c500a5
5b80620
e695ade
 
0a7c76d
e695ade
df1dbc1
e695ade

import os
import shutil
import subprocess
import signal
import hashlib

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler
from textwrap import dedent

HOME = os.environ.get("HOME")
token = os.environ.get("HF_TOKEN")
library_username = os.environ.get("OLLAMA_USERNAME").lower()
ollama_pubkey = None
ollama_model_name = None
download_gguf_link = None
# model.num_parameters()
def regenerate_pubkey(pubkey, oauth_token: gr.OAuthToken | None):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use Ollamafy")
        
    hash_oauth = hashlib.sha256(b"{oauth_token.token}").hexdigest()
    generate_ollama_host_file = f"echo $(ss -natp | grep (cat ollama.pid) | awk '{{print $4}}') > ollama.host"
    generate_ollama_pid_file = f"echo $! > ollama.pid"
    ollama_pubkey = f"cat {HOME}/{hash_oauth}/.ollama/id_ed25519.pub"  
    ollama_start = f"HOME={HOME}/{hash_oauth} ollama serve & {generate_ollama_pid_file} & sleep 5"    
    ollama_pid = f"cat {HOME}/{hash_oauth}/ollama.pid"
    ollama_stop = f"kill -9 {ollama_pid}"
    delete_home = f"rm -Rf {HOME}/{hash_oauth}/.ollama"
    
    try:
        result = subprocess.run(ollama_pid, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error retrieving Ollama PID {result.stderr}")
        print(f"Ollama PID Retrived: {ollama_pid}")

        result = subprocess.run(ollama_stop, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error stoppping Ollama {result.stderr}")
        print("Ollama stopped successfully!")
    
        result = subprocess.run(delete_home, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error removing Ollama HOME folder {result.stderr}")
        print("Ollama HOME folder removed successfully!")
    
        result = subprocess.run(ollama_start, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error starting Ollama {result.stderr}")
        print("Ollama started successfully!")

        result = subprocess.run(ollama_pubkey, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error starting Ollama {result.stderr}")
        print(f"echo $(ss -natp | grep (cat ollama.pid) | awk '{{print $4}}')")
        print("Ollama Pubkey Obtained!")

        result = subprocess.run(generate_ollama_host_file, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error generating Ollama Host File {result.stderr}")
        print("Ollama Host File Created!")

    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        # shutil.rmtree(model_name, ignore_errors=True)
        print("Ollama Pubkey Generated! Copy to your user profile in the Ollama Library.")

def ollamafy_model(login, model_id, ollama_library_username , ollama_q_method, latest, download_gguf_link, maintainer, oauth_token: gr.OAuthToken | None, ollama_model_name):
    ollama_library_username: library_username | None 
    
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use Ollamafy")
    hash_oauth = hashlib.sha256(b"{oauth_token.token}").hexdigest()

    # username = whoami(oauth_token.token)["name"]
    model_name = model_id.split('/')[-1]
    fp16 = f"{model_name}-fp16.gguf"
    ollama_pid = f"cat {HOME}/{hash_oauth}/ollama.pid"
    ollama_stop = f"kill -9 {ollama_pid}"
    delete_home = f"rm -Rf {HOME}/{hash_oauth}/.ollama"
    download_gguf = f"wget download_gguf_link"

    try:
        api = HfApi(token=oauth_token.token)   
        dl_pattern = ["*.md", "*.json", "*.model"]
        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )
        dl_pattern += pattern
        
        if not os.path.isfile(fp16) and download_gguf_link is None:         
            api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
            print("Model downloaded successfully!")
            print(f"Current working directory: {os.getcwd()}")
            print(f"Model directory contents: {os.listdir(model_name)}")
    
            conversion_script = "convert_hf_to_gguf.py"
            fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
            print(result)
            if result.returncode != 0:
                raise Exception(f"Error converting to fp16: {result.stderr}")
            print("Model converted to fp16 successfully!")
            print(f"Converted model path: {fp16}")
            
            HfApi().delete_repo(repo_id=model_id)
        else:
            if urlparse.urlparse(download_gguf_link).scheme and download_gguf_link.file.path.endswith("*.gguf"):
                result = subprocess.run(download_gguf, shell=True, capture_output=True)
                print(result)
                if result.returncode != 0:
                    raise Exception(f"Error downloading GGUF: {result.stderr}")
                print("Downloaded GGUF")
            else:
                print("Invalid GGUF Download Link")
                

        ### Ollamafy ###
        model_maintainer = model_id.split('/')[-2]
        if ollama_model_name is None:
            ollama_model_name = model_maintainer.lower() + '_' + model_name.lower() 
        ollama_modelfile_name = model_name + '_modelfile'
        model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
      
        ollama_modelfile = open(ollama_modelfile_name, "w")
        ollama_modelfile_path = quantized_gguf_path
        ollama_modelfile.write(quantized_gguf_path)
        ollama_modelfile.close()
        
        print(quantized_gguf_path)

        if ollama_q_method == "FP16":
            ollama_conversion = f"ollama create -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
        else:
            ollama_conversion = f"ollama create -q {ollama_q_method} -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
            
        ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
        print(ollama_conversion_result)
        if ollama_conversion_result.returncode != 0:
            raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
        else: 
            print("Model converted to Ollama successfully!")
        
        if maintainer:
            ollama_push =  f"OLLAMA_HOST={ollama_host} ollama {library_username}/{model_name}:{q_method.lower()}"
            ollama_rm =  f"ollama rm  {library_username}/{model_name}:{q_method.lower()}"
        else: 
            ollama_push =  f"OLLAMA_HOST={ollama_host} ollama push {library_username}/{ollama_model_name}:{q_method.lower()}"
            ollama_rm =  f"ollama rm  {library_username}/{ollama_model_name}:{q_method.lower()}"
            
        ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
        print(ollama_push_result)
        if ollama_push_result.returncode != 0:
            raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")      
        else: 
            print("Model pushed to Ollama library successfully!")
        
        ollama_rm_result = subprocess.run(ollama_rm, shell=True, capture_output=True)
        print(ollama_rm_result)
        if ollama_rm_result.returncode != 0:
            raise Exception(f"Error removing to Ollama: {ollama_rm_result.stderr}")
        else: 
            print("Model pushed to Ollama library successfully!")

        if latest:
            ollama_copy =  f"ollama cp {library_username}/{model_id.lower()}:{q_method.lower()} {library_username}/{model_id.lower()}:latest"
            ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
            print(ollama_copy_result)
            if ollama_copy_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
            print("Model pushed to Ollama library successfully!")
            
            if maintainer:
                ollama_push_latest =  f"OLLAMA_HOST={ollama_host} ollama push {library_username}/{model_name}:latest"
                ollama_rm_latest =  f"ollama rm  {library_username}/{model_name}:latest"
            else:    
                ollama_push_latest =  f"OLLAMA_HOST={ollama_host} ollama push {library_username}/{ollama_model_name}:latest"
                ollama_rm_latest =  f"ollama rm  {library_username}/{ollama_model_name}:latest"
                
            ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
            print(ollama_push_latest_result)
            if ollama_push_latest_result.returncode != 0:
                raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
            else:
                print("Model pushed to Ollama library successfully!")

            ollama_rm_latest_result = subprocess.run(ollama_rm_latest, shell=True, capture_output=True)
            print(ollama_rm_latest_result)
        if ollama_rm_latest_result.returncode != 0:
            raise Exception(f"Error pushing to Ollama: {ollama_rm_latest.stderr}")
        else:
            print("Model pushed to Ollama library successfully!")
    
    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        shutil.rmtree(model_name, ignore_errors=True)
        print("Folder cleaned up successfully!")
        
        result = subprocess.run(ollama_pid, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error retrieving Ollama PID {result.stderr}")
        print(f"Ollama PID Retrived: {ollama_pid}")
        
        result = subprocess.run(ollama_stop, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error stoppping Ollama {result.stderr}")
        print("Ollama stopped successfully!")
        
        result = subprocess.run(delete_home, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error removing Ollama HOME folder {result.stderr}")
        print("Ollama HOME fodler removed successfully!")
        
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
    
    login = gr.LoginButton(
        min_width=250,
    )

    generate_pubkey = gr.Button (
        value="Generate Pubkey",
        min_width=250,
    )
    
    model_id = HuggingfaceHubSearch(
        label="Hugging Face Hub Model ID",
        placeholder="Search for model id on Huggingface",
        search_type="model",
    )
    
    download_gguf_link = gr.Textbox(
        label="Download Link",
        info="If you have access to the GGUF, you can insert the downlaod link here.",
    )
    
    ollama_q_method = gr.Dropdown(
        ["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
        label="Ollama Quantization Method",
        info="Chose which quantization will created and exported to the Ollama Library.",
        value="FP16"
    )

    pubkey = gr.Code (
        ollama_pubkey,
        label="Copy this and paste this into your Ollama profile.",
    )
    
    ollama_model_name = gr.Textbox(
        label="Ollama Model Name",
        info="Input a Custom Model Name.",
    )

    ollama_library_username = gr.Textbox(
        label="Ollama Library Username",
        info="Input your username from Ollama to Push this model to their Library.",
    )
    
    latest = gr.Checkbox(
        value=False,
        label="Latest",
        info="Push Model to the Ollama Library with the :latest tag."
    )

    maintainer = gr.Checkbox(
        value=False,
        label="Maintainer",
        info="Use this option is your original repository on both Hugging Face and Ollama."
    )
            
    generate_pubkey.click(
        fn=regenerate_pubkey,
        inputs=[
            generate_pubkey
        ],
        outputs=[
        pubkey,
        ],
    )
    
    iface = gr.Interface(
        fn=ollamafy_model,
        # additional_inputs=[
        #     generate_pubkey,
        # ],
        inputs=[
            login,
            generate_pubkey,
            model_id,
            ollama_model_name,
            download_gguf_link,
            ollama_library_username,
            ollama_q_method,
            latest, 
            maintainer,
        ],
        outputs=[
            gr.Markdown(label="output"),
            gr.Image(show_label=False),

        ],
        title="Ollamafy",
        description="Import Hugging Face Models to Ollama and Push them to the Ollama Library 🦙 \n\n Sampled from: \n\n - https://huggingface.co/spaces/ggml-org/gguf-my-repo \n\n - https://huggingface.co/spaces/gingdev/ollama-server",
        api_name=False
    )
    
def restart_space():
    HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, library_username=OLLAMA_USERNAME, factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)