File size: 14,312 Bytes
e695ade
 
 
 
0058e79
fe3e2a9
e695ade
b681a48
e695ade
 
 
 
 
 
b681a48
e695ade
 
 
c358045
df1dbc1
a4a3ccb
3c34fae
592fb18
48e4738
b86f40e
0d849a1
c4051d8
 
ec1edfc
7234021
4763941
ec1edfc
 
62f8c49
ee892c2
 
ec1edfc
 
7e07a21
ee892c2
 
 
 
 
 
7e07a21
 
 
 
 
 
 
 
 
 
ee892c2
7e07a21
 
 
 
 
 
45aff13
 
 
 
 
62f8c49
45aff13
 
62f8c49
 
 
 
 
 
7e07a21
 
 
df1dbc1
48e4738
0058e79
1ba9717
f488a6b
c4051d8
85cc441
5b80620
7234021
0058e79
e89573f
e695ade
2cdde93
ee892c2
 
c4051d8
3909c1b
e695ade
 
85cc441
e695ade
 
 
 
 
 
 
 
 
 
 
 
 
fe3e2a9
cd1246d
5e25d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48e4738
2a8570b
48e4738
 
 
1ba9717
 
48e4738
 
 
6b057bb
 
19bd34c
b86f40e
41edeed
19bd34c
48e4738
19bd34c
 
48e4738
19bd34c
 
 
 
 
 
3e12a3f
19bd34c
3e12a3f
94d6df0
19bd34c
 
 
 
 
 
 
 
924c8a5
3e12a3f
19bd34c
924c8a5
3e12a3f
19bd34c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e12a3f
19bd34c
 
 
 
 
f784967
 
924c8a5
3e12a3f
19bd34c
924c8a5
3e12a3f
f784967
19bd34c
 
 
 
 
fe3e2a9
19bd34c
 
 
a189388
 
 
 
f784967
70b9a5b
 
 
 
 
c4051d8
ee892c2
 
 
 
 
 
c4051d8
 
 
 
 
 
 
 
 
 
 
70b9a5b
e695ade
 
 
 
478cf2e
41edeed
 
 
 
 
 
 
 
 
 
e695ade
7f796a5
e695ade
 
 
48e4738
 
 
 
 
 
5e25d21
fe3e2a9
b158147
19bd34c
85b9b71
e695ade
478cf2e
9af226a
6f8fbc7
536cde2
9887b14
6a5b758
592fb18
 
 
41edeed
 
6a5b758
 
924c8a5
50bf865
6b057bb
41edeed
 
 
 
 
6a5b758
41edeed
 
 
 
 
8a3edae
5295b27
8a3edae
41edeed
 
 
490c2d9
 
 
8a3edae
 
e695ade
6a5b758
bd3947f
 
 
e695ade
946ffd4
 
e695ade
41edeed
48e4738
dbe89f7
6b057bb
946ffd4
 
e695ade
 
 
 
55bfde1
e695ade
7c500a5
5b80620
e695ade
 
0a7c76d
e695ade
df1dbc1
e695ade
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import os
import shutil
import subprocess
import signal
import hashlib

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler
from textwrap import dedent

HOME = os.environ.get("HOME")
token = os.environ.get("HF_TOKEN")
library_username = os.environ.get("OLLAMA_USERNAME").lower()
ollama_pubkey = None
ollama_model_name = None
download_gguf_link = None
# model.num_parameters()
def regenerate_pubkey(pubkey, oauth_token: gr.OAuthToken | None):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use Ollamafy")
        
    hash_oauth = hashlib.sha256(b"{oauth_token.token}").hexdigest()
    generate_ollama_host_file = f"echo $(ss -natp | grep (cat ollama.pid) | awk '{{print $4}}') > ollama.host"
    generate_ollama_pid_file = f"echo $! > ollama.pid"
    ollama_pubkey = f"cat {HOME}/{hash_oauth}/.ollama/id_ed25519.pub"  
    ollama_start = f"HOME={HOME}/{hash_oauth} ollama serve & {generate_ollama_pid_file} & sleep 5"    
    ollama_pid = f"cat {HOME}/{hash_oauth}/ollama.pid"
    ollama_stop = f"kill -9 {ollama_pid}"
    delete_home = f"rm -Rf {HOME}/{hash_oauth}/.ollama"
    
    try:
        result = subprocess.run(ollama_pid, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error retrieving Ollama PID {result.stderr}")
        print(f"Ollama PID Retrived: {ollama_pid}")

        result = subprocess.run(ollama_stop, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error stoppping Ollama {result.stderr}")
        print("Ollama stopped successfully!")
    
        result = subprocess.run(delete_home, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error removing Ollama HOME folder {result.stderr}")
        print("Ollama HOME folder removed successfully!")
    
        result = subprocess.run(ollama_start, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error starting Ollama {result.stderr}")
        print("Ollama started successfully!")

        result = subprocess.run(ollama_pubkey, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error starting Ollama {result.stderr}")
        print(f"echo $(ss -natp | grep (cat ollama.pid) | awk '{{print $4}}')")
        print("Ollama Pubkey Obtained!")

        result = subprocess.run(generate_ollama_host_file, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error generating Ollama Host File {result.stderr}")
        print("Ollama Host File Created!")

    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        # shutil.rmtree(model_name, ignore_errors=True)
        print("Ollama Pubkey Generated! Copy to your user profile in the Ollama Library.")

def ollamafy_model(login, model_id, ollama_library_username , ollama_q_method, latest, download_gguf_link, maintainer, oauth_token: gr.OAuthToken | None, ollama_model_name):
    ollama_library_username: library_username | None 
    
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use Ollamafy")
    hash_oauth = hashlib.sha256(b"{oauth_token.token}").hexdigest()

    # username = whoami(oauth_token.token)["name"]
    model_name = model_id.split('/')[-1]
    fp16 = f"{model_name}-fp16.gguf"
    ollama_pid = f"cat {HOME}/{hash_oauth}/ollama.pid"
    ollama_stop = f"kill -9 {ollama_pid}"
    delete_home = f"rm -Rf {HOME}/{hash_oauth}/.ollama"
    download_gguf = f"wget download_gguf_link"

    try:
        api = HfApi(token=oauth_token.token)   
        dl_pattern = ["*.md", "*.json", "*.model"]
        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )
        dl_pattern += pattern
        
        if not os.path.isfile(fp16) and download_gguf_link is None:         
            api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
            print("Model downloaded successfully!")
            print(f"Current working directory: {os.getcwd()}")
            print(f"Model directory contents: {os.listdir(model_name)}")
    
            conversion_script = "convert_hf_to_gguf.py"
            fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
            print(result)
            if result.returncode != 0:
                raise Exception(f"Error converting to fp16: {result.stderr}")
            print("Model converted to fp16 successfully!")
            print(f"Converted model path: {fp16}")
            
            HfApi().delete_repo(repo_id=model_id)
        else:
            if urlparse.urlparse(download_gguf_link).scheme and download_gguf_link.file.path.endswith("*.gguf"):
                result = subprocess.run(download_gguf, shell=True, capture_output=True)
                print(result)
                if result.returncode != 0:
                    raise Exception(f"Error downloading GGUF: {result.stderr}")
                print("Downloaded GGUF")
            else:
                print("Invalid GGUF Download Link")
                

        ### Ollamafy ###
        model_maintainer = model_id.split('/')[-2]
        if ollama_model_name is None:
            ollama_model_name = model_maintainer.lower() + '_' + model_name.lower() 
        ollama_modelfile_name = model_name + '_modelfile'
        model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
      
        ollama_modelfile = open(ollama_modelfile_name, "w")
        ollama_modelfile_path = quantized_gguf_path
        ollama_modelfile.write(quantized_gguf_path)
        ollama_modelfile.close()
        
        print(quantized_gguf_path)

        if ollama_q_method == "FP16":
            ollama_conversion = f"ollama create -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
        else:
            ollama_conversion = f"ollama create -q {ollama_q_method} -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
            
        ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
        print(ollama_conversion_result)
        if ollama_conversion_result.returncode != 0:
            raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
        else: 
            print("Model converted to Ollama successfully!")
        
        if maintainer:
            ollama_push =  f"OLLAMA_HOST={ollama_host} ollama {library_username}/{model_name}:{q_method.lower()}"
            ollama_rm =  f"ollama rm  {library_username}/{model_name}:{q_method.lower()}"
        else: 
            ollama_push =  f"OLLAMA_HOST={ollama_host} ollama push {library_username}/{ollama_model_name}:{q_method.lower()}"
            ollama_rm =  f"ollama rm  {library_username}/{ollama_model_name}:{q_method.lower()}"
            
        ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
        print(ollama_push_result)
        if ollama_push_result.returncode != 0:
            raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")      
        else: 
            print("Model pushed to Ollama library successfully!")
        
        ollama_rm_result = subprocess.run(ollama_rm, shell=True, capture_output=True)
        print(ollama_rm_result)
        if ollama_rm_result.returncode != 0:
            raise Exception(f"Error removing to Ollama: {ollama_rm_result.stderr}")
        else: 
            print("Model pushed to Ollama library successfully!")

        if latest:
            ollama_copy =  f"ollama cp {library_username}/{model_id.lower()}:{q_method.lower()} {library_username}/{model_id.lower()}:latest"
            ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
            print(ollama_copy_result)
            if ollama_copy_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
            print("Model pushed to Ollama library successfully!")
            
            if maintainer:
                ollama_push_latest =  f"OLLAMA_HOST={ollama_host} ollama push {library_username}/{model_name}:latest"
                ollama_rm_latest =  f"ollama rm  {library_username}/{model_name}:latest"
            else:    
                ollama_push_latest =  f"OLLAMA_HOST={ollama_host} ollama push {library_username}/{ollama_model_name}:latest"
                ollama_rm_latest =  f"ollama rm  {library_username}/{ollama_model_name}:latest"
                
            ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
            print(ollama_push_latest_result)
            if ollama_push_latest_result.returncode != 0:
                raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
            else:
                print("Model pushed to Ollama library successfully!")

            ollama_rm_latest_result = subprocess.run(ollama_rm_latest, shell=True, capture_output=True)
            print(ollama_rm_latest_result)
        if ollama_rm_latest_result.returncode != 0:
            raise Exception(f"Error pushing to Ollama: {ollama_rm_latest.stderr}")
        else:
            print("Model pushed to Ollama library successfully!")
    
    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        shutil.rmtree(model_name, ignore_errors=True)
        print("Folder cleaned up successfully!")
        
        result = subprocess.run(ollama_pid, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error retrieving Ollama PID {result.stderr}")
        print(f"Ollama PID Retrived: {ollama_pid}")
        
        result = subprocess.run(ollama_stop, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error stoppping Ollama {result.stderr}")
        print("Ollama stopped successfully!")
        
        result = subprocess.run(delete_home, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error removing Ollama HOME folder {result.stderr}")
        print("Ollama HOME fodler removed successfully!")
        
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
    
    login = gr.LoginButton(
        min_width=250,
    )

    generate_pubkey = gr.Button (
        value="Generate Pubkey",
        min_width=250,
    )
    
    model_id = HuggingfaceHubSearch(
        label="Hugging Face Hub Model ID",
        placeholder="Search for model id on Huggingface",
        search_type="model",
    )
    
    download_gguf_link = gr.Textbox(
        label="Download Link",
        info="If you have access to the GGUF, you can insert the downlaod link here.",
    )
    
    ollama_q_method = gr.Dropdown(
        ["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
        label="Ollama Quantization Method",
        info="Chose which quantization will created and exported to the Ollama Library.",
        value="FP16"
    )

    pubkey = gr.Code (
        ollama_pubkey,
        label="Copy this and paste this into your Ollama profile.",
    )
    
    ollama_model_name = gr.Textbox(
        label="Ollama Model Name",
        info="Input a Custom Model Name.",
    )

    ollama_library_username = gr.Textbox(
        label="Ollama Library Username",
        info="Input your username from Ollama to Push this model to their Library.",
    )
    
    latest = gr.Checkbox(
        value=False,
        label="Latest",
        info="Push Model to the Ollama Library with the :latest tag."
    )

    maintainer = gr.Checkbox(
        value=False,
        label="Maintainer",
        info="Use this option is your original repository on both Hugging Face and Ollama."
    )
            
    generate_pubkey.click(
        fn=regenerate_pubkey,
        inputs=[
            generate_pubkey
        ],
        outputs=[
        pubkey,
        ],
    )
    
    iface = gr.Interface(
        fn=ollamafy_model,
        # additional_inputs=[
        #     generate_pubkey,
        # ],
        inputs=[
            login,
            generate_pubkey,
            model_id,
            ollama_model_name,
            download_gguf_link,
            ollama_library_username,
            ollama_q_method,
            latest, 
            maintainer,
        ],
        outputs=[
            gr.Markdown(label="output"),
            gr.Image(show_label=False),

        ],
        title="Ollamafy",
        description="Import Hugging Face Models to Ollama and Push them to the Ollama Library 🦙 \n\n Sampled from: \n\n - https://huggingface.co/spaces/ggml-org/gguf-my-repo \n\n - https://huggingface.co/spaces/gingdev/ollama-server",
        api_name=False
    )
    
def restart_space():
    HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, library_username=OLLAMA_USERNAME, factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)