File size: 3,722 Bytes
1a37806
 
0aa6630
1451cd9
1a37806
1451cd9
1a37806
1451cd9
 
f55c257
1a37806
f55c257
3f9cf1b
24266ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aff8b1b
 
 
 
 
 
 
 
 
 
 
 
24266ff
 
1451cd9
1a37806
1451cd9
 
 
 
1a37806
 
 
 
b43a211
1a37806
 
 
 
97a5779
 
 
da07405
 
3b41829
1451cd9
 
 
 
3b41829
1451cd9
 
 
 
24266ff
1451cd9
 
 
24266ff
1a37806
24266ff
3f9cf1b
ef2ecb8
24266ff
 
1a37806
 
 
 
 
 
 
 
 
1451cd9
d1e9c7d
 
 
1451cd9
 
 
24266ff
1a37806
24266ff
 
 
 
 
1a37806
1451cd9
 
 
d1e9c7d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import pickle
import tempfile
import warnings
from io import BytesIO
from pathlib import Path
from uuid import uuid4

import gradio as gr
import joblib
from huggingface_hub import upload_file
from skops import io as sio

title = "skops converter"

desc = """
# Pickle to skops converter

This space converts your pickle files to skops format. You can read more on the
skops format [here]( https://skops.readthedocs.io/en/stable/persistence.html).

You can use `skops.io.dump(joblib.load(in_file), out_file)` to do the
conversion yourself, where `in_file` is your source pickle file and `out_file`
is where you want to save the skops file. But only do that **if you trust the
source of the pickle file**.

You can then use `skops.io.load(skops_file, trusted=unknown_types)` to load the
file, where `skops_file` is the converted skops format file, and the
`unknown_types` is what you see in the "Unknown Types" box bellow. You can also
locally reproduce this list using
`skops.io.get_untrusted_types(file=skops_file)`. You should only load a `skops`
file that you trust all the types included in the `unknown_types` list.

## Requirements

This space assumes you have used the latest `joblib` and `scikit-learn`
versions installed on your environment to create the pickle file.

## Reporting issues

If you encounter an issue, please open an issue on the project's repository
on the [issue tracker](
https://github.com/skops-dev/skops/issues/new?title=CONVERSION+error+from+hf.space&body=Paste+the+error+message+and+a+link+to+your+pickle+file+here+please)

"""


def convert(file, store):
    msg = ""
    try:
        with warnings.catch_warnings(record=True) as record:
            in_file = Path(file.name)
            if store:
                upload_file(
                    path_or_fileobj=str(in_file),
                    path_in_repo=f"{uuid4()}/{in_file.name}",
                    repo_id="scikit-learn/pickle-to-skops",
                    repo_type="dataset",
                    token=os.environ["HF_TOKEN"],
                )

            try:
                obj = joblib.load(in_file)
            except:
                with open(in_file, "rb") as f:
                    obj = pickle.load(f)

            if "." in in_file.name:
                out_file = ".".join(in_file.name.split(".")[:-1])
            else:
                out_file = in_file.name

            out_file += ".skops"
            path = tempfile.mkdtemp(prefix="gradio-convert-")
            out_file = Path(path) / out_file
            sio.dump(obj, out_file)
            unknown_types = sio.get_untrusted_types(file=out_file)
        if len(record):
            msg = "\n".join([repr(w.message) for w in record])
    except Exception as e:
        return None, None, repr(e)

    return out_file, unknown_types, msg


with gr.Blocks(title=title) as iface:
    gr.Markdown(desc)
    store = gr.Checkbox(
        label=(
            "Store a copy: if you leave this box checked, we store a copy of your"
            " pickle file in a private place, only used for us to find issues and"
            " improve the skops format. Please uncheck this box if your pickle file"
            " includes any personal or sensitive data."
        ),
        value=True,
    )
    upload_button = gr.UploadButton(
        "Click to Upload a File",
        file_types=None,
        file_count="single",
    )
    file_output = gr.File(label="Converted File")
    upload_button.upload(
        convert,
        [upload_button, store],
        [
            file_output,
            gr.Text(label="Unknown Types"),
            gr.Text(label="Errors and Warnings"),
        ],
        api_name="upload-file",
    )


iface.launch(debug=True)