Doron Adler commited on
Commit
d230739
β€’
1 Parent(s): f4adb42

WoWQuestTextGenerator

Browse files
.gitattributes CHANGED
@@ -29,3 +29,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ model/optimizer.pt filter=lfs diff=lfs merge=lfs -text
33
+ model/scheduler.pt filter=lfs diff=lfs merge=lfs -text
34
+ model/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
35
+ model/training_args.bin filter=lfs diff=lfs merge=lfs -text
36
+ model/rng_state.pth filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: WoWQuestTextGenerator
3
- emoji: πŸƒ
4
- colorFrom: blue
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.1.7
8
  app_file: app.py
 
1
  ---
2
+ title: WoW Quest Generator
3
+ emoji: πŸ§β€β™€οΈ
4
+ colorFrom: green
5
+ colorTo: orange
6
  sdk: gradio
7
  sdk_version: 3.1.7
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import random
4
+ import re
5
+
6
+ title = "WoW Quest Text Generator"
7
+ description = "Tap on the \"Submit\" button to generate a random quest text."
8
+ article = "<p>Fine tuned <a href=\"https://huggingface.co/EleutherAI/gpt-neo-125M\">EleutherAI/gpt-neo-125M</a> upon a formatted <a href=\"https://github.com/TrinityCore/TrinityCore\"> TrinityCore – TDB_full_world_927.22082_2022_08_21 Dataset</a></p><p>This generator is fan made and is not affiliated in any way with Blizzard and/or any other company</p>"
9
+
10
+ model_id = "./model"
11
+ text_generator = pipeline("text-generation", model=model_id, tokenizer=model_id)
12
+ max_length = 192
13
+ top_k = 40
14
+ top_p = 0.92
15
+ temperature = 1.0
16
+
17
+ random.seed(None)
18
+
19
+ wow_class_list = ["Death Knight", "Demon Hunter", "Druid", "Hunter", "Mage", "Monk", "Paladin", "Priest", "Rogue", "Shaman", "Warrior", "Warlock"]
20
+ wow_race_list = ["Blood Elf", "Human", "Tauren", "Orc", "Kul Tiran", "Void Elf", "Troll", "Vulpera", "Night Elf", "Zandalari Troll", "Worgen", "Undead", "Goblin", "Highmountain Tauren", "Nightborne", "Dwarf", "Draenei", "Gnome", "Lightforged Draenei", "Pandaren", "Maghar Orc", "Mechagnome", "Dark Iron Dwarf"]
21
+ wow_silly_name_list = ["Glitterstorm", "Sunderwear", "Arrowdynamic", "Sapntap", "Crossblesser", "Praystation", "Healium", "Shocknorris", "Alestrom", "Harryportal", "Merlìn", "Wreckquiem", "Owlcapone"]
22
+
23
+ suggested_text_list = ["Greetings $r", "$c I need your help", "Good to see you $n", "Hey $gBoy:Girl; "]
24
+
25
+ def parseGenderTokens(text):
26
+ regex = r"\$[gG]([^:]+):([^;]+);"
27
+ matches = re.finditer(regex, text, re.MULTILINE)
28
+ parsed_string = ""
29
+ prev_index = 0
30
+ group_num = 0
31
+ random_group = -1
32
+ for matchNum, match in enumerate(matches, start=1):
33
+ parsed_string += text[prev_index:match.start()]
34
+ if random_group == -1:
35
+ group_num = len(match.groups())
36
+ random_group = random.randint(1, group_num)
37
+ parsed_string += match.group(random_group)
38
+ prev_index = match.end(group_num) + 1
39
+ parsed_string += text[prev_index:]
40
+ return parsed_string
41
+
42
+ def parseSpecialCharacters(text, wow_class_item, wow_race_item, wow_silly_name_item):
43
+ parsedText = text.replace("$B", "\n").replace("$b", "\n").replace("$c", wow_class_item).replace("$C", wow_class_item).replace("$r", wow_race_item).replace("$R", wow_race_item).replace("$n", wow_silly_name_item).replace("$N", wow_silly_name_item)
44
+ return parseGenderTokens(parsedText)
45
+
46
+ def text_generation(input_text = None):
47
+ if input_text == None or len(input_text) == 0:
48
+ input_text = "<|startoftext|>"
49
+ else:
50
+ if input_text.startswith("<|startoftext|>") == False:
51
+ input_text ="<|startoftext|>" + input_text
52
+ generated_text = text_generator(input_text,
53
+ max_length=max_length,
54
+ top_k=top_k,
55
+ top_p=top_p,
56
+ temperature=temperature,
57
+ do_sample=True,
58
+ repetition_penalty=2.0,
59
+ bos_token="<|startoftext|>",
60
+ eos_token="<|endoftext|>",
61
+ pad_token="<|pad|>",
62
+ unknown_token = "<|unknown|>",
63
+ num_return_sequences=1)
64
+ parsed_text = generated_text[0]["generated_text"].replace("<|startoftext|>", "").replace("\r","").replace("\n\n", "\n").replace("\t", " ").replace("<|pad|>", " * ").replace("\"\"", "\"")
65
+ wow_class_item = random.choice(wow_class_list)
66
+ wow_race_item = random.choice(wow_race_list)
67
+ wow_silly_name_item = random.choice(wow_silly_name_list)
68
+ parsed_text = parseSpecialCharacters(parsed_text, wow_class_item, wow_race_item, wow_silly_name_item)
69
+ parsed_text = parsed_text.replace("\\n", "\n")
70
+ return parsed_text
71
+
72
+ gr.Interface(
73
+ text_generation,
74
+ [gr.inputs.Textbox(lines=1, label="Enter strating text or leave blank")],
75
+ outputs=[gr.outputs.Textbox(type="auto", label="Generated quest text")],
76
+ title=title,
77
+ description=description,
78
+ article=article,
79
+ examples=suggested_text_list,
80
+ theme="default",
81
+ allow_flagging=False,
82
+ ).launch()
model/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|pad|>": 50258,
3
+ "<|startoftext|>": 50257
4
+ }
model/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EleutherAI/gpt-neo-125M",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPTNeoForCausalLM"
6
+ ],
7
+ "attention_dropout": 0,
8
+ "attention_layers": [
9
+ "global",
10
+ "local",
11
+ "global",
12
+ "local",
13
+ "global",
14
+ "local",
15
+ "global",
16
+ "local",
17
+ "global",
18
+ "local",
19
+ "global",
20
+ "local"
21
+ ],
22
+ "attention_types": [
23
+ [
24
+ [
25
+ "global",
26
+ "local"
27
+ ],
28
+ 6
29
+ ]
30
+ ],
31
+ "bos_token_id": 50256,
32
+ "embed_dropout": 0,
33
+ "eos_token_id": 50256,
34
+ "gradient_checkpointing": false,
35
+ "hidden_size": 768,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": null,
38
+ "layer_norm_epsilon": 1e-05,
39
+ "max_position_embeddings": 2048,
40
+ "model_type": "gpt_neo",
41
+ "num_heads": 12,
42
+ "num_layers": 12,
43
+ "resid_dropout": 0,
44
+ "summary_activation": null,
45
+ "summary_first_dropout": 0.1,
46
+ "summary_proj_to_labels": true,
47
+ "summary_type": "cls_index",
48
+ "summary_use_proj": true,
49
+ "torch_dtype": "float32",
50
+ "transformers_version": "4.21.2",
51
+ "use_cache": true,
52
+ "vocab_size": 50259,
53
+ "window_size": 256
54
+ }
model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:131c0394c90e7a276a2d60646fb35bd626b0cd949afb946eb9816340a1ca9c45
3
+ size 1001693889
model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06a36064aa3b3fcbc6379befed64965c4a20bde5b343cdaefa0edc0cdd54ea6c
3
+ size 551191249
model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a3c1141f8bebf59967bea65fa021fcc2ad8a3d7753ae183d1d946d3f5b6d8e
3
+ size 14503
model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a433fc7c4fe2e22525674ece37e19e095986d7600708d2a528f0478472b251
3
+ size 623
model/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|pad|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
model/tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|startoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 2048,
22
+ "name_or_path": "EleutherAI/gpt-neo-125M",
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|pad|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "special_tokens_map_file": null,
32
+ "tokenizer_class": "GPT2Tokenizer",
33
+ "unk_token": {
34
+ "__type": "AddedToken",
35
+ "content": "<|endoftext|>",
36
+ "lstrip": false,
37
+ "normalized": true,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "unknown_token": "<|unknown|>"
42
+ }
model/trainer_state.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.2171372930866602,
5
+ "global_step": 5000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 4.9075899457861016e-05,
13
+ "loss": 2.0374,
14
+ "step": 250
15
+ },
16
+ {
17
+ "epoch": 0.12,
18
+ "learning_rate": 4.753573188762938e-05,
19
+ "loss": 1.6902,
20
+ "step": 500
21
+ },
22
+ {
23
+ "epoch": 0.18,
24
+ "learning_rate": 4.599556431739773e-05,
25
+ "loss": 1.6269,
26
+ "step": 750
27
+ },
28
+ {
29
+ "epoch": 0.24,
30
+ "learning_rate": 4.445539674716609e-05,
31
+ "loss": 1.6167,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 0.3,
36
+ "learning_rate": 4.2915229176934454e-05,
37
+ "loss": 1.568,
38
+ "step": 1250
39
+ },
40
+ {
41
+ "epoch": 0.37,
42
+ "learning_rate": 4.137506160670281e-05,
43
+ "loss": 1.5706,
44
+ "step": 1500
45
+ },
46
+ {
47
+ "epoch": 0.43,
48
+ "learning_rate": 3.983489403647117e-05,
49
+ "loss": 1.558,
50
+ "step": 1750
51
+ },
52
+ {
53
+ "epoch": 0.49,
54
+ "learning_rate": 3.829472646623953e-05,
55
+ "loss": 1.5502,
56
+ "step": 2000
57
+ },
58
+ {
59
+ "epoch": 0.55,
60
+ "learning_rate": 3.675455889600789e-05,
61
+ "loss": 1.534,
62
+ "step": 2250
63
+ },
64
+ {
65
+ "epoch": 0.61,
66
+ "learning_rate": 3.5214391325776246e-05,
67
+ "loss": 1.5069,
68
+ "step": 2500
69
+ },
70
+ {
71
+ "epoch": 0.67,
72
+ "learning_rate": 3.367422375554461e-05,
73
+ "loss": 1.5202,
74
+ "step": 2750
75
+ },
76
+ {
77
+ "epoch": 0.73,
78
+ "learning_rate": 3.213405618531297e-05,
79
+ "loss": 1.4854,
80
+ "step": 3000
81
+ },
82
+ {
83
+ "epoch": 0.79,
84
+ "learning_rate": 3.059388861508132e-05,
85
+ "loss": 1.4739,
86
+ "step": 3250
87
+ },
88
+ {
89
+ "epoch": 0.85,
90
+ "learning_rate": 2.905372104484968e-05,
91
+ "loss": 1.4941,
92
+ "step": 3500
93
+ },
94
+ {
95
+ "epoch": 0.91,
96
+ "learning_rate": 2.751355347461804e-05,
97
+ "loss": 1.4642,
98
+ "step": 3750
99
+ },
100
+ {
101
+ "epoch": 0.97,
102
+ "learning_rate": 2.59733859043864e-05,
103
+ "loss": 1.4524,
104
+ "step": 4000
105
+ },
106
+ {
107
+ "epoch": 1.03,
108
+ "learning_rate": 2.4433218334154756e-05,
109
+ "loss": 1.3755,
110
+ "step": 4250
111
+ },
112
+ {
113
+ "epoch": 1.1,
114
+ "learning_rate": 2.2893050763923117e-05,
115
+ "loss": 1.3779,
116
+ "step": 4500
117
+ },
118
+ {
119
+ "epoch": 1.16,
120
+ "learning_rate": 2.1352883193691475e-05,
121
+ "loss": 1.3758,
122
+ "step": 4750
123
+ },
124
+ {
125
+ "epoch": 1.22,
126
+ "learning_rate": 1.9812715623459833e-05,
127
+ "loss": 1.3637,
128
+ "step": 5000
129
+ }
130
+ ],
131
+ "max_steps": 8216,
132
+ "num_train_epochs": 2,
133
+ "total_flos": 2861768122887168.0,
134
+ "trial_name": null,
135
+ "trial_params": null
136
+ }
model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8fc79109b17bd5de5f6f4ae5ff262a057de0e111539bc97ae382655514eb717
3
+ size 3247
model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ tokenizers
5
+