NorHsangPha
commited on
Commit
•
83472e2
1
Parent(s):
02f84e6
Initial: initial commit
Browse files- .gitattributes +1 -0
- app.py +27 -0
- cleaned_stories.txt +3 -0
- ngram_text_gen.py +84 -0
- requirements.txt +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
cleaned_stories.txt filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from ngram_text_gen import (
|
3 |
+
generate_output as ngram_text_gen,
|
4 |
+
NGRAM_TEXT_GEN_EXAMPLES,
|
5 |
+
)
|
6 |
+
|
7 |
+
ngram_generate = gr.Interface(
|
8 |
+
fn=ngram_text_gen,
|
9 |
+
inputs=[
|
10 |
+
gr.Slider(minimum=2, maximum=4, value=2, step=1, label="N-gram"),
|
11 |
+
gr.Textbox(label="Input text"),
|
12 |
+
gr.Number(label="Token length", value=10),
|
13 |
+
],
|
14 |
+
outputs=[
|
15 |
+
gr.Textbox(label="Generated text"),
|
16 |
+
gr.HTML(label="Other possible outputs"),
|
17 |
+
],
|
18 |
+
examples=NGRAM_TEXT_GEN_EXAMPLES,
|
19 |
+
title="N-gram Text Generator",
|
20 |
+
description="Generate text using N-grams.",
|
21 |
+
allow_flagging="never",
|
22 |
+
)
|
23 |
+
|
24 |
+
with gr.Blocks() as demo:
|
25 |
+
ngram_generate.render()
|
26 |
+
|
27 |
+
demo.launch()
|
cleaned_stories.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12c40b2fce072f7664e5fbe6b4ab1908cf49bfdad1796814a5176799bfb682fc
|
3 |
+
size 158068184
|
ngram_text_gen.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
|
3 |
+
|
4 |
+
def make_markov_model(cleaned_stories, n_gram=2):
|
5 |
+
markov_model = {}
|
6 |
+
|
7 |
+
# Create n-grams and count transitions
|
8 |
+
for i in range(len(cleaned_stories) - n_gram):
|
9 |
+
curr_state = tuple(cleaned_stories[i : i + n_gram])
|
10 |
+
next_state = cleaned_stories[i + n_gram]
|
11 |
+
|
12 |
+
if curr_state not in markov_model:
|
13 |
+
markov_model[curr_state] = {}
|
14 |
+
|
15 |
+
if next_state in markov_model[curr_state]:
|
16 |
+
markov_model[curr_state][next_state] += 1
|
17 |
+
else:
|
18 |
+
markov_model[curr_state][next_state] = 1
|
19 |
+
|
20 |
+
# Calculate transition probabilities
|
21 |
+
for curr_state, transitions in markov_model.items():
|
22 |
+
total = sum(transitions.values())
|
23 |
+
for next_state in transitions:
|
24 |
+
markov_model[curr_state][next_state] /= total
|
25 |
+
|
26 |
+
return markov_model
|
27 |
+
|
28 |
+
|
29 |
+
def generate_text(markov_model, n_gram=2, max_length=100, start=None):
|
30 |
+
if start is None:
|
31 |
+
current_state = random.choice(list(markov_model.keys()))
|
32 |
+
else:
|
33 |
+
current_state = tuple(start.split())
|
34 |
+
|
35 |
+
generated_text = list(current_state)
|
36 |
+
|
37 |
+
while len(generated_text) < max_length:
|
38 |
+
if current_state in markov_model:
|
39 |
+
next_state = random.choices(
|
40 |
+
list(markov_model[current_state].keys()),
|
41 |
+
list(markov_model[current_state].values()),
|
42 |
+
)[0]
|
43 |
+
generated_text.append(next_state)
|
44 |
+
current_state = tuple(generated_text[-n_gram:])
|
45 |
+
else:
|
46 |
+
break
|
47 |
+
|
48 |
+
return " ".join(generated_text)
|
49 |
+
|
50 |
+
|
51 |
+
# Function to generate a random story with a specified starting seed and limit
|
52 |
+
def generate_random_story(markov_model, start=None, n_gram=2, limit=100):
|
53 |
+
return generate_text(markov_model, n_gram, max_length=limit, start=start)
|
54 |
+
|
55 |
+
|
56 |
+
# Load the cleaned stories
|
57 |
+
with open("cleaned_stories.txt", "r", encoding="utf-8") as file:
|
58 |
+
cleaned_stories = [line.strip() for line in file]
|
59 |
+
|
60 |
+
|
61 |
+
def generate_output(n_gram, input, token_length):
|
62 |
+
markov_model = make_markov_model(cleaned_stories, n_gram)
|
63 |
+
|
64 |
+
text_outputs = []
|
65 |
+
for _ in range(5):
|
66 |
+
text = generate_random_story(markov_model, input, n_gram, token_length)
|
67 |
+
|
68 |
+
generated = " ".join(text.split())
|
69 |
+
|
70 |
+
text_outputs.append(generated)
|
71 |
+
|
72 |
+
other_outputs = "".join(text + "<br />" for text in text_outputs[1:])
|
73 |
+
|
74 |
+
return text_outputs[0], other_outputs
|
75 |
+
|
76 |
+
|
77 |
+
NGRAM_TEXT_GEN_EXAMPLES = [
|
78 |
+
[2, "မႂ် သုင်", 10],
|
79 |
+
[2, "မႂ် သုင်", 50],
|
80 |
+
[3, "မႂ် သုင် ၶႃႈ", 10],
|
81 |
+
[2, "ၵိၼ် ၶဝ်ႈ", 10],
|
82 |
+
[3, "လိၵ်ႈ လၢႆး တႆး", 10],
|
83 |
+
[4, "ပွႆး ပီ မႂ်ႇ တႆး", 20],
|
84 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gradio
|