NorHsangPha commited on
Commit
83472e2
1 Parent(s): 02f84e6

Initial: initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +27 -0
  3. cleaned_stories.txt +3 -0
  4. ngram_text_gen.py +84 -0
  5. requirements.txt +1 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ cleaned_stories.txt filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ngram_text_gen import (
3
+ generate_output as ngram_text_gen,
4
+ NGRAM_TEXT_GEN_EXAMPLES,
5
+ )
6
+
7
+ ngram_generate = gr.Interface(
8
+ fn=ngram_text_gen,
9
+ inputs=[
10
+ gr.Slider(minimum=2, maximum=4, value=2, step=1, label="N-gram"),
11
+ gr.Textbox(label="Input text"),
12
+ gr.Number(label="Token length", value=10),
13
+ ],
14
+ outputs=[
15
+ gr.Textbox(label="Generated text"),
16
+ gr.HTML(label="Other possible outputs"),
17
+ ],
18
+ examples=NGRAM_TEXT_GEN_EXAMPLES,
19
+ title="N-gram Text Generator",
20
+ description="Generate text using N-grams.",
21
+ allow_flagging="never",
22
+ )
23
+
24
+ with gr.Blocks() as demo:
25
+ ngram_generate.render()
26
+
27
+ demo.launch()
cleaned_stories.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c40b2fce072f7664e5fbe6b4ab1908cf49bfdad1796814a5176799bfb682fc
3
+ size 158068184
ngram_text_gen.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+
4
+ def make_markov_model(cleaned_stories, n_gram=2):
5
+ markov_model = {}
6
+
7
+ # Create n-grams and count transitions
8
+ for i in range(len(cleaned_stories) - n_gram):
9
+ curr_state = tuple(cleaned_stories[i : i + n_gram])
10
+ next_state = cleaned_stories[i + n_gram]
11
+
12
+ if curr_state not in markov_model:
13
+ markov_model[curr_state] = {}
14
+
15
+ if next_state in markov_model[curr_state]:
16
+ markov_model[curr_state][next_state] += 1
17
+ else:
18
+ markov_model[curr_state][next_state] = 1
19
+
20
+ # Calculate transition probabilities
21
+ for curr_state, transitions in markov_model.items():
22
+ total = sum(transitions.values())
23
+ for next_state in transitions:
24
+ markov_model[curr_state][next_state] /= total
25
+
26
+ return markov_model
27
+
28
+
29
+ def generate_text(markov_model, n_gram=2, max_length=100, start=None):
30
+ if start is None:
31
+ current_state = random.choice(list(markov_model.keys()))
32
+ else:
33
+ current_state = tuple(start.split())
34
+
35
+ generated_text = list(current_state)
36
+
37
+ while len(generated_text) < max_length:
38
+ if current_state in markov_model:
39
+ next_state = random.choices(
40
+ list(markov_model[current_state].keys()),
41
+ list(markov_model[current_state].values()),
42
+ )[0]
43
+ generated_text.append(next_state)
44
+ current_state = tuple(generated_text[-n_gram:])
45
+ else:
46
+ break
47
+
48
+ return " ".join(generated_text)
49
+
50
+
51
+ # Function to generate a random story with a specified starting seed and limit
52
+ def generate_random_story(markov_model, start=None, n_gram=2, limit=100):
53
+ return generate_text(markov_model, n_gram, max_length=limit, start=start)
54
+
55
+
56
+ # Load the cleaned stories
57
+ with open("cleaned_stories.txt", "r", encoding="utf-8") as file:
58
+ cleaned_stories = [line.strip() for line in file]
59
+
60
+
61
+ def generate_output(n_gram, input, token_length):
62
+ markov_model = make_markov_model(cleaned_stories, n_gram)
63
+
64
+ text_outputs = []
65
+ for _ in range(5):
66
+ text = generate_random_story(markov_model, input, n_gram, token_length)
67
+
68
+ generated = " ".join(text.split())
69
+
70
+ text_outputs.append(generated)
71
+
72
+ other_outputs = "".join(text + "<br />" for text in text_outputs[1:])
73
+
74
+ return text_outputs[0], other_outputs
75
+
76
+
77
+ NGRAM_TEXT_GEN_EXAMPLES = [
78
+ [2, "မႂ် သုင်", 10],
79
+ [2, "မႂ် သုင်", 50],
80
+ [3, "မႂ် သုင် ၶႃႈ", 10],
81
+ [2, "ၵိၼ် ၶဝ်ႈ", 10],
82
+ [3, "လိၵ်ႈ လၢႆး တႆး", 10],
83
+ [4, "ပွႆး ပီ မႂ်ႇ တႆး", 20],
84
+ ]
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio