Nima Kamali Lassem commited on
Commit
e4afdb3
1 Parent(s): 9db40e8
Files changed (5) hide show
  1. app.py +76 -0
  2. not.py +86 -0
  3. requirements.txt +7 -0
  4. tokenizer_config.json +1 -0
  5. vocab.txt +0 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from transformers.models.bert import BertTokenizer
5
+ from transformers import TFBertModel
6
+ import streamlit as st
7
+ import pandas as pd
8
+ from transformers import TFAutoModel
9
+
10
+
11
+
12
+
13
+ hist_loss= [0.1971,0.0732,0.0465,0.0319,0.0232,0.0167,0.0127,0.0094,0.0073,0.0058,0.0049,0.0042]
14
+ hist_acc = [0.9508,0.9811,0.9878,0.9914,0.9936,0.9954,0.9965,0.9973,0.9978,0.9983,0.9986,0.9988]
15
+ hist_val_acc = [0.9804,0.9891,0.9927,0.9956,0.9981,0.998,0.9991,0.9997,0.9991,0.9998,0.9998,0.9998]
16
+ hist_val_loss = [0.0759,0.0454,0.028,0.015,0.0063,0.0064,0.004,0.0011,0.0021,0.00064548,0.0010,0.00042896]
17
+ Epochs = [i for i in range(1,13)]
18
+
19
+ hist_loss[:] = [x * 100 for x in hist_loss]
20
+ hist_acc[:] = [x * 100 for x in hist_acc]
21
+ hist_val_acc[:] = [x * 100 for x in hist_val_acc]
22
+ hist_val_loss[:] = [x * 100 for x in hist_val_loss]
23
+ d = {'val_acc':hist_val_acc, 'acc':hist_acc,'loss':hist_loss, 'val_loss':hist_val_loss, 'Epochs': Epochs}
24
+ chart_data = pd.DataFrame(d)
25
+ chart_data.index = range(1,13)
26
+
27
+ @st.cache(suppress_st_warning=True, allow_output_mutation=True)
28
+ def load_model(show_spinner=True):
29
+ yorum_model = TFAutoModel.from_pretrained("NimaKL/tc32_test")
30
+ tokenizer = BertTokenizer.from_pretrained('NimaKL/tc32_test')
31
+ return yorum_model, tokenizer
32
+
33
+ st.set_page_config(layout='wide', initial_sidebar_state='expanded')
34
+ col1, col2= st.columns(2)
35
+ with col1:
36
+ st.title("TC32 Multi-Class Text Classification")
37
+ st.subheader('Model Loss and Accuracy')
38
+ st.area_chart(chart_data)
39
+ yorum_model, tokenizer = load_model()
40
+
41
+
42
+ with col2:
43
+ st.title("Sınıfı bulmak için bir şikayet girin.")
44
+ st.subheader("Şikayet")
45
+ text = st.text_area('', height=240)
46
+ aButton = st.button('Ara')
47
+
48
+ def prepare_data(input_text, tokenizer):
49
+ token = tokenizer.encode_plus(
50
+ input_text,
51
+ max_length=256,
52
+ truncation=True,
53
+ padding='max_length',
54
+ add_special_tokens=True,
55
+ return_tensors='tf'
56
+ )
57
+ return {
58
+ 'input_ids': tf.cast(token.input_ids, tf.float64),
59
+ 'attention_mask': tf.cast(token.attention_mask, tf.float64)
60
+ }
61
+
62
+ def make_prediction(model, processed_data, classes=['Alışveriş','Anne-Bebek','Beyaz Eşya','Bilgisayar','Cep Telefonu','Eğitim','Elektronik','Emlak ve İnşaat','Enerji','Etkinlik ve Organizasyon','Finans','Gıda','Giyim','Hizmet','İçecek','İnternet','Kamu','Kargo-Nakliyat','Kozmetik','Küçük Ev Aletleri','Medya','Mekan ve Eğlence','Mobilya - Ev Tekstili','Mücevher Saat Gözlük','Mutfak Araç Gereç','Otomotiv','Sağlık','Sigorta','Spor','Temizlik','Turizm','Ulaşım']):
63
+ probs = model.predict(processed_data)[0]
64
+ return classes[np.argmax(probs)]
65
+
66
+
67
+ if text or aButton:
68
+ with col2:
69
+ with st.spinner('Wait for it...'):
70
+ processed_data = prepare_data(text, tokenizer)
71
+ result = make_prediction(yorum_model, processed_data=processed_data)
72
+ description = '<table style="border: collapse;"><tr><div style="height: 62px;"></div></tr><tr><p style="border-width: medium; border-color: #aa5e70; border-radius: 10px;padding-top: 1px;padding-left: 20px;background:#20212a;font-family:Courier New; color: white;font-size: 36px; font-weight: boldest;">'+result+'</p></tr><table>'
73
+ st.markdown(description, unsafe_allow_html=True)
74
+ with col1:
75
+ st.success("Tahmin başarıyla tamamlandı!")
76
+
not.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ if not os.path.exists('variables'):
4
+ os.makedirs('variables')
5
+ shutil.move('variables.data-00000-of-00001', 'variables')
6
+ shutil.move('variables.index', 'variables')
7
+ if not os.path.exists('tokenizer'):
8
+ os.makedirs('tokenizer')
9
+ shutil.move('tokenizer_config.json', 'tokeizer')
10
+ shutil.move('vocab.txt', 'tokenizer')
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+ import tensorflow as tf
15
+ from transformers.models.bert import BertTokenizer
16
+ from transformers import TFBertModel
17
+ import streamlit as st
18
+ import pandas as pd
19
+ from transformers import TFAutoModel
20
+
21
+
22
+
23
+ hist_loss= [0.1971,0.0732,0.0465,0.0319,0.0232,0.0167,0.0127,0.0094,0.0073,0.0058,0.0049,0.0042]
24
+ hist_acc = [0.9508,0.9811,0.9878,0.9914,0.9936,0.9954,0.9965,0.9973,0.9978,0.9983,0.9986,0.9988]
25
+ hist_val_acc = [0.9804,0.9891,0.9927,0.9956,0.9981,0.998,0.9991,0.9997,0.9991,0.9998,0.9998,0.9998]
26
+ hist_val_loss = [0.0759,0.0454,0.028,0.015,0.0063,0.0064,0.004,0.0011,0.0021,0.00064548,0.0010,0.00042896]
27
+ Epochs = [i for i in range(1,13)]
28
+
29
+ hist_loss[:] = [x * 100 for x in hist_loss]
30
+ hist_acc[:] = [x * 100 for x in hist_acc]
31
+ hist_val_acc[:] = [x * 100 for x in hist_val_acc]
32
+ hist_val_loss[:] = [x * 100 for x in hist_val_loss]
33
+ d = {'val_acc':hist_val_acc, 'acc':hist_acc,'loss':hist_loss, 'val_loss':hist_val_loss, 'Epochs': Epochs}
34
+ chart_data = pd.DataFrame(d)
35
+ chart_data.index = range(1,13)
36
+
37
+ @st.cache(suppress_st_warning=True, allow_output_mutation=True)
38
+ def load_model(show_spinner=True):
39
+ yorum_model = tf.keras.models.load_model(os.currdir)
40
+ tokenizer = BertTokenizer.from_pretrained('NimaKL/TC32')
41
+ return yorum_model, tokenizer
42
+
43
+ st.set_page_config(layout='wide', initial_sidebar_state='expanded')
44
+ col1, col2= st.columns(2)
45
+ with col1:
46
+ st.title("TC32 Multi-Class Text Classification")
47
+ st.subheader('Model Loss and Accuracy')
48
+ st.area_chart(chart_data)
49
+ yorum_model, tokenizer = load_model()
50
+
51
+
52
+
53
+ with col2:
54
+ st.title("Sınıfı bulmak için bir şikayet girin.")
55
+ st.subheader("Şikayet")
56
+ text = st.text_area('', height=240)
57
+ aButton = st.button('Ara')
58
+
59
+ def prepare_data(input_text, tokenizer):
60
+ token = tokenizer.encode_plus(
61
+ input_text,
62
+ max_length=256,
63
+ truncation=True,
64
+ padding='max_length',
65
+ add_special_tokens=True,
66
+ return_tensors='tf'
67
+ )
68
+ return {
69
+ 'input_ids': tf.cast(token.input_ids, tf.float64),
70
+ 'attention_mask': tf.cast(token.attention_mask, tf.float64)
71
+ }
72
+
73
+ def make_prediction(model, processed_data, classes=['Alışveriş','Anne-Bebek','Beyaz Eşya','Bilgisayar','Cep Telefonu','Eğitim','Elektronik','Emlak ve İnşaat','Enerji','Etkinlik ve Organizasyon','Finans','Gıda','Giyim','Hizmet','İçecek','İnternet','Kamu','Kargo-Nakliyat','Kozmetik','Küçük Ev Aletleri','Medya','Mekan ve Eğlence','Mobilya - Ev Tekstili','Mücevher Saat Gözlük','Mutfak Araç Gereç','Otomotiv','Sağlık','Sigorta','Spor','Temizlik','Turizm','Ulaşım']):
74
+ probs = model.predict(processed_data)[0]
75
+ return classes[np.argmax(probs)]
76
+
77
+
78
+ if text or aButton:
79
+ with col2:
80
+ with st.spinner('Wait for it...'):
81
+ processed_data = prepare_data(text, tokenizer)
82
+ result = make_prediction(yorum_model, processed_data=processed_data)
83
+ description = '<table style="border: collapse;"><tr><div style="height: 62px;"></div></tr><tr><p style="border-width: medium; border-color: #aa5e70; border-radius: 10px;padding-top: 1px;padding-left: 20px;background:#20212a;font-family:Courier New; color: white;font-size: 36px; font-weight: boldest;">'+result+'</p></tr><table>'
84
+ st.markdown(description, unsafe_allow_html=True)
85
+ with col1:
86
+ st.success("Tahmin başarıyla tamamlandı!")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy==1.23.4
2
+ tensorflow==2.10.0
3
+ transformers==4.23.1
4
+ pandas==1.5.1
5
+ streamlit==1.14.0
6
+
7
+
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "max_len": 512, "init_inputs": []}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff