ultima versao
Browse files- create_TIMOB_model.ipynb +232 -0
- create_XD_model.ipynb +232 -0
- create_alloy_model.ipynb +22 -56
- models/tawos/XD/model_tawos_XD_mbr.joblib +3 -0
- models/tawos/XD/model_tawos_XD_median.joblib +3 -0
- models/tawos/XD/model_tawos_XD_neosp_linear.joblib +3 -0
- models/tawos/XD/model_tawos_XD_neosp_svr.joblib +3 -0
- models/tawos/XD/model_tawos_XD_tfidf_linear.joblib +3 -0
- models/tawos/XD/model_tawos_XD_tfidf_svr.joblib +3 -0
- models/tawos/XD/vectorizer_tawos_XD_tfidf.joblib +3 -0
- models/tawos/aloy/{vectorizer_tfidf.joblib → vectorizer_tawos_ALOY_tfidf.joblib} +1 -1
create_TIMOB_model.ipynb
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"project_name = \"TIMOB\"\n",
|
11 |
+
"\n",
|
12 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
13 |
+
"\n",
|
14 |
+
"df.info()"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "markdown",
|
19 |
+
"metadata": {},
|
20 |
+
"source": [
|
21 |
+
"# Pré-Processamento"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": null,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [],
|
29 |
+
"source": [
|
30 |
+
"import re\n",
|
31 |
+
"from string import punctuation\n",
|
32 |
+
"\n",
|
33 |
+
"def escape_tags_and_content(text):\n",
|
34 |
+
" \"\"\"Escape tags and their content containing text, which is not written in natural language, such as code snippets\"\"\"\n",
|
35 |
+
"\n",
|
36 |
+
" NO_TEXT_TAGS = \"code\", \"noformat\"\n",
|
37 |
+
" for tag in NO_TEXT_TAGS:\n",
|
38 |
+
" regex_matching_tag = re.compile(\"\\{%s(.*?)\\}(.*?)\\{%s\\}\" % (tag, tag), re.DOTALL)\n",
|
39 |
+
" text = re.sub(regex_matching_tag, \"\", text)\n",
|
40 |
+
"\n",
|
41 |
+
" return text\n",
|
42 |
+
"\n",
|
43 |
+
"def escape_tags(text):\n",
|
44 |
+
" \"\"\"Escape markup tags, but retain their content\"\"\"\n",
|
45 |
+
"\n",
|
46 |
+
" ESCAPE_TAGS = \"color\", \"quote\", \"anchor\", \"panel\"\n",
|
47 |
+
" for tag in ESCAPE_TAGS:\n",
|
48 |
+
" text = re.sub(\"\\{%s(.*?)\\}\" % tag, \"\", text)\n",
|
49 |
+
"\n",
|
50 |
+
" return text\n",
|
51 |
+
"\n",
|
52 |
+
"def escape_strings(text):\n",
|
53 |
+
" \"\"\"Escape line breaks, tabulators, slashes and JIRA heading markup symbols\"\"\"\n",
|
54 |
+
"\n",
|
55 |
+
" ESCAPE_STRINGS = \"\\\\r\", \"\\\\n\", \"\\\\t\", \"\\\\f\", \"\\\\v\", \"\\\"\", \"\\\\\\\\\", \"h1. \", \"h2. \", \"h3. \", \"h4. \", \"h5. \", \"h6. \"\n",
|
56 |
+
" for escape_string in ESCAPE_STRINGS:\n",
|
57 |
+
" text = text.replace(escape_string, \" \")\n",
|
58 |
+
"\n",
|
59 |
+
" return text\n",
|
60 |
+
"\n",
|
61 |
+
"def escape_links(text):\n",
|
62 |
+
" \"\"\"Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' \"\"\"\n",
|
63 |
+
"\n",
|
64 |
+
" LINK_STARTERS = r\"\\#\", r\"\\^\", r\"http\\:\\/\\/\", r\"https\\:\\/\\/\", r\"malto\\:\", r\"file\\:\", r\"\\~\"\n",
|
65 |
+
" for link_starter in LINK_STARTERS:\n",
|
66 |
+
" text = re.sub(\"\\[(.*?\\\\|)?%s(.*?)\\]\" % link_starter, \"\", text)\n",
|
67 |
+
" text = re.sub(r\"\\bhttps?://\\S+\", \"\", text)\n",
|
68 |
+
"\n",
|
69 |
+
" return text\n",
|
70 |
+
"\n",
|
71 |
+
"def escape_hex_character_codes(text):\n",
|
72 |
+
" \"\"\"Escape characters outside the latin alphabet which are converted to hex code representation\"\"\"\n",
|
73 |
+
"\n",
|
74 |
+
" return re.sub(r\"\\\\x\\w\\w\", \"\", text)\n",
|
75 |
+
"\n",
|
76 |
+
"def escape_punctuation_boundaries(text):\n",
|
77 |
+
" \"\"\"Remove all punctuation marks from the beginning and end of words,\n",
|
78 |
+
" except for trailing period at the end of words\"\"\"\n",
|
79 |
+
"\n",
|
80 |
+
" return \" \".join([word.strip(punctuation.replace(\".\", \"\")).lstrip(\".\") for word in text.split()])\n",
|
81 |
+
"\n",
|
82 |
+
"def escape_odd_spaces(text):\n",
|
83 |
+
" \"\"\"Replace several consequent spaces with one space\n",
|
84 |
+
" and remove spaces from string start and end\"\"\"\n",
|
85 |
+
"\n",
|
86 |
+
" text = re.sub(r\"\\s+\", \" \", text)\n",
|
87 |
+
" text = text.strip()\n",
|
88 |
+
"\n",
|
89 |
+
" return text"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "markdown",
|
94 |
+
"metadata": {},
|
95 |
+
"source": [
|
96 |
+
"# Criação do Modelo"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": null,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [],
|
104 |
+
"source": [
|
105 |
+
"from sklearn.dummy import DummyRegressor\n",
|
106 |
+
"from nltk.corpus import stopwords\n",
|
107 |
+
"from textblob import TextBlob\n",
|
108 |
+
"import textstat\n",
|
109 |
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
110 |
+
"from sklearn import svm\n",
|
111 |
+
"from sklearn.linear_model import LinearRegression\n",
|
112 |
+
"from sklearn.feature_selection import SelectKBest\n",
|
113 |
+
"import pandas as pd\n",
|
114 |
+
"from joblib import dump\n",
|
115 |
+
"\n",
|
116 |
+
"# carregando os dados\n",
|
117 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
118 |
+
"\n",
|
119 |
+
"# criação de uma nova coluna\n",
|
120 |
+
"df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
|
121 |
+
"\n",
|
122 |
+
"# pré-processamento\n",
|
123 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
|
124 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
|
125 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
|
126 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
|
127 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
|
128 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
|
129 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
|
130 |
+
"\n",
|
131 |
+
"# removendo stop-words\n",
|
132 |
+
"stop = stopwords.words('english')\n",
|
133 |
+
"df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
|
134 |
+
"\n",
|
135 |
+
"# renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
|
136 |
+
"df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
|
137 |
+
"y = df[\"storypoint_\"]\n",
|
138 |
+
"df = df.drop(columns=['storypoint_'])\n",
|
139 |
+
"\n",
|
140 |
+
"# 5º coluna -> extração das features para o neosp\n",
|
141 |
+
"df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
|
142 |
+
"df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
|
143 |
+
"df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
|
144 |
+
"df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
|
145 |
+
"df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
|
146 |
+
"df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
|
147 |
+
"df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
|
148 |
+
"df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
|
149 |
+
"df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
|
150 |
+
"df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
|
151 |
+
"df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
|
152 |
+
"# 16º colunas\n",
|
153 |
+
"\n",
|
154 |
+
"# Extração das features para o TFIDF\n",
|
155 |
+
"vectorizer = TfidfVectorizer()\n",
|
156 |
+
"X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
|
157 |
+
"#dump(vectorizer, \"vectorizer_tfidf.joblib\")\n",
|
158 |
+
"dump(vectorizer, \"models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib\".format(project_name, project_name))\n",
|
159 |
+
"\n",
|
160 |
+
"df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
|
161 |
+
"\n",
|
162 |
+
"# Juntando as features do neosp com o tfidf\n",
|
163 |
+
"df = df.join(df_vec)\n",
|
164 |
+
"X = df\n",
|
165 |
+
"\n",
|
166 |
+
"############ MbR\n",
|
167 |
+
"\n",
|
168 |
+
"model = DummyRegressor(strategy=\"mean\")\n",
|
169 |
+
"model.fit(X, y)\n",
|
170 |
+
"#dump(model, \"model_tawos_aloy_mbr.joblib\")\n",
|
171 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_mbr.joblib\".format(project_name, project_name))\n",
|
172 |
+
"\n",
|
173 |
+
"############ Mediana\n",
|
174 |
+
"\n",
|
175 |
+
"model = DummyRegressor(strategy=\"median\")\n",
|
176 |
+
"model.fit(X, y)\n",
|
177 |
+
"#dump(model, \"model_tawos_aloy_median.joblib\")\n",
|
178 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_median.joblib\".format(project_name, project_name))\n",
|
179 |
+
"\n",
|
180 |
+
"########### NEOSP-SVR\n",
|
181 |
+
"\n",
|
182 |
+
"model = svm.SVR()\n",
|
183 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
184 |
+
"#dump(model, \"model_tawos_aloy_neosp_svr.joblib\")\n",
|
185 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_svr.joblib\".format(project_name, project_name))\n",
|
186 |
+
"\n",
|
187 |
+
"########### NEOSP-LR\n",
|
188 |
+
"\n",
|
189 |
+
"model = LinearRegression()\n",
|
190 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
191 |
+
"#dump(model, \"model_tawos_aloy_neosp_linear.joblib\")\n",
|
192 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_linear.joblib\".format(project_name, project_name))\n",
|
193 |
+
"\n",
|
194 |
+
"############ TFIDF-SVM\n",
|
195 |
+
"\n",
|
196 |
+
"model = svm.SVR()\n",
|
197 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
198 |
+
"#dump(model, \"model_tawos_aloy_tfidf_svr.joblib\")\n",
|
199 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_svr.joblib\".format(project_name, project_name))\n",
|
200 |
+
"\n",
|
201 |
+
"############ TFIDF-LR\n",
|
202 |
+
"\n",
|
203 |
+
"model = LinearRegression()\n",
|
204 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
205 |
+
"#dump(model, \"model_tawos_aloy_tfidf_linear.joblib\")\n",
|
206 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_linear.joblib\".format(project_name, project_name))"
|
207 |
+
]
|
208 |
+
}
|
209 |
+
],
|
210 |
+
"metadata": {
|
211 |
+
"kernelspec": {
|
212 |
+
"display_name": "Python 3",
|
213 |
+
"language": "python",
|
214 |
+
"name": "python3"
|
215 |
+
},
|
216 |
+
"language_info": {
|
217 |
+
"codemirror_mode": {
|
218 |
+
"name": "ipython",
|
219 |
+
"version": 3
|
220 |
+
},
|
221 |
+
"file_extension": ".py",
|
222 |
+
"mimetype": "text/x-python",
|
223 |
+
"name": "python",
|
224 |
+
"nbconvert_exporter": "python",
|
225 |
+
"pygments_lexer": "ipython3",
|
226 |
+
"version": "3.10.11"
|
227 |
+
},
|
228 |
+
"orig_nbformat": 4
|
229 |
+
},
|
230 |
+
"nbformat": 4,
|
231 |
+
"nbformat_minor": 2
|
232 |
+
}
|
create_XD_model.ipynb
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"project_name = \"TIMOB\"\n",
|
11 |
+
"\n",
|
12 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
13 |
+
"\n",
|
14 |
+
"df.info()"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "markdown",
|
19 |
+
"metadata": {},
|
20 |
+
"source": [
|
21 |
+
"# Pré-Processamento"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": null,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [],
|
29 |
+
"source": [
|
30 |
+
"import re\n",
|
31 |
+
"from string import punctuation\n",
|
32 |
+
"\n",
|
33 |
+
"def escape_tags_and_content(text):\n",
|
34 |
+
" \"\"\"Escape tags and their content containing text, which is not written in natural language, such as code snippets\"\"\"\n",
|
35 |
+
"\n",
|
36 |
+
" NO_TEXT_TAGS = \"code\", \"noformat\"\n",
|
37 |
+
" for tag in NO_TEXT_TAGS:\n",
|
38 |
+
" regex_matching_tag = re.compile(\"\\{%s(.*?)\\}(.*?)\\{%s\\}\" % (tag, tag), re.DOTALL)\n",
|
39 |
+
" text = re.sub(regex_matching_tag, \"\", text)\n",
|
40 |
+
"\n",
|
41 |
+
" return text\n",
|
42 |
+
"\n",
|
43 |
+
"def escape_tags(text):\n",
|
44 |
+
" \"\"\"Escape markup tags, but retain their content\"\"\"\n",
|
45 |
+
"\n",
|
46 |
+
" ESCAPE_TAGS = \"color\", \"quote\", \"anchor\", \"panel\"\n",
|
47 |
+
" for tag in ESCAPE_TAGS:\n",
|
48 |
+
" text = re.sub(\"\\{%s(.*?)\\}\" % tag, \"\", text)\n",
|
49 |
+
"\n",
|
50 |
+
" return text\n",
|
51 |
+
"\n",
|
52 |
+
"def escape_strings(text):\n",
|
53 |
+
" \"\"\"Escape line breaks, tabulators, slashes and JIRA heading markup symbols\"\"\"\n",
|
54 |
+
"\n",
|
55 |
+
" ESCAPE_STRINGS = \"\\\\r\", \"\\\\n\", \"\\\\t\", \"\\\\f\", \"\\\\v\", \"\\\"\", \"\\\\\\\\\", \"h1. \", \"h2. \", \"h3. \", \"h4. \", \"h5. \", \"h6. \"\n",
|
56 |
+
" for escape_string in ESCAPE_STRINGS:\n",
|
57 |
+
" text = text.replace(escape_string, \" \")\n",
|
58 |
+
"\n",
|
59 |
+
" return text\n",
|
60 |
+
"\n",
|
61 |
+
"def escape_links(text):\n",
|
62 |
+
" \"\"\"Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' \"\"\"\n",
|
63 |
+
"\n",
|
64 |
+
" LINK_STARTERS = r\"\\#\", r\"\\^\", r\"http\\:\\/\\/\", r\"https\\:\\/\\/\", r\"malto\\:\", r\"file\\:\", r\"\\~\"\n",
|
65 |
+
" for link_starter in LINK_STARTERS:\n",
|
66 |
+
" text = re.sub(\"\\[(.*?\\\\|)?%s(.*?)\\]\" % link_starter, \"\", text)\n",
|
67 |
+
" text = re.sub(r\"\\bhttps?://\\S+\", \"\", text)\n",
|
68 |
+
"\n",
|
69 |
+
" return text\n",
|
70 |
+
"\n",
|
71 |
+
"def escape_hex_character_codes(text):\n",
|
72 |
+
" \"\"\"Escape characters outside the latin alphabet which are converted to hex code representation\"\"\"\n",
|
73 |
+
"\n",
|
74 |
+
" return re.sub(r\"\\\\x\\w\\w\", \"\", text)\n",
|
75 |
+
"\n",
|
76 |
+
"def escape_punctuation_boundaries(text):\n",
|
77 |
+
" \"\"\"Remove all punctuation marks from the beginning and end of words,\n",
|
78 |
+
" except for trailing period at the end of words\"\"\"\n",
|
79 |
+
"\n",
|
80 |
+
" return \" \".join([word.strip(punctuation.replace(\".\", \"\")).lstrip(\".\") for word in text.split()])\n",
|
81 |
+
"\n",
|
82 |
+
"def escape_odd_spaces(text):\n",
|
83 |
+
" \"\"\"Replace several consequent spaces with one space\n",
|
84 |
+
" and remove spaces from string start and end\"\"\"\n",
|
85 |
+
"\n",
|
86 |
+
" text = re.sub(r\"\\s+\", \" \", text)\n",
|
87 |
+
" text = text.strip()\n",
|
88 |
+
"\n",
|
89 |
+
" return text"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "markdown",
|
94 |
+
"metadata": {},
|
95 |
+
"source": [
|
96 |
+
"# Criação do Modelo"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": null,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [],
|
104 |
+
"source": [
|
105 |
+
"from sklearn.dummy import DummyRegressor\n",
|
106 |
+
"from nltk.corpus import stopwords\n",
|
107 |
+
"from textblob import TextBlob\n",
|
108 |
+
"import textstat\n",
|
109 |
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
110 |
+
"from sklearn import svm\n",
|
111 |
+
"from sklearn.linear_model import LinearRegression\n",
|
112 |
+
"from sklearn.feature_selection import SelectKBest\n",
|
113 |
+
"import pandas as pd\n",
|
114 |
+
"from joblib import dump\n",
|
115 |
+
"\n",
|
116 |
+
"# carregando os dados\n",
|
117 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
118 |
+
"\n",
|
119 |
+
"# criação de uma nova coluna\n",
|
120 |
+
"df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
|
121 |
+
"\n",
|
122 |
+
"# pré-processamento\n",
|
123 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
|
124 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
|
125 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
|
126 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
|
127 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
|
128 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
|
129 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
|
130 |
+
"\n",
|
131 |
+
"# removendo stop-words\n",
|
132 |
+
"stop = stopwords.words('english')\n",
|
133 |
+
"df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
|
134 |
+
"\n",
|
135 |
+
"# renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
|
136 |
+
"df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
|
137 |
+
"y = df[\"storypoint_\"]\n",
|
138 |
+
"df = df.drop(columns=['storypoint_'])\n",
|
139 |
+
"\n",
|
140 |
+
"# 5º coluna -> extração das features para o neosp\n",
|
141 |
+
"df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
|
142 |
+
"df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
|
143 |
+
"df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
|
144 |
+
"df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
|
145 |
+
"df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
|
146 |
+
"df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
|
147 |
+
"df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
|
148 |
+
"df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
|
149 |
+
"df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
|
150 |
+
"df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
|
151 |
+
"df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
|
152 |
+
"# 16º colunas\n",
|
153 |
+
"\n",
|
154 |
+
"# Extração das features para o TFIDF\n",
|
155 |
+
"vectorizer = TfidfVectorizer()\n",
|
156 |
+
"X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
|
157 |
+
"#dump(vectorizer, \"vectorizer_tfidf.joblib\")\n",
|
158 |
+
"dump(vectorizer, \"models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib\".format(project_name, project_name))\n",
|
159 |
+
"\n",
|
160 |
+
"df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
|
161 |
+
"\n",
|
162 |
+
"# Juntando as features do neosp com o tfidf\n",
|
163 |
+
"df = df.join(df_vec)\n",
|
164 |
+
"X = df\n",
|
165 |
+
"\n",
|
166 |
+
"############ MbR\n",
|
167 |
+
"\n",
|
168 |
+
"model = DummyRegressor(strategy=\"mean\")\n",
|
169 |
+
"model.fit(X, y)\n",
|
170 |
+
"#dump(model, \"model_tawos_aloy_mbr.joblib\")\n",
|
171 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_mbr.joblib\".format(project_name, project_name))\n",
|
172 |
+
"\n",
|
173 |
+
"############ Mediana\n",
|
174 |
+
"\n",
|
175 |
+
"model = DummyRegressor(strategy=\"median\")\n",
|
176 |
+
"model.fit(X, y)\n",
|
177 |
+
"#dump(model, \"model_tawos_aloy_median.joblib\")\n",
|
178 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_median.joblib\".format(project_name, project_name))\n",
|
179 |
+
"\n",
|
180 |
+
"########### NEOSP-SVR\n",
|
181 |
+
"\n",
|
182 |
+
"model = svm.SVR()\n",
|
183 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
184 |
+
"#dump(model, \"model_tawos_aloy_neosp_svr.joblib\")\n",
|
185 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_svr.joblib\".format(project_name, project_name))\n",
|
186 |
+
"\n",
|
187 |
+
"########### NEOSP-LR\n",
|
188 |
+
"\n",
|
189 |
+
"model = LinearRegression()\n",
|
190 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
191 |
+
"#dump(model, \"model_tawos_aloy_neosp_linear.joblib\")\n",
|
192 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_linear.joblib\".format(project_name, project_name))\n",
|
193 |
+
"\n",
|
194 |
+
"############ TFIDF-SVM\n",
|
195 |
+
"\n",
|
196 |
+
"model = svm.SVR()\n",
|
197 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
198 |
+
"#dump(model, \"model_tawos_aloy_tfidf_svr.joblib\")\n",
|
199 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_svr.joblib\".format(project_name, project_name))\n",
|
200 |
+
"\n",
|
201 |
+
"############ TFIDF-LR\n",
|
202 |
+
"\n",
|
203 |
+
"model = LinearRegression()\n",
|
204 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
205 |
+
"#dump(model, \"model_tawos_aloy_tfidf_linear.joblib\")\n",
|
206 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_linear.joblib\".format(project_name, project_name))"
|
207 |
+
]
|
208 |
+
}
|
209 |
+
],
|
210 |
+
"metadata": {
|
211 |
+
"kernelspec": {
|
212 |
+
"display_name": "Python 3",
|
213 |
+
"language": "python",
|
214 |
+
"name": "python3"
|
215 |
+
},
|
216 |
+
"language_info": {
|
217 |
+
"codemirror_mode": {
|
218 |
+
"name": "ipython",
|
219 |
+
"version": 3
|
220 |
+
},
|
221 |
+
"file_extension": ".py",
|
222 |
+
"mimetype": "text/x-python",
|
223 |
+
"name": "python",
|
224 |
+
"nbconvert_exporter": "python",
|
225 |
+
"pygments_lexer": "ipython3",
|
226 |
+
"version": "3.10.11"
|
227 |
+
},
|
228 |
+
"orig_nbformat": 4
|
229 |
+
},
|
230 |
+
"nbformat": 4,
|
231 |
+
"nbformat_minor": 2
|
232 |
+
}
|
create_alloy_model.ipynb
CHANGED
@@ -2,31 +2,13 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"name": "stdout",
|
10 |
-
"output_type": "stream",
|
11 |
-
"text": [
|
12 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
-
"RangeIndex: 241 entries, 0 to 240\n",
|
14 |
-
"Data columns (total 5 columns):\n",
|
15 |
-
" # Column Non-Null Count Dtype \n",
|
16 |
-
"--- ------ -------------- ----- \n",
|
17 |
-
" 0 issuekey 241 non-null object\n",
|
18 |
-
" 1 created 241 non-null object\n",
|
19 |
-
" 2 title 241 non-null object\n",
|
20 |
-
" 3 description 241 non-null object\n",
|
21 |
-
" 4 storypoint 241 non-null int64 \n",
|
22 |
-
"dtypes: int64(1), object(4)\n",
|
23 |
-
"memory usage: 9.5+ KB\n"
|
24 |
-
]
|
25 |
-
}
|
26 |
-
],
|
27 |
"source": [
|
28 |
"import pandas as pd\n",
|
29 |
-
"
|
|
|
30 |
"df.info()"
|
31 |
]
|
32 |
},
|
@@ -39,7 +21,7 @@
|
|
39 |
},
|
40 |
{
|
41 |
"cell_type": "code",
|
42 |
-
"execution_count":
|
43 |
"metadata": {},
|
44 |
"outputs": [],
|
45 |
"source": [
|
@@ -114,20 +96,9 @@
|
|
114 |
},
|
115 |
{
|
116 |
"cell_type": "code",
|
117 |
-
"execution_count":
|
118 |
"metadata": {},
|
119 |
-
"outputs": [
|
120 |
-
{
|
121 |
-
"data": {
|
122 |
-
"text/plain": [
|
123 |
-
"['model_tawos_aloy_tfidf_linear.joblib']"
|
124 |
-
]
|
125 |
-
},
|
126 |
-
"execution_count": 6,
|
127 |
-
"metadata": {},
|
128 |
-
"output_type": "execute_result"
|
129 |
-
}
|
130 |
-
],
|
131 |
"source": [
|
132 |
"from sklearn.dummy import DummyRegressor\n",
|
133 |
"from nltk.corpus import stopwords\n",
|
@@ -141,7 +112,7 @@
|
|
141 |
"from joblib import dump\n",
|
142 |
"\n",
|
143 |
"# carregando os dados\n",
|
144 |
-
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\
|
145 |
"\n",
|
146 |
"# criação de uma nova coluna\n",
|
147 |
"df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
|
@@ -181,8 +152,8 @@
|
|
181 |
"# Extração das features para o TFIDF\n",
|
182 |
"vectorizer = TfidfVectorizer()\n",
|
183 |
"X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
|
184 |
-
"dump(vectorizer, \"vectorizer_tfidf.joblib\")\n",
|
185 |
-
"
|
186 |
"\n",
|
187 |
"df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
|
188 |
"\n",
|
@@ -194,49 +165,44 @@
|
|
194 |
"\n",
|
195 |
"model = DummyRegressor(strategy=\"mean\")\n",
|
196 |
"model.fit(X, y)\n",
|
197 |
-
"dump(model, \"model_tawos_aloy_mbr.joblib\")\n",
|
198 |
-
"
|
199 |
"\n",
|
200 |
"############ Mediana\n",
|
201 |
"\n",
|
202 |
"model = DummyRegressor(strategy=\"median\")\n",
|
203 |
"model.fit(X, y)\n",
|
204 |
-
"dump(model, \"model_tawos_aloy_median.joblib\")\n",
|
205 |
-
"
|
206 |
"\n",
|
207 |
"########### NEOSP-SVR\n",
|
208 |
"\n",
|
209 |
"model = svm.SVR()\n",
|
210 |
"model.fit(X[X.columns[5:16]], y)\n",
|
211 |
-
"dump(model, \"model_tawos_aloy_neosp_svr.joblib\")\n",
|
212 |
-
"
|
213 |
"\n",
|
214 |
"########### NEOSP-LR\n",
|
215 |
"\n",
|
216 |
"model = LinearRegression()\n",
|
217 |
"model.fit(X[X.columns[5:16]], y)\n",
|
218 |
-
"dump(model, \"model_tawos_aloy_neosp_linear.joblib\")\n",
|
219 |
-
"
|
220 |
"\n",
|
221 |
"############ TFIDF-SVM\n",
|
222 |
"\n",
|
223 |
"model = svm.SVR()\n",
|
224 |
"model.fit(X[X.columns[16:]], y)\n",
|
225 |
-
"dump(model, \"model_tawos_aloy_tfidf_svr.joblib\")\n",
|
226 |
-
"
|
227 |
"\n",
|
228 |
"############ TFIDF-LR\n",
|
229 |
"\n",
|
230 |
"model = LinearRegression()\n",
|
231 |
"model.fit(X[X.columns[16:]], y)\n",
|
232 |
-
"dump(model, \"model_tawos_aloy_tfidf_linear.joblib\")\n",
|
233 |
-
"
|
234 |
]
|
235 |
-
},
|
236 |
-
{
|
237 |
-
"cell_type": "markdown",
|
238 |
-
"metadata": {},
|
239 |
-
"source": []
|
240 |
}
|
241 |
],
|
242 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import pandas as pd\n",
|
10 |
+
"project_name = \"ALOY\"\n",
|
11 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
12 |
"df.info()"
|
13 |
]
|
14 |
},
|
|
|
21 |
},
|
22 |
{
|
23 |
"cell_type": "code",
|
24 |
+
"execution_count": null,
|
25 |
"metadata": {},
|
26 |
"outputs": [],
|
27 |
"source": [
|
|
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "code",
|
99 |
+
"execution_count": null,
|
100 |
"metadata": {},
|
101 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
"source": [
|
103 |
"from sklearn.dummy import DummyRegressor\n",
|
104 |
"from nltk.corpus import stopwords\n",
|
|
|
112 |
"from joblib import dump\n",
|
113 |
"\n",
|
114 |
"# carregando os dados\n",
|
115 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
116 |
"\n",
|
117 |
"# criação de uma nova coluna\n",
|
118 |
"df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
|
|
|
152 |
"# Extração das features para o TFIDF\n",
|
153 |
"vectorizer = TfidfVectorizer()\n",
|
154 |
"X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
|
155 |
+
"#dump(vectorizer, \"vectorizer_tfidf.joblib\")\n",
|
156 |
+
"dump(vectorizer, \"models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib\".format(project_name, project_name))\n",
|
157 |
"\n",
|
158 |
"df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
|
159 |
"\n",
|
|
|
165 |
"\n",
|
166 |
"model = DummyRegressor(strategy=\"mean\")\n",
|
167 |
"model.fit(X, y)\n",
|
168 |
+
"#dump(model, \"model_tawos_aloy_mbr.joblib\")\n",
|
169 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_mbr.joblib\".format(project_name, project_name))\n",
|
170 |
"\n",
|
171 |
"############ Mediana\n",
|
172 |
"\n",
|
173 |
"model = DummyRegressor(strategy=\"median\")\n",
|
174 |
"model.fit(X, y)\n",
|
175 |
+
"#dump(model, \"model_tawos_aloy_median.joblib\")\n",
|
176 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_median.joblib\".format(project_name, project_name))\n",
|
177 |
"\n",
|
178 |
"########### NEOSP-SVR\n",
|
179 |
"\n",
|
180 |
"model = svm.SVR()\n",
|
181 |
"model.fit(X[X.columns[5:16]], y)\n",
|
182 |
+
"#dump(model, \"model_tawos_aloy_neosp_svr.joblib\")\n",
|
183 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_svr.joblib\".format(project_name, project_name))\n",
|
184 |
"\n",
|
185 |
"########### NEOSP-LR\n",
|
186 |
"\n",
|
187 |
"model = LinearRegression()\n",
|
188 |
"model.fit(X[X.columns[5:16]], y)\n",
|
189 |
+
"#dump(model, \"model_tawos_aloy_neosp_linear.joblib\")\n",
|
190 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_linear.joblib\".format(project_name, project_name))\n",
|
191 |
"\n",
|
192 |
"############ TFIDF-SVM\n",
|
193 |
"\n",
|
194 |
"model = svm.SVR()\n",
|
195 |
"model.fit(X[X.columns[16:]], y)\n",
|
196 |
+
"#dump(model, \"model_tawos_aloy_tfidf_svr.joblib\")\n",
|
197 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_svr.joblib\".format(project_name, project_name))\n",
|
198 |
"\n",
|
199 |
"############ TFIDF-LR\n",
|
200 |
"\n",
|
201 |
"model = LinearRegression()\n",
|
202 |
"model.fit(X[X.columns[16:]], y)\n",
|
203 |
+
"#dump(model, \"model_tawos_aloy_tfidf_linear.joblib\")\n",
|
204 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_linear.joblib\".format(project_name, project_name))\n"
|
205 |
]
|
|
|
|
|
|
|
|
|
|
|
206 |
}
|
207 |
],
|
208 |
"metadata": {
|
models/tawos/XD/model_tawos_XD_mbr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfd1f37c3a508425a3d21f5dee3ed029754a752a70be1c1a9c2564c96009df98
|
3 |
+
size 383
|
models/tawos/XD/model_tawos_XD_median.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f33655a67e4d587143f615b1604a45fc5cac5b70b0c8e999b47a953a43511e43
|
3 |
+
size 383
|
models/tawos/XD/model_tawos_XD_neosp_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aab222c9966e14bf870d7e25b3339e6d86b185557cbb66fc0a8330d6206523bd
|
3 |
+
size 1280
|
models/tawos/XD/model_tawos_XD_neosp_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e764a03294eb8f6461ff716305e35b620fb7c51350946832f99954c45a4fcd3b
|
3 |
+
size 86524
|
models/tawos/XD/model_tawos_XD_tfidf_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dd5c88b409a9a93b4bf754b50569df165c1fca36fd4f53e9f78e50f58d2f493
|
3 |
+
size 170304
|
models/tawos/XD/model_tawos_XD_tfidf_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8868f069e4ae8533bb5bafef7d8fefd064006414413a971a0a828d8316fa88a0
|
3 |
+
size 37738316
|
models/tawos/XD/vectorizer_tawos_XD_tfidf.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d12ff0ea3dddc7319dd2fea9547ecc96fc5bdbc15a263fa124b2929db436e3bb
|
3 |
+
size 189024
|
models/tawos/aloy/{vectorizer_tfidf.joblib → vectorizer_tawos_ALOY_tfidf.joblib}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 68159
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6416ad0d2dd928218d5bee8a2f5776d5985b8d464611da8e517cb4b78e4f01c7
|
3 |
size 68159
|