ultima versao
Browse files- create_APSTUD_model.ipynb +262 -0
- create_CLI_model.ipynb +262 -0
- create_TIMOB_model.ipynb +35 -5
- create_XD_model.ipynb +36 -6
- create_alloy_model.ipynb +35 -5
- models/tawos/APSTUD/model_tawos_APSTUD_mbr.joblib +3 -0
- models/tawos/APSTUD/model_tawos_APSTUD_median.joblib +3 -0
- models/tawos/APSTUD/model_tawos_APSTUD_neosp_linear.joblib +3 -0
- models/tawos/APSTUD/model_tawos_APSTUD_neosp_svr.joblib +3 -0
- models/tawos/APSTUD/model_tawos_APSTUD_tfidf_linear.joblib +3 -0
- models/tawos/APSTUD/model_tawos_APSTUD_tfidf_svr.joblib +3 -0
- models/tawos/APSTUD/vectorizer_tawos_APSTUD_tfidf.joblib +3 -0
- models/tawos/CLI/model_tawos_CLI_mbr.joblib +3 -0
- models/tawos/CLI/model_tawos_CLI_median.joblib +3 -0
- models/tawos/CLI/model_tawos_CLI_neosp_linear.joblib +3 -0
- models/tawos/CLI/model_tawos_CLI_neosp_svr.joblib +3 -0
- models/tawos/CLI/model_tawos_CLI_tfidf_linear.joblib +3 -0
- models/tawos/CLI/model_tawos_CLI_tfidf_svr.joblib +3 -0
- models/tawos/CLI/vectorizer_tawos_CLI_tfidf.joblib +3 -0
- models/tawos/TIMOB/model_tawos_TIMOB_mbr.joblib +3 -0
- models/tawos/TIMOB/model_tawos_TIMOB_median.joblib +3 -0
- models/tawos/TIMOB/model_tawos_TIMOB_neosp_linear.joblib +3 -0
- models/tawos/TIMOB/model_tawos_TIMOB_neosp_svr.joblib +3 -0
- models/tawos/TIMOB/model_tawos_TIMOB_tfidf_linear.joblib +3 -0
- models/tawos/TIMOB/model_tawos_TIMOB_tfidf_svr.joblib +3 -0
- models/tawos/TIMOB/vectorizer_tawos_TIMOB_tfidf.joblib +3 -0
create_APSTUD_model.ipynb
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
+
"RangeIndex: 476 entries, 0 to 475\n",
|
14 |
+
"Data columns (total 5 columns):\n",
|
15 |
+
" # Column Non-Null Count Dtype \n",
|
16 |
+
"--- ------ -------------- ----- \n",
|
17 |
+
" 0 issuekey 476 non-null object\n",
|
18 |
+
" 1 created 476 non-null object\n",
|
19 |
+
" 2 title 476 non-null object\n",
|
20 |
+
" 3 description 476 non-null object\n",
|
21 |
+
" 4 storypoint 476 non-null int64 \n",
|
22 |
+
"dtypes: int64(1), object(4)\n",
|
23 |
+
"memory usage: 18.7+ KB\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
+
"source": [
|
28 |
+
"import pandas as pd\n",
|
29 |
+
"project_name = \"APSTUD\"\n",
|
30 |
+
"\n",
|
31 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
32 |
+
"\n",
|
33 |
+
"df.info()"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "markdown",
|
38 |
+
"metadata": {},
|
39 |
+
"source": [
|
40 |
+
"# Pré-Processamento"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 5,
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"import re\n",
|
50 |
+
"from string import punctuation\n",
|
51 |
+
"\n",
|
52 |
+
"def escape_tags_and_content(text):\n",
|
53 |
+
" \"\"\"Escape tags and their content containing text, which is not written in natural language, such as code snippets\"\"\"\n",
|
54 |
+
"\n",
|
55 |
+
" NO_TEXT_TAGS = \"code\", \"noformat\"\n",
|
56 |
+
" for tag in NO_TEXT_TAGS:\n",
|
57 |
+
" regex_matching_tag = re.compile(\"\\{%s(.*?)\\}(.*?)\\{%s\\}\" % (tag, tag), re.DOTALL)\n",
|
58 |
+
" text = re.sub(regex_matching_tag, \"\", text)\n",
|
59 |
+
"\n",
|
60 |
+
" return text\n",
|
61 |
+
"\n",
|
62 |
+
"def escape_tags(text):\n",
|
63 |
+
" \"\"\"Escape markup tags, but retain their content\"\"\"\n",
|
64 |
+
"\n",
|
65 |
+
" ESCAPE_TAGS = \"color\", \"quote\", \"anchor\", \"panel\"\n",
|
66 |
+
" for tag in ESCAPE_TAGS:\n",
|
67 |
+
" text = re.sub(\"\\{%s(.*?)\\}\" % tag, \"\", text)\n",
|
68 |
+
"\n",
|
69 |
+
" return text\n",
|
70 |
+
"\n",
|
71 |
+
"def escape_strings(text):\n",
|
72 |
+
" \"\"\"Escape line breaks, tabulators, slashes and JIRA heading markup symbols\"\"\"\n",
|
73 |
+
"\n",
|
74 |
+
" ESCAPE_STRINGS = \"\\\\r\", \"\\\\n\", \"\\\\t\", \"\\\\f\", \"\\\\v\", \"\\\"\", \"\\\\\\\\\", \"h1. \", \"h2. \", \"h3. \", \"h4. \", \"h5. \", \"h6. \"\n",
|
75 |
+
" for escape_string in ESCAPE_STRINGS:\n",
|
76 |
+
" text = text.replace(escape_string, \" \")\n",
|
77 |
+
"\n",
|
78 |
+
" return text\n",
|
79 |
+
"\n",
|
80 |
+
"def escape_links(text):\n",
|
81 |
+
" \"\"\"Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' \"\"\"\n",
|
82 |
+
"\n",
|
83 |
+
" LINK_STARTERS = r\"\\#\", r\"\\^\", r\"http\\:\\/\\/\", r\"https\\:\\/\\/\", r\"malto\\:\", r\"file\\:\", r\"\\~\"\n",
|
84 |
+
" for link_starter in LINK_STARTERS:\n",
|
85 |
+
" text = re.sub(\"\\[(.*?\\\\|)?%s(.*?)\\]\" % link_starter, \"\", text)\n",
|
86 |
+
" text = re.sub(r\"\\bhttps?://\\S+\", \"\", text)\n",
|
87 |
+
"\n",
|
88 |
+
" return text\n",
|
89 |
+
"\n",
|
90 |
+
"def escape_hex_character_codes(text):\n",
|
91 |
+
" \"\"\"Escape characters outside the latin alphabet which are converted to hex code representation\"\"\"\n",
|
92 |
+
"\n",
|
93 |
+
" return re.sub(r\"\\\\x\\w\\w\", \"\", text)\n",
|
94 |
+
"\n",
|
95 |
+
"def escape_punctuation_boundaries(text):\n",
|
96 |
+
" \"\"\"Remove all punctuation marks from the beginning and end of words,\n",
|
97 |
+
" except for trailing period at the end of words\"\"\"\n",
|
98 |
+
"\n",
|
99 |
+
" return \" \".join([word.strip(punctuation.replace(\".\", \"\")).lstrip(\".\") for word in text.split()])\n",
|
100 |
+
"\n",
|
101 |
+
"def escape_odd_spaces(text):\n",
|
102 |
+
" \"\"\"Replace several consequent spaces with one space\n",
|
103 |
+
" and remove spaces from string start and end\"\"\"\n",
|
104 |
+
"\n",
|
105 |
+
" text = re.sub(r\"\\s+\", \" \", text)\n",
|
106 |
+
" text = text.strip()\n",
|
107 |
+
"\n",
|
108 |
+
" return text"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "markdown",
|
113 |
+
"metadata": {},
|
114 |
+
"source": [
|
115 |
+
"# Criação do Modelo"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"execution_count": 6,
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": [
|
126 |
+
"['models/tawos/APSTUD/model_tawos_APSTUD_tfidf_linear.joblib']"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
"execution_count": 6,
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "execute_result"
|
132 |
+
}
|
133 |
+
],
|
134 |
+
"source": [
|
135 |
+
"from sklearn.dummy import DummyRegressor\n",
|
136 |
+
"from nltk.corpus import stopwords\n",
|
137 |
+
"from textblob import TextBlob\n",
|
138 |
+
"import textstat\n",
|
139 |
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
140 |
+
"from sklearn import svm\n",
|
141 |
+
"from sklearn.linear_model import LinearRegression\n",
|
142 |
+
"from sklearn.feature_selection import SelectKBest\n",
|
143 |
+
"import pandas as pd\n",
|
144 |
+
"from joblib import dump\n",
|
145 |
+
"\n",
|
146 |
+
"# carregando os dados\n",
|
147 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
148 |
+
"\n",
|
149 |
+
"# criação de uma nova coluna\n",
|
150 |
+
"df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
|
151 |
+
"\n",
|
152 |
+
"# pré-processamento\n",
|
153 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
|
154 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
|
155 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
|
156 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
|
157 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
|
158 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
|
159 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
|
160 |
+
"\n",
|
161 |
+
"# removendo stop-words\n",
|
162 |
+
"stop = stopwords.words('english')\n",
|
163 |
+
"df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
|
164 |
+
"\n",
|
165 |
+
"# renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
|
166 |
+
"df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
|
167 |
+
"y = df[\"storypoint_\"]\n",
|
168 |
+
"df = df.drop(columns=['storypoint_'])\n",
|
169 |
+
"\n",
|
170 |
+
"# 5º coluna -> extração das features para o neosp\n",
|
171 |
+
"df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
|
172 |
+
"df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
|
173 |
+
"df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
|
174 |
+
"df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
|
175 |
+
"df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
|
176 |
+
"df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
|
177 |
+
"df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
|
178 |
+
"df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
|
179 |
+
"df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
|
180 |
+
"df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
|
181 |
+
"df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
|
182 |
+
"# 16º colunas\n",
|
183 |
+
"\n",
|
184 |
+
"# Extração das features para o TFIDF\n",
|
185 |
+
"vectorizer = TfidfVectorizer()\n",
|
186 |
+
"X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
|
187 |
+
"#dump(vectorizer, \"vectorizer_tfidf.joblib\")\n",
|
188 |
+
"dump(vectorizer, \"models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib\".format(project_name, project_name))\n",
|
189 |
+
"\n",
|
190 |
+
"df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
|
191 |
+
"\n",
|
192 |
+
"# Juntando as features do neosp com o tfidf\n",
|
193 |
+
"df = df.join(df_vec)\n",
|
194 |
+
"X = df\n",
|
195 |
+
"\n",
|
196 |
+
"############ MbR\n",
|
197 |
+
"\n",
|
198 |
+
"model = DummyRegressor(strategy=\"mean\")\n",
|
199 |
+
"model.fit(X, y)\n",
|
200 |
+
"#dump(model, \"model_tawos_aloy_mbr.joblib\")\n",
|
201 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_mbr.joblib\".format(project_name, project_name))\n",
|
202 |
+
"\n",
|
203 |
+
"############ Mediana\n",
|
204 |
+
"\n",
|
205 |
+
"model = DummyRegressor(strategy=\"median\")\n",
|
206 |
+
"model.fit(X, y)\n",
|
207 |
+
"#dump(model, \"model_tawos_aloy_median.joblib\")\n",
|
208 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_median.joblib\".format(project_name, project_name))\n",
|
209 |
+
"\n",
|
210 |
+
"########### NEOSP-SVR\n",
|
211 |
+
"\n",
|
212 |
+
"model = svm.SVR()\n",
|
213 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
214 |
+
"#dump(model, \"model_tawos_aloy_neosp_svr.joblib\")\n",
|
215 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_svr.joblib\".format(project_name, project_name))\n",
|
216 |
+
"\n",
|
217 |
+
"########### NEOSP-LR\n",
|
218 |
+
"\n",
|
219 |
+
"model = LinearRegression()\n",
|
220 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
221 |
+
"#dump(model, \"model_tawos_aloy_neosp_linear.joblib\")\n",
|
222 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_linear.joblib\".format(project_name, project_name))\n",
|
223 |
+
"\n",
|
224 |
+
"############ TFIDF-SVM\n",
|
225 |
+
"\n",
|
226 |
+
"model = svm.SVR()\n",
|
227 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
228 |
+
"#dump(model, \"model_tawos_aloy_tfidf_svr.joblib\")\n",
|
229 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_svr.joblib\".format(project_name, project_name))\n",
|
230 |
+
"\n",
|
231 |
+
"############ TFIDF-LR\n",
|
232 |
+
"\n",
|
233 |
+
"model = LinearRegression()\n",
|
234 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
235 |
+
"#dump(model, \"model_tawos_aloy_tfidf_linear.joblib\")\n",
|
236 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_linear.joblib\".format(project_name, project_name))"
|
237 |
+
]
|
238 |
+
}
|
239 |
+
],
|
240 |
+
"metadata": {
|
241 |
+
"kernelspec": {
|
242 |
+
"display_name": "Python 3",
|
243 |
+
"language": "python",
|
244 |
+
"name": "python3"
|
245 |
+
},
|
246 |
+
"language_info": {
|
247 |
+
"codemirror_mode": {
|
248 |
+
"name": "ipython",
|
249 |
+
"version": 3
|
250 |
+
},
|
251 |
+
"file_extension": ".py",
|
252 |
+
"mimetype": "text/x-python",
|
253 |
+
"name": "python",
|
254 |
+
"nbconvert_exporter": "python",
|
255 |
+
"pygments_lexer": "ipython3",
|
256 |
+
"version": "3.10.11"
|
257 |
+
},
|
258 |
+
"orig_nbformat": 4
|
259 |
+
},
|
260 |
+
"nbformat": 4,
|
261 |
+
"nbformat_minor": 2
|
262 |
+
}
|
create_CLI_model.ipynb
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
+
"RangeIndex: 293 entries, 0 to 292\n",
|
14 |
+
"Data columns (total 5 columns):\n",
|
15 |
+
" # Column Non-Null Count Dtype \n",
|
16 |
+
"--- ------ -------------- ----- \n",
|
17 |
+
" 0 issuekey 293 non-null object\n",
|
18 |
+
" 1 created 293 non-null object\n",
|
19 |
+
" 2 title 293 non-null object\n",
|
20 |
+
" 3 description 293 non-null object\n",
|
21 |
+
" 4 storypoint 293 non-null int64 \n",
|
22 |
+
"dtypes: int64(1), object(4)\n",
|
23 |
+
"memory usage: 11.6+ KB\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
+
"source": [
|
28 |
+
"import pandas as pd\n",
|
29 |
+
"project_name = \"CLI\"\n",
|
30 |
+
"\n",
|
31 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
32 |
+
"\n",
|
33 |
+
"df.info()"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "markdown",
|
38 |
+
"metadata": {},
|
39 |
+
"source": [
|
40 |
+
"# Pré-Processamento"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 2,
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"import re\n",
|
50 |
+
"from string import punctuation\n",
|
51 |
+
"\n",
|
52 |
+
"def escape_tags_and_content(text):\n",
|
53 |
+
" \"\"\"Escape tags and their content containing text, which is not written in natural language, such as code snippets\"\"\"\n",
|
54 |
+
"\n",
|
55 |
+
" NO_TEXT_TAGS = \"code\", \"noformat\"\n",
|
56 |
+
" for tag in NO_TEXT_TAGS:\n",
|
57 |
+
" regex_matching_tag = re.compile(\"\\{%s(.*?)\\}(.*?)\\{%s\\}\" % (tag, tag), re.DOTALL)\n",
|
58 |
+
" text = re.sub(regex_matching_tag, \"\", text)\n",
|
59 |
+
"\n",
|
60 |
+
" return text\n",
|
61 |
+
"\n",
|
62 |
+
"def escape_tags(text):\n",
|
63 |
+
" \"\"\"Escape markup tags, but retain their content\"\"\"\n",
|
64 |
+
"\n",
|
65 |
+
" ESCAPE_TAGS = \"color\", \"quote\", \"anchor\", \"panel\"\n",
|
66 |
+
" for tag in ESCAPE_TAGS:\n",
|
67 |
+
" text = re.sub(\"\\{%s(.*?)\\}\" % tag, \"\", text)\n",
|
68 |
+
"\n",
|
69 |
+
" return text\n",
|
70 |
+
"\n",
|
71 |
+
"def escape_strings(text):\n",
|
72 |
+
" \"\"\"Escape line breaks, tabulators, slashes and JIRA heading markup symbols\"\"\"\n",
|
73 |
+
"\n",
|
74 |
+
" ESCAPE_STRINGS = \"\\\\r\", \"\\\\n\", \"\\\\t\", \"\\\\f\", \"\\\\v\", \"\\\"\", \"\\\\\\\\\", \"h1. \", \"h2. \", \"h3. \", \"h4. \", \"h5. \", \"h6. \"\n",
|
75 |
+
" for escape_string in ESCAPE_STRINGS:\n",
|
76 |
+
" text = text.replace(escape_string, \" \")\n",
|
77 |
+
"\n",
|
78 |
+
" return text\n",
|
79 |
+
"\n",
|
80 |
+
"def escape_links(text):\n",
|
81 |
+
" \"\"\"Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' \"\"\"\n",
|
82 |
+
"\n",
|
83 |
+
" LINK_STARTERS = r\"\\#\", r\"\\^\", r\"http\\:\\/\\/\", r\"https\\:\\/\\/\", r\"malto\\:\", r\"file\\:\", r\"\\~\"\n",
|
84 |
+
" for link_starter in LINK_STARTERS:\n",
|
85 |
+
" text = re.sub(\"\\[(.*?\\\\|)?%s(.*?)\\]\" % link_starter, \"\", text)\n",
|
86 |
+
" text = re.sub(r\"\\bhttps?://\\S+\", \"\", text)\n",
|
87 |
+
"\n",
|
88 |
+
" return text\n",
|
89 |
+
"\n",
|
90 |
+
"def escape_hex_character_codes(text):\n",
|
91 |
+
" \"\"\"Escape characters outside the latin alphabet which are converted to hex code representation\"\"\"\n",
|
92 |
+
"\n",
|
93 |
+
" return re.sub(r\"\\\\x\\w\\w\", \"\", text)\n",
|
94 |
+
"\n",
|
95 |
+
"def escape_punctuation_boundaries(text):\n",
|
96 |
+
" \"\"\"Remove all punctuation marks from the beginning and end of words,\n",
|
97 |
+
" except for trailing period at the end of words\"\"\"\n",
|
98 |
+
"\n",
|
99 |
+
" return \" \".join([word.strip(punctuation.replace(\".\", \"\")).lstrip(\".\") for word in text.split()])\n",
|
100 |
+
"\n",
|
101 |
+
"def escape_odd_spaces(text):\n",
|
102 |
+
" \"\"\"Replace several consequent spaces with one space\n",
|
103 |
+
" and remove spaces from string start and end\"\"\"\n",
|
104 |
+
"\n",
|
105 |
+
" text = re.sub(r\"\\s+\", \" \", text)\n",
|
106 |
+
" text = text.strip()\n",
|
107 |
+
"\n",
|
108 |
+
" return text"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "markdown",
|
113 |
+
"metadata": {},
|
114 |
+
"source": [
|
115 |
+
"# Criação do Modelo"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"execution_count": 3,
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": [
|
126 |
+
"['models/tawos/CLI/model_tawos_CLI_tfidf_linear.joblib']"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
"execution_count": 3,
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "execute_result"
|
132 |
+
}
|
133 |
+
],
|
134 |
+
"source": [
|
135 |
+
"from sklearn.dummy import DummyRegressor\n",
|
136 |
+
"from nltk.corpus import stopwords\n",
|
137 |
+
"from textblob import TextBlob\n",
|
138 |
+
"import textstat\n",
|
139 |
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
140 |
+
"from sklearn import svm\n",
|
141 |
+
"from sklearn.linear_model import LinearRegression\n",
|
142 |
+
"from sklearn.feature_selection import SelectKBest\n",
|
143 |
+
"import pandas as pd\n",
|
144 |
+
"from joblib import dump\n",
|
145 |
+
"\n",
|
146 |
+
"# carregando os dados\n",
|
147 |
+
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
148 |
+
"\n",
|
149 |
+
"# criação de uma nova coluna\n",
|
150 |
+
"df[\"context\"] = df[\"title\"] + df[\"description\"]\n",
|
151 |
+
"\n",
|
152 |
+
"# pré-processamento\n",
|
153 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags_and_content(x))\n",
|
154 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_tags(x))\n",
|
155 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_strings(x))\n",
|
156 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_links(x))\n",
|
157 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_hex_character_codes(x))\n",
|
158 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_punctuation_boundaries(x))\n",
|
159 |
+
"df[\"context\"] = df[\"context\"].apply(lambda x: escape_odd_spaces(x))\n",
|
160 |
+
"\n",
|
161 |
+
"# removendo stop-words\n",
|
162 |
+
"stop = stopwords.words('english')\n",
|
163 |
+
"df['context'] = df['context'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))\n",
|
164 |
+
"\n",
|
165 |
+
"# renomeando as colunas porque senão dá um problema com a extração de features do NEOSP\n",
|
166 |
+
"df = df.rename(columns={ \"issuekey\": \"issuekey_\", \"created\": \"created_\", \"description\": \"description_\", \"title\": \"title_\", \"context\": \"context_\", \"storypoint\": \"storypoint_\"})\n",
|
167 |
+
"y = df[\"storypoint_\"]\n",
|
168 |
+
"df = df.drop(columns=['storypoint_'])\n",
|
169 |
+
"\n",
|
170 |
+
"# 5º coluna -> extração das features para o neosp\n",
|
171 |
+
"df[\"gunning_fog_\"] = df['context_'].apply(textstat.gunning_fog)\n",
|
172 |
+
"df[\"flesch_reading_ease_\"] = df['context_'].apply(textstat.flesch_reading_ease)\n",
|
173 |
+
"df[\"flesch_kincaid_grade_\"] = df['context_'].apply(textstat.flesch_kincaid_grade)\n",
|
174 |
+
"df[\"smog_index_\"] = df['context_'].apply(textstat.smog_index)\n",
|
175 |
+
"df[\"coleman_liau_index_\"] = df['context_'].apply(textstat.coleman_liau_index)\n",
|
176 |
+
"df[\"automated_readability_index_\"] = df['context_'].apply(textstat.automated_readability_index)\n",
|
177 |
+
"df[\"dale_chall_readability_score_\"] = df['context_'].apply(textstat.dale_chall_readability_score)\n",
|
178 |
+
"df[\"difficult_words_\"] = df['context_'].apply(textstat.difficult_words)\n",
|
179 |
+
"df[\"linsear_write_formula_\"] = df['context_'].apply(textstat.linsear_write_formula)\n",
|
180 |
+
"df[\"polarity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.polarity)\n",
|
181 |
+
"df[\"subjectivity_\"] = df[\"context_\"].apply(lambda x: TextBlob(x).sentiment.subjectivity)\n",
|
182 |
+
"# 16º colunas\n",
|
183 |
+
"\n",
|
184 |
+
"# Extração das features para o TFIDF\n",
|
185 |
+
"vectorizer = TfidfVectorizer()\n",
|
186 |
+
"X_vec = vectorizer.fit_transform(df[\"context_\"])\n",
|
187 |
+
"#dump(vectorizer, \"vectorizer_tfidf.joblib\")\n",
|
188 |
+
"dump(vectorizer, \"models/tawos/{}/vectorizer_tawos_{}_tfidf.joblib\".format(project_name, project_name))\n",
|
189 |
+
"\n",
|
190 |
+
"df_vec = pd.DataFrame(data = X_vec.toarray(), columns = vectorizer.get_feature_names_out())\n",
|
191 |
+
"\n",
|
192 |
+
"# Juntando as features do neosp com o tfidf\n",
|
193 |
+
"df = df.join(df_vec)\n",
|
194 |
+
"X = df\n",
|
195 |
+
"\n",
|
196 |
+
"############ MbR\n",
|
197 |
+
"\n",
|
198 |
+
"model = DummyRegressor(strategy=\"mean\")\n",
|
199 |
+
"model.fit(X, y)\n",
|
200 |
+
"#dump(model, \"model_tawos_aloy_mbr.joblib\")\n",
|
201 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_mbr.joblib\".format(project_name, project_name))\n",
|
202 |
+
"\n",
|
203 |
+
"############ Mediana\n",
|
204 |
+
"\n",
|
205 |
+
"model = DummyRegressor(strategy=\"median\")\n",
|
206 |
+
"model.fit(X, y)\n",
|
207 |
+
"#dump(model, \"model_tawos_aloy_median.joblib\")\n",
|
208 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_median.joblib\".format(project_name, project_name))\n",
|
209 |
+
"\n",
|
210 |
+
"########### NEOSP-SVR\n",
|
211 |
+
"\n",
|
212 |
+
"model = svm.SVR()\n",
|
213 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
214 |
+
"#dump(model, \"model_tawos_aloy_neosp_svr.joblib\")\n",
|
215 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_svr.joblib\".format(project_name, project_name))\n",
|
216 |
+
"\n",
|
217 |
+
"########### NEOSP-LR\n",
|
218 |
+
"\n",
|
219 |
+
"model = LinearRegression()\n",
|
220 |
+
"model.fit(X[X.columns[5:16]], y)\n",
|
221 |
+
"#dump(model, \"model_tawos_aloy_neosp_linear.joblib\")\n",
|
222 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_neosp_linear.joblib\".format(project_name, project_name))\n",
|
223 |
+
"\n",
|
224 |
+
"############ TFIDF-SVM\n",
|
225 |
+
"\n",
|
226 |
+
"model = svm.SVR()\n",
|
227 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
228 |
+
"#dump(model, \"model_tawos_aloy_tfidf_svr.joblib\")\n",
|
229 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_svr.joblib\".format(project_name, project_name))\n",
|
230 |
+
"\n",
|
231 |
+
"############ TFIDF-LR\n",
|
232 |
+
"\n",
|
233 |
+
"model = LinearRegression()\n",
|
234 |
+
"model.fit(X[X.columns[16:]], y)\n",
|
235 |
+
"#dump(model, \"model_tawos_aloy_tfidf_linear.joblib\")\n",
|
236 |
+
"dump(model, \"models/tawos/{}/model_tawos_{}_tfidf_linear.joblib\".format(project_name, project_name))"
|
237 |
+
]
|
238 |
+
}
|
239 |
+
],
|
240 |
+
"metadata": {
|
241 |
+
"kernelspec": {
|
242 |
+
"display_name": "Python 3",
|
243 |
+
"language": "python",
|
244 |
+
"name": "python3"
|
245 |
+
},
|
246 |
+
"language_info": {
|
247 |
+
"codemirror_mode": {
|
248 |
+
"name": "ipython",
|
249 |
+
"version": 3
|
250 |
+
},
|
251 |
+
"file_extension": ".py",
|
252 |
+
"mimetype": "text/x-python",
|
253 |
+
"name": "python",
|
254 |
+
"nbconvert_exporter": "python",
|
255 |
+
"pygments_lexer": "ipython3",
|
256 |
+
"version": "3.10.11"
|
257 |
+
},
|
258 |
+
"orig_nbformat": 4
|
259 |
+
},
|
260 |
+
"nbformat": 4,
|
261 |
+
"nbformat_minor": 2
|
262 |
+
}
|
create_TIMOB_model.ipynb
CHANGED
@@ -2,9 +2,28 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import pandas as pd\n",
|
10 |
"project_name = \"TIMOB\"\n",
|
@@ -23,7 +42,7 @@
|
|
23 |
},
|
24 |
{
|
25 |
"cell_type": "code",
|
26 |
-
"execution_count":
|
27 |
"metadata": {},
|
28 |
"outputs": [],
|
29 |
"source": [
|
@@ -98,9 +117,20 @@
|
|
98 |
},
|
99 |
{
|
100 |
"cell_type": "code",
|
101 |
-
"execution_count":
|
102 |
"metadata": {},
|
103 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
"source": [
|
105 |
"from sklearn.dummy import DummyRegressor\n",
|
106 |
"from nltk.corpus import stopwords\n",
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
+
"RangeIndex: 3915 entries, 0 to 3914\n",
|
14 |
+
"Data columns (total 5 columns):\n",
|
15 |
+
" # Column Non-Null Count Dtype \n",
|
16 |
+
"--- ------ -------------- ----- \n",
|
17 |
+
" 0 issuekey 3915 non-null object \n",
|
18 |
+
" 1 created 3915 non-null object \n",
|
19 |
+
" 2 title 3915 non-null object \n",
|
20 |
+
" 3 description 3915 non-null object \n",
|
21 |
+
" 4 storypoint 3915 non-null float64\n",
|
22 |
+
"dtypes: float64(1), object(4)\n",
|
23 |
+
"memory usage: 153.1+ KB\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
"source": [
|
28 |
"import pandas as pd\n",
|
29 |
"project_name = \"TIMOB\"\n",
|
|
|
42 |
},
|
43 |
{
|
44 |
"cell_type": "code",
|
45 |
+
"execution_count": 2,
|
46 |
"metadata": {},
|
47 |
"outputs": [],
|
48 |
"source": [
|
|
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
120 |
+
"execution_count": 3,
|
121 |
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": [
|
126 |
+
"['models/tawos/TIMOB/model_tawos_TIMOB_tfidf_linear.joblib']"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
"execution_count": 3,
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "execute_result"
|
132 |
+
}
|
133 |
+
],
|
134 |
"source": [
|
135 |
"from sklearn.dummy import DummyRegressor\n",
|
136 |
"from nltk.corpus import stopwords\n",
|
create_XD_model.ipynb
CHANGED
@@ -2,12 +2,31 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import pandas as pd\n",
|
10 |
-
"project_name = \"
|
11 |
"\n",
|
12 |
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
13 |
"\n",
|
@@ -23,7 +42,7 @@
|
|
23 |
},
|
24 |
{
|
25 |
"cell_type": "code",
|
26 |
-
"execution_count":
|
27 |
"metadata": {},
|
28 |
"outputs": [],
|
29 |
"source": [
|
@@ -98,9 +117,20 @@
|
|
98 |
},
|
99 |
{
|
100 |
"cell_type": "code",
|
101 |
-
"execution_count":
|
102 |
"metadata": {},
|
103 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
"source": [
|
105 |
"from sklearn.dummy import DummyRegressor\n",
|
106 |
"from nltk.corpus import stopwords\n",
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
+
"RangeIndex: 811 entries, 0 to 810\n",
|
14 |
+
"Data columns (total 5 columns):\n",
|
15 |
+
" # Column Non-Null Count Dtype \n",
|
16 |
+
"--- ------ -------------- ----- \n",
|
17 |
+
" 0 issuekey 811 non-null object\n",
|
18 |
+
" 1 created 811 non-null object\n",
|
19 |
+
" 2 title 811 non-null object\n",
|
20 |
+
" 3 description 811 non-null object\n",
|
21 |
+
" 4 storypoint 811 non-null int64 \n",
|
22 |
+
"dtypes: int64(1), object(4)\n",
|
23 |
+
"memory usage: 31.8+ KB\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
"source": [
|
28 |
"import pandas as pd\n",
|
29 |
+
"project_name = \"XD\"\n",
|
30 |
"\n",
|
31 |
"df = pd.read_csv(\"database\\\\tawos\\\\deep\\\\{}_deep-se.csv\".format(project_name))\n",
|
32 |
"\n",
|
|
|
42 |
},
|
43 |
{
|
44 |
"cell_type": "code",
|
45 |
+
"execution_count": 3,
|
46 |
"metadata": {},
|
47 |
"outputs": [],
|
48 |
"source": [
|
|
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
120 |
+
"execution_count": 4,
|
121 |
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": [
|
126 |
+
"['models/tawos/XD/model_tawos_XD_tfidf_linear.joblib']"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
"execution_count": 4,
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "execute_result"
|
132 |
+
}
|
133 |
+
],
|
134 |
"source": [
|
135 |
"from sklearn.dummy import DummyRegressor\n",
|
136 |
"from nltk.corpus import stopwords\n",
|
create_alloy_model.ipynb
CHANGED
@@ -2,9 +2,28 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import pandas as pd\n",
|
10 |
"project_name = \"ALOY\"\n",
|
@@ -21,7 +40,7 @@
|
|
21 |
},
|
22 |
{
|
23 |
"cell_type": "code",
|
24 |
-
"execution_count":
|
25 |
"metadata": {},
|
26 |
"outputs": [],
|
27 |
"source": [
|
@@ -96,9 +115,20 @@
|
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "code",
|
99 |
-
"execution_count":
|
100 |
"metadata": {},
|
101 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
"source": [
|
103 |
"from sklearn.dummy import DummyRegressor\n",
|
104 |
"from nltk.corpus import stopwords\n",
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 10,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
+
"RangeIndex: 241 entries, 0 to 240\n",
|
14 |
+
"Data columns (total 5 columns):\n",
|
15 |
+
" # Column Non-Null Count Dtype \n",
|
16 |
+
"--- ------ -------------- ----- \n",
|
17 |
+
" 0 issuekey 241 non-null object\n",
|
18 |
+
" 1 created 241 non-null object\n",
|
19 |
+
" 2 title 241 non-null object\n",
|
20 |
+
" 3 description 241 non-null object\n",
|
21 |
+
" 4 storypoint 241 non-null int64 \n",
|
22 |
+
"dtypes: int64(1), object(4)\n",
|
23 |
+
"memory usage: 9.5+ KB\n"
|
24 |
+
]
|
25 |
+
}
|
26 |
+
],
|
27 |
"source": [
|
28 |
"import pandas as pd\n",
|
29 |
"project_name = \"ALOY\"\n",
|
|
|
40 |
},
|
41 |
{
|
42 |
"cell_type": "code",
|
43 |
+
"execution_count": 11,
|
44 |
"metadata": {},
|
45 |
"outputs": [],
|
46 |
"source": [
|
|
|
115 |
},
|
116 |
{
|
117 |
"cell_type": "code",
|
118 |
+
"execution_count": 12,
|
119 |
"metadata": {},
|
120 |
+
"outputs": [
|
121 |
+
{
|
122 |
+
"data": {
|
123 |
+
"text/plain": [
|
124 |
+
"['models/tawos/ALOY/model_tawos_ALOY_tfidf_linear.joblib']"
|
125 |
+
]
|
126 |
+
},
|
127 |
+
"execution_count": 12,
|
128 |
+
"metadata": {},
|
129 |
+
"output_type": "execute_result"
|
130 |
+
}
|
131 |
+
],
|
132 |
"source": [
|
133 |
"from sklearn.dummy import DummyRegressor\n",
|
134 |
"from nltk.corpus import stopwords\n",
|
models/tawos/APSTUD/model_tawos_APSTUD_mbr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7acac58dc585421fbd6a591c8ec452275e9c4e48ae37c5dd82497c0ab35cc6b3
|
3 |
+
size 383
|
models/tawos/APSTUD/model_tawos_APSTUD_median.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6571c714502722b037a8acd8cbf088c366257eb1061179d542a657eea7aba33
|
3 |
+
size 383
|
models/tawos/APSTUD/model_tawos_APSTUD_neosp_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e13246fcc693b4894adca7b7bf2eec614fc6a3ab96b58860146471b6b458550
|
3 |
+
size 1280
|
models/tawos/APSTUD/model_tawos_APSTUD_neosp_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80ca226d62be4cd400503dbbf0705617bb3b9e9694a2059ce7c41bdeb5ab9a84
|
3 |
+
size 48180
|
models/tawos/APSTUD/model_tawos_APSTUD_tfidf_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b103fa59d6908c65ba7c761c6a316f207b92a043a4ab5b8b45881bae91971c84
|
3 |
+
size 137848
|
models/tawos/APSTUD/model_tawos_APSTUD_tfidf_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d7c47cb8057e26871533f974cc1589166ac35ba35b124f961f02de08a07f207
|
3 |
+
size 19491164
|
models/tawos/APSTUD/vectorizer_tawos_APSTUD_tfidf.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61778540a81a78a549cf7c03df66b5eef8cc66202e072d083ab57aef64399649
|
3 |
+
size 155196
|
models/tawos/CLI/model_tawos_CLI_mbr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45703b7823252f7922bc9c977d04b9b120b71304bdce17cc28344caa35fabbbe
|
3 |
+
size 383
|
models/tawos/CLI/model_tawos_CLI_median.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f33655a67e4d587143f615b1604a45fc5cac5b70b0c8e999b47a953a43511e43
|
3 |
+
size 383
|
models/tawos/CLI/model_tawos_CLI_neosp_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a4a3292bebabee20e9e36430e6554f3de8da5fb422571484f6a95c227e81576
|
3 |
+
size 1280
|
models/tawos/CLI/model_tawos_CLI_neosp_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f0b063bb04d30f576212a7154e6961c919e75595f6e143c42e4c19bc06a0844
|
3 |
+
size 29171
|
models/tawos/CLI/model_tawos_CLI_tfidf_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfdc99bda09e610d6e429b394fae12c28898e7e28f4467c097d0e314522240ba
|
3 |
+
size 60608
|
models/tawos/CLI/model_tawos_CLI_tfidf_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f4e2eef9d29a97978d4690b2502a7c3f2249d4d7a9bb5a2332875634919268c
|
3 |
+
size 5148515
|
models/tawos/CLI/vectorizer_tawos_CLI_tfidf.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcd970eb344f06bd09a584eceb7237b1705c88452b7a1f0ea2e7b434d9c8dbae
|
3 |
+
size 68396
|
models/tawos/TIMOB/model_tawos_TIMOB_mbr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:102c44692f2343dba35472d1eb958558c33726394f70a796bf0b8f4aea4f930e
|
3 |
+
size 383
|
models/tawos/TIMOB/model_tawos_TIMOB_median.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b953e6d331fe351298ed5347ee1248ed7b925103d17579f22e1af38f9969c6a7
|
3 |
+
size 383
|
models/tawos/TIMOB/model_tawos_TIMOB_neosp_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46b123d78db5341847c348fcd4c4717b735e7a56b183d99c4fc2df11ea4cfdfc
|
3 |
+
size 1280
|
models/tawos/TIMOB/model_tawos_TIMOB_neosp_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37fc0c3bfcad3e6422f9d764331d739bd99d618449abb1e6c2c8c197d41ae1d4
|
3 |
+
size 392692
|
models/tawos/TIMOB/model_tawos_TIMOB_tfidf_linear.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ade1379f8b7b1322fe6ec3c8b6a858d9a7f3201f7ee14f2d795df33801f108
|
3 |
+
size 427664
|
models/tawos/TIMOB/model_tawos_TIMOB_tfidf_svr.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d8509d7867bbc0600873a3b423e76541ed9a93131b85e9b518800a8114bb8b6
|
3 |
+
size 447037100
|
models/tawos/TIMOB/vectorizer_tawos_TIMOB_tfidf.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb23410643efcc29e0f566115da31efd5958f03fbfaea577569d2a6c29157110
|
3 |
+
size 455625
|