Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .README +31 -0
- Analysis.ipynb +0 -0
- Dockerfile +10 -0
- README.md +3 -9
- __pycache__/utils.cpython-310.pyc +0 -0
- app.py +1255 -0
- code/Agency.json +34 -0
- code/Borough.json +12 -0
- code/all_vars.json +35 -0
- code/build_service.py +167 -0
- code/build_weather.py +98 -0
- code/create_maps.py +177 -0
- code/future_features.py +21 -0
- code/past_features.py +21 -0
- code/recurse_predict.py +22 -0
- code/target_features.py +27 -0
- custom.css +43 -0
- data/data_final.csv +0 -0
- data/data_merged_full.csv +0 -0
- data/docs.csv +1315 -0
- data/drop_vars.xlsx +0 -0
- data/weather_aggregated_2010-2018.csv +0 -0
- figures/bounded_map.html +95 -0
- figures/final_map.html +0 -0
- figures/map1.html +0 -0
- figures/map2.html +0 -0
- figures/model_performance.png +0 -0
- models/BERTopic/config.json +17 -0
- models/BERTopic/ctfidf.safetensors +3 -0
- models/BERTopic/ctfidf_config.json +408 -0
- models/BERTopic/topic_embeddings.safetensors +3 -0
- models/BERTopic/topics.json +1671 -0
- models/final_model.json +0 -0
- reports/311_data_1.html +0 -0
- reports/weather_data_after2016_ts.html +0 -0
- reports/weather_data_ts.html +0 -0
- requirements.txt +26 -0
- utils.py +1028 -0
.README
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Steps to run
|
2 |
+
|
3 |
+
## Pip install requirements
|
4 |
+
|
5 |
+
```bash
|
6 |
+
pip install -r requirements.txt
|
7 |
+
```
|
8 |
+
|
9 |
+
## Follow the Analysis.ipynb notebook for notebook format results
|
10 |
+
|
11 |
+
### Recommended to view the gradio application for fuller view
|
12 |
+
|
13 |
+
## Option 1: Run gradio app locally
|
14 |
+
|
15 |
+
```bash
|
16 |
+
python /path/to/app.y
|
17 |
+
```
|
18 |
+
|
19 |
+
## Option 2: Access web application at (https://dna-casestudy.com/)[https://dna-casestudy.com/]
|
20 |
+
|
21 |
+
## Option 3: Build Docker container
|
22 |
+
|
23 |
+
|
24 |
+
```bash
|
25 |
+
cd /path/to/folder
|
26 |
+
docker build -t my-case-study .
|
27 |
+
docker run -p 7860:7860 my-case-study
|
28 |
+
```
|
29 |
+
|
30 |
+
|
31 |
+
## If ran locally, app will run via: (http://localhost:7860)[http://localhost:7860]
|
Analysis.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Dockerfile
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10.2
|
2 |
+
|
3 |
+
workdir /app
|
4 |
+
|
5 |
+
COPY . .
|
6 |
+
RUN python -m pip install -U pip
|
7 |
+
RUN pip install -r /app/requirements.txt
|
8 |
+
|
9 |
+
EXPOSE 7860
|
10 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji: 🐢
|
4 |
-
colorFrom: green
|
5 |
-
colorTo: yellow
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.28.3
|
8 |
app_file: app.py
|
9 |
-
|
|
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: dna-casestudy
|
|
|
|
|
|
|
|
|
|
|
3 |
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 4.27.0
|
6 |
---
|
|
|
|
__pycache__/utils.cpython-310.pyc
ADDED
Binary file (26.8 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,1255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import inspect
|
3 |
+
import math
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
import polars as pl
|
7 |
+
import seaborn as sns
|
8 |
+
import matplotlib
|
9 |
+
import utils
|
10 |
+
from matplotlib import pyplot as plt
|
11 |
+
import sklearn
|
12 |
+
import gradio as gr
|
13 |
+
from IPython.display import display
|
14 |
+
import plotly.figure_factory as ff
|
15 |
+
from sklearn.impute import SimpleImputer
|
16 |
+
from utils import create_seasons
|
17 |
+
from bs4 import BeautifulSoup
|
18 |
+
from IPython.display import display, HTML
|
19 |
+
from bertopic import BERTopic
|
20 |
+
import html
|
21 |
+
import xgboost as xgb
|
22 |
+
from xgboost import plot_importance
|
23 |
+
from sklearn.metrics import r2_score, mean_absolute_percentage_error
|
24 |
+
from utils import find_variable_data, build_temporal_features, create_datetime, map_vals
|
25 |
+
import plotly.express as px
|
26 |
+
import plotly.graph_objects as go
|
27 |
+
import plotly.figure_factory as ff
|
28 |
+
from plotly.subplots import make_subplots
|
29 |
+
import plotly.io as pio
|
30 |
+
import folium
|
31 |
+
import gc
|
32 |
+
import json
|
33 |
+
from utils import MyNaiveImputer
|
34 |
+
matplotlib.use('agg')
|
35 |
+
dark_mode = """
|
36 |
+
function refresh() {
|
37 |
+
const url = new URL(window.location);
|
38 |
+
|
39 |
+
if (url.searchParams.get('__theme') !== 'dark') {
|
40 |
+
url.searchParams.set('__theme', 'dark');
|
41 |
+
window.location.href = url.href;
|
42 |
+
}
|
43 |
+
}
|
44 |
+
"""
|
45 |
+
|
46 |
+
# Imputation Variables
|
47 |
+
wd_full_local = pd.read_csv("data/weather_aggregated_2010-2018.csv", index_col=0)
|
48 |
+
wd_full_local = wd_full_local.reset_index()
|
49 |
+
wd_full_local["Datetime"] = pd.to_datetime(wd_full_local["Datetime"], format="%Y-%m-%d")
|
50 |
+
wd_full_local = build_temporal_features(wd_full_local, "Datetime")
|
51 |
+
impute_cols = ['MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
|
52 |
+
'Percipitation', 'WindSpeed', 'MaxSustainedWind',
|
53 |
+
'Gust', 'Rain', 'SnowDepth', 'SnowIce']
|
54 |
+
|
55 |
+
my_imputer = MyNaiveImputer(wd_full_local, time_steps=49+7)
|
56 |
+
imputers = {
|
57 |
+
"Mean": my_imputer.impute_all(impute_cols, strategy="mean"),
|
58 |
+
"Median": my_imputer.impute_all(impute_cols, strategy="median"),
|
59 |
+
"Max": my_imputer.impute_all(impute_cols, strategy="max"),
|
60 |
+
"Min": my_imputer.impute_all(impute_cols, strategy="min")
|
61 |
+
}
|
62 |
+
|
63 |
+
# Merged Data Variables
|
64 |
+
data_merged = pd.read_csv("data/data_merged_full.csv", index_col=0)
|
65 |
+
data_merged = create_datetime(data_merged, "Datetime", format="%Y-%m-%d")
|
66 |
+
data_merged["Day Of Week"] = data_merged["Datetime"].dt.day_name()
|
67 |
+
data_merged["Year String"] = data_merged["Year"].astype(str)
|
68 |
+
data_merged["Month String"] = data_merged["Datetime"].dt.month_name()
|
69 |
+
data_merged["Rain Bool"] = data_merged["Rain"].astype(bool)
|
70 |
+
data_merged["SnowIce Bool"] = data_merged["SnowIce"].astype(bool)
|
71 |
+
data_merged = data_merged.set_index("Datetime")
|
72 |
+
weather_full_df = data_merged.loc[data_merged["Year"] <= 2018].copy()
|
73 |
+
data_merged_eda = data_merged.loc[(data_merged["Year"] <= 2018) & (data_merged["Year"] >= 2016)]
|
74 |
+
|
75 |
+
# Feature Preprocessing
|
76 |
+
data_preprocess = data_merged.loc[(data_merged["Year"] >= 2016)].copy()
|
77 |
+
data_preprocess["Gust_lin"] = data_preprocess["Gust"].interpolate(method="linear")
|
78 |
+
data_preprocess["Gust_spline3"] = data_preprocess["Gust"].interpolate(method="spline", order=3)
|
79 |
+
data_preprocess["Gust_spline5"] = data_preprocess["Gust"].interpolate(method="spline", order=5)
|
80 |
+
data_preprocess["Gust_quad"] = data_preprocess["Gust"].interpolate(method="quadratic")
|
81 |
+
data_preprocess["Gust"] = data_preprocess["Gust"].interpolate(method="linear")
|
82 |
+
data_preprocess["DewPoint_old"] = data_preprocess["DewPoint"]
|
83 |
+
data_preprocess["DewPoint_diff7d"] = data_preprocess["DewPoint"] - data_preprocess["DewPoint"].shift(7)
|
84 |
+
data_preprocess["DewPoint"] = data_preprocess["DewPoint_diff7d"]
|
85 |
+
data_preprocess["MinTemp_old"] = data_preprocess["MinTemp"]
|
86 |
+
data_preprocess["MinTemp_log"] = data_preprocess["MinTemp"].apply(np.log1p)
|
87 |
+
data_preprocess["MinTemp_log_diff7d"] = data_preprocess["MinTemp_log"] - data_preprocess["MinTemp_log"].shift(7)
|
88 |
+
data_preprocess["MinTemp"] = data_preprocess["MinTemp_log_diff7d"]
|
89 |
+
|
90 |
+
|
91 |
+
# Final Preprocessed Variables
|
92 |
+
data_final = pd.read_csv("data/data_final.csv")
|
93 |
+
data_final = create_datetime(data_final, "Datetime", format="%Y-%m-%d")
|
94 |
+
data_final = data_final.set_index("Datetime")
|
95 |
+
test = data_final[-7:]
|
96 |
+
dataset = data_final[:-7]
|
97 |
+
split_point = int(len(data_final[:-7])*0.75)
|
98 |
+
train, val = dataset[:split_point], dataset[split_point:]
|
99 |
+
X_train, y_train = train.drop(columns="Target"), train["Target"]
|
100 |
+
X_val, y_val = val.drop(columns="Target"), val["Target"]
|
101 |
+
X_test, y_test = test.drop(columns="Target"), test["Target"]
|
102 |
+
forecast_model = xgb.XGBRegressor()
|
103 |
+
forecast_model.load_model("models/final_model.json")
|
104 |
+
|
105 |
+
# Current Predictions
|
106 |
+
global r2_val, r2_train, mape_train, mape_val
|
107 |
+
r2_train = 0.8691238468740025
|
108 |
+
mape_train = 0.04889510400934162
|
109 |
+
r2_val = 0.6072642783665692
|
110 |
+
mape_val = 0.6072642783665692
|
111 |
+
|
112 |
+
|
113 |
+
# Initial Variables
|
114 |
+
reports = {
|
115 |
+
"weather_2011-2018": BeautifulSoup(open("reports/weather_data_ts.html"), "html.parser"),
|
116 |
+
"weather_2016-2018": BeautifulSoup(open("reports/weather_data_after2016_ts.html"), "html.parser"),
|
117 |
+
"service_full": BeautifulSoup(open("reports/311_data_1.html"), "html.parser")
|
118 |
+
}
|
119 |
+
|
120 |
+
iframe_dp_weather, _ = find_variable_data(reports["weather_2011-2018"], "MeanTemp")
|
121 |
+
iframe_dp_service, _ = find_variable_data(reports["service_full"], "Created Date")
|
122 |
+
|
123 |
+
# Code Variables to show in app
|
124 |
+
load_code = """
|
125 |
+
# Load Weather Data in pandas
|
126 |
+
# No need for polars because data is sufficiently small
|
127 |
+
weather_data = pd.read_csv("data/weather_NY_2010_2018Nov.csv")
|
128 |
+
|
129 |
+
# Load Service data in polars for speed optimization
|
130 |
+
# Loading directly with polars leads to errors
|
131 |
+
# Load in pandas then convert to polars
|
132 |
+
service_data_pd = pd.read_csv("data/311-2016-2018.csv")
|
133 |
+
assert service_data_pd["Unique Key"].nunique() == len(service_data_pd)
|
134 |
+
# This casting is done just because of some errors when loading pl from pandas
|
135 |
+
service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
|
136 |
+
service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
|
137 |
+
service_data = pl.DataFrame(service_data_pd)
|
138 |
+
|
139 |
+
# Clear some ram
|
140 |
+
del service_data_pd
|
141 |
+
gc.collect()"""
|
142 |
+
|
143 |
+
|
144 |
+
map_code = """
|
145 |
+
lat_min = service_data["Latitude"].min()
|
146 |
+
lat_max = service_data["Latitude"].max()
|
147 |
+
long_min = service_data["Longitude"].min()
|
148 |
+
long_max = service_data["Longitude"].max()
|
149 |
+
|
150 |
+
mincon_lat = weather_data["Latitude"] >= lat_min
|
151 |
+
maxcon_lat = weather_data["Latitude"] <= lat_max
|
152 |
+
mincon_long = weather_data["Longitude"] >= long_min
|
153 |
+
maxcon_long = weather_data["Longitude"] <= long_max
|
154 |
+
wd_localized = weather_data.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
|
155 |
+
"""
|
156 |
+
|
157 |
+
Closed_Ticket_Code = """
|
158 |
+
# Fill null and Typos with mean time diff (13 days)
|
159 |
+
service_data = service_data.with_columns(
|
160 |
+
Closed_Date_New = pl.when(pl.col("Created Date") - pl.col("Closed Date") > pl.duration(days=1))
|
161 |
+
.then(pl.col("Created Date") + pl.duration(days=mean_diff))
|
162 |
+
.otherwise(pl.col("Closed Date")).fill_null(pl.col("Created Date") + pl.duration(days=mean_diff))
|
163 |
+
)
|
164 |
+
|
165 |
+
# Check for no null values
|
166 |
+
assert service_data["Closed_Date_New"].is_null().sum() == 0
|
167 |
+
|
168 |
+
# Pair wise GroupBy and Filter
|
169 |
+
closed_tickets = service_data.group_by(["Closed_Date_New", "Created Date"]) \
|
170 |
+
.agg((pl.when(pl.col("Created Date") <= pl.col("Closed_Date_New")).then(1).otherwise(0)).sum().alias("count")) \ # FILTER Created Date < Closed Date Here
|
171 |
+
.sort("Closed_Date_New") \ # Sort by new column Closed Date New
|
172 |
+
.filter((pl.col("Closed_Date_New").dt.year() >= 2016) & (pl.col("Closed_Date_New").dt.year() < 2019)) \ # Filter for only Closed Dates in time window
|
173 |
+
.group_by("Closed_Date_New").agg(pl.col("count").sum().alias("num_closed_tickets")) # Final Group By Closed date after filtering
|
174 |
+
|
175 |
+
ct_df = closed_tickets.with_columns(
|
176 |
+
pl.col("num_closed_tickets") # Rename Column
|
177 |
+
)
|
178 |
+
"""
|
179 |
+
|
180 |
+
global topic_model
|
181 |
+
topic_model = BERTopic.load("models/BERTopic")
|
182 |
+
|
183 |
+
def plot_imputations(var, data, imputers=imputers):
|
184 |
+
plt.close('all')
|
185 |
+
fig = plt.figure(figsize=(15,5))
|
186 |
+
plt.plot(data["Datetime"][-800:], data[var][-800:], label="Actual")
|
187 |
+
plt.title(f"{var} Imputation")
|
188 |
+
for method in imputers:
|
189 |
+
plt.plot(imputers[method]["Datetime"], imputers[method][var], label=method)
|
190 |
+
|
191 |
+
plt.legend()
|
192 |
+
|
193 |
+
return gr.update(value=fig)
|
194 |
+
|
195 |
+
|
196 |
+
def plot_timeseries(data, var, data_name="My", all_vars=[], height=800, width=600):
|
197 |
+
plt.close('all')
|
198 |
+
if var == "":
|
199 |
+
return gr.update()
|
200 |
+
|
201 |
+
from utils import plot_timeseries
|
202 |
+
fig = plot_timeseries(data, var, data_name, all_vars, height, width)
|
203 |
+
|
204 |
+
return gr.update(value=fig)
|
205 |
+
|
206 |
+
|
207 |
+
def plot_bivariate(data, x, y, subset=None, trendline=True):
|
208 |
+
plt.close('all')
|
209 |
+
map_var = {
|
210 |
+
"Year": "Year String",
|
211 |
+
"Season": "Season",
|
212 |
+
"Month": "Month String",
|
213 |
+
"Day Of Week": "Day Of Week",
|
214 |
+
"Weekend": "is_weekend",
|
215 |
+
"Holiday": "is_holiday",
|
216 |
+
"Rain": "Rain Bool",
|
217 |
+
"SnowIce": "SnowIce Bool",
|
218 |
+
"None": None,
|
219 |
+
"": None,
|
220 |
+
}
|
221 |
+
subset = map_var[subset]
|
222 |
+
|
223 |
+
from utils import plot_bivariate
|
224 |
+
fig = plot_bivariate(data, x, y, subset, trendline)
|
225 |
+
|
226 |
+
return gr.update(value=fig)
|
227 |
+
|
228 |
+
|
229 |
+
def plot_seasonality(data, x, y, show_box=True, show_outliers=False):
|
230 |
+
plt.close('all')
|
231 |
+
map_var = {
|
232 |
+
"Year": "Year String",
|
233 |
+
"Season": "Season",
|
234 |
+
"Month": "Month String",
|
235 |
+
"Day Of Week": "Day Of Week",
|
236 |
+
"Weekend": "is_weekend",
|
237 |
+
"Holiday": "is_holiday",
|
238 |
+
"Rain": "Rain Bool",
|
239 |
+
"SnowIce": "SnowIce Bool",
|
240 |
+
"None": None,
|
241 |
+
}
|
242 |
+
x = map_var[x]
|
243 |
+
|
244 |
+
from utils import plot_seasonality
|
245 |
+
fig = plot_seasonality(data, x, y, show_box, show_outliers)
|
246 |
+
|
247 |
+
return gr.update(value=fig)
|
248 |
+
|
249 |
+
|
250 |
+
def plot_correlations(data, covar, target="Target", lags=[0,1,2,3,4,5,6,7,8,13,14,15,21], method="pearson"):
|
251 |
+
plt.close('all')
|
252 |
+
from utils import plot_correlations
|
253 |
+
fig = plot_correlations(data, covar, target, lags, method)
|
254 |
+
|
255 |
+
return gr.update(value=fig)
|
256 |
+
|
257 |
+
|
258 |
+
def plot_autocorr(data, var, apply=None):
|
259 |
+
plt.close('all')
|
260 |
+
from utils import plot_acf, plot_pacf
|
261 |
+
time_series = data.loc[:, var].to_frame().copy()
|
262 |
+
if apply:
|
263 |
+
time_series[var] = time_series[var].apply(apply)
|
264 |
+
fig, ax = plt.subplots(2, 1, figsize=(12, 8))
|
265 |
+
_ = plot_acf(time_series[var], lags=30, ax=ax[0])
|
266 |
+
_ = plot_pacf(time_series[var], lags=30, method="ols-adjusted", ax=ax[1])
|
267 |
+
_ = plt.suptitle(f"{var}", y=0.95)
|
268 |
+
|
269 |
+
return gr.update(value=fig)
|
270 |
+
|
271 |
+
|
272 |
+
def plot_all_correlations(data, data_name="weather", method="pearson"):
|
273 |
+
plt.close('all')
|
274 |
+
from utils import plot_all_correlations
|
275 |
+
fig = plot_all_correlations(data, data_name, method)
|
276 |
+
|
277 |
+
return fig
|
278 |
+
|
279 |
+
|
280 |
+
|
281 |
+
def run_report(report_base, variable_name, report_category="full"):
|
282 |
+
report_name = report_base + "_" + report_category
|
283 |
+
iframe, _ = find_variable_data(reports[report_name], variable_name)
|
284 |
+
return gr.update(value=iframe)
|
285 |
+
|
286 |
+
|
287 |
+
def test_stationary(data, var):
|
288 |
+
from utils import test_stationary
|
289 |
+
df = test_stationary(data, var)
|
290 |
+
|
291 |
+
return df
|
292 |
+
|
293 |
+
|
294 |
+
def plot_interpolation(data):
|
295 |
+
plt.close('all')
|
296 |
+
from utils import plot_gust_interpolation
|
297 |
+
fig = plot_gust_interpolation(data)
|
298 |
+
|
299 |
+
return fig
|
300 |
+
|
301 |
+
|
302 |
+
def plot_model_feature_importance():
|
303 |
+
|
304 |
+
plt.close('all')
|
305 |
+
from utils import plot_final_feature_importance
|
306 |
+
fig = plot_final_feature_importance(forecast_model)
|
307 |
+
|
308 |
+
return fig
|
309 |
+
|
310 |
+
|
311 |
+
def plot_final_predictions():
|
312 |
+
plt.close('all')
|
313 |
+
from utils import predict_recurse
|
314 |
+
next_7_day_prediction = predict_recurse(dataset, test, forecast_model)
|
315 |
+
fig = plt.subplots(figsize=(15, 5))
|
316 |
+
data_final.loc[data_final.index[-7:], "Target"]= next_7_day_prediction
|
317 |
+
ax = data_final.loc[data_final.index[-96:-6], "Target"].plot(label="Real", title="311 Service Volume: 7 Day Prediction")
|
318 |
+
data_final.loc[data_final.index[-7:], "Target"].plot(label="Forecast", ax=ax)
|
319 |
+
ax.legend()
|
320 |
+
|
321 |
+
curr_fig = plt.gcf()
|
322 |
+
plt.close()
|
323 |
+
|
324 |
+
return curr_fig
|
325 |
+
|
326 |
+
|
327 |
+
def plot_train_split():
|
328 |
+
plt.close('all')
|
329 |
+
from utils import plot_train_split
|
330 |
+
fig = plot_train_split(train, val)
|
331 |
+
|
332 |
+
return fig
|
333 |
+
|
334 |
+
|
335 |
+
def plot_val_predicitons():
|
336 |
+
data = val.copy()
|
337 |
+
data["Prediction"] = preds_val
|
338 |
+
|
339 |
+
from utils import plot_predictions
|
340 |
+
|
341 |
+
fig = plot_predictions(train, val, preds_val)
|
342 |
+
|
343 |
+
return fig
|
344 |
+
|
345 |
+
|
346 |
+
curr_theme = gr.themes.Default(
|
347 |
+
text_size=gr.themes.sizes.text_lg
|
348 |
+
)
|
349 |
+
|
350 |
+
with gr.Blocks(theme=curr_theme, js=dark_mode, css=open("custom.css", "r").read()) as app:
|
351 |
+
title = gr.HTML("""<h1 align="center">Point72 Case Study</h1>""")
|
352 |
+
with gr.Tabs() as pages:
|
353 |
+
|
354 |
+
with gr.Tab("Overview") as toc_page:
|
355 |
+
gr.Markdown("# My Point72 Case Study Results")
|
356 |
+
gr.Markdown("""
|
357 |
+
* Please follow the tabs sequentially left to right to get the full story of my work
|
358 |
+
* There will be many interactive parts where you will be able to test and view different parameters
|
359 |
+
* This app may also be built and ran locally
|
360 |
+
* This app is hosted and served from a cloud server VM Instance
|
361 |
+
* Any questions please email me: [email protected]
|
362 |
+
""")
|
363 |
+
|
364 |
+
|
365 |
+
with gr.Tab("Data Preprocessing") as data_preprocessing_page:
|
366 |
+
|
367 |
+
with gr.Tab("Data Loading") as dp_overview:
|
368 |
+
gr.HTML("<h1 style=\"text-align: center;\">Loading the Data</h1>")
|
369 |
+
gr.Markdown("## Goal: Load the Data as efficiently as possible")
|
370 |
+
gr.Markdown("""
|
371 |
+
* Using Pandas alone is **slow and inefficient**.
|
372 |
+
* With small datasets, pandas is great because the API is robust.
|
373 |
+
* With medium datasets, using a library like polars (a Rust based module with 10x pandas speed) is much faster.
|
374 |
+
* As data gets even larger, multi-processing languages like Spark are required.
|
375 |
+
* For this dataset, I use pandas for the weather data and polars for the 311 data. After the aggregation and merge, I revert back to pandas for API compatibility.
|
376 |
+
""")
|
377 |
+
|
378 |
+
with gr.Accordion("Code", open=False):
|
379 |
+
gr.Code(load_code, language="python")
|
380 |
+
|
381 |
+
|
382 |
+
with gr.Tab("Location Mapping") as dp_overview:
|
383 |
+
src_doc = html.escape(open("figures/map1.html","r").read())
|
384 |
+
iframe1 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
|
385 |
+
src_doc = html.escape(open("figures/map2.html","r").read())
|
386 |
+
iframe2 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
|
387 |
+
src_doc = html.escape(open("figures/bounded_map.html","r").read())
|
388 |
+
iframe3 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
|
389 |
+
src_doc = html.escape(open("figures/final_map.html","r").read())
|
390 |
+
iframe4 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
|
391 |
+
|
392 |
+
gr.HTML("<h1 style=\"text-align: center;\">Location Mapping for Both Datasets</h1>")
|
393 |
+
with gr.Row(elem_classes="map-legend"):
|
394 |
+
gr.Markdown("""
|
395 |
+
**Legend:**
|
396 |
+
* <span style=\"color: red\">Red:</span> Weather records
|
397 |
+
* <span style=\"color: #5989ff\">Blue:</span> 311 Service records
|
398 |
+
""", elem_classes="map-legend-text")
|
399 |
+
|
400 |
+
with gr.Row():
|
401 |
+
with gr.Column():
|
402 |
+
gr.HTML("<h1 style=\"text-align: center; margin: 0px;\">Map of New York State</h1>")
|
403 |
+
map1 = gr.HTML(iframe1, elem_classes="map")
|
404 |
+
with gr.Column():
|
405 |
+
gr.HTML("<h1 style=\"text-align: center; margin: 0px;\">Map of New York City</h1>")
|
406 |
+
map2 = gr.HTML(iframe2, elem_classes="map")
|
407 |
+
|
408 |
+
with gr.Row():
|
409 |
+
gr.Markdown("""
|
410 |
+
Juxtaposing these two maps and seeing the approximate distributions of data observations,
|
411 |
+
its easy to see the problem. The weather dataset encompasses a larger area than the 311 Service call dataset.
|
412 |
+
Once this problem was diagnosed the solution was simple. First you find the max coordinate (Lat, Long) bounds
|
413 |
+
from the 311 Service Dataset. Then, you just filter the weather dataset to only include points from within
|
414 |
+
these bounds. This was one of my initial discoveries when analyzing the dataset and crucial to ensure
|
415 |
+
congruity between the two. **Below you can see the bounding box I created and how the new weather data
|
416 |
+
observations fit in this bounding box.**
|
417 |
+
""")
|
418 |
+
|
419 |
+
with gr.Row():
|
420 |
+
with gr.Column():
|
421 |
+
map3 = gr.HTML(iframe3, elem_classes="map")
|
422 |
+
with gr.Column():
|
423 |
+
map4 = gr.HTML(iframe4, elem_classes="map")
|
424 |
+
|
425 |
+
with gr.Accordion("Code", open=False):
|
426 |
+
gr.Code(map_code, language="python")
|
427 |
+
|
428 |
+
|
429 |
+
with gr.Tab("Variable Pruning") as var_pruning:
|
430 |
+
gr.HTML("<h1 style=\"text-align: center;\">How I pruned the datasets</h1>")
|
431 |
+
gr.Markdown("## Goal: Remove as many useless features as possible")
|
432 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Key Factors for Feature Removal</h3>")
|
433 |
+
gr.Markdown("""
|
434 |
+
* Percentage of missing data points
|
435 |
+
* Distribution Imbalance
|
436 |
+
* Irrelevance
|
437 |
+
* Number of distinct categories
|
438 |
+
* Another variable was chosen as replacement <br/><br/>
|
439 |
+
NOTE: Look in the appendix for visualizations of individual variables
|
440 |
+
""")
|
441 |
+
droped_var_df = pd.read_excel("data/drop_vars.xlsx")
|
442 |
+
gr.Dataframe(
|
443 |
+
droped_var_df,
|
444 |
+
wrap=True,
|
445 |
+
label="Dropped Variables & Justification (Weather on Bottom)"
|
446 |
+
)
|
447 |
+
|
448 |
+
|
449 |
+
with gr.Tab("Time Aggregation") as time_agg:
|
450 |
+
gr.HTML("<h1 style=\"text-align: center;\">Aggregate Data by Date</h1>")
|
451 |
+
gr.Markdown("## Goal: Aggregate data by Date")
|
452 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: 311 Service data is not inherently formatted to provide Created Ticket Counts</h3>")
|
453 |
+
gr.HTML("""
|
454 |
+
<ul style="font-size: 18px">
|
455 |
+
<li>Data must be aggregated by day to find ticket counts</li>
|
456 |
+
<li>Covariate features need a special transformation</li>
|
457 |
+
<li>Final Aggregations Mapping</li>
|
458 |
+
<ul style="padding-inline-start: 40px;">
|
459 |
+
<li>Created Date ==> groupby.count ==> Target (Created ticket count)</li>
|
460 |
+
<li>Closed Date ==> Agg* ==> Number of closed tickets (Agg* explained in next tabs)</li>
|
461 |
+
<li>Agency ==> Agg* ==> Number of tickets by Agency (Agg* explained in next tabs)</li>
|
462 |
+
<li>Borough ==> Agg* ==> Number of tickets by Boroguh (Agg* explained in next tabs)</li>
|
463 |
+
<li>Descriptor ==> Agg* ==> Number of tickets by Descriptor Group/Category (Agg* explained in next tabs)</li>
|
464 |
+
</ul>
|
465 |
+
</ul>""")
|
466 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Weather data is not aggregated by day</h3>")
|
467 |
+
gr.HTML("""
|
468 |
+
<ul style="font-size: 18px">
|
469 |
+
<li>To merge with 311 Service data, both datasets must be aggregated</li>
|
470 |
+
<li>Additional transformations may be applied only after time aggregation</li>
|
471 |
+
<li>Aggregation function needs to be handled feature by feature</li>
|
472 |
+
<li>Final Aggregation Mapping</li>
|
473 |
+
<ul style="padding-inline-start: 40px;">
|
474 |
+
<li>MaxTemp, MaxSustainedWind ==> groupby.max ==> Variables have an inherent max feature</li>
|
475 |
+
<li>MinTemp ==> groupby.min ==> Variable has an inherent min feature</li>
|
476 |
+
<li>Rain, SnowIce ==> groupby.mean.round ==> Binary variables are first aggregated than rounded back to binary</li>
|
477 |
+
<li>All Other Variables ==> groupy.mean ==> Mean used by default as it is the least lossy pooling method</li>
|
478 |
+
</ul>
|
479 |
+
</ul>""")
|
480 |
+
|
481 |
+
|
482 |
+
with gr.Tab("Weather Data: Imputation") as wd_impute:
|
483 |
+
gr.HTML("<h1 style=\"text-align: center;\">Data Imputation</h1>")
|
484 |
+
gr.Markdown("## Goal: Impute missing values in Weather Data")
|
485 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue: Weather data is incomplete, 49 days are missing in 2018</h3>")
|
486 |
+
gr.Markdown("#### Proposed Solution: Use a simple imputer to fill these missing days + 7 more days into the \"future\"")
|
487 |
+
gr.HTML("""
|
488 |
+
<ul style="font-size: 18px">
|
489 |
+
<li>Use a simple imputer rather than a robust imputation method to reduce model complexity</li>
|
490 |
+
<ul style="padding-inline-start: 40px;">
|
491 |
+
<li>Using a robust imputer = Conducting a multivariate forcast, Very complex & can be slow</li>
|
492 |
+
<li>Using a simple imputer = Low complexity, low latency</li>
|
493 |
+
</ul>
|
494 |
+
<li>Simple imputer applies an aggregate function using Day Of Year (1-366) as the interval</li>
|
495 |
+
<li>4 different Imputation Methods: Mean, Median, Min, Max</li>
|
496 |
+
<li>7 additional days are imputed so the weather data can be used as a future covariate in our model</li>
|
497 |
+
<li>Final Aggregation Mapping</li>
|
498 |
+
<ul style="padding-inline-start: 40px;">
|
499 |
+
<li>WindSpeed, MaxSustainedWind, Gust, SnowDepth => Use Mean => Noisy Variables, Non-Mean/Median methods are too biased, curve best fit with Mean</li>
|
500 |
+
<li>Rain => Use Max => Binary Variables with noise, min/mean/median imputes 0, which does not follow the trend</li>
|
501 |
+
<li>SnowIce => Use Min (impute 0) => Binary variables but mostly 0's, any other imputation is visually inaccurate</li>
|
502 |
+
<li>MeanTemp, MinTemp, MaxTemp, DewPoint, Percipitation => Use Min => Perhaps helping to remove non-stationarity (global warming), Winter is colder now than before, Curve best fits with min</li>
|
503 |
+
</ul>
|
504 |
+
</ul>""")
|
505 |
+
|
506 |
+
gr.Markdown("Use plots below to view the plots used to help justify above reasoning")
|
507 |
+
with gr.Accordion("Show Plots", open=False):
|
508 |
+
impute_data = gr.State(wd_full_local)
|
509 |
+
impute_choices = ["None"]
|
510 |
+
impute_choices.extend(impute_cols)
|
511 |
+
wd_impute_col = gr.Dropdown(
|
512 |
+
choices=impute_choices,
|
513 |
+
value="None",
|
514 |
+
label="Choose a Variable to plot all imputation methods"
|
515 |
+
)
|
516 |
+
|
517 |
+
wd_impute_plot = gr.Plot()
|
518 |
+
|
519 |
+
wd_impute_col.change(
|
520 |
+
plot_imputations,
|
521 |
+
[wd_impute_col, impute_data],
|
522 |
+
[wd_impute_plot]
|
523 |
+
)
|
524 |
+
|
525 |
+
|
526 |
+
with gr.Tab("311: Closed Ticket Counting") as ct_date:
|
527 |
+
gr.HTML("<h1 style=\"text-align: center;\">Closed Ticket Feature</h1>")
|
528 |
+
gr.Markdown("## The Closed Ticket Feature is built from the Closed Date column similarly to how Created Date was used to generate new 311 Call Volume")
|
529 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Data Error, Typos, and/or Null valuess</h3>")
|
530 |
+
gr.HTML("""
|
531 |
+
<ul style="font-size: 18px">
|
532 |
+
<li>Number of Null Values: </li>
|
533 |
+
<li>Number of Closed Dates where Closed Date > Created Date: </li>
|
534 |
+
<ul style="padding-inline-start: 40px;">
|
535 |
+
<li>These values were most likely typos/data recording errors</li>
|
536 |
+
<li>For instance, some of these values dated to 1900</li>
|
537 |
+
</ul>
|
538 |
+
<li>SOLUTION: For every data error, impute with the mean difference (recompute Closed Date based off Created)</li>
|
539 |
+
<li>Mean is calculated as the mean time differential between all valid Closed & Created Dates</li>
|
540 |
+
<li>Mean Time Differential: 13 Days</li>
|
541 |
+
</ul>""")
|
542 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Data Leakage - Future into Past</h3>")
|
543 |
+
gr.HTML("""
|
544 |
+
<ul style="font-size: 18px">
|
545 |
+
<li>Most of the Closed Date values are 13 days ahead relative to Created Date</li>
|
546 |
+
<li>GroupBy Closed Date only will lead to some closed ticket counts leaking into future created dates</li>
|
547 |
+
<li>SOLUTION: GroupBy [Closed Date, Created Date] pairwise, filter so Created Date < Closed Date</li>
|
548 |
+
</ul>""")
|
549 |
+
with gr.Accordion("Code", open=False):
|
550 |
+
gr.Code(Closed_Ticket_Code, language="python")
|
551 |
+
|
552 |
+
|
553 |
+
with gr.Tab("311: Categorical Grouping") as cat_groups:
|
554 |
+
BERTopic = gr.State(BERTopic.load("models/BERTopic"))
|
555 |
+
gr.HTML("<h1 style=\"text-align: center;\">Categorical Features</h1>")
|
556 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Categorical Features have too many categories</h3>")
|
557 |
+
gr.Markdown("#### Create a mapping of categories into groups to reduce total number (Viewable at the bottom of the page)")
|
558 |
+
gr.HTML("""
|
559 |
+
<ul style="font-size: 18px">
|
560 |
+
<li>Borough:</li>
|
561 |
+
<ul style="padding-inline-start: 40px;">
|
562 |
+
<li>Only 9 Categories without grouping</li>
|
563 |
+
<li>Four Categories are either typos or just null => Group all into OTHER</li>
|
564 |
+
</ul>
|
565 |
+
<li>Agency:</li>
|
566 |
+
<ul style="padding-inline-start: 40px;">
|
567 |
+
<li>30 Agencies in total are listed</li>
|
568 |
+
<li>Manual Research to group each Agency by Category of what they typically do</li>
|
569 |
+
<li>30 Agencies down to 7 Agency Groupings, based on frequency and research</li>
|
570 |
+
</ul>
|
571 |
+
<li>Complaint Type: Removed because analysis showed complaints were too related to the agency</li>
|
572 |
+
<ul style="padding-inline-start: 40px;">
|
573 |
+
<li>299 unique pairs out of 271 unique complaints => only ~10% difference in distribution</li>
|
574 |
+
</ul>
|
575 |
+
<li>Descriptor: Over 1000+ unique categories. Only way to realistically group is to use NLP</li>
|
576 |
+
<ul style="padding-inline-start: 40px;">
|
577 |
+
<li>Pretrained a BERTopic model to extract topics from the text</li>
|
578 |
+
<li>BERTopic uses TF-IDF & Transformers to extract topics from text</li>
|
579 |
+
<li>BERTopic reduced 1000 categories into 8 groups</li>
|
580 |
+
</ul>
|
581 |
+
</ul>""")
|
582 |
+
|
583 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: How do we aggregate by day these features when there are multiple repeated categories per day</h3>")
|
584 |
+
gr.Markdown("#### One Hot Encode and Sum per category")
|
585 |
+
gr.HTML("""
|
586 |
+
<ul style="font-size: 18px">
|
587 |
+
<li>Step 1: One hot encode all the features before aggregation</li>
|
588 |
+
<li>Step 2: GroupBy date and Sum for each encoding</li>
|
589 |
+
<ul style="padding-inline-start: 40px;">
|
590 |
+
<li>Example: A categorical group with 4 categories</li>
|
591 |
+
<li>One Sum column per category representing the frequency of that category per day</li>
|
592 |
+
</ul>
|
593 |
+
<li>Main Downside: Highly correlated with Created Ticket data; aggregation method was essentially the same</li>
|
594 |
+
<ul style="padding-inline-start: 40px;">
|
595 |
+
<li>Summing across the four feature categories in the example above would just equal the ticket count</li>
|
596 |
+
</ul>
|
597 |
+
<li>Solution: Leave some categories out of final vector to reduce bias (Shown in feature engineering stage)</li>
|
598 |
+
</ul>""")
|
599 |
+
|
600 |
+
with gr.Accordion("View Feature Groups", open=False):
|
601 |
+
with gr.Accordion("Borough", open=False):
|
602 |
+
gr.JSON(json.loads(open("code/Borough.json", "r").read()))
|
603 |
+
|
604 |
+
with gr.Accordion("Agency", open=False):
|
605 |
+
gr.JSON(open("code/Agency.json", "r").read())
|
606 |
+
|
607 |
+
with gr.Accordion("Descriptor", open=False):
|
608 |
+
gr.Dataframe(topic_model.get_topic_info().loc[:, ["Count", "Name", "Representation"]])
|
609 |
+
gr.Plot(topic_model.visualize_barchart(list(range(-1,6,1))))
|
610 |
+
|
611 |
+
|
612 |
+
with gr.Tab("All Code") as code_preprocess:
|
613 |
+
gr.Markdown("# View Full Code for building Weather Data")
|
614 |
+
with gr.Accordion(open=False):
|
615 |
+
gr.Code(open("code/build_weather.py", "r").read())
|
616 |
+
|
617 |
+
gr.Markdown("# View Full Code for building 311 Service Data")
|
618 |
+
with gr.Accordion(open=False):
|
619 |
+
gr.Code(open("code/build_service.py", "r").read())
|
620 |
+
|
621 |
+
|
622 |
+
with gr.Tab("Exploratory Data Analysis", id="eda_page") as eda_page:
|
623 |
+
bivar_data = gr.State(data_merged_eda)
|
624 |
+
with gr.Tab("Overview", id="eda_overview") as eda_overview:
|
625 |
+
gr.Markdown("# The EDA Section is intended to be a set of interactive visualizations")
|
626 |
+
gr.Markdown("The tabs are interactive plots and tables that were used to generate the key insights below.")
|
627 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Key Insights</h3>")
|
628 |
+
gr.HTML("""
|
629 |
+
<ul style="font-size: 18px">
|
630 |
+
<li>Missing Values:</li>
|
631 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
632 |
+
<li>Gust if used may need interpolation to fill missing values</li>
|
633 |
+
</ul>
|
634 |
+
<li>Stationarity</li>
|
635 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
636 |
+
<li>Weather variables exhibit various levels of non-stationarity (mostly based on trend but some constant)</li>
|
637 |
+
<ul style="padding-inline-start: 60px; font-size: 18px;">
|
638 |
+
<li>Trends are clear for some like Temperature and DewPoint</li>
|
639 |
+
<li>Possible cause of constant non-stationarity are factors such as global warming</li>
|
640 |
+
</ul>
|
641 |
+
<li>311 Calls may exhibit some forms of weekly non-stationarity</li>
|
642 |
+
<ul style="padding-inline-start: 60px; font-size: 18px;">
|
643 |
+
<li>Potentially weekly and monthly non-stationarity</li>
|
644 |
+
<li>Affected by Holidays and Weekends</li>
|
645 |
+
<li>More robust tests needed</li>
|
646 |
+
</ul>
|
647 |
+
<li>Action Item: Test for stationarity and remove</li>
|
648 |
+
</ul>
|
649 |
+
<li>Bivariate Interactions:</li>
|
650 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
651 |
+
<li>311 Calls have stronger relationships with certain Agency, Borough and Descriptor categories</li>
|
652 |
+
<li>311 calls exhibit weak overal linear relationships with weather</li>
|
653 |
+
<ul style="padding-inline-start: 60px; font-size: 18px;">
|
654 |
+
<li>Monthly and Seasonal relationship is strongest in winter months</li>
|
655 |
+
<li>Month Of January: strongest linear relationship between MinTemp, DewPoint</li>
|
656 |
+
</ul>
|
657 |
+
</ul>
|
658 |
+
<li>Seasonality:</li>
|
659 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
660 |
+
<li>Weather variables exhibit a strong Yearly and Seasonal seasonality</li>
|
661 |
+
<li>311 Service Variables exhibit Weekly Seasonality</li>
|
662 |
+
<li>311 Variables affected strongly by holidays and weekends (less 311 calls on weekends and holidays)</li>
|
663 |
+
</ul>
|
664 |
+
<li>Correlation:</li>
|
665 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
666 |
+
<li>Heavy Collinearity among weather variables (especially Min, Mean, MaxTemp)</li>
|
667 |
+
<li>Varying degrees of correlation among 311 covariates and 311 volume</li>
|
668 |
+
</ul>
|
669 |
+
<li>Lags & Autocorrelation:</li>
|
670 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
671 |
+
<li>311 Service Calls have highest correlation with 7,14,21 weekly lags</li>
|
672 |
+
<li>6,8 day lag intervals second strongest relationship. 8 day exhibits some negative correlation</li>
|
673 |
+
<li>1 day lag exhibits similar correlation with 6,7 day lags</li>
|
674 |
+
</ul>
|
675 |
+
</ul>""")
|
676 |
+
|
677 |
+
|
678 |
+
with gr.Tab("Univariate", id="eda_univar") as eda_univar:
|
679 |
+
|
680 |
+
with gr.Tab("Weather Data") as eda_uni_weather:
|
681 |
+
eda_univar_weatherdf = gr.State(weather_full_df)
|
682 |
+
gr.Markdown("# Use the Interactive plot below")
|
683 |
+
eda_uni_weather_name = gr.State("Weather")
|
684 |
+
weather_vars = [
|
685 |
+
"", 'MeanTemp', 'DewPoint', 'Percipitation', 'WindSpeed', 'Gust', 'SnowDepth',
|
686 |
+
'MinTemp', 'MaxTemp', 'MaxSustainedWind'
|
687 |
+
]
|
688 |
+
select_weather_var = gr.Dropdown(
|
689 |
+
choices=weather_vars,
|
690 |
+
value="",
|
691 |
+
label="Select a Variable to View"
|
692 |
+
)
|
693 |
+
|
694 |
+
weather_uniplot = gr.Plot()
|
695 |
+
|
696 |
+
select_weather_var.change(
|
697 |
+
plot_timeseries,
|
698 |
+
inputs=[
|
699 |
+
eda_univar_weatherdf,
|
700 |
+
select_weather_var,
|
701 |
+
eda_uni_weather_name
|
702 |
+
],
|
703 |
+
outputs=[
|
704 |
+
weather_uniplot
|
705 |
+
]
|
706 |
+
)
|
707 |
+
|
708 |
+
with gr.Tab("311 Service Data") as eda_uni_weather:
|
709 |
+
eda_univar_servicedf = gr.State(data_merged_eda)
|
710 |
+
gr.Markdown("# Use the Interactive plot below")
|
711 |
+
gr.Markdown("**NOTE: Target is the count of 311 service records**")
|
712 |
+
eda_uni_service_name = gr.State("Weather")
|
713 |
+
service_vars = [
|
714 |
+
"", 'Target', 'num_closed_tickets',
|
715 |
+
# Agency Group Counts
|
716 |
+
'AG_Buildings', 'AG_Environment & Sanitation', 'AG_Health',
|
717 |
+
'AG_Parks', 'AG_Security', 'AG_Transportation',
|
718 |
+
'AG_Other',
|
719 |
+
# Borough Counts
|
720 |
+
'Borough_BRONX', 'Borough_BROOKLYN', 'Borough_MANHATTAN',
|
721 |
+
'Borough_QUEENS', 'Borough_STATEN ISLAND',
|
722 |
+
'Borough_OTHER',
|
723 |
+
# Descriptor Group Counts
|
724 |
+
'DG_damaged_sign_sidewalk_missing',
|
725 |
+
'DG_english_emergency_spanish_chinese',
|
726 |
+
'DG_exemption_commercial_tax_business',
|
727 |
+
'DG_license_complaint_illegal_violation', 'DG_noise_animal_truck_dead',
|
728 |
+
'DG_odor_food_air_smoke', 'DG_order_property_inspection_condition',
|
729 |
+
'DG_water_basin_litter_missed'
|
730 |
+
]
|
731 |
+
select_service_var = gr.Dropdown(
|
732 |
+
choices=service_vars,
|
733 |
+
value="",
|
734 |
+
label="Select a Variable to View"
|
735 |
+
)
|
736 |
+
|
737 |
+
service_uniplot = gr.Plot()
|
738 |
+
|
739 |
+
select_service_var.change(
|
740 |
+
plot_timeseries,
|
741 |
+
inputs=[
|
742 |
+
eda_univar_servicedf,
|
743 |
+
select_service_var,
|
744 |
+
eda_uni_service_name
|
745 |
+
],
|
746 |
+
outputs=[
|
747 |
+
service_uniplot
|
748 |
+
]
|
749 |
+
)
|
750 |
+
|
751 |
+
|
752 |
+
with gr.Tab("Bivariate", id="eda_bivar") as eda_bivar:
|
753 |
+
gr.Markdown("# Use the Interactive plot below")
|
754 |
+
gr.Markdown("Use this tab to view relationships between the Target variable (number of tickets created daily) and a Covariate")
|
755 |
+
with gr.Column():
|
756 |
+
with gr.Row() as bivar_params:
|
757 |
+
bivar_dist_target = gr.Dropdown(
|
758 |
+
choices=["Target"],
|
759 |
+
value="Target",
|
760 |
+
label="Target Variable (One option)"
|
761 |
+
)
|
762 |
+
|
763 |
+
all_bivars = ['num_closed_tickets', "Agency", "Borough", "Descriptor"]
|
764 |
+
all_bivars.extend(weather_vars)
|
765 |
+
all_bivars = sorted(all_bivars)
|
766 |
+
all_bivars = all_bivars[1:]
|
767 |
+
bivar_dist_cov = gr.Dropdown(
|
768 |
+
choices=all_bivars,
|
769 |
+
value="MeanTemp",
|
770 |
+
label="Select Covariate"
|
771 |
+
)
|
772 |
+
bivar_trendline = gr.Dropdown(
|
773 |
+
choices=[True, False],
|
774 |
+
value=True,
|
775 |
+
label="Graph with OLS Trendline"
|
776 |
+
)
|
777 |
+
|
778 |
+
with gr.Accordion("Add Seasonality", open=False):
|
779 |
+
bivar_subset = gr.Dropdown(
|
780 |
+
choices=["None", "Year", "Season", "Month", "Day Of Week", "Weekend", "Holiday"],
|
781 |
+
value="None",
|
782 |
+
label="Seasonality Options (Disabled for Agency, Borough and Descriptor)"
|
783 |
+
)
|
784 |
+
|
785 |
+
bivar_submit = gr.Button("Run")
|
786 |
+
bivar_plot = gr.Plot()
|
787 |
+
bivar_submit.click(
|
788 |
+
plot_bivariate,
|
789 |
+
[bivar_data, bivar_dist_cov, bivar_dist_target, bivar_subset, bivar_trendline],
|
790 |
+
bivar_plot
|
791 |
+
)
|
792 |
+
|
793 |
+
|
794 |
+
with gr.Tab("Seasonality") as bivar_season:
|
795 |
+
gr.Markdown("## Exploring the affect of Seasonality")
|
796 |
+
|
797 |
+
with gr.Row() as bivar_season_params:
|
798 |
+
bivar_season_var = gr.Dropdown(
|
799 |
+
choices=["Target", 'MeanTemp', 'DewPoint',
|
800 |
+
'Percipitation', 'WindSpeed', 'Gust', 'SnowDepth',
|
801 |
+
'MinTemp', 'MaxTemp', 'MaxSustainedWind'],
|
802 |
+
value="Target",
|
803 |
+
label="Variable"
|
804 |
+
)
|
805 |
+
|
806 |
+
bivar_season_cov = gr.Dropdown(
|
807 |
+
choices=["Year", "Season", "Month", "Day Of Week", "Weekend", "Holiday", "Rain", "SnowIce"],
|
808 |
+
value="Year",
|
809 |
+
label="Seasonality"
|
810 |
+
)
|
811 |
+
|
812 |
+
with gr.Column():
|
813 |
+
season_boxplot = gr.Checkbox(value=True, label="Show Boxplot")
|
814 |
+
season_outlier = gr.Checkbox(value=False, label="Show Outliers")
|
815 |
+
|
816 |
+
bivar_season_btn = gr.Button("Run")
|
817 |
+
|
818 |
+
bivar_season_plot = gr.Plot()
|
819 |
+
|
820 |
+
bivar_season_btn.click(
|
821 |
+
plot_seasonality,
|
822 |
+
[bivar_data, bivar_season_cov, bivar_season_var, season_boxplot, season_outlier],
|
823 |
+
[bivar_season_plot]
|
824 |
+
)
|
825 |
+
|
826 |
+
|
827 |
+
with gr.Tab("Correlation") as corr:
|
828 |
+
|
829 |
+
with gr.Tab("Weather Correlations") as corr_weather:
|
830 |
+
gr.Plot(plot_all_correlations(data_merged_eda, "weather", method="pearson"))
|
831 |
+
|
832 |
+
|
833 |
+
with gr.Tab("311 Service Correlations") as corr_service:
|
834 |
+
gr.Plot(plot_all_correlations(data_merged_eda, "service", method="pearson"))
|
835 |
+
|
836 |
+
|
837 |
+
with gr.Tab("Lag Correlations") as corr_dynamic:
|
838 |
+
gr.Markdown("## Use this to dynamically view correlations based on Lag")
|
839 |
+
gr.Markdown("By Default, we will analyze lags of [0,1,2,3,4,5,6,7,8,13,14,15,21] days for chosen variable")
|
840 |
+
gr.Markdown("Scroll Down For AutoCorrelation Graphs")
|
841 |
+
with gr.Row():
|
842 |
+
corr_vars = [
|
843 |
+
"None", 'Target', 'num_closed_tickets',
|
844 |
+
# Weather Variables
|
845 |
+
'MeanTemp', 'DewPoint', 'Percipitation',
|
846 |
+
'WindSpeed', 'Gust', 'SnowDepth',
|
847 |
+
'MinTemp', 'MaxTemp', 'MaxSustainedWind',
|
848 |
+
# Agency Group Counts
|
849 |
+
'AG_Buildings', 'AG_Environment & Sanitation', 'AG_Health',
|
850 |
+
'AG_Parks', 'AG_Security', 'AG_Transportation',
|
851 |
+
'AG_Other',
|
852 |
+
# Borough Counts
|
853 |
+
'Borough_BRONX', 'Borough_BROOKLYN', 'Borough_MANHATTAN',
|
854 |
+
'Borough_QUEENS', 'Borough_STATEN ISLAND',
|
855 |
+
'Borough_OTHER',
|
856 |
+
# Descriptor Group Counts
|
857 |
+
'DG_damaged_sign_sidewalk_missing',
|
858 |
+
'DG_english_emergency_spanish_chinese',
|
859 |
+
'DG_exemption_commercial_tax_business',
|
860 |
+
'DG_license_complaint_illegal_violation', 'DG_noise_animal_truck_dead',
|
861 |
+
'DG_odor_food_air_smoke', 'DG_order_property_inspection_condition',
|
862 |
+
'DG_water_basin_litter_missed'
|
863 |
+
]
|
864 |
+
corr_vars = gr.Dropdown(
|
865 |
+
choices=corr_vars,
|
866 |
+
value="Target",
|
867 |
+
label="Variable"
|
868 |
+
)
|
869 |
+
|
870 |
+
corr_btn = gr.Button("Run")
|
871 |
+
corr_plot = gr.Plot()
|
872 |
+
autocorr_plot = gr.Plot()
|
873 |
+
|
874 |
+
corr_btn.click(
|
875 |
+
plot_correlations,
|
876 |
+
[bivar_data, corr_vars],
|
877 |
+
[corr_plot]
|
878 |
+
)
|
879 |
+
|
880 |
+
corr_btn.click(
|
881 |
+
plot_autocorr,
|
882 |
+
[bivar_data, corr_vars],
|
883 |
+
[autocorr_plot]
|
884 |
+
)
|
885 |
+
|
886 |
+
|
887 |
+
with gr.Tab("Feature Engineering") as feature_engineer_page:
|
888 |
+
|
889 |
+
|
890 |
+
with gr.Tab("Feature Selection") as feature_select:
|
891 |
+
gr.HTML("<h1 style=\"text-align: center;\">Select Features Based on EDA</h1>")
|
892 |
+
gr.Markdown("### Below is the logic used in our model feature selection")
|
893 |
+
gr.HTML("""
|
894 |
+
<ul style="font-size: 18px">
|
895 |
+
<li>Weather Covariates</li>
|
896 |
+
<ul style="padding-inline-start: 30px; font-size: 18px;">
|
897 |
+
<li>Weather variables exhibit various levels of non-stationarity (mostly based on trend but some constant)</li>
|
898 |
+
<li>MeanTemp, MaxTemp: High collinearity with MinTemp. MinTemp has highest correlation of 3 => REMOVE</li>
|
899 |
+
<ul style="padding-inline-start: 50px; font-size: 18px;">
|
900 |
+
<li>Possible Reason: High temps, people stay indoors. A/C doesn't break nowadays. Lower Temps lead to building/tech failure more often</li>
|
901 |
+
</ul>
|
902 |
+
<li>Percipitation: Bivariate plot shows weak relationship, outliers no effect on 311 => REMOVE</li>
|
903 |
+
<li>SnowDepth: High number missing values, low correlation => REMOVE</li>
|
904 |
+
<li>Rain, SnowIce: Binary, plots (look in Seasonality Tab) show weak relationship, SnowIce heavily imbalanced (99% 0's) => REMOVE</li>
|
905 |
+
</ul>
|
906 |
+
<li>311 Service Covariates:</li>
|
907 |
+
<ul style="padding-inline-start: 30px; font-size: 18px;">
|
908 |
+
<li>LOO (Leave One - or many - Out) Encoding:</li>
|
909 |
+
<ul style="padding-inline-start: 50px; font-size: 18px;">
|
910 |
+
<li>Remove weakest features from our categorical covariates</li>
|
911 |
+
<li>Reduces bias and removes multicollinearity inherent to One-Hot Encoding</li>
|
912 |
+
<li>Candidates For Removal:</li>
|
913 |
+
<ul style="padding-inline-start: 70px; font-size: 18px;">
|
914 |
+
<li>AG_Health, AG_Other: Lowest Correlation, lowest counts => REMOVE</li>
|
915 |
+
<li>AG_Parks: Lowest Correlation, but low multi-collinearity => KEEP</li>
|
916 |
+
<li>Borough_OTHER: Weakest Correlation, lowest count => REMOVE</li>
|
917 |
+
<li>DG_english_emergency, DG_exemption_commercial: Weakest Correlation, lowest counts => REMOVE</li>
|
918 |
+
<li>DG_odor_food_air_smoke: Lowest Count, but high correlation => KEEP</li>
|
919 |
+
</ul>
|
920 |
+
</ul>
|
921 |
+
</ul>
|
922 |
+
</ul>""")
|
923 |
+
|
924 |
+
with gr.Accordion("Show Final Variable List", open=False):
|
925 |
+
gr.JSON(json.loads(open("code/all_vars.json","r").read()))
|
926 |
+
|
927 |
+
|
928 |
+
with gr.Tab("Feature Preprocessing") as feature_prep:
|
929 |
+
data_feature_prep = gr.State(data_preprocess)
|
930 |
+
gr.HTML("<h1 style=\"text-align: center;\">Preprocess Features</h1>")
|
931 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Missing Values</h3>")
|
932 |
+
gr.HTML("""
|
933 |
+
<ul style="font-size: 18px">
|
934 |
+
<li>Only One value has missing values to impute: Gust</li>
|
935 |
+
<ul style="padding-inline-start: 30px; font-size: 18px;">
|
936 |
+
<li>Various interpolation methods were tested</li>
|
937 |
+
<li>Methods like Spline and Polynomial over-estimated some values, breaking inherent data ranges</li>
|
938 |
+
<li>Turns out Simple Linear interpolation was best</li>
|
939 |
+
</ul>
|
940 |
+
<li>SOLUTION: Interpolate Gust with Linear method</li>
|
941 |
+
</ul>""")
|
942 |
+
|
943 |
+
with gr.Accordion("Show Interpolation Plots", open=False):
|
944 |
+
gr.Plot(plot_interpolation(data_preprocess))
|
945 |
+
|
946 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Remove Non-Stationarity</h3>")
|
947 |
+
gr.HTML("""
|
948 |
+
<ul style="font-size: 18px">
|
949 |
+
<li>Variables that are non-stationary change over time, they have a trend</li>
|
950 |
+
<li>Ideal to transform non-stationarity variables for modeling</li>
|
951 |
+
<li>Ignore Categorical Variables (simply to keep model complexity low)</li>
|
952 |
+
<li>Numerical Variables were tested for Non-Stationarity using two methods: ADF and KPSS</li>
|
953 |
+
<ul style="padding-inline-start: 30px; font-size: 18px;">
|
954 |
+
<li>Using ADF and KPSS together can reveal what kind of trend exists in the data</li>
|
955 |
+
<li>Only 1 Case Met: Pass KPSS, Fail ADF = Trend Stationary (most likely by season)</li>
|
956 |
+
</ul>
|
957 |
+
<li>Only Two Variables failed the tests: DewPoint & MinTemp</li>
|
958 |
+
<li>SOLUTION: Use Differencing (7d lag) + Log for MinTemp and Differencing (7d lag) for DewPoint (Log caused many NaNs)</li>
|
959 |
+
</ul>""")
|
960 |
+
|
961 |
+
with gr.Accordion("View Results Below", open=False):
|
962 |
+
gr.Markdown("### MinTemp (Log) Tests Before and After Transformation")
|
963 |
+
with gr.Row():
|
964 |
+
with gr.Column():
|
965 |
+
gr.Dataframe(test_stationary(data_preprocess, "MinTemp_old"), label="MinTemp No Augments")
|
966 |
+
with gr.Column():
|
967 |
+
gr.Dataframe(test_stationary(data_preprocess, "MinTemp"), label="Log + 7 Day Lag Differencing")
|
968 |
+
|
969 |
+
gr.Markdown("### DewPoint Tests Before and After Transformation")
|
970 |
+
with gr.Row():
|
971 |
+
with gr.Column():
|
972 |
+
gr.Dataframe(test_stationary(data_preprocess, "DewPoint_old"), label="DewPoint No Augments")
|
973 |
+
with gr.Column():
|
974 |
+
gr.Dataframe(test_stationary(data_preprocess, "DewPoint"), label="7 Day Lag Differencing")
|
975 |
+
|
976 |
+
|
977 |
+
with gr.Tab("Feature Engineering") as feature_eng:
|
978 |
+
|
979 |
+
|
980 |
+
with gr.Tab("Past Covariates") as fe_past:
|
981 |
+
gr.HTML("<h1 style=\"text-align: center;\">Past Covariate Features</h1>")
|
982 |
+
gr.Markdown("""
|
983 |
+
* Past Covariates are datapoints that are implied to be only related to past information
|
984 |
+
* For Instance, using past sales of product B to predict futures sales of product A
|
985 |
+
* There are two ways to use past covariates
|
986 |
+
* *Option 1:* Build a multi-variate forecast to predict these variables simultaneously
|
987 |
+
* *Option 2:* Use a sliding window and lags to provide past data (especially for multi-step forecasts)
|
988 |
+
""")
|
989 |
+
gr.Markdown("**I will use Option 2 to avoid building a very complex multi-variate model**")
|
990 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Leaking Future Data into the past</h3>")
|
991 |
+
gr.Markdown("""
|
992 |
+
* By using lags, I can shift my data in a way to avoid leaking past data into the future
|
993 |
+
* For predicting 7 days into the future, I must lag my data by at least 7 days
|
994 |
+
* Use a rolling window that will reset over time
|
995 |
+
""")
|
996 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Curse of Dimensionality</h3>")
|
997 |
+
gr.Markdown("""
|
998 |
+
* Possible to use many variations of lags, rolling and differences to generate many features
|
999 |
+
* Too many features leads to the curse of dimensionality, i.e. Overfitting
|
1000 |
+
* Thus, I keep my Feature Set as simple as possible
|
1001 |
+
""")
|
1002 |
+
gr.Markdown("""
|
1003 |
+
### Feature Set
|
1004 |
+
* Lags: 7D, 14D, 21D
|
1005 |
+
* Rolling (Shifted 7 Days forward): Mean of 14D (14 because mean(Created - Closed Date) = 13 days)
|
1006 |
+
* Differencing (7D difference = 7D lag - 14D lag): 7D
|
1007 |
+
""")
|
1008 |
+
|
1009 |
+
|
1010 |
+
with gr.Accordion("Open to view implementation code", open=False):
|
1011 |
+
gr.Code(open("code/past_features.py","r").read())
|
1012 |
+
|
1013 |
+
|
1014 |
+
with gr.Tab("Future Covariates") as fe_past:
|
1015 |
+
gr.HTML("<h1 style=\"text-align: center;\">Past Covariate Features</h1>")
|
1016 |
+
gr.Markdown("""
|
1017 |
+
* Future Covariates are data that I have about the future
|
1018 |
+
* For Instance, I can use the projected revenue of Company A to predict daily sales
|
1019 |
+
* For Future Covariates, I do not need to shift variables. I will provide a shift up to 2 days.
|
1020 |
+
* I apply a rolling and expanding window as more features
|
1021 |
+
* Also, I use mean and min to follow the logic learned in EDA. Minimum temp values seem to be more impactful on 311 volume
|
1022 |
+
""")
|
1023 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Curse of Dimensionality</h3>")
|
1024 |
+
gr.Markdown("""
|
1025 |
+
* Similar to the Past Covaraiates, I keep my features as simple as possible with as little as possible
|
1026 |
+
* The more features, the more we may overfit
|
1027 |
+
""")
|
1028 |
+
gr.Markdown("""
|
1029 |
+
### Feature Set
|
1030 |
+
* Lags: 0D, 1D, 2D
|
1031 |
+
* Rolling: Mean & Min of last 14D
|
1032 |
+
* Expanding Window: Max, Min (min-length of 14)
|
1033 |
+
* Differencing already performed to remove trends
|
1034 |
+
""")
|
1035 |
+
|
1036 |
+
with gr.Accordion("Open to view implementation code", open=False):
|
1037 |
+
gr.Code(open("code/future_features.py","r").read())
|
1038 |
+
|
1039 |
+
|
1040 |
+
with gr.Tab("Target Variable") as fe_past:
|
1041 |
+
gr.HTML("<h1 style=\"text-align: center;\">311 Service Calls Features</h1>")
|
1042 |
+
gr.Markdown("""
|
1043 |
+
* For providing feature transformations of our Target, we can follow a similar process as above
|
1044 |
+
* Main Difference: Lags of < prediction window need to be recomputed at each iteration
|
1045 |
+
* So, for predicting at time (t+1) we need the predicted value at time (t)
|
1046 |
+
* For a recursive prediction model, this means the model cannot make batch predictions without iterating
|
1047 |
+
""")
|
1048 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: More variables increase complexity for prediction</h3>")
|
1049 |
+
gr.Markdown("""
|
1050 |
+
* The more features, the more overfitting & more computation
|
1051 |
+
* As I will use a recursive model, these values must be recomputed at each step t+1
|
1052 |
+
* In favor of a less complex model, I will choose as minimal features as possible (excluding rolling features as its prone to error with recalculation)
|
1053 |
+
""")
|
1054 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Leaking Future Data into the past</h3>")
|
1055 |
+
gr.Markdown("""
|
1056 |
+
* Must be careful about how these features are computed
|
1057 |
+
* For instance, for rolling mean, I would shift the data up by 1 lag first then compute the rolling sum
|
1058 |
+
* For differencing, a 7D lag difference is really the 1D - 8D lag. (For t=8, 7D diff = t7-t1 not t8-t2)
|
1059 |
+
""")
|
1060 |
+
gr.Markdown("""
|
1061 |
+
### Feature Set
|
1062 |
+
* Lags: 1D, 6D, 7D, 8D, 14D, 21D (based on highest correlations and weekly seasonality)
|
1063 |
+
* Differencing: 7D, 14D
|
1064 |
+
""")
|
1065 |
+
|
1066 |
+
with gr.Accordion("Open to view implementation code", open=False):
|
1067 |
+
gr.Code(open("code/target_features.py","r").read())
|
1068 |
+
|
1069 |
+
|
1070 |
+
with gr.Tab("Forecast Model") as model_select_train_page:
|
1071 |
+
|
1072 |
+
|
1073 |
+
with gr.Tab("Splitting the data") as model_data_split:
|
1074 |
+
gr.HTML("<h1 style=\"text-align: center;\">Splitting Time-Series Data</h1>")
|
1075 |
+
gr.HTML("""
|
1076 |
+
<ul style="font-size: 18px">
|
1077 |
+
<li>Splitting Time-Series Data is different than splitting other data</li>
|
1078 |
+
<li>Rather than splitting on random samples, you split the data by time with order consistent</li>
|
1079 |
+
<li>I took a 75% splitting approach where I split my data at the date that sits on the 75% of data length</li>
|
1080 |
+
</ul>""")
|
1081 |
+
gr.Markdown("#### As an example, I provide a graph showing exactly how I split my data")
|
1082 |
+
gr.Plot(plot_train_split())
|
1083 |
+
|
1084 |
+
|
1085 |
+
with gr.Tab("Model Selection") as model_data_split:
|
1086 |
+
gr.HTML("<h1 style=\"text-align: center;\">Choosing the Right Model</h1>")
|
1087 |
+
gr.Markdown("### Types of Forecast Models for Multi-Step Prediction")
|
1088 |
+
gr.HTML("""
|
1089 |
+
<ul style="font-size: 18px">
|
1090 |
+
<li>Parallel Models: Train a model for each prediction (one for 1 day ahead, another for 2, etc.)</li>
|
1091 |
+
<li>Recursive Models: Model makes a forecast, fills any values it needs for the next prediction, predicts again</li>
|
1092 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
1093 |
+
<li>One of the assumptions was to build a model that was reasonable for production</li>
|
1094 |
+
<li>Parallel models are hard to maintain as the steps of prediction increase</li>
|
1095 |
+
</ul>
|
1096 |
+
<li>Decision: Recursive Modele</li>
|
1097 |
+
</ul>""")
|
1098 |
+
gr.Markdown("### My Model Choice: XGBoost")
|
1099 |
+
gr.HTML("""
|
1100 |
+
<ul style="font-size: 18px">
|
1101 |
+
<li>Reasons for choosing:</li>
|
1102 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
1103 |
+
<li>Industry standard for regression</li>
|
1104 |
+
<li>Lightweight and relatively fast</li>
|
1105 |
+
<li>Many parameters to tune, such as tree depth and regularization</li>
|
1106 |
+
<li>Scale invariant - Data does not have to be scaled</li>
|
1107 |
+
<li>Allows NaN values and categorical features without encodings (unused in my implementation)</li>
|
1108 |
+
<li>Provides key explainability in its feature importance metrics</li>
|
1109 |
+
</ul>
|
1110 |
+
<li>Decision: Use XGBoost</li>
|
1111 |
+
</ul>""")
|
1112 |
+
|
1113 |
+
|
1114 |
+
with gr.Tab("Model Training") as model_data_split:
|
1115 |
+
gr.HTML("<h1 style=\"text-align: center;\">Training the Model</h1>")
|
1116 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Overfitting</h3>")
|
1117 |
+
gr.HTML("""
|
1118 |
+
<ul style="font-size: 18px">
|
1119 |
+
<li>Main Cause: High number of variables and XGBoost's tendency to overfit without tuning</li>
|
1120 |
+
<li>While training, effort was made to watch the validation and training set's relative performance</li>
|
1121 |
+
<li>Steps Taken to avoid Overfitting</li>
|
1122 |
+
<ul style="padding-inline-start: 40px; font-size: 18px;">
|
1123 |
+
<li>Low Learning Rate</li>
|
1124 |
+
<li>Low Tree Depth</li>
|
1125 |
+
<li>Keeping Val score relatively close to Training score</li>
|
1126 |
+
<li>Increased l2-lambda parameter, boosting regularization</li>
|
1127 |
+
<li>Many trials to get best set of parameters</li>
|
1128 |
+
<li>Implementing Early Stopping</li>
|
1129 |
+
</ul>
|
1130 |
+
</ul>""")
|
1131 |
+
gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Choosing a Metric</h3>")
|
1132 |
+
gr.HTML("""
|
1133 |
+
<ul style="font-size: 18px">
|
1134 |
+
<li>Three metrics I considered: MAPE, MAE and MSE</li>
|
1135 |
+
<li>MAPE seemed to show the most consistent and visually accurate results</li>
|
1136 |
+
<li>Decision: MAPE</li>
|
1137 |
+
<li>Justification: 311 Service volume is quite noisy and MAPE better estimates fit to a very noisy curve than the others</li>
|
1138 |
+
</ul>""")
|
1139 |
+
|
1140 |
+
|
1141 |
+
with gr.Tab("Model Prediction") as model_data_split:
|
1142 |
+
gr.HTML("<h1 style=\"text-align: center;\">Recursive Model Prediction</h1>")
|
1143 |
+
gr.Markdown("""
|
1144 |
+
* Below is the code I wrote to implement the Recursive prediction explained in previous tabs
|
1145 |
+
* Predictions are made one step at a time, where the prediction t depends on prediction t-1
|
1146 |
+
* To view the final predictions made by the model see below
|
1147 |
+
""")
|
1148 |
+
gr.Code(open("code/recurse_predict.py","r").read())
|
1149 |
+
with gr.Accordion("View 7 Day Model Forecast", open=False):
|
1150 |
+
gr.Plot(plot_final_predictions())
|
1151 |
+
|
1152 |
+
|
1153 |
+
with gr.Tab("Model Evaluation") as model_eval_page:
|
1154 |
+
gr.HTML("<h1 style=\"text-align: center;\">Forecast Results</h1>")
|
1155 |
+
gr.Markdown("Overall, the model seemed to have performed pretty well. The MAPE is also <10% for both Validation and Training sets.")
|
1156 |
+
gr.Markdown("The model did suffer from a low validation R2, but this was difficult to resolve without compromising overall performance of the model.")
|
1157 |
+
gr.Markdown("The predictions seem to visually pass most backtests, which can be viewed in the graph below.")
|
1158 |
+
with gr.Accordion("Model Prediction Scores", open=False):
|
1159 |
+
gr.JSON({"Train R2": r2_train, "Train MAPE": mape_train, "Validation R2": r2_val, "Validation MAPE": mape_val})
|
1160 |
+
gr.Image("figures/model_performance.png", show_download_button=False)
|
1161 |
+
|
1162 |
+
|
1163 |
+
with gr.Tab("Feature Importance") as model_eval_page:
|
1164 |
+
gr.HTML("<h1 style=\"text-align: center;\">Feature Importance</h1>")
|
1165 |
+
gr.Markdown("""
|
1166 |
+
* Below you can view the feature importance metrics from the XGBoost model
|
1167 |
+
* It seems there is significant impact of the weather variables on 311 Service Call Volume
|
1168 |
+
* Interestingly, it seems some categories were more impactful than others as well
|
1169 |
+
""")
|
1170 |
+
gr.Plot(plot_model_feature_importance())
|
1171 |
+
|
1172 |
+
|
1173 |
+
with gr.Tab("Future Work & Limitations") as future_limitations_page:
|
1174 |
+
gr.Markdown("# Future Work")
|
1175 |
+
gr.Markdown("""
|
1176 |
+
* **Multi-Variate Time Series Forecasting** rather than imputing values naively
|
1177 |
+
* Testing more kinds of models such as LightGBM
|
1178 |
+
* Robustly testing parameters of current model using GridSearchCV
|
1179 |
+
* Comparing performance of my forecast model to others
|
1180 |
+
* More Data! Having more 311 Call data may help find other indicators
|
1181 |
+
""")
|
1182 |
+
gr.Markdown("# Future Deployments")
|
1183 |
+
gr.Markdown("""
|
1184 |
+
* Containerize the model and load onto an API for ingestion
|
1185 |
+
* Containerize data preprocessing and load into a Spark Cluster
|
1186 |
+
* Create triggers and view tables to verify data preprocessing
|
1187 |
+
* Create functions to monitor model performance
|
1188 |
+
""")
|
1189 |
+
|
1190 |
+
with gr.Tab("Appendix") as future_limitations_page:
|
1191 |
+
|
1192 |
+
with gr.Tab("Weather Data Analysis") as dp_weather:
|
1193 |
+
dp_weather_state = gr.State("weather")
|
1194 |
+
with gr.Column():
|
1195 |
+
with gr.Row():
|
1196 |
+
dp_weather_category = gr.Dropdown(
|
1197 |
+
choices=["2011-2018", "2016-2018"],
|
1198 |
+
value="2011-2018",
|
1199 |
+
label="Time Range"
|
1200 |
+
)
|
1201 |
+
|
1202 |
+
dp_weather_var = gr.Dropdown(
|
1203 |
+
choices = ["MeanTemp", "MinTemp", "MaxTemp", "DewPoint", "Percipitation", "WindSpeed", "MaxSustainedWind", "Gust", "Rain", "SnowDepth", "SnowIce"],
|
1204 |
+
value = "MeanTemp",
|
1205 |
+
label = "Variable"
|
1206 |
+
)
|
1207 |
+
|
1208 |
+
dp_weather_btn = gr.Button("Run")
|
1209 |
+
|
1210 |
+
dp_weather_report = gr.HTML(value=iframe_dp_weather)
|
1211 |
+
|
1212 |
+
dp_weather_btn.click(
|
1213 |
+
run_report,
|
1214 |
+
[dp_weather_state, dp_weather_var, dp_weather_category],
|
1215 |
+
dp_weather_report,
|
1216 |
+
)
|
1217 |
+
|
1218 |
+
with gr.Tab("Service Data Analysis") as dp_service:
|
1219 |
+
dp_service_state = gr.State("service")
|
1220 |
+
dp_service_category = gr.State("full")
|
1221 |
+
with gr.Column():
|
1222 |
+
dp_service_var = gr.Dropdown(
|
1223 |
+
choices = [
|
1224 |
+
"Created Date", "Closed Date", "Agency", "Agency Name",
|
1225 |
+
"Complaint Type", "Descriptor", "Location Type", "Landmark",
|
1226 |
+
"Facility Type", "Status", "Community Board", "Borough",
|
1227 |
+
"Open Data Channel Type", "Park Facility Name", "Park Borough",
|
1228 |
+
"Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",
|
1229 |
+
"Bridge Highway Name", "Bridge Highway Direction", "Road ramp",
|
1230 |
+
"Bridge Highway Segment"
|
1231 |
+
],
|
1232 |
+
value = "Created Date",
|
1233 |
+
label = "Select Variable and Run"
|
1234 |
+
)
|
1235 |
+
dp_service_btn = gr.Button("Run")
|
1236 |
+
|
1237 |
+
dp_service_report = gr.HTML(value=iframe_dp_service)
|
1238 |
+
|
1239 |
+
dp_service_btn.click(
|
1240 |
+
run_report,
|
1241 |
+
[dp_service_state, dp_service_var, dp_service_category],
|
1242 |
+
dp_service_report,
|
1243 |
+
)
|
1244 |
+
|
1245 |
+
def main():
|
1246 |
+
|
1247 |
+
|
1248 |
+
app.launch(share=False)
|
1249 |
+
return app
|
1250 |
+
|
1251 |
+
|
1252 |
+
if __name__=="__main__":
|
1253 |
+
|
1254 |
+
|
1255 |
+
main()
|
code/Agency.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Agency": {
|
3 |
+
"NYPD": "Security",
|
4 |
+
"HPD": "Buildings",
|
5 |
+
"DOT": "Transportation",
|
6 |
+
"DSNY": "Environment & Sanitation",
|
7 |
+
"DEP": "Environment & Sanitation",
|
8 |
+
"DOB": "Buildings",
|
9 |
+
"DOE": "Buildings",
|
10 |
+
"DPR": "Parks",
|
11 |
+
"DOHMH": "Health",
|
12 |
+
"DOF": "Other",
|
13 |
+
"DHS": "Security",
|
14 |
+
"TLC": "Transportation",
|
15 |
+
"HRA": "Other",
|
16 |
+
"DCA": "Other",
|
17 |
+
"DFTA": "Other",
|
18 |
+
"EDC": "Other",
|
19 |
+
"DOITT": "Other",
|
20 |
+
"DCAS": "Other",
|
21 |
+
"NYCEM": "Other",
|
22 |
+
"ACS": "Other",
|
23 |
+
"3-1-1": "Other",
|
24 |
+
"TAX": "Other",
|
25 |
+
"DCP": "Other",
|
26 |
+
"DORIS": "Other",
|
27 |
+
"FDNY": "Other",
|
28 |
+
"TAT": "Other",
|
29 |
+
"COIB": "Other",
|
30 |
+
"CEO": "Other",
|
31 |
+
"MOC": "Other",
|
32 |
+
"OMB": "Other"
|
33 |
+
}
|
34 |
+
}
|
code/Borough.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Borough": {
|
3 |
+
"BRONX" : "BRONX",
|
4 |
+
"BROOKLYN": "BROOKLIN",
|
5 |
+
"QUEENS": "QUEENS",
|
6 |
+
"STATEN ISLAND": "STATEN ISLAND",
|
7 |
+
"2017": "OTHER",
|
8 |
+
"2018": "OTHER",
|
9 |
+
"undefined": "OTHER",
|
10 |
+
"null": "OTHER"
|
11 |
+
}
|
12 |
+
}
|
code/all_vars.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"y": ["Target"],
|
3 |
+
"past_covariates": [
|
4 |
+
"num_closed_tickets",
|
5 |
+
"AG_Buildings", "AG_Environment & Sanitation", "AG_Health",
|
6 |
+
"AG_Parks", "AG_Security", "AG_Transportation",
|
7 |
+
"AG_Other",
|
8 |
+
"Borough_BRONX", "Borough_BROOKLYN", "Borough_MANHATTAN",
|
9 |
+
"Borough_QUEENS", "Borough_STATEN ISLAND",
|
10 |
+
"Borough_OTHER",
|
11 |
+
"DG_damaged_sign_sidewalk_missing",
|
12 |
+
"DG_english_emergency_spanish_chinese",
|
13 |
+
"DG_exemption_commercial_tax_business",
|
14 |
+
"DG_license_complaint_illegal_violation", "DG_noise_animal_truck_dead",
|
15 |
+
"DG_odor_food_air_smoke", "DG_order_property_inspection_condition",
|
16 |
+
"DG_water_basin_litter_missed"
|
17 |
+
],
|
18 |
+
"future_covariates": [
|
19 |
+
"DewPoint",
|
20 |
+
"WindSpeed",
|
21 |
+
"Gust",
|
22 |
+
"SnowDepth",
|
23 |
+
"MinTemp"
|
24 |
+
],
|
25 |
+
"temporal": [
|
26 |
+
"Year",
|
27 |
+
"Month",
|
28 |
+
"Day",
|
29 |
+
"DayOfWeek",
|
30 |
+
"DayOfYear",
|
31 |
+
"is_weekend",
|
32 |
+
"is_holiday",
|
33 |
+
"Season"
|
34 |
+
]
|
35 |
+
}
|
code/build_service.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def build_service_data(filename):
|
2 |
+
# Loading data directly with polars leads to errors
|
3 |
+
# Some rows end up missing for an unknown reason
|
4 |
+
# FIX: Load in pandas then convert to polars
|
5 |
+
service_data_pd = pd.read_csv(filename)
|
6 |
+
|
7 |
+
# Quick test to assure the unique key is in fact unique
|
8 |
+
assert service_data_pd["Unique Key"].nunique() == len(service_data_pd)
|
9 |
+
|
10 |
+
# Load from pandas Dataframe
|
11 |
+
service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
|
12 |
+
service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
|
13 |
+
service_data = pl.DataFrame(service_data_pd)
|
14 |
+
|
15 |
+
# Clear some ram
|
16 |
+
del service_data_pd
|
17 |
+
gc.collect()
|
18 |
+
|
19 |
+
drop_cols = [
|
20 |
+
"Unique Key", "Agency Name", "Location Type", "Incident Zip",
|
21 |
+
"Incident Address", "Street Name", "Cross Street 1",
|
22 |
+
"Cross Street 2", "Intersection Street 1", "Intersection Street 2",
|
23 |
+
"Address Type", "City", "Landmark", "Facility Type",
|
24 |
+
"Status", "Due Date", "Resolution Description",
|
25 |
+
"Resolution Action Updated Date", "Community Board",
|
26 |
+
"BBL", "X Coordinate (State Plane)", "Y Coordinate (State Plane)",
|
27 |
+
"Open Data Channel Type", "Park Facility Name", "Park Borough",
|
28 |
+
"Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",
|
29 |
+
"Bridge Highway Name", "Bridge Highway Direction", "Road Ramp",
|
30 |
+
"Bridge Highway Segment", "Location", "Created Year"
|
31 |
+
]
|
32 |
+
|
33 |
+
# Drop columns and create the date variable
|
34 |
+
service_data = service_data.drop(drop_cols)
|
35 |
+
service_data = create_datetime(service_data, "Created Date")
|
36 |
+
service_data = create_datetime(service_data, "Closed Date")
|
37 |
+
|
38 |
+
# Group by date to get the number of Created tickets (as target)
|
39 |
+
sd_grouped = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
|
40 |
+
pl.len().alias("Target"),
|
41 |
+
).sort(by="Datetime")
|
42 |
+
|
43 |
+
# Calculate the number of closed tickets
|
44 |
+
# Mean diff used to filter service data
|
45 |
+
# mean_diff = service_data.with_columns(
|
46 |
+
# diff_created_closed = pl.col("Closed Date") - pl.col("Created Date")
|
47 |
+
# ).filter((pl.col("Closed Date").dt.year() >= 2016) & (pl.col("Closed Date").dt.year() < 2020))["diff_created_closed"].mean().days
|
48 |
+
# Mean diff precalculated as
|
49 |
+
mean_diff = 13
|
50 |
+
|
51 |
+
# Create new Closed date with errors filled using the mean diff above
|
52 |
+
service_data = service_data.with_columns(
|
53 |
+
Closed_Date_New = pl.when(pl.col("Created Date") - pl.col("Closed Date") > pl.duration(days=1))
|
54 |
+
.then(pl.col("Created Date") + pl.duration(days=mean_diff))
|
55 |
+
.otherwise(pl.col("Closed Date")).fill_null(pl.col("Created Date") + pl.duration(days=mean_diff))
|
56 |
+
)
|
57 |
+
|
58 |
+
# Filter tickets such that the closed date < the created date to prevent future data leakage in our dataset
|
59 |
+
# We want to make sure future data is not accidentally leaked across other points in our data
|
60 |
+
closed_tickets = service_data.group_by(["Closed_Date_New", "Created Date"]) \
|
61 |
+
.agg((pl.when(pl.col("Created Date") <= pl.col("Closed_Date_New")).then(1).otherwise(0)).sum().alias("count")) \
|
62 |
+
.sort("Closed_Date_New") \
|
63 |
+
.filter((pl.col("Closed_Date_New").dt.year() >= 2016) & (pl.col("Closed_Date_New").dt.year() < 2019)) \
|
64 |
+
.group_by("Closed_Date_New").agg(pl.col("count").sum().alias("num_closed_tickets"))
|
65 |
+
|
66 |
+
# Rename this column to num closed tickets
|
67 |
+
ct_df = closed_tickets.with_columns(
|
68 |
+
pl.col("num_closed_tickets")
|
69 |
+
)
|
70 |
+
|
71 |
+
# Concat the new columns into our data
|
72 |
+
sd_df = pl.concat([sd_grouped, ct_df.drop("Closed_Date_New")], how="horizontal")
|
73 |
+
|
74 |
+
assert len(sd_grouped) == len(ct_df)
|
75 |
+
|
76 |
+
# CATEGORICAL FEATURE MAPPING
|
77 |
+
# MAPPING FOR BOROUGH
|
78 |
+
Borough_Map = {
|
79 |
+
"Unspecified": "OTHER",
|
80 |
+
"2017": "OTHER",
|
81 |
+
None: "OTHER",
|
82 |
+
"2016": "OTHER"
|
83 |
+
}
|
84 |
+
service_data = service_data.with_columns(
|
85 |
+
pl.col("Borough").replace(Borough_Map)
|
86 |
+
)
|
87 |
+
|
88 |
+
# MAPPING FOR AGENCY
|
89 |
+
# This mapping was done Manually
|
90 |
+
Agency_Map = {
|
91 |
+
"NYPD": "Security", "HPD": "Buildings", "DOT": "Transportation",
|
92 |
+
"DSNY": "Environment & Sanitation", "DEP": "Environment & Sanitation",
|
93 |
+
"DOB": "Buildings", "DOE": "Buildings", "DPR": "Parks",
|
94 |
+
"DOHMH": "Health", "DOF": "Other", "DHS": "Security",
|
95 |
+
"TLC": "Transportation", "HRA": "Other", "DCA": "Other",
|
96 |
+
"DFTA": "Other", "EDC": "Other", "DOITT": "Other", "OMB": "Other",
|
97 |
+
"DCAS": "Other", "NYCEM": "Other", "ACS": "Other", "3-1-1": "Other",
|
98 |
+
"TAX": "Other", "DCP": "Other", "DORIS": "Other", "FDNY": "Other",
|
99 |
+
"TAT": "Other", "COIB": "Other", "CEO": "Other", "MOC": "Other",
|
100 |
+
}
|
101 |
+
|
102 |
+
service_data = service_data.with_columns(
|
103 |
+
pl.col("Agency").replace(Agency_Map).alias("AG") # AG Shorthand for Agency Groups
|
104 |
+
)
|
105 |
+
|
106 |
+
|
107 |
+
# Mapping for Descriptor using BERTopic
|
108 |
+
# Store descriptors as pandas dataframe (polars not supported)
|
109 |
+
# Drop any nan values, and we only care about the unique values
|
110 |
+
descriptor_docs = service_data["Descriptor"].unique().to_numpy()
|
111 |
+
|
112 |
+
# Build our topic mapping using the pretrained BERTopic model
|
113 |
+
# Load model and get predictions
|
114 |
+
topic_model = BERTopic.load("models/BERTopic")
|
115 |
+
topics, probs = topic_model.transform(descriptor_docs)
|
116 |
+
|
117 |
+
# Visualize if wanted
|
118 |
+
# topic_model.visualize_barchart(list(range(-1,6,1)))
|
119 |
+
|
120 |
+
# Create a topic to ID map
|
121 |
+
topic_df = topic_model.get_topic_info()
|
122 |
+
topic_id_map = {row["Topic"]: row["Name"][2:] for _, row in topic_df.iterrows()}
|
123 |
+
topic_id_map[-1] = topic_id_map[-1][1:] # Fix for the -1 topic case
|
124 |
+
|
125 |
+
# For each document (descriptor string) get a mapping of topics
|
126 |
+
doc_to_topic_map = defaultdict(str)
|
127 |
+
for topic_id, doc in zip(topics, descriptor_docs):
|
128 |
+
topic = topic_id_map[topic_id]
|
129 |
+
doc_to_topic_map[doc] = topic
|
130 |
+
|
131 |
+
service_data = service_data.with_columns(
|
132 |
+
pl.col("Descriptor").replace(doc_to_topic_map).alias("DG") # DG Shorthand for descriptor Groups
|
133 |
+
)
|
134 |
+
|
135 |
+
|
136 |
+
# One Hot Encode Features
|
137 |
+
cat_features = ["AG", "Borough", "DG"]
|
138 |
+
service_data = service_data.to_dummies(columns=cat_features)
|
139 |
+
|
140 |
+
|
141 |
+
# Group by Date and create our Category Feature Vector
|
142 |
+
cat_df = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
|
143 |
+
# Categorical Features Sum
|
144 |
+
pl.col('^AG_.*$').sum(),
|
145 |
+
pl.col('^Borough_.*$').sum(),
|
146 |
+
pl.col('^DG_.*$').sum(),
|
147 |
+
).sort(by="Datetime")
|
148 |
+
|
149 |
+
# Concat our category features to our current dataframe
|
150 |
+
sd_df = pl.concat([sd_df, cat_df.drop("Datetime")], how="horizontal")
|
151 |
+
|
152 |
+
# Now that our dataframe is significantly reduced in size
|
153 |
+
# We can finally convert back to a pandas dataframe
|
154 |
+
# as pandas is usable across more python packages
|
155 |
+
sd_df = sd_df.to_pandas()
|
156 |
+
|
157 |
+
# Set index to datetime
|
158 |
+
sd_df = sd_df.set_index("Datetime")
|
159 |
+
|
160 |
+
# NOTE we added 7 new rows to our weather df
|
161 |
+
# These 7 new rows will essentially be our final pred set
|
162 |
+
# The Target for these rows will be null -> indicating it needs to be predicted
|
163 |
+
# Add these rows to the service dataframe
|
164 |
+
preds_df = pd.DataFrame({'Datetime': pd.date_range(start=sd_df.index[-1], periods=8, freq='D')})[1:]
|
165 |
+
sd_df = pd.concat([sd_df, preds_df.set_index("Datetime")], axis=0)
|
166 |
+
|
167 |
+
return sd_df
|
code/build_weather.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Build all weather data from file
|
2 |
+
def build_weather_data(filename):
|
3 |
+
# Use pandas to read file
|
4 |
+
weather_data = pd.read_csv(filename)
|
5 |
+
|
6 |
+
# Quickly aggregate Year, Month, Day into a datetime object
|
7 |
+
# This is because the 311 data uses datetime
|
8 |
+
weather_data["Datetime"] = weather_data["Year"].astype("str") + "-" + weather_data["Month"].astype("str") + "-" + weather_data["Day"].astype("str")
|
9 |
+
weather_data = create_datetime(weather_data, "Datetime", format="%Y-%m-%d")
|
10 |
+
|
11 |
+
# LOCALIZE
|
12 |
+
# Pre-recorded min/max values from the service data (so we don't need again)
|
13 |
+
lat_min = 40.49804421521046
|
14 |
+
lat_max = 40.91294056699566
|
15 |
+
long_min = -74.25521082506387
|
16 |
+
long_max = -73.70038354802529
|
17 |
+
|
18 |
+
# Create the conditions for location matching
|
19 |
+
mincon_lat = weather_data["Latitude"] >= lat_min
|
20 |
+
maxcon_lat = weather_data["Latitude"] <= lat_max
|
21 |
+
mincon_long = weather_data["Longitude"] >= long_min
|
22 |
+
maxcon_long = weather_data["Longitude"] <= long_max
|
23 |
+
|
24 |
+
# Localize our data to match the service data
|
25 |
+
wd_localized = weather_data.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
|
26 |
+
drop_cols = [
|
27 |
+
"USAF",
|
28 |
+
"WBAN",
|
29 |
+
"StationName",
|
30 |
+
"State",
|
31 |
+
"Latitude",
|
32 |
+
"Longitude"
|
33 |
+
]
|
34 |
+
wd_localized = wd_localized.drop(columns=drop_cols)
|
35 |
+
|
36 |
+
# AGGREGATE
|
37 |
+
# Map columns with aggregation method
|
38 |
+
mean_cols = [
|
39 |
+
'MeanTemp',
|
40 |
+
'DewPoint',
|
41 |
+
'Percipitation',
|
42 |
+
'WindSpeed',
|
43 |
+
'Gust',
|
44 |
+
'SnowDepth',
|
45 |
+
]
|
46 |
+
min_cols = [
|
47 |
+
'MinTemp'
|
48 |
+
]
|
49 |
+
max_cols = [
|
50 |
+
'MaxTemp',
|
51 |
+
'MaxSustainedWind'
|
52 |
+
]
|
53 |
+
round_cols = [
|
54 |
+
'Rain',
|
55 |
+
'SnowIce'
|
56 |
+
]
|
57 |
+
|
58 |
+
# Perform Aggregation
|
59 |
+
mean_df = wd_localized.groupby("Datetime")[mean_cols].mean()
|
60 |
+
min_df = wd_localized.groupby("Datetime")[min_cols].min()
|
61 |
+
max_df = wd_localized.groupby("Datetime")[max_cols].max()
|
62 |
+
round_df = wd_localized.groupby("Datetime")[round_cols].mean().round().astype(np.int8)
|
63 |
+
wd_full = pd.concat([mean_df, min_df, max_df, round_df], axis=1)
|
64 |
+
|
65 |
+
# Add seasonal features
|
66 |
+
wd_full = build_temporal_features(wd_full, "Datetime")
|
67 |
+
wd_full["Season"] = wd_full["Season"].astype("category")
|
68 |
+
wd_full = wd_full.set_index("Datetime")
|
69 |
+
|
70 |
+
# We will calculate the imputation for the next 7 days after 12/31/2018
|
71 |
+
# Along with the 49 missing days
|
72 |
+
# This will act as our "Weather Forecast"
|
73 |
+
time_steps = 49 + 7
|
74 |
+
|
75 |
+
# Impute Cols
|
76 |
+
impute_cols = [
|
77 |
+
'MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
|
78 |
+
'Percipitation', 'WindSpeed', 'MaxSustainedWind',
|
79 |
+
'Gust', 'Rain', 'SnowDepth', 'SnowIce',
|
80 |
+
]
|
81 |
+
|
82 |
+
# Mean Vars
|
83 |
+
mean_vars = ["WindSpeed", "MaxSustainedWind", "Gust", "SnowDepth"]
|
84 |
+
min_vars = ["SnowIce", "MeanTemp", "MinTemp", "MaxTemp", "DewPoint", "Percipitation"]
|
85 |
+
max_vars = ["Rain"]
|
86 |
+
|
87 |
+
# Use the imported function to create the imputed data
|
88 |
+
preds_mean = impute_missing_weather(wd_full, strategy="mean", time_steps=time_steps, impute_cols=mean_vars)
|
89 |
+
preds_min = impute_missing_weather(wd_full, strategy="min", time_steps=time_steps, impute_cols=min_vars)
|
90 |
+
preds_max = impute_missing_weather(wd_full, strategy="max", time_steps=time_steps, impute_cols=max_vars)
|
91 |
+
all_preds = pd.concat([preds_mean, preds_min, preds_max], axis=1)
|
92 |
+
all_preds = build_temporal_features(all_preds.loc[:, impute_cols], "Datetime")
|
93 |
+
all_preds = all_preds.set_index("Datetime")
|
94 |
+
|
95 |
+
wd_curr = wd_full.loc[wd_full["Year"] >= 2016]
|
96 |
+
wd_df = pd.concat([wd_full, all_preds], axis=0, join="outer")
|
97 |
+
|
98 |
+
return wd_df
|
code/create_maps.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import folio
|
5 |
+
from utils import map_vals
|
6 |
+
from matplotlib import pyplot as plt
|
7 |
+
|
8 |
+
# NOTE
|
9 |
+
# This only needed to be ran once to generate the maps
|
10 |
+
# Maps are saved in the figures folder and loaded as html
|
11 |
+
|
12 |
+
service_data_pd = pd.read_csv("data/311-2016-2018.csv")
|
13 |
+
service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
|
14 |
+
service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
|
15 |
+
service_data_raw = pl.DataFrame(service_data_pd)
|
16 |
+
# service_data_raw = pl.read_csv("data/311-2016-2018.csv", null_values="", infer_schema_length=0)
|
17 |
+
# service_data_raw = service_data_raw.with_columns(
|
18 |
+
# pl.col("Latitude").cast(pl.Float64),
|
19 |
+
# pl.col("Longitude").cast(pl.Float64)
|
20 |
+
# )
|
21 |
+
# Clear some ram
|
22 |
+
del service_data_pd
|
23 |
+
gc.collect()
|
24 |
+
|
25 |
+
weather_data_raw = pd.read_csv("data/weather_NY_2010_2018Nov.csv")
|
26 |
+
|
27 |
+
def get_map_1():
|
28 |
+
fig, weather_map = map_vals(
|
29 |
+
weather_data_raw.loc[weather_data_raw["Year"] >= 2016],
|
30 |
+
cols=["Latitude", "Longitude"],
|
31 |
+
label_cols=["StationName"],
|
32 |
+
sample_size=1000,
|
33 |
+
color='red',
|
34 |
+
radius=3,
|
35 |
+
weight=4
|
36 |
+
)
|
37 |
+
fig, combined_map = map_vals(
|
38 |
+
service_data_raw,
|
39 |
+
cols=["Latitude", "Longitude"],
|
40 |
+
color="blue", submap=weather_map,
|
41 |
+
sample_size=1000,
|
42 |
+
weight=2,
|
43 |
+
radius=1
|
44 |
+
)
|
45 |
+
|
46 |
+
fig.save("figures/map1.html")
|
47 |
+
|
48 |
+
return fig
|
49 |
+
|
50 |
+
|
51 |
+
def get_map_2():
|
52 |
+
fig, service_map = map_vals(
|
53 |
+
service_data_raw,
|
54 |
+
cols=["Latitude", "Longitude"],
|
55 |
+
color="blue",
|
56 |
+
weight=2,
|
57 |
+
radius=1,
|
58 |
+
start_loc=[40.7128, -74.0060],
|
59 |
+
sample_size=1000,
|
60 |
+
zoom_start=10
|
61 |
+
)
|
62 |
+
fig, weather_map = map_vals(
|
63 |
+
weather_data_raw.loc[weather_data_raw["Year"] >= 2016],
|
64 |
+
cols=["Latitude", "Longitude"],
|
65 |
+
submap=service_map,
|
66 |
+
label_cols=["StationName"],
|
67 |
+
color='red',
|
68 |
+
radius=5,
|
69 |
+
weight=2,
|
70 |
+
sample_size=1000,
|
71 |
+
)
|
72 |
+
|
73 |
+
fig.save("figures/map2.html")
|
74 |
+
|
75 |
+
return fig
|
76 |
+
|
77 |
+
|
78 |
+
def get_bounded_map():
|
79 |
+
# Get prerecorded coords for the mins/max to maximize speed here
|
80 |
+
# In notebook this is recorded via code
|
81 |
+
lat_min = 40.49804421521046
|
82 |
+
lat_max = 40.91294056699566
|
83 |
+
long_min = -74.25521082506387
|
84 |
+
long_max = -73.70038354802529
|
85 |
+
|
86 |
+
fig = folium.Figure(height=500, width=750)
|
87 |
+
service_bounds_map = folium.Map(
|
88 |
+
location=[40.7128, -74.0060],
|
89 |
+
zoom_start=10,
|
90 |
+
tiles='cartodbpositron',
|
91 |
+
zoom_control=False,
|
92 |
+
scrollWheelZoom=False,
|
93 |
+
dragging=False
|
94 |
+
)
|
95 |
+
|
96 |
+
kw = {
|
97 |
+
"color": "#F1807E",
|
98 |
+
"line_cap": "round",
|
99 |
+
"fill": True,
|
100 |
+
"fill_color": "blue",
|
101 |
+
"weight": 3,
|
102 |
+
"popup": "Service Data Coverage Zone",
|
103 |
+
}
|
104 |
+
|
105 |
+
folium.Rectangle(
|
106 |
+
bounds=[[lat_min, long_min], [lat_max, long_max]],
|
107 |
+
line_join="round",
|
108 |
+
dash_array="5 5",
|
109 |
+
**kw,
|
110 |
+
).add_to(service_bounds_map)
|
111 |
+
|
112 |
+
fig.add_child(service_bounds_map)
|
113 |
+
|
114 |
+
fig.save("figures/bounded_map.html")
|
115 |
+
|
116 |
+
return fig
|
117 |
+
|
118 |
+
|
119 |
+
def get_final_map():
|
120 |
+
lat_min = 40.49804421521046
|
121 |
+
lat_max = 40.91294056699566
|
122 |
+
long_min = -74.25521082506387
|
123 |
+
long_max = -73.70038354802529
|
124 |
+
|
125 |
+
mincon_lat = weather_data_raw["Latitude"] >= lat_min
|
126 |
+
maxcon_lat = weather_data_raw["Latitude"] <= lat_max
|
127 |
+
mincon_long = weather_data_raw["Longitude"] >= long_min
|
128 |
+
maxcon_long = weather_data_raw["Longitude"] <= long_max
|
129 |
+
|
130 |
+
service_bounds_map = folium.Map(
|
131 |
+
location=[40.7128, -74.0060],
|
132 |
+
zoom_start=10,
|
133 |
+
tiles='cartodbpositron',
|
134 |
+
zoom_control=False,
|
135 |
+
scrollWheelZoom=False,
|
136 |
+
dragging=False
|
137 |
+
)
|
138 |
+
|
139 |
+
kw = {
|
140 |
+
"color": "#F1807E",
|
141 |
+
"line_cap": "round",
|
142 |
+
"fill": True,
|
143 |
+
"fill_color": "blue",
|
144 |
+
"weight": 3,
|
145 |
+
"popup": "Service Data Coverage Zone",
|
146 |
+
}
|
147 |
+
|
148 |
+
folium.Rectangle(
|
149 |
+
bounds=[[lat_min, long_min], [lat_max, long_max]],
|
150 |
+
line_join="round",
|
151 |
+
dash_array="5 5",
|
152 |
+
**kw,
|
153 |
+
).add_to(service_bounds_map)
|
154 |
+
|
155 |
+
wd_localized = weather_data_raw.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
|
156 |
+
fig, wd_local_map = map_vals(
|
157 |
+
wd_localized,
|
158 |
+
submap=service_bounds_map,
|
159 |
+
label_cols=["StationName"],
|
160 |
+
color='red',
|
161 |
+
radius=5,
|
162 |
+
weight=2,
|
163 |
+
sample_size=1000,
|
164 |
+
)
|
165 |
+
|
166 |
+
fig.save("figures/final_map.html")
|
167 |
+
|
168 |
+
return fig
|
169 |
+
|
170 |
+
|
171 |
+
def build_maps():
|
172 |
+
get_map_1()
|
173 |
+
get_map_2()
|
174 |
+
get_bounded_map()
|
175 |
+
get_final_map()
|
176 |
+
|
177 |
+
build_maps()
|
code/future_features.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FEATURES["future_covariates_final"] = []
|
2 |
+
for col in FEATURES["future_covariates"]:
|
3 |
+
new_features = data_preprocess[col].to_frame().copy()
|
4 |
+
# Lag Features
|
5 |
+
new_features[col+"_L0D"] = new_features[col].shift(0)
|
6 |
+
new_features[col+"_L1D"] = new_features[col].shift(1)
|
7 |
+
new_features[col+"_L2D"] = new_features[col].shift(2)
|
8 |
+
|
9 |
+
# Rolling Features (No shift needed for future vars)
|
10 |
+
new_features[col+"_RMean14D"] = new_features[col].rolling('14D').mean()
|
11 |
+
new_features[col+"_RMin14D"] = new_features[col].rolling('14D').min()
|
12 |
+
|
13 |
+
# Expanding Window (No shift needed for future vars)
|
14 |
+
new_features[col+"_EMean14D"] = new_features[col].expanding(min_periods=14).mean()
|
15 |
+
new_features[col+"_EMin14D"] = new_features[col].expanding(min_periods=14).min()
|
16 |
+
|
17 |
+
FEATURES["future_covariates_final"].extend([col+"_L0D", col+"_L1D", col+"_L2D", col+"_RMean14D", col+"_RMin14D", col+"_EMean14D", col+"_EMin14D"])
|
18 |
+
new_features = new_features.drop(columns=col)
|
19 |
+
data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
|
20 |
+
|
21 |
+
assert len(data_preprocess.loc[:, FEATURES["future_covariates_final"]].columns) == len(FEATURES["future_covariates"])*7
|
code/past_features.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FEATURES["past_covariates_final"] = []
|
2 |
+
for col in FEATURES["past_covariates"]:
|
3 |
+
new_features = data_preprocess[col].to_frame().copy()
|
4 |
+
# Lag Features
|
5 |
+
new_features[col+"_L7D"] = new_features[col].shift(7)
|
6 |
+
new_features[col+"_L14D"] = new_features[col].shift(14)
|
7 |
+
new_features[col+"_L21D"] = new_features[col].shift(21)
|
8 |
+
|
9 |
+
# Rolling Features
|
10 |
+
# Shift to move the new features into the prediction space (2019-01-01 to 2019-01-07)
|
11 |
+
new_features[col+"_RMean14D"] = new_features[col].shift(7).rolling('14D').mean()
|
12 |
+
|
13 |
+
# Differencing Features
|
14 |
+
# Shift to move the new features into the prediction space (2019-01-01 to 2019-01-07)
|
15 |
+
new_features[col+"_Diff7D"] = (new_features[col].shift(7) - new_features[col].shift(7).shift(7))
|
16 |
+
|
17 |
+
FEATURES["past_covariates_final"].extend([col+"_L7D", col+"_L14D", col+"_L21D", col+"_RMean14D", col+"_Diff7D"])
|
18 |
+
new_features = new_features.drop(columns=col)
|
19 |
+
data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
|
20 |
+
|
21 |
+
assert len(data_preprocess.loc[:, FEATURES["past_covariates_final"]].columns) == len(FEATURES["past_covariates"])*5
|
code/recurse_predict.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def predict_recurse(dataset, test, model, features_to_impute=['Target_L1D', 'Target_Diff7D', 'Target_Diff14D'], last_feature='Target_L6D'):
|
2 |
+
n_steps = len(test)
|
3 |
+
merged_data = pd.concat([dataset[-14:], test], axis=0)
|
4 |
+
all_index = merged_data.index
|
5 |
+
X_test = test.drop(columns="Target")
|
6 |
+
sd = -6 # Starting point for filling next value
|
7 |
+
|
8 |
+
# For each step, get the predictions
|
9 |
+
for i in range(n_steps-1):
|
10 |
+
pred = final_model.predict(X_test)[i]
|
11 |
+
# For the three features needed, compute the new value
|
12 |
+
X_test.loc[all_index[sd+i], features_to_impute[0]] = pred
|
13 |
+
X_test.loc[all_index[sd+i], features_to_impute[1]] = pred - merged_data.loc[all_index[sd+i-7], features_to_impute[1]]
|
14 |
+
X_test.loc[all_index[sd+i], features_to_impute[2]] = pred - merged_data.loc[all_index[sd+i-14], features_to_impute[2]]
|
15 |
+
|
16 |
+
# In the last iteration compute the Lag6D value
|
17 |
+
if i == 5:
|
18 |
+
X_test.loc[all_index[sd+i], last_feature] = pred - merged_data.loc[all_index[sd+i-6], last_feature]
|
19 |
+
|
20 |
+
|
21 |
+
final_preds = final_model.predict(X_test)
|
22 |
+
return final_preds
|
code/target_features.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FEATURES["y_features"] = []
|
2 |
+
col = FEATURES["y"][0]
|
3 |
+
new_features = data_preprocess[col].to_frame().copy()
|
4 |
+
|
5 |
+
# Lag Features
|
6 |
+
new_features[col+"_L1D"] = new_features[col].shift(1)
|
7 |
+
new_features[col+"_L6D"] = new_features[col].shift(6)
|
8 |
+
new_features[col+"_L7D"] = new_features[col].shift(7)
|
9 |
+
new_features[col+"_L8D"] = new_features[col].shift(8)
|
10 |
+
new_features[col+"_L14D"] = new_features[col].shift(14)
|
11 |
+
|
12 |
+
# Rolling Features
|
13 |
+
# After computing shift by 1 to indicate its computed based off a 1 day lag
|
14 |
+
new_features[col+"_RMean14D"] = new_features[col].shift(1).rolling(window='14D').mean()
|
15 |
+
# The last 6 days, I need the prediction from time t-1
|
16 |
+
# For now set to nan
|
17 |
+
new_features[col+"_RMean14D"][-6:] = np.nan
|
18 |
+
|
19 |
+
# Differencing features
|
20 |
+
new_features[col+"_Diff7D"] = (new_features[col].shift(1) - new_features[col].shift(1).shift(7))
|
21 |
+
new_features[col+"_Diff14D"] = (new_features[col].shift(1) - new_features[col].shift(1).shift(14))
|
22 |
+
|
23 |
+
new_features = new_features.drop(columns=col)
|
24 |
+
FEATURES["y_features"].extend([col+"_L1D", col+"_L6D", col+"_L7D", col+"_L8D", col+"_L14D", col+"_RMean14D", col+"_Diff7D", col+"_Diff14D"])
|
25 |
+
data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
|
26 |
+
|
27 |
+
assert len(data_preprocess.loc[:, FEATURES["y_features"]].columns) == len(FEATURES["y"])*8
|
custom.css
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.gr-describe-tb {
|
2 |
+
overflow: hidden !important;
|
3 |
+
}
|
4 |
+
.row.spacing {
|
5 |
+
border: 0px;
|
6 |
+
}
|
7 |
+
.plot-container {
|
8 |
+
width: 100vw
|
9 |
+
}
|
10 |
+
.map * {
|
11 |
+
text-align: -webkit-center;
|
12 |
+
}
|
13 |
+
|
14 |
+
.map-legend * {
|
15 |
+
width: fit-content;
|
16 |
+
max-width: 215px;
|
17 |
+
padding: 5px;
|
18 |
+
background: var(--border-color-primary);
|
19 |
+
margin-top: -50px
|
20 |
+
}
|
21 |
+
|
22 |
+
.map-legend-text * {
|
23 |
+
width: fit-content;
|
24 |
+
padding: 0px;
|
25 |
+
margin-botton: 0px;
|
26 |
+
font-size: 16px;
|
27 |
+
margin-top: 0px;
|
28 |
+
}
|
29 |
+
|
30 |
+
|
31 |
+
.prose {
|
32 |
+
# font-size: 16px;
|
33 |
+
}
|
34 |
+
|
35 |
+
.no-padding * {
|
36 |
+
padding: 0px;
|
37 |
+
margin: 0px;
|
38 |
+
}
|
39 |
+
|
40 |
+
.low-padding * {
|
41 |
+
padding: 2px;
|
42 |
+
margin: 0px;
|
43 |
+
}
|
data/data_final.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/data_merged_full.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/docs.csv
ADDED
@@ -0,0 +1,1315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,docs
|
2 |
+
0,Request Large Bulky Item Collection
|
3 |
+
1,Sewage Odor
|
4 |
+
2,Sidewalk Violation
|
5 |
+
3,Blocked Hydrant
|
6 |
+
4,For One Address
|
7 |
+
5,Blocked Sidewalk
|
8 |
+
6,Commercial Overnight Parking
|
9 |
+
7,Noise: Construction Before/After Hours (NM1)
|
10 |
+
8,Posted Parking Sign Violation
|
11 |
+
9,Congestion/Gridlock
|
12 |
+
10,Ped Multiple Lamps
|
13 |
+
11,Building Shaking/Vibrating/Structural Stability
|
14 |
+
12,Egress - Doors Locked/Blocked/Improper/No Secondary Means
|
15 |
+
13,Working Contrary To Stop Work Order
|
16 |
+
14,E15 Illegal Postering
|
17 |
+
15,E3 Dirty Sidewalk
|
18 |
+
16,Pothole - Highway
|
19 |
+
17,Pothole
|
20 |
+
18,No Access
|
21 |
+
19,Lamppost Base Door/Cover Missing
|
22 |
+
20,Branch Cracked and Will Fall
|
23 |
+
21,Application Renewal
|
24 |
+
22,Illegal Conversion Of Residential Building/Space
|
25 |
+
23,Zoning - Non-Conforming/Illegal Vehicle Storage
|
26 |
+
24,Excessive Water In Basement (WEFB)
|
27 |
+
25,Mouse Sighting
|
28 |
+
26,Administration
|
29 |
+
27,No Receipt
|
30 |
+
28,Site Conditions Endangering Workers
|
31 |
+
29,15I Street Condition Ice-Non Storm
|
32 |
+
30,Food Contaminated
|
33 |
+
31,1 Missed Collection
|
34 |
+
32,E3A Dirty Area/Alleyway
|
35 |
+
33,Street Light Out
|
36 |
+
34,Controller
|
37 |
+
35,Electrical Wiring Defective/Exposed
|
38 |
+
36,With License Plate
|
39 |
+
37,Driver Complaint
|
40 |
+
38,Engine Idling
|
41 |
+
39,Loud Music/Party
|
42 |
+
40,Banging/Pounding
|
43 |
+
41,15S Re-Plow/Spread/Ice-Snow Cond.
|
44 |
+
42,Loud Talking
|
45 |
+
43,Noise: air condition/ventilation equipment (NV1)
|
46 |
+
44,Other (complaint details)
|
47 |
+
45,"Air: Odor/Fumes, Vehicle Idling (AD3)"
|
48 |
+
46,Lead Kit Request (Residential) (L10)
|
49 |
+
47,LOW WATER PRESSURE - WLWP
|
50 |
+
48,Use Indoor
|
51 |
+
49,Cave-in
|
52 |
+
50,Noise: Construction Equipment (NC1)
|
53 |
+
51,15 Street Cond/Dump-Out/Drop-Off
|
54 |
+
52,Sewer Backup (Use Comments) (SA)
|
55 |
+
53,Trees and Sidewalks Program
|
56 |
+
54,Hitting Building
|
57 |
+
55,Water Meter Broken/Leaking - Other (CMO)
|
58 |
+
56,Water Meter Broken/Leaking - Private Residence (CMR)
|
59 |
+
57,Exchange/Refund/Return
|
60 |
+
58,Car/Truck Horn
|
61 |
+
59,St Name - Attached to Pole
|
62 |
+
60,Graffiti
|
63 |
+
61,Dirty Water (WE)
|
64 |
+
62,Hydrant Defective (WC2)
|
65 |
+
63,Other/Unknown
|
66 |
+
64,Hydrant Running (WC3)
|
67 |
+
65,Cons - Contrary/Beyond Approved Plans/Permits
|
68 |
+
66,Sidewalk Shed/Pipe Scafford - Inadequate Defective/None
|
69 |
+
67,Leak (Use Comments) (WA2)
|
70 |
+
68,Possible Water Main Break (Use Comments) (WA1)
|
71 |
+
69,Curb Cut/Driveway/Carport - Illegal
|
72 |
+
70,Plate Condition - Shifted
|
73 |
+
71,1R Missed Recycling-All Materials
|
74 |
+
72,Planted Less Than 2 Years Ago
|
75 |
+
73,Fence - None/Inadequate
|
76 |
+
74,Recycling Electronics
|
77 |
+
75,LED Lense
|
78 |
+
76,Other
|
79 |
+
77,Parking Permit Improper Use
|
80 |
+
78,Other Housing Options
|
81 |
+
79,No Certificate Of Occupancy/Illegal/Contrary To CO
|
82 |
+
80,10 Litter Basket / Request
|
83 |
+
81,Special Events
|
84 |
+
82,E2 Receptacle Violation
|
85 |
+
83,E5 Loose Rubbish
|
86 |
+
84,Boiler - Defective/Inoperative/No Permit
|
87 |
+
85,SRO - Illegal Work/No Permit/Change In Occupancy/Use
|
88 |
+
86,Flood Light Lamp Out
|
89 |
+
87,Entire Tree Has Fallen Down
|
90 |
+
88,Coin or Card Did Not Register
|
91 |
+
89,Tree Leaning/Uprooted
|
92 |
+
90,Line/Marking - Faded
|
93 |
+
91,Plumbing
|
94 |
+
92,"No Parking, Standing, Stopping"
|
95 |
+
93,Branch or Limb Has Fallen Down
|
96 |
+
94,Food Worker Illness
|
97 |
+
95,Use Outside
|
98 |
+
96,Pedestrian Signal
|
99 |
+
97,Plumbing-Defective/Leaking/Not Maintained
|
100 |
+
98,Inadequate or No Heat
|
101 |
+
99,Out of Order
|
102 |
+
100,New Bus Stop Shelter Placement
|
103 |
+
101,Food Spoiled
|
104 |
+
102,Failed Street Repair
|
105 |
+
103,Unclean Condition
|
106 |
+
104,Price Not Posted
|
107 |
+
105,Painted Line/Marking
|
108 |
+
106,Rodents/Insects/Garbage
|
109 |
+
107,Illegal Hotel Rooms In Residential Building
|
110 |
+
108,Air: Other Air Problem (Use Comments) (AZZ)
|
111 |
+
109,"Noise, Barking Dog (NR5)"
|
112 |
+
110,Planted More Than 2 Years Ago
|
113 |
+
111,Catch Basin Clogged/Flooding (Use Comments) (SC)
|
114 |
+
112,Manhole Sunken/Damaged/Raised (SB1)
|
115 |
+
113,Wear & Tear
|
116 |
+
114,E9 Snow / Icy Sidewalk
|
117 |
+
115,No Water (WNW)
|
118 |
+
116,Driver Report
|
119 |
+
117,Double Parked Blocking Vehicle
|
120 |
+
118,"Rough, Pitted or Cracked Roads"
|
121 |
+
119,1RG Missed Recycling Paper
|
122 |
+
120,Traffic Signal Light
|
123 |
+
121,Glassware Broken
|
124 |
+
122,Insurance Information Requested
|
125 |
+
123,Demolition - Unsafe
|
126 |
+
124,Photocell (PEC) Missing
|
127 |
+
125,"Air: Smoke, Chimney or vent (AS1)"
|
128 |
+
126,Assisted Living
|
129 |
+
127,1RO Missed Recycling Organics
|
130 |
+
128,Broken Sidewalk
|
131 |
+
129,Partial Access
|
132 |
+
130,Blocked Bike Lane
|
133 |
+
131,Tattoo Artist Unlicensed
|
134 |
+
132,1 or 2
|
135 |
+
133,E14 ASP/Restricted Parking
|
136 |
+
134,Wood Pole Missing
|
137 |
+
135,Derelict Vehicles
|
138 |
+
136,Veh Signal Head
|
139 |
+
137,Loud Television
|
140 |
+
138,1RB Missed Recycling - M/G/Pl
|
141 |
+
139,LED Pedestrian Unit
|
142 |
+
140,Ped Flasher
|
143 |
+
141,Rat Sighting
|
144 |
+
142,14B Derelict Bicycle
|
145 |
+
143,Street Flooding (SJ)
|
146 |
+
144,Vehicle Signal
|
147 |
+
145,Hydrant Leaking (WC1)
|
148 |
+
146,Hydrant Running Full (WA4)
|
149 |
+
147,Double Parked Blocking Traffic
|
150 |
+
148,2 Bulk-Missed Collection
|
151 |
+
149,Overnight Commercial Storage
|
152 |
+
150,Trunk Damaged
|
153 |
+
151,2R Bulk-Missed Recy Collection
|
154 |
+
152,E1 Improper Disposal
|
155 |
+
153,E8 Canine Violation
|
156 |
+
154,E11 Litter Surveillance
|
157 |
+
155,Snow/Ice
|
158 |
+
156,12 Dead Animals
|
159 |
+
157,E12 Illegal Dumping Surveillance
|
160 |
+
158,Pigeon Waste
|
161 |
+
159,Neglected
|
162 |
+
160,Timer Defect - Fast/Fail
|
163 |
+
161,Blocked - Construction
|
164 |
+
162,Plumbing Work - Illegal/No Permit/Standpipe/Sprinkler
|
165 |
+
163,Glassware Missing
|
166 |
+
164,Lamppost Damaged
|
167 |
+
165,Ventilation
|
168 |
+
166,Street Light Cycling
|
169 |
+
167,Veh Signal Lamp
|
170 |
+
168,Structure - Indoors
|
171 |
+
169,Other School Condition
|
172 |
+
170,Chemical Vapors/Gases/Odors
|
173 |
+
171,Property Refunds and Credits
|
174 |
+
172,Car/Truck Music
|
175 |
+
173,"Air: Odor/Fumes, Restaurant (AD2)"
|
176 |
+
174,Chemical Odor (HD1)
|
177 |
+
175,Manhole Cover Broken/Making Noise (SB)
|
178 |
+
176,Cloudy Or Milky Water (QB1)
|
179 |
+
177,Failure To Maintain
|
180 |
+
178,Litter
|
181 |
+
179,Defective Hardware
|
182 |
+
180,Street Light Lamp Dim
|
183 |
+
181,Gas Hook-Up/Piping - Illegal Or Defective
|
184 |
+
182,Defacement
|
185 |
+
183,Plumbing Problem
|
186 |
+
184,E10 Street Obstruction
|
187 |
+
185,Dead Animal
|
188 |
+
186,Noise: Alarms (NR3)
|
189 |
+
187,E3B Sidewalk Obstruction
|
190 |
+
188,Detached Trailer
|
191 |
+
189,Non-Delivery Goods/Services
|
192 |
+
190,Interest Dispute
|
193 |
+
191,Tree Alive - in Poor Condition
|
194 |
+
192,Condition Attracting Rodents
|
195 |
+
193,Aided/Injury
|
196 |
+
194,One Way
|
197 |
+
195,Property - Other Billing Issue
|
198 |
+
196,Pesticide
|
199 |
+
197,Heating Problem
|
200 |
+
198,Trespassing
|
201 |
+
199,Fixture/Luminaire Out Of Position
|
202 |
+
200,Lamppost Base Door/Cover Open
|
203 |
+
201,Unlicensed
|
204 |
+
202,Broken Curb
|
205 |
+
203,Illegal Tow
|
206 |
+
204,Failure To Retain Water/Improper Drainage- (LL103/89)
|
207 |
+
205,Structural Stability Impacted - New Building Under Construction
|
208 |
+
206,Hitting Power/Phone Lines
|
209 |
+
207,Lamppost Knocked Down
|
210 |
+
208,Vehicle Complaint
|
211 |
+
209,Lamppost Wire Exposed
|
212 |
+
210,Dishwashing/Utensils
|
213 |
+
211,Other (Explain Below)
|
214 |
+
212,Blocking Street
|
215 |
+
213,Canopy Complaint
|
216 |
+
214,Manhole Overflow (Use Comments) (SA1)
|
217 |
+
215,Other Water Problem (Use Comments) (WZZ)
|
218 |
+
216,15R Street Cond/Ref.W Door
|
219 |
+
217,C1 Request Xmas Trees Collection
|
220 |
+
218,Rent Discrepancy
|
221 |
+
219,Food Contains Foreign Object
|
222 |
+
220,Unauthorized Bus Layover
|
223 |
+
221,Veh Signal Sec Door
|
224 |
+
222,Post
|
225 |
+
223,Fixture/Luminaire Door Open
|
226 |
+
224,Chronic Speeding
|
227 |
+
225,Truck Route Violation
|
228 |
+
226,Fixture/Luminaire Hanging
|
229 |
+
227,Suspended (Hanging) Scaffolds - No Pmt/Lic/Dangerous/Accident
|
230 |
+
228,Street Cleaning - ASP
|
231 |
+
229,Illegal. Commercial Use In Resident Zone
|
232 |
+
230,"Building - Vacant, Open And Unguarded"
|
233 |
+
231,Bare Hands in Contact w/ Food
|
234 |
+
232,Adult Establishment
|
235 |
+
233,Sign/Awning/Marquee - Illegal/No Permit
|
236 |
+
234,Privately Owned Public Space/Non-Compliance
|
237 |
+
235,Wall/Retaining Wall - Bulging/Cracked
|
238 |
+
236,Property Value Dispute
|
239 |
+
237,Stop
|
240 |
+
238,Nursing Home
|
241 |
+
239,Electronics/Phones
|
242 |
+
240,False Advertising
|
243 |
+
241,Flashing Hazard
|
244 |
+
242,Unsafe Worksite
|
245 |
+
243,Labor violation
|
246 |
+
244,Public Complaint - Comm Location
|
247 |
+
245,"Unsafe Chemical, Abandoned (HC2)"
|
248 |
+
246,Cable
|
249 |
+
247,Chained
|
250 |
+
248,Tortured
|
251 |
+
249,"Oil Spill On Street, Large (HQL)"
|
252 |
+
250,Noise: Private Carting Noise (NQ1)
|
253 |
+
251,22 Weeds
|
254 |
+
252,Dust from Construction
|
255 |
+
253,Multiple Street Lights Out
|
256 |
+
254,Smoking Ban - Smoking on Construction Site
|
257 |
+
255,After Hours - Licensed Est
|
258 |
+
256,Lamppost Missing
|
259 |
+
257,Pet/Animal
|
260 |
+
258,Toxic Chemical/Material
|
261 |
+
259,Tree Trunk Split
|
262 |
+
260,Metal Protruding - Sign Stump
|
263 |
+
261,Vent/Exhaust - Illegal/Improper
|
264 |
+
262,Sprinkler System - Inadequate
|
265 |
+
263,No Shelter
|
266 |
+
264,Bicycle Chained to Tree
|
267 |
+
265,Bus Stop
|
268 |
+
266,In Car
|
269 |
+
267,Sidewalk Grating - Defective
|
270 |
+
268,General Maintenance
|
271 |
+
269,Rooster
|
272 |
+
270,Damaged/Defective Goods
|
273 |
+
271,Overcharge
|
274 |
+
272,E2A Storage Of Receptacles
|
275 |
+
273,Food Worker Hygiene
|
276 |
+
274,Base Door
|
277 |
+
275,Hydrant Knocked Over/Missing (WC)
|
278 |
+
276,News Gathering
|
279 |
+
277,Sewage Leak
|
280 |
+
278,Dog
|
281 |
+
279,Chronic Stoplight Violation
|
282 |
+
280,Asbestos
|
283 |
+
281,Copy of Approval Order
|
284 |
+
282,Fixture/Luminaire Damaged
|
285 |
+
283,Billing Dispute
|
286 |
+
284,Personal SCHE Exemption
|
287 |
+
285,Safety Netting/Guard Rails - Damaged/Inadequate/None (6 Stories/75 Feet Or Less)
|
288 |
+
286,Excavation Undermining Adjacent Building
|
289 |
+
287,Plate Condition - Anti-Skid
|
290 |
+
288,Plate Condition - Noisy
|
291 |
+
289,Plate Condition - Open
|
292 |
+
290,Car Service Company Complaint
|
293 |
+
291,Damaged Vehicle
|
294 |
+
292,Demand for Cash
|
295 |
+
293,Manhole Cover Missing (Emergency) (SA3)
|
296 |
+
294,Support Bracket
|
297 |
+
295,"Cloudy Or Milky, Other (Use Comments) (QBZ)"
|
298 |
+
296,Affecting Sewer or Foundation
|
299 |
+
297,Signs of Rodents
|
300 |
+
298,Miscellaneous
|
301 |
+
299,Illegal Use Of Hose - Other (CCO)
|
302 |
+
300,Odor In Sewer/Catch Basin (ICB)
|
303 |
+
301,Street Light Dayburning
|
304 |
+
302,Veh Sgnl Mult Lamps
|
305 |
+
303,Odor
|
306 |
+
304,Maintenance Cover
|
307 |
+
305,Dumpster - Construction Waste
|
308 |
+
306,Contract Dispute
|
309 |
+
307,Real Property Tax Assessment/Correction
|
310 |
+
308,E6 Commercial Waste Disposal
|
311 |
+
309,Obstructing Public Use
|
312 |
+
310,Temporary
|
313 |
+
311,Veh Signal Visor
|
314 |
+
312,Plumbing Work - Unlicensed/Illegal/Improper Work In Progress
|
315 |
+
313,Failure to Comply with Vacate Order
|
316 |
+
314,Street Light Feed
|
317 |
+
315,Unleashed Dog in Public
|
318 |
+
316,ID Requirement Not Posted
|
319 |
+
317,Safety Netting/Guard Rails - Damaged/Inadequate/None (Over 6 Stories/75 Feet)
|
320 |
+
318,Sidewalk Staircase
|
321 |
+
319,Debris - Falling Or In Danger Of Falling
|
322 |
+
320,Smoking Violation
|
323 |
+
321,Guard Rail - Street
|
324 |
+
322,Illegal Conversion Of Commercial Bldg/Space To Other Uses
|
325 |
+
323,Lights From Parking Lot Shining On Building
|
326 |
+
324,"Air: Dust, Construction/Demolition (AE4)"
|
327 |
+
325,Asbestos Complaint (B1)
|
328 |
+
326,Car Service Company Report
|
329 |
+
327,Enclosure Cap
|
330 |
+
328,Foreign Attachment On Lamppost
|
331 |
+
329,unknown odor/taste in drinking water (QA6)
|
332 |
+
330,3A Sweeping/Missed
|
333 |
+
331,Dead Branches in Tree
|
334 |
+
332,Sidewalk Collapsed
|
335 |
+
333,Underground
|
336 |
+
334,Over Capacity
|
337 |
+
335,Noise: Jack Hammering (NC2)
|
338 |
+
336,Catch Basin Sunken/Damaged/Raised (SC1)
|
339 |
+
337,21 Collection Truck Noise
|
340 |
+
338,Curb Defect-Metal Protruding
|
341 |
+
339,Too Few on Duty
|
342 |
+
340,Update Tenant Information
|
343 |
+
341,Defective Street Cut (WZZ1)
|
344 |
+
342,Snow or Ice
|
345 |
+
343,Boiler - Fumes/Smoke/Carbon Monoxide
|
346 |
+
344,Damaged/Defective Parts
|
347 |
+
345,Illness Caused by Drinking Water
|
348 |
+
346,Structure - Outdoors
|
349 |
+
347,"Taste/Odor, Chlorine (QA1)"
|
350 |
+
348,Turn Signal
|
351 |
+
349,E1A Litter Basket / Improper Use
|
352 |
+
350,Contact Sign Not Posted
|
353 |
+
351,Graffiti - Bridge
|
354 |
+
352,8 Request to Clean Vacant Lot
|
355 |
+
353,Personal Other Exemption
|
356 |
+
354,Letter Grading
|
357 |
+
355,Food Temperature
|
358 |
+
356,Pedestrian Ramp Defective
|
359 |
+
357,Food Protection
|
360 |
+
358,School Crossing
|
361 |
+
359,Cars Parked on Sidewalk/Street
|
362 |
+
360,Mast Arm
|
363 |
+
361,TAC Report
|
364 |
+
362,ER2 Resident Recyc. (Tenant)
|
365 |
+
363,E13 Throw-Out
|
366 |
+
364,In Prohibited Area
|
367 |
+
365,In Public
|
368 |
+
366,Ped Lamp
|
369 |
+
367,Credit Card Limitations Not Posted
|
370 |
+
368,ELECTRIC/GAS RANGE
|
371 |
+
369,APARTMENT ONLY
|
372 |
+
370,WINDOW GUARD BROKEN/MISSING
|
373 |
+
371,RADIATOR
|
374 |
+
372,STEAM PIPE/RISER
|
375 |
+
373,TOILET
|
376 |
+
374,DOOR
|
377 |
+
375,BASIN/SINK
|
378 |
+
376,FLOOR
|
379 |
+
377,REFRIGERATOR
|
380 |
+
378,Other Animal
|
381 |
+
379,Line/Marking - After Repaving
|
382 |
+
380,Push Button
|
383 |
+
381,DOOR FRAME
|
384 |
+
382,WINDOW FRAME
|
385 |
+
383,WINDOW PANE
|
386 |
+
384,LIGHTING
|
387 |
+
385,NO LIGHTING
|
388 |
+
386,OUTLET/SWITCH
|
389 |
+
387,WIRING
|
390 |
+
388,POWER OUTAGE
|
391 |
+
389,MAINTENANCE
|
392 |
+
390,BELL/BUZZER/INTERCOM
|
393 |
+
391,STAIRS
|
394 |
+
392,CABINET
|
395 |
+
393,COOKING GAS
|
396 |
+
394,JANITOR/SUPER
|
397 |
+
395,MAILBOX
|
398 |
+
396,ENTIRE BUILDING
|
399 |
+
397,Wood Pole Wires Exposed
|
400 |
+
398,Vehicle
|
401 |
+
399,Damaged Telephone
|
402 |
+
400,Open Excavation (WZZ2)
|
403 |
+
401,Sewer Odor (SA2)
|
404 |
+
402,CEILING
|
405 |
+
403,ROOFING
|
406 |
+
404,WALL
|
407 |
+
405,WINDOW/FRAME
|
408 |
+
406,BATHTUB/SHOWER
|
409 |
+
407,Rodents/Mice
|
410 |
+
408,WATER SUPPLY
|
411 |
+
409,CARBON MONOXIDE DETECTOR
|
412 |
+
410,SMOKE DETECTOR
|
413 |
+
411,GARBAGE/RECYCLING STORAGE
|
414 |
+
412,FIRE ESCAPE
|
415 |
+
413,MOLD
|
416 |
+
414,Fire Alarm Lamp Out
|
417 |
+
415,PESTS
|
418 |
+
416,HEAVY FLOW
|
419 |
+
417,SEP - Professional Certification Compliance Audit
|
420 |
+
418,DAMP SPOT
|
421 |
+
419,SLOW LEAK
|
422 |
+
420,St Name - Over Intersection
|
423 |
+
421,BOILER
|
424 |
+
422,Highway Fence
|
425 |
+
423,Bag/Wallet
|
426 |
+
424,Installation/Work Quality
|
427 |
+
425,Veh Signal Lens
|
428 |
+
426,Noise: lawn care equipment (NCL)
|
429 |
+
427,SEWAGE
|
430 |
+
428,PAVEMENT
|
431 |
+
429,DOOR/FRAME
|
432 |
+
430,ROOF DOOR/HATCH
|
433 |
+
431,Lamppost Base Door/Cover Damaged
|
434 |
+
432,Facility Maintenance
|
435 |
+
433,Permit/License/Certificate
|
436 |
+
434,Rodent Sighting
|
437 |
+
435,Allergy Information
|
438 |
+
436,Receipt Incomplete/Not Given
|
439 |
+
437,Street Light Lamp Missing
|
440 |
+
438,Illegal Conversion Of Manufacturing/Industrial Space
|
441 |
+
439,Facility Construction
|
442 |
+
440,VENTILATION SYSTEM
|
443 |
+
441,Clothing Damage
|
444 |
+
442,3B Sweeping/Inadequate
|
445 |
+
443,Unauthorized Tree Pruning
|
446 |
+
444,Concrete In Catch Basin (IEA)
|
447 |
+
445,"Taste/Odor, Chemical (QA2)"
|
448 |
+
446,Flood Light Lamp Cycling
|
449 |
+
447,"\E4 18\""\"" Law\"""""
|
450 |
+
448,Fixture/Luminaire Missing
|
451 |
+
449,3 or More
|
452 |
+
450,Toilet Facility
|
453 |
+
451,GUTTER/LEADER
|
454 |
+
452,Hydrant Locking Device Request (Use Comments) (WC5)
|
455 |
+
453,Water Meter Stolen/Missing - Private Residence (CLR)
|
456 |
+
454,New Con Ed Service Request
|
457 |
+
455,Graffiti or Vandalism
|
458 |
+
456,Illegal Use Of A Hydrant (CIN)
|
459 |
+
457,Cigarette Sale to Minor
|
460 |
+
458,SIGNAGE MISSING
|
461 |
+
459,Fire Globe Missing
|
462 |
+
460,Locker Break-in/Incident
|
463 |
+
461,APS
|
464 |
+
462,Roots Damaged
|
465 |
+
463,ER1 Resident Recyc. (Owner/Manager
|
466 |
+
464,Do Not Enter
|
467 |
+
465,Branches Damaged
|
468 |
+
466,Junction Box
|
469 |
+
467,Food Preparation Location
|
470 |
+
468,Underage - Licensed Est
|
471 |
+
469,Sidewalk Blocked
|
472 |
+
470,Human Capital
|
473 |
+
471,Police Report Requested
|
474 |
+
472,Car Not Available
|
475 |
+
473,Warning Buzzer
|
476 |
+
474,"Education Support, Policy, and Practice"
|
477 |
+
475,Lamppost Leaning
|
478 |
+
476,WiFi/Internet Not Working/Slow
|
479 |
+
477,"Air: Smoke, Vehicular (AA4)"
|
480 |
+
478,Credit Card Stuck in Meter
|
481 |
+
479,PORCH/BALCONY
|
482 |
+
480,Kitchen/Food Prep Area
|
483 |
+
481,RAIN GARDEN DEBRIS (SRGDBR)
|
484 |
+
482,Defective/Missing Curb Piece (SC4)
|
485 |
+
483,Food Worker Activity
|
486 |
+
484,Wastewater Into Catch Basin (IEB)
|
487 |
+
485,SPRINKLER
|
488 |
+
486,Plants- Odor Related Problems (PO1)
|
489 |
+
487,SKYLIGHT
|
490 |
+
488,Yield
|
491 |
+
489,Beach/Pool Water
|
492 |
+
490,Dogs or Cats Sold
|
493 |
+
491,Garbage or Litter
|
494 |
+
492,Plate Missing/Moved-Exposing Hole (WF4)
|
495 |
+
493,Plants- Noise Related Problems (PN1)
|
496 |
+
494,NYPD
|
497 |
+
495,On Messenger
|
498 |
+
496,Dirty/Graffiti
|
499 |
+
497,Playing in Unsuitable Place
|
500 |
+
498,Building
|
501 |
+
499,Injured Wildlife
|
502 |
+
500,6 Overflowing Litter Baskets
|
503 |
+
501,Improper Sale of Items
|
504 |
+
502,Advertising Sign/Billboard/Posters/Flexible Fabric - Illegal
|
505 |
+
503,DOOR TO DUMBWAITER
|
506 |
+
504,Natural Gas In Sewer/Catch Basin (IFB)
|
507 |
+
505,No Permit or License
|
508 |
+
506,Noise: Manufacturing Noise (NK1)
|
509 |
+
507,Broken Glass
|
510 |
+
508,Illegal/Unfair Booting
|
511 |
+
509,Cat
|
512 |
+
510,Speed Limit
|
513 |
+
511,Unauthorized Tree Removal
|
514 |
+
512,Plate Missing/Moved-Exposing Hole (SB4)
|
515 |
+
513,"Air: Odor, Sweet From Unknown Source (AZ1)"
|
516 |
+
514,EEO
|
517 |
+
515,Lighting
|
518 |
+
516,FOIL Request - Request for Records
|
519 |
+
517,Scale Inaccurate/Broken
|
520 |
+
518,"Unsafe Chemical, Storage (HC1)"
|
521 |
+
519,Hours of Operation
|
522 |
+
520,Unsecured Facility
|
523 |
+
521,Safety Equipment/Signs
|
524 |
+
522,Posted Notice Or Order Removed/Tampered With
|
525 |
+
523,"Wasting Faucets,Sinks,Flushometer,Urinal,Etc. - Other (CWO)"
|
526 |
+
524,House/Property Damaged
|
527 |
+
525,Cellar Door Defective
|
528 |
+
526,Multiple St Lts Dayburning
|
529 |
+
527,Crane/Suspension Scaffold - No Permit/License/Cert./Unsafe/Illegal
|
530 |
+
528,Noise: Other Noise Sources (Use Comments) (NZZ)
|
531 |
+
529,Damaged Other
|
532 |
+
530,No Consent Form
|
533 |
+
531,Debt Not Owed
|
534 |
+
532,High Water Pressure (WHP)
|
535 |
+
533,Closed without Notice
|
536 |
+
534,Property
|
537 |
+
535,Real Estate Services
|
538 |
+
536,SEWER
|
539 |
+
537,Hyd Valve Box Cover Missing (WV2)
|
540 |
+
538,Office of Preventive Technical Assistance/OPTA
|
541 |
+
539,Traffic Sign or Signal Blocked
|
542 |
+
540,Dissatisfaction with Provider
|
543 |
+
541,Grass/Weeds
|
544 |
+
542,Catch Basin Grating Missing (SA4)
|
545 |
+
543,Bracket Arm Loose
|
546 |
+
544,Graffiti - Highway
|
547 |
+
545,Initial Application
|
548 |
+
546,Snow Removal Requested
|
549 |
+
547,Non-Compliance w/TTPN 1/00 - Vertical Enlargements
|
550 |
+
548,Landmark Bldg - Illegal Work
|
551 |
+
549,Damaged/Defective
|
552 |
+
550,"Dirt, Debris, Litter Complaint"
|
553 |
+
551,Illegal Tree Removal/Topo. Change in SNAD
|
554 |
+
552,Relocation of Bus Stop Shelter
|
555 |
+
553,Controller Flasher
|
556 |
+
554,Annual Report
|
557 |
+
555,Facilities Management
|
558 |
+
556,Culvert Blocked/Needs Cleaning (SE)
|
559 |
+
557,Sign - In Danger Of Falling
|
560 |
+
558,Loose Plate
|
561 |
+
559,Commercial ICP or ICAP Exemption
|
562 |
+
560,Time Switch
|
563 |
+
561,Fiscal and Business Management
|
564 |
+
562,Crash Cushion Defect
|
565 |
+
563,Glassware Hanging
|
566 |
+
564,"Noise, Other Animals (NR6)"
|
567 |
+
565,Missing/Stump
|
568 |
+
566,ECR Commercial Routing Sticker
|
569 |
+
567,Water Meter Stolen/Missing - Other (CLO)
|
570 |
+
568,Clear Street Light
|
571 |
+
569,Rates Not Posted
|
572 |
+
570,"No Sampling Required, Requested Information (QG2)"
|
573 |
+
571,Damaged Leg or Pole Bent
|
574 |
+
572,Rooftank Leak Or Overflow (CKO)
|
575 |
+
573,"Wasting Faucets,Sinks,Flushometer,Urinal,Etc. - Private Residence (CWR)"
|
576 |
+
574,Smoking
|
577 |
+
575,E30 Transfer Station
|
578 |
+
576,Equipment Not Safe
|
579 |
+
577,Domestic Strays
|
580 |
+
578,Weather Head
|
581 |
+
579,Broken Lock
|
582 |
+
580,Dog Off Leash
|
583 |
+
581,Oil Spill Into Basin/Sewer - Large (IABL)
|
584 |
+
582,Plate Noisy/Sunken/Raised (SB5)
|
585 |
+
583,Personal STAR Exemption
|
586 |
+
584,Handwashing
|
587 |
+
585,"Taste/Odor, Musty/Stale (QA4)"
|
588 |
+
586,Citywide Procurement
|
589 |
+
587,"Taste/Odor, Bitter/Metallic (QA3)"
|
590 |
+
588,Animal Waste
|
591 |
+
589,Parking Card Stuck in Meter
|
592 |
+
590,Door Open with Air Conditioning On
|
593 |
+
591,Inadequate Support Shoring
|
594 |
+
592,Non-Delivery of Papers
|
595 |
+
593,1C Uncollected Xmas Trees
|
596 |
+
594,Personal Exemptions
|
597 |
+
595,Controller Cabinet
|
598 |
+
596,Executive
|
599 |
+
597,Chemical Spill/Release (HA1)
|
600 |
+
598,Tax Commission Rules
|
601 |
+
599,Co-op or Condo Abatement
|
602 |
+
600,Guard Rail - Bridge
|
603 |
+
601,Chemical Spill (IAC)
|
604 |
+
602,Door
|
605 |
+
603,In-Line Fuse Missing
|
606 |
+
604,"Oil Spill On Street, Small (HQS)"
|
607 |
+
605,Drag Racing
|
608 |
+
606,Cellar Door Open/Unprotected
|
609 |
+
607,Bag
|
610 |
+
608,Disclosure Not Provided
|
611 |
+
609,Contrary To LL 58/87(Handicapped Access)
|
612 |
+
610,Lack of Supplies
|
613 |
+
611,Vehicle Report
|
614 |
+
612,Accident - Elevator
|
615 |
+
613,Unauthorized Film Shoot
|
616 |
+
614,Clear Water With Other Particles (Use Comments) (QEZ)
|
617 |
+
615,Catch Basin Search (SC2)
|
618 |
+
616,Sidewalk Grating - Missing
|
619 |
+
617,Dirty/Inadequate Equip./Facility
|
620 |
+
618,Removing Flowers/Plants
|
621 |
+
619,Foundation
|
622 |
+
620,Time Clock Maladjusted
|
623 |
+
621,Unlicensed Day Care
|
624 |
+
622,Graffiti/Litter on Phone
|
625 |
+
623,Sway Bar
|
626 |
+
624,Unlicensed Vendors
|
627 |
+
625,Turtle Under 4 inches Long
|
628 |
+
626,Biking/Rollerblading off Path
|
629 |
+
627,Mandated Reporters
|
630 |
+
628,Waterway-Sewage (IHA)
|
631 |
+
629,Layaway Terms Not Provided
|
632 |
+
630,Grease In Sewer/Catch Basin (IDG)
|
633 |
+
631,ER5 Comm. Recyc. (Bldg Mgmt)
|
634 |
+
632,Police Report Not Requested
|
635 |
+
633,Failure to Post Calorie Information
|
636 |
+
634,FDNY Referral - Pilot
|
637 |
+
635,Wildlife Sighting
|
638 |
+
636,Oil Spill Into Basin/Sewer - Small (IABS)
|
639 |
+
637,Flood Light Lamp Missing
|
640 |
+
638,Noise: Loud Music/Nighttime(Mark Date And Time) (NP1)
|
641 |
+
639,Removing Wildlife
|
642 |
+
640,Detour
|
643 |
+
641,Foreign Attachment On Wood Pole
|
644 |
+
642,Mandatory Tip
|
645 |
+
643,10A Adopt-A-Basket
|
646 |
+
644,Bike Rack Repair
|
647 |
+
645,Honorary
|
648 |
+
646,Rough Pavement
|
649 |
+
647,Warning Signal Lamp
|
650 |
+
648,Equipment Complaint
|
651 |
+
649,Harassment
|
652 |
+
650,Other Sewer Problem (Use Comments) (SZZ)
|
653 |
+
651,"Air: Smoke, Other (Use Comments) (AA5)"
|
654 |
+
652,Damaged Bench
|
655 |
+
653,Snow on Overpass
|
656 |
+
654,Exposure Unnecessary
|
657 |
+
655,Bracket Arm Broken
|
658 |
+
656,Door Lock
|
659 |
+
657,Exit/Route
|
660 |
+
658,Returns Not Filed
|
661 |
+
659,Remove Hydrant Locking Device (WC6)
|
662 |
+
660,Reflector/Louvre
|
663 |
+
661,Illegal Activity by Phone
|
664 |
+
662,Fallen Debris from Bridge
|
665 |
+
663,"Air: Odor, Nail Salon (AD8)"
|
666 |
+
664,Sign Missing or Defective
|
667 |
+
665,General Counsel
|
668 |
+
666,Unauthorized Posting of Signs
|
669 |
+
667,Wood Pole Knocked Down
|
670 |
+
668,Conduit
|
671 |
+
669,"Taste/Odor, Sewer (QA5)"
|
672 |
+
670,Sign
|
673 |
+
671,Warning Signal
|
674 |
+
672,Prohibited Item Sale to Minor
|
675 |
+
673,Investigative Inspection
|
676 |
+
674,Hummock
|
677 |
+
675,Dogs or Cats Not Sold
|
678 |
+
676,Other Water Problem (Use Comments) (QZZ)
|
679 |
+
677,Sidewalk Café
|
680 |
+
678,Newspaper Box Complaint
|
681 |
+
679,Leaky Roof
|
682 |
+
680,Concrete Barrier
|
683 |
+
681,Illness/Injury
|
684 |
+
682,Pigeon Odor
|
685 |
+
683,Ticket Scalping
|
686 |
+
684,Bent/Loose
|
687 |
+
685,Ped Visor
|
688 |
+
686,Milk Not Pasteurized
|
689 |
+
687,Sewage
|
690 |
+
688,Projects
|
691 |
+
689,ER6 Comm. Recyc. (Comm. Tenant)
|
692 |
+
690,Material Storage - Unsafe
|
693 |
+
691,TAL 2 Wheelchair
|
694 |
+
692,Noise: Loud Music/Daytime (Mark Date And Time) (NN1)
|
695 |
+
693,Community Outreach
|
696 |
+
694,Bracket Arm Missing
|
697 |
+
695,Relocation of Parking Meter
|
698 |
+
696,Dry Cleaning Vapors (PERC)
|
699 |
+
697,Gasoline Spill (IAA)
|
700 |
+
698,MCI Abatement
|
701 |
+
699,Required Signage Not Posted
|
702 |
+
700,Advice Request
|
703 |
+
701,Beach/Pool/Sauna Unpermitted
|
704 |
+
702,Illegal Use Of Hose - Private Residence (CCR)
|
705 |
+
703,Flood Light Lamp Dayburning
|
706 |
+
704,Control Panel Damaged
|
707 |
+
705,installation of hydrant side post (WHFP)
|
708 |
+
706,Non-Disclosure of Fees
|
709 |
+
707,Flood Light Lamp Dim
|
710 |
+
708,"Noise, Ice Cream Truck (NR4)"
|
711 |
+
709,"Air: Odor/Fumes, Dry Cleaners (AD1)"
|
712 |
+
710,"Clear Water With Organisms (Insects, Worms) (QE2)"
|
713 |
+
711,"Oil, Grease In Water (QD1)"
|
714 |
+
712,Nuisance/Truant
|
715 |
+
713,Air Conditioning Problem
|
716 |
+
714,Gender Pricing
|
717 |
+
715,American Flag
|
718 |
+
716,Water
|
719 |
+
717,Traffic Camera
|
720 |
+
718,Highway Flooding (SH)
|
721 |
+
719,Elevator - Multiple Devices On Property
|
722 |
+
720,Misleading Appraisal
|
723 |
+
721,Animal Odor
|
724 |
+
722,Information Technology
|
725 |
+
723,Media Inquiries
|
726 |
+
724,About NYC Opportunity
|
727 |
+
725,9 Spill/Oil etc
|
728 |
+
726,Plate Noisy/Sunken/Raised (WF5)
|
729 |
+
727,Touchscreen/Button Not Working
|
730 |
+
728,Broken/Defective
|
731 |
+
729,Street Con Game
|
732 |
+
730,Fleet
|
733 |
+
731,Paid in Advance
|
734 |
+
732,Jewelry
|
735 |
+
733,Sewer Break (SBR)
|
736 |
+
734,Broken Water Fountain
|
737 |
+
735,Sidewalk Pull Box Co
|
738 |
+
736,Stalled Construction Site
|
739 |
+
737,Large Number of Mosquitoes
|
740 |
+
738,Telco Connection Blk
|
741 |
+
739,In Post Base
|
742 |
+
740,Personal DHE Exemption
|
743 |
+
741,Construction
|
744 |
+
742,Blocking Sidewalk
|
745 |
+
743,ER4 City Agency (Inst. Recycling)
|
746 |
+
744,No Dial Tone
|
747 |
+
745,Lane Control Signal
|
748 |
+
746,Unrequested Services Provided
|
749 |
+
747,Tenant Refusal
|
750 |
+
748,Unsafe Use of Playground
|
751 |
+
749,MICROWAVE
|
752 |
+
750,"Dirt, Litter, Debris - Lot"
|
753 |
+
751,"Noise: Boat(Engine,Music,Etc) (NR10)"
|
754 |
+
752,Lost/Missing Person
|
755 |
+
753,Clothing/Glasses
|
756 |
+
754,High Pressure to Take on Loan/Debt
|
757 |
+
755,Time Insufficient
|
758 |
+
756,Blocked - ATM
|
759 |
+
757,Fire Alarm Lamp Missing
|
760 |
+
758,Bees/Wasps - Not a beekeper
|
761 |
+
759,CMU Communication
|
762 |
+
760,Sodium Warning
|
763 |
+
761,Facility General
|
764 |
+
762,Lost Property
|
765 |
+
763,Capital Construction
|
766 |
+
764,Business Tax
|
767 |
+
765,Non-Compliance w/Lightweight Materials
|
768 |
+
766,Dead End Signal
|
769 |
+
767,Apply Payment or Credit
|
770 |
+
768,Payment Not Posted
|
771 |
+
769,Refund/Credit Info or Status
|
772 |
+
770,EFT or Online Payment Problem
|
773 |
+
771,DAMAGE STRUCTURE/RAILING (SRGDM)
|
774 |
+
772,Request To Open A Hydrant (WC4)
|
775 |
+
773,Amount Owed Dispute
|
776 |
+
774,Payment Misapplied
|
777 |
+
775,Other Agency Charge
|
778 |
+
776,Bill Received in Error
|
779 |
+
777,"\Smoking Signs - \""\""No Smoking\""\"" Signs Not Observed on Construction Site\"""""
|
780 |
+
778,Application Appeal
|
781 |
+
779,Speed Board Sign
|
782 |
+
780,OUTLET COVER
|
783 |
+
781,Material Stored Improperly
|
784 |
+
782,Other Health Matters
|
785 |
+
783,Damaged or Missing Ad Box
|
786 |
+
784,Lack of Safety Equipment
|
787 |
+
785,Wrong Amount Paid or Withdrawn
|
788 |
+
786,Missing Complaint Sign
|
789 |
+
787,Unsanitary Condition
|
790 |
+
788,RAIN GARDEN FLOODING (SRGFLD)
|
791 |
+
789,Property Value
|
792 |
+
790,Commercial Rent Tax- Refund
|
793 |
+
791,Accident - Cranes/Derricks/Suspension Scaffold
|
794 |
+
792,Ferret
|
795 |
+
793,Hangers
|
796 |
+
794,In Conduit
|
797 |
+
795,Farm Animal
|
798 |
+
796,No or Defective Headphones
|
799 |
+
797,Phone Blocking Sidewalk
|
800 |
+
798,Equipment Malfunction
|
801 |
+
799,Beekeeping - Honeybees
|
802 |
+
800,Damaged Door
|
803 |
+
801,Guard Rail - Highway
|
804 |
+
802,Illegal Dumping
|
805 |
+
803,6R Overflowing Recycling Baskets
|
806 |
+
804,Ewaste appointment
|
807 |
+
805,Graffiti/Dirty Condition
|
808 |
+
806,Application Portability
|
809 |
+
807,Public Event Seating
|
810 |
+
808,Inattentive
|
811 |
+
809,Equipment Maintenance
|
812 |
+
810,Snake
|
813 |
+
811,Defective Water Sampling Station (QSS)
|
814 |
+
812,Fence
|
815 |
+
813,BBQ Outside Authorized Area
|
816 |
+
814,Bracket Arm Bent
|
817 |
+
815,Not Received - Vending Machine
|
818 |
+
816,E7 Private Carter Spillage
|
819 |
+
817,Lane Station
|
820 |
+
818,Injury/Safety
|
821 |
+
819,Condulet Cover
|
822 |
+
820,Absent
|
823 |
+
821,Labor Violation
|
824 |
+
822,Swimming Pool - Unmaintained
|
825 |
+
823,Tie Rods
|
826 |
+
824,Other - Explain Below
|
827 |
+
825,Elevator - Dangerous Condition/Shaft Open/Unguarded
|
828 |
+
826,Puddle on Driveway
|
829 |
+
827,Puddle on Roof
|
830 |
+
828,Basement
|
831 |
+
829,Container - Over 5 Gallons
|
832 |
+
830,Commercial Other Exemption
|
833 |
+
831,Puddle in Ground
|
834 |
+
832,Elevator - Single Device On Property/No Alternate Service
|
835 |
+
833,Lien Sale
|
836 |
+
834,Sewer or Drain
|
837 |
+
835,Flooded
|
838 |
+
836,Box Cover
|
839 |
+
837,Container - Under 5 Gallons
|
840 |
+
838,Special Agency Projects/Initiatives
|
841 |
+
839,Bird Bath
|
842 |
+
840,Swimming Pool Cover
|
843 |
+
841,Lighting - Garage
|
844 |
+
842,Advance Fee
|
845 |
+
843,Roof Gutters
|
846 |
+
844,Building Foundation
|
847 |
+
845,Lost Coin
|
848 |
+
846,Puddle on Sidewalk
|
849 |
+
847,Shisha
|
850 |
+
848,Transducer-Loop
|
851 |
+
849,Decorative Necklace Lighting
|
852 |
+
850,Monkey
|
853 |
+
851,Property Misclassified
|
854 |
+
852,Flavored Tobacco
|
855 |
+
853,Taste
|
856 |
+
854,"Dirt, Litter, Debris - Garage"
|
857 |
+
855,Fountain - Over 5 Gallons
|
858 |
+
856,Inaccurate Meter
|
859 |
+
857,Stop Temporary
|
860 |
+
858,Language Access Coordinator
|
861 |
+
859,Ped Lens
|
862 |
+
860,Tires
|
863 |
+
861,Damaged Toilet/Sink
|
864 |
+
862,Broken Fence
|
865 |
+
863,Seizure of Funds
|
866 |
+
864,Flower Planters
|
867 |
+
865,Scale Inaccurate
|
868 |
+
866,High Grass
|
869 |
+
867,Minor Received Tattoo
|
870 |
+
868,Wood Pole Leaning
|
871 |
+
869,Waterway-Color (IHD)
|
872 |
+
870,User Unlicensed
|
873 |
+
871,Supervisory
|
874 |
+
872,Unauthorized Climbing
|
875 |
+
873,Complaint
|
876 |
+
874,Red Lt Camera Feed
|
877 |
+
875,No Idling
|
878 |
+
876,RPIE - Filing and Technical Issues
|
879 |
+
877,General Business Tax - Other
|
880 |
+
878,Fountain - Under 5 Gallons
|
881 |
+
879,No Bill of Rights
|
882 |
+
880,Poison Ivy
|
883 |
+
881,Spanish Transaction
|
884 |
+
882,Book/Stationery
|
885 |
+
883,SCRIE Miscellaneous
|
886 |
+
884,New Automatic Public Toilet Request
|
887 |
+
885,Personal Clergy Exemption
|
888 |
+
886,Color
|
889 |
+
887,Fire Hydrant Emergency (FHE)
|
890 |
+
888,Documents/Paperwork Missing
|
891 |
+
889,City Planning Commission
|
892 |
+
890,BBS Failure
|
893 |
+
891,Exposure from Nearby Facility
|
894 |
+
892,Information on Contracts and Contractors
|
895 |
+
893,Deck Inspection
|
896 |
+
894,Gas Utility Referral
|
897 |
+
895,Sports Equipment
|
898 |
+
896,Appeals Division
|
899 |
+
897,Marine Lamp
|
900 |
+
898,Safety Inspection-Retaining Walls (May 2005)
|
901 |
+
899,Integrity Complaint Referral
|
902 |
+
900,Property Misclassification
|
903 |
+
901,Non-Public Schools
|
904 |
+
902,On Structure
|
905 |
+
903,421A Exemption
|
906 |
+
904,Zoning and Land Use Questions/Information
|
907 |
+
905,Contamination Risk
|
908 |
+
906,Energy
|
909 |
+
907,FENCING
|
910 |
+
908,Cellar Door New
|
911 |
+
909,Use of Newly Seeded Lawn
|
912 |
+
910,Electronic Sign - Overhead
|
913 |
+
911,ULURP Project Status Questions
|
914 |
+
912,Pedestrian Sign
|
915 |
+
913,1RE Recycling Electronics
|
916 |
+
914,Bikes in Buildings
|
917 |
+
915,Blank Out Matrix Sgn
|
918 |
+
916,Broken Window
|
919 |
+
917,High Pressure Sales
|
920 |
+
918,Building Permit - None
|
921 |
+
919,Do Not Block the Box
|
922 |
+
920,12P Dead Deer
|
923 |
+
921,Ver Message Sign
|
924 |
+
922,1RE missed collection for E-waste
|
925 |
+
923,Illegal Use Of Hose - Private Residence
|
926 |
+
924,Budget
|
927 |
+
925,Enforcement Work Order (DOB)
|
928 |
+
926,Sign Defect - Garage
|
929 |
+
927,1L Missed Recycling Leaves
|
930 |
+
928,Debris - Excessive
|
931 |
+
929,Adjacent Buildings Not Protected
|
932 |
+
930,After Hours Work - Illegal
|
933 |
+
931,Accident - Construction/Plumbing
|
934 |
+
932,Construction - Change Grade/Watercourse
|
935 |
+
933,Landlord Inquiries
|
936 |
+
934,Det-Sens Amplifier
|
937 |
+
935,Wood Pole Damaged
|
938 |
+
936,Contractor Responsibility/VENDEX
|
939 |
+
937,Unlicensed/Illegal/Improper Work In Progress
|
940 |
+
938,Commercial Exemptions
|
941 |
+
939,General Business Tax - Refund
|
942 |
+
940,Damaged or Leaking Roof
|
943 |
+
941,General Bad Condition
|
944 |
+
942,Detector Sensor
|
945 |
+
943,Accessibility Accommodations
|
946 |
+
944,DRY WEATHER DISCHARGE - DWD
|
947 |
+
945,General Business Tax- Audit
|
948 |
+
946,Commercial Not For Profit Exemption
|
949 |
+
947,Restroom Non-Complaince With Local Law 79/16
|
950 |
+
948,Best - DM Tracking Complaint
|
951 |
+
949,Best - High-Rise Tracking Complaint
|
952 |
+
950,SST Tracking Complaint
|
953 |
+
951,M.A.R.C.H. Program (Interagency)
|
954 |
+
952,Facade (LL11/98)- Unsafe Notification
|
955 |
+
953,Inspection Work Order (DOB)
|
956 |
+
954,Plumbing Enforcement Work Order (DOB)
|
957 |
+
955,Illegal Conversion No Access Follow - UP
|
958 |
+
956,Best - Low-Rise Tracking Complaint
|
959 |
+
957,Construction Enforcement Work Order (DOB)
|
960 |
+
958,Illegal Activity
|
961 |
+
959,Excavation Tracking Complaint
|
962 |
+
960,Sustainability Enforcement Work Order
|
963 |
+
961,Interior Demo Tracking Complaint
|
964 |
+
962,Electrical Enforcement Work Order (DOB)
|
965 |
+
963,Sandy: Building Destroyed
|
966 |
+
964,Amusement Ride Accident/Incident
|
967 |
+
965,Complaince Inspection
|
968 |
+
966,Demolition Notification Received
|
969 |
+
967,V.E.S.T. Program (DOB & NYPD)
|
970 |
+
968,Personal Veteran Exemption
|
971 |
+
969,Depression Maintenance
|
972 |
+
970,Driver Complaint - Passenger
|
973 |
+
971,Elevator - Defective/Not Working
|
974 |
+
972,DRIE Exemption
|
975 |
+
973,Mailed - Not Reflected
|
976 |
+
974,The ABCs of Housing
|
977 |
+
975,Full Term Mobile Food Vendor License
|
978 |
+
976,Medicaid
|
979 |
+
977,Food Stamp
|
980 |
+
978,Cash Assistance
|
981 |
+
979,Billing Name Incorrect
|
982 |
+
980,Waive Penalty for Late Payment
|
983 |
+
981,The ABCs of Housing - Chinese
|
984 |
+
982,Lost and Found
|
985 |
+
983,Heat Bulletin
|
986 |
+
984,The ABCs of Housing - Spanish
|
987 |
+
985,Homeless Issue
|
988 |
+
986,Electronic Fund Transfer (EFT) Problem
|
989 |
+
987,Copy of Account Information
|
990 |
+
988,Condo or Co-op Abatement
|
991 |
+
989,Copy of Statement
|
992 |
+
990,Property Address Incorrect
|
993 |
+
991,Other Billing Issue
|
994 |
+
992,Card - DOF Confirmation Number Issued
|
995 |
+
993,Mitchell-Lama Housing List
|
996 |
+
994,Billing Address Incorrect
|
997 |
+
995,Waterway-Oil/Gasoline (IHB)
|
998 |
+
996,Status of Payment Adjustment
|
999 |
+
997,Cleanliness
|
1000 |
+
998,Barbershop License
|
1001 |
+
999,Food Service Establishment License
|
1002 |
+
1000,Debt Collection Agency License
|
1003 |
+
1001,Housing Information Guide For Tenants and Owners Notice
|
1004 |
+
1002,The ABCs of Housing - Arabic
|
1005 |
+
1003,Applied to Wrong Ticket
|
1006 |
+
1004,Tax Exemption
|
1007 |
+
1005,Commercial ICIP or ICAP Exemption
|
1008 |
+
1006,Misapplied Payment
|
1009 |
+
1007,Remove Mortgage
|
1010 |
+
1008,Frozen Dessert Manufacturer License
|
1011 |
+
1009,General Inquiry
|
1012 |
+
1010,Image of Ticket
|
1013 |
+
1011,Incorrect Amount Paid
|
1014 |
+
1012,Status of Appeal
|
1015 |
+
1013,Card - No DOF Confirmation Number Issued
|
1016 |
+
1014,Status of PV Refund
|
1017 |
+
1015,Filing and Technical Issues
|
1018 |
+
1016,Full Term Mobile Food Unit Permit
|
1019 |
+
1017,General Street Vendor License
|
1020 |
+
1018,Ready NY - English - Full Size
|
1021 |
+
1019,Condo Billing Issue
|
1022 |
+
1020,Locksmith License
|
1023 |
+
1021,Status of Hearing
|
1024 |
+
1022,General Complaint
|
1025 |
+
1023,Home Ownership Kit
|
1026 |
+
1024,Registration Clearance Request
|
1027 |
+
1025,Commercial Green Roof or Solar Panel Exemption
|
1028 |
+
1026,Driver Compliment
|
1029 |
+
1027,Commercial 421A Exemption
|
1030 |
+
1028,HomeFirst Down Payment Information
|
1031 |
+
1029,Ready NY - Businesses - English
|
1032 |
+
1030,Copy of Notice of Property Value
|
1033 |
+
1031,Seasonal Mobile Food Vendor License
|
1034 |
+
1032,Ready NY Guide - Pocket Sized - English
|
1035 |
+
1033,Newsstand License
|
1036 |
+
1034,ACRIS Incorrect
|
1037 |
+
1035,Secondhand Dealer Firearms License
|
1038 |
+
1036,Catering Establishment License
|
1039 |
+
1037,Cigarette Retail Dealer License
|
1040 |
+
1038,Housing Quality Standards (HQS) Inspections FAQs - English
|
1041 |
+
1039,Commercial CEP or CRP Exemption
|
1042 |
+
1040,Finance Business Center - Not Reflected
|
1043 |
+
1041,Street Fair Vendor License
|
1044 |
+
1042,Stoop Line Stand License
|
1045 |
+
1043,Home Improvement Contractor License
|
1046 |
+
1044,Individual Process Server License
|
1047 |
+
1045,Decision and Order
|
1048 |
+
1046,Card - Charged Twice
|
1049 |
+
1047,Status of Request to file Paper RPIE
|
1050 |
+
1048,Full Term Tattoo License
|
1051 |
+
1049,List of Outstanding Tickets
|
1052 |
+
1050,Disruptive Passenger
|
1053 |
+
1051,Interruption of Essential Services Notice
|
1054 |
+
1052,Commercial J51 Exemption
|
1055 |
+
1053,Secondhand Dealer Auto License
|
1056 |
+
1054,Ready NY My Emergency Plan - English
|
1057 |
+
1055,Winter Health and Safety Tips Guide
|
1058 |
+
1056,Sightseeing Guide License
|
1059 |
+
1057,Home Improvement Salesperson License
|
1060 |
+
1058,The ABCs of Housing - Russian
|
1061 |
+
1059,Delays
|
1062 |
+
1060,General Vendor Distributor License
|
1063 |
+
1061,Senior Citizen Home Assistance Program (SCHAP) Loan
|
1064 |
+
1062,Performance
|
1065 |
+
1063,Employment Agency License
|
1066 |
+
1064,Secondhand Dealer General License
|
1067 |
+
1065,Ready NY - English - Pocket Size
|
1068 |
+
1066,Commercial UDAAP Exemption
|
1069 |
+
1067,Death Certificate Before 1949 Order Form
|
1070 |
+
1068,Birth Certificate Before 1910 Order Form
|
1071 |
+
1069,Garage or Parking Lot License
|
1072 |
+
1070,Marriage Certificate Order Form
|
1073 |
+
1071,Elevator Not Inspected/Illegal/No Permit
|
1074 |
+
1072,Certificate of No Harassment or Exemption - SRO
|
1075 |
+
1073,Dead/Dying Tree
|
1076 |
+
1074,Certificate of No Harassment - Zoning
|
1077 |
+
1075,Hurricane Preparedness - English
|
1078 |
+
1076,Tow Truck Driver License
|
1079 |
+
1077,Temporary Food Service Establishment Permit
|
1080 |
+
1078,Settlement Reduction Not Shown
|
1081 |
+
1079,Ready NY - Arabic - Full Size
|
1082 |
+
1080,Electronic Sign - Portable
|
1083 |
+
1081,Ready NY - Kids - Middle and High School Students
|
1084 |
+
1082,Ready NY - French - Full Size
|
1085 |
+
1083,City Rebate
|
1086 |
+
1084,Restrooms
|
1087 |
+
1085,Non Retail Food Processing Establishment License
|
1088 |
+
1086,Homestead
|
1089 |
+
1087,Ready NY - Kids - Elementary School Students
|
1090 |
+
1088,Ready NY - Flooding
|
1091 |
+
1089,Fallen Debris from Tunnel
|
1092 |
+
1090,Ready NY - Reference Card
|
1093 |
+
1091,Genealogy Research Application
|
1094 |
+
1092,Emergency Notice
|
1095 |
+
1093,Dealer in Devices for Disabled License
|
1096 |
+
1094,Ready NY My Emergency Plan - Spanish
|
1097 |
+
1095,Hurricane Preparedness - Spanish
|
1098 |
+
1096,Hurricane Preparedness - Haitian Creole
|
1099 |
+
1097,Sightseeing Bus License
|
1100 |
+
1098,Hurricane Preparedness - Arabic
|
1101 |
+
1099,Process Server Organization License
|
1102 |
+
1100,Licensed Home Improvement Contractor Bumper Sticker
|
1103 |
+
1101,Ready NY Beat the Heat - English
|
1104 |
+
1102,Temporary Amusement Device License
|
1105 |
+
1103,Ready NY- Pandemic Flu
|
1106 |
+
1104,Pothole - Tunnel
|
1107 |
+
1105,Shelter for Homeless Animals License
|
1108 |
+
1106,Auctioneer License
|
1109 |
+
1107,Marshal - Not Reflected
|
1110 |
+
1108,Ready NY My Emergency Plan - Traditional Chinese
|
1111 |
+
1109,Sidewalk Cafe License
|
1112 |
+
1110,Cabaret License
|
1113 |
+
1111,Locksmith Apprentice License
|
1114 |
+
1112,New Lead Law Rights and Requirements
|
1115 |
+
1113,Ready NY - Spanish - Full Size
|
1116 |
+
1114,Scrap Metal Processor License
|
1117 |
+
1115,Announcements
|
1118 |
+
1116,3 Sweeping/Missed-Inadequate
|
1119 |
+
1117,Electronics and Home Appliance Service Dealer License
|
1120 |
+
1118,Ready NY - Pets - English
|
1121 |
+
1119,Pedicab Driver
|
1122 |
+
1120,Ready NY - Chinese Traditional - Full Size
|
1123 |
+
1121,Summer Heat - English
|
1124 |
+
1122,Laundry License
|
1125 |
+
1123,Summer Heat - Russian
|
1126 |
+
1124,Ready NY My Emergency Plan - Russian
|
1127 |
+
1125,The ABCs of Housing - Korean
|
1128 |
+
1126,Ready NY - Russian - Pocket Size
|
1129 |
+
1127,Strip Paving
|
1130 |
+
1128,Sign Blocked by Tree
|
1131 |
+
1129,Ready NY - Haitian Creole - Full Size
|
1132 |
+
1130,Beach/Pool Closure
|
1133 |
+
1131,Conflict Monitor
|
1134 |
+
1132,Dead End Sign
|
1135 |
+
1133,Seasonal Food Cart Vendor Permit
|
1136 |
+
1134,Amusement Arcade License
|
1137 |
+
1135,Tow Truck Company License
|
1138 |
+
1136,Commercial DAMP Exemption
|
1139 |
+
1137,Waterway-Floatables (IHC)
|
1140 |
+
1138,Pet Store - New License
|
1141 |
+
1139,Ready NY - Chinese Traditional - Pocket Size
|
1142 |
+
1140,The ABCs of Housing - Haitian Creole
|
1143 |
+
1141,Ready NY - Spanish - Pocket Size
|
1144 |
+
1142,Ready NY - Small and Mid-Sized Companies
|
1145 |
+
1143,Commercial 421B Exemption
|
1146 |
+
1144,Booting Company License
|
1147 |
+
1145,Animal Grooming License
|
1148 |
+
1146,Temporary Tattoo License
|
1149 |
+
1147,Commercial 421G Exemption
|
1150 |
+
1148,Scale Dealer or Repairer License
|
1151 |
+
1149,General Vendor Waiting List Application
|
1152 |
+
1150,Ready NY - Russian - Full Size
|
1153 |
+
1151,Ready NY My Emergency Plan - Haitian Creole
|
1154 |
+
1152,Going Out of Business Sale License
|
1155 |
+
1153,Graffiti - Tunnel
|
1156 |
+
1154,Compressed Air License
|
1157 |
+
1155,Det-Sens Cabinet
|
1158 |
+
1156,Ready NY - Chinese Simplified - Full Size
|
1159 |
+
1157,Laundry Jobber License
|
1160 |
+
1158,NO WATER - WNW
|
1161 |
+
1159,Animal Boarding License
|
1162 |
+
1160,Electronics Store License
|
1163 |
+
1161,Auction House License
|
1164 |
+
1162,Hydrotest
|
1165 |
+
1163,Pawn Broker License
|
1166 |
+
1164,Smoke/Odor
|
1167 |
+
1165,Food Source/Protection
|
1168 |
+
1166,Garbage
|
1169 |
+
1167,Equipment
|
1170 |
+
1168,Pool or Billiard Hall License
|
1171 |
+
1169,EXPY Sign Fixt Cover
|
1172 |
+
1170,Personal Crime Victim or Good Samaritan Exemption
|
1173 |
+
1171,Dust Cover
|
1174 |
+
1172,Ready NY My Emergency Plan - Italian
|
1175 |
+
1173,Gaming Cafe License
|
1176 |
+
1174,Portable Amusement Ride License
|
1177 |
+
1175,Curb Violation
|
1178 |
+
1176,Status Call
|
1179 |
+
1177,No Status Call
|
1180 |
+
1178,Summer Heat - Spanish
|
1181 |
+
1179,Electrical - Unlicensed/Illegal/Improper Work In Progress
|
1182 |
+
1180,Messenger
|
1183 |
+
1181,Hurricane Preparedness - Chinese
|
1184 |
+
1182,ABANDONED APARTMENT UNIT
|
1185 |
+
1183,Hurricane Preparedness - Russian
|
1186 |
+
1184,License Violation
|
1187 |
+
1185,Placement
|
1188 |
+
1186,Insects / Pests
|
1189 |
+
1187,Driver Complaint - Non Passenger
|
1190 |
+
1188,Retail Store
|
1191 |
+
1189,Initial
|
1192 |
+
1190,To FDNY Approved System
|
1193 |
+
1191,Ready NY - Korean - Full Size
|
1194 |
+
1192,For Violation
|
1195 |
+
1193,Licensee Complaint
|
1196 |
+
1194,Bodega/Deli/Supermarket
|
1197 |
+
1195,Multi Agency Joint Inspection
|
1198 |
+
1196,Inhalation Therapy Supervising Technician License
|
1199 |
+
1197,Horse Drawn Carriage Driver License
|
1200 |
+
1198,Bowstring Truss Tracking Complaint
|
1201 |
+
1199,Retail Laundry License Application
|
1202 |
+
1200,14 Derelict Vehicles
|
1203 |
+
1201,Retaining Wall Tracking Complaint
|
1204 |
+
1202,Ready NY - Businesses - Spanish
|
1205 |
+
1203,Notice of Housing Code Enforcement Issues
|
1206 |
+
1204,Pathogens License
|
1207 |
+
1205,Tobacco Retail Dealer License Application
|
1208 |
+
1206,Sample Suspected Gas Leak Notice
|
1209 |
+
1207,Street Cave-In *Dep Internal Use Only* (SG1)
|
1210 |
+
1208,Housing Quality Standards (HQS) Inspections FAQs - Spanish
|
1211 |
+
1209,HOUSING QUALITY STANDARDS
|
1212 |
+
1210,For Letter of Defect
|
1213 |
+
1211,New Building
|
1214 |
+
1212,Milk/Dairy Products
|
1215 |
+
1213,Voluntary
|
1216 |
+
1214,Business Opportunities/RFPs
|
1217 |
+
1215,Reinspection
|
1218 |
+
1216,Construction Safety Compliance Action
|
1219 |
+
1217,Amusement Ride
|
1220 |
+
1218,Structurally Compromised Building (LL33/08)
|
1221 |
+
1219,Non-med Compressed Gas - New
|
1222 |
+
1220,Other Hazmats
|
1223 |
+
1221,Cell Phone Store
|
1224 |
+
1222,Existing Building
|
1225 |
+
1223,Re-inspection
|
1226 |
+
1224,Iguana
|
1227 |
+
1225,Because of Violation
|
1228 |
+
1226,Disabled Device Dealer
|
1229 |
+
1227,Debt Collection Agency
|
1230 |
+
1228,Semi-Annual Homeless Shelter Inspection: Electrical
|
1231 |
+
1229,Semi-Annual Homeless Shelter Inspection: Construction
|
1232 |
+
1230,DCP/BSA Compliance Inspection
|
1233 |
+
1231,Semi-Annual Homeless Shelter Inspection: Plumbing
|
1234 |
+
1232,Illegal Commercial Or Manufacturing Use In a C1 Or C2 Zone
|
1235 |
+
1233,Car Dealer - Used
|
1236 |
+
1234,Permission to Publish Contract
|
1237 |
+
1235,Ticket Seller Business License Application
|
1238 |
+
1236,Certificate of No Harassment (CONH) Application
|
1239 |
+
1237,Certificate of No Harassment (CONH) Exemption
|
1240 |
+
1238,Commercial Government Exemption
|
1241 |
+
1239,Tow Truck Exemption License
|
1242 |
+
1240,EXPY Fixture
|
1243 |
+
1241,Industrial Laundry Delivery License Application
|
1244 |
+
1242,Guide Rail
|
1245 |
+
1243,Wireless Antenna
|
1246 |
+
1244,Ready NY My Emergency Plan - Korean
|
1247 |
+
1245,Ready NY My Emergency Plan - Polish
|
1248 |
+
1246,Accident/Explosion - Boiler
|
1249 |
+
1247,Summer Heat - Chinese
|
1250 |
+
1248,Ready NY - Polish - Full Size
|
1251 |
+
1249,Sidewalk Consultation
|
1252 |
+
1250,Noise: Vehicle (NR2)
|
1253 |
+
1251,Con Edison Referral
|
1254 |
+
1252,Documents Not Returned
|
1255 |
+
1253,Initial - Construction
|
1256 |
+
1254,Snow Removal
|
1257 |
+
1255,Snow Emergency
|
1258 |
+
1256,Relocation of Muni Meter
|
1259 |
+
1257,Elevator In (Fdny) Readiness - None
|
1260 |
+
1258,Suspected Street Cut
|
1261 |
+
1259,Overexposure During Treatment
|
1262 |
+
1260,SCRIE Application Denial
|
1263 |
+
1261,Unincorporated Business Tax - Other
|
1264 |
+
1262,"Air: Smoke, Residential (AA1)"
|
1265 |
+
1263,"BUILDING COLLAPSE/FIRE, (ASBESTOS RELATED) *FOR DEP INTERNAL USE ONLY* (HH2)"
|
1266 |
+
1264,Unincorporated Business Tax - Return Filing
|
1267 |
+
1265,No Statement of Job Conditions
|
1268 |
+
1266,Excise Taxes-Refund
|
1269 |
+
1267,Child or Minor Tanning
|
1270 |
+
1268,Injury or Illness from Tanning
|
1271 |
+
1269,Loan Offer
|
1272 |
+
1270,Defective Streetlight
|
1273 |
+
1271,EXPY Sign Reflector
|
1274 |
+
1272,Commercial Rent Tax-Other
|
1275 |
+
1273,Sediment
|
1276 |
+
1274,Workplace - 10 or Less Staff
|
1277 |
+
1275,Failure to Comply with Annual Crane Inspection
|
1278 |
+
1276,Facility Unregistered
|
1279 |
+
1277,"Air: Open Fire, Construction/Demolition (AC4)"
|
1280 |
+
1278,Extra Parts
|
1281 |
+
1279,Unincorporated Business Tax - Refund
|
1282 |
+
1280,Musical Instrument
|
1283 |
+
1281,Green Roof or Solar Panel Exemption
|
1284 |
+
1282,Sign Defect - Lot
|
1285 |
+
1283,Crack Sealing
|
1286 |
+
1284,Cigarette Vending Machine
|
1287 |
+
1285,Marine Globe
|
1288 |
+
1286,Mssg Sign Multi Lamp
|
1289 |
+
1287,Marine Flasher
|
1290 |
+
1288,Technician Unlicensed
|
1291 |
+
1289,Dumpster - Causing Damage
|
1292 |
+
1290,Minor Access
|
1293 |
+
1291,Excise Taxes-Audit
|
1294 |
+
1292,Not Certified
|
1295 |
+
1293,Toy Gun Sale
|
1296 |
+
1294,Fire Alarm Lamp Cycling
|
1297 |
+
1295,High Interest Loan
|
1298 |
+
1296,SCRIE Application Appeal
|
1299 |
+
1297,Cable Television
|
1300 |
+
1298,Mapping Information
|
1301 |
+
1299,NYC.gov Web Site
|
1302 |
+
1300,EZ PASS READER
|
1303 |
+
1301,RPIE
|
1304 |
+
1302,RTMS
|
1305 |
+
1303,Excise Taxes-Other
|
1306 |
+
1304,Facility Complaint
|
1307 |
+
1305,Inspection Requests/Complaints
|
1308 |
+
1306,Building Information/Construction History
|
1309 |
+
1307,Birth/Death Certificates
|
1310 |
+
1308,Communications/Intergovernmental
|
1311 |
+
1309,Ethernet Cable
|
1312 |
+
1310,Mental Health
|
1313 |
+
1311,ALJ Division
|
1314 |
+
1312,Fixture(S)
|
1315 |
+
1313,General Business Tax - Return filing
|
data/drop_vars.xlsx
ADDED
Binary file (10.8 kB). View file
|
|
data/weather_aggregated_2010-2018.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
figures/bounded_map.html
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
|
5 |
+
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
|
6 |
+
|
7 |
+
<script>
|
8 |
+
L_NO_TOUCH = false;
|
9 |
+
L_DISABLE_3D = false;
|
10 |
+
</script>
|
11 |
+
|
12 |
+
<style>html, body {width: 100%;height: 100%;margin: 0;padding: 0;}</style>
|
13 |
+
<style>#map {position:absolute;top:0;bottom:0;right:0;left:0;}</style>
|
14 |
+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/leaflet.js"></script>
|
15 |
+
<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
|
16 |
+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
|
17 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js"></script>
|
18 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/leaflet.css"/>
|
19 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"/>
|
20 |
+
<link rel="stylesheet" href="https://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css"/>
|
21 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/[email protected]/css/all.min.css"/>
|
22 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css"/>
|
23 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css"/>
|
24 |
+
|
25 |
+
<meta name="viewport" content="width=device-width,
|
26 |
+
initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
|
27 |
+
<style>
|
28 |
+
#map_1bca46dd8c0ecb99e8cf98a8490d26c6 {
|
29 |
+
position: relative;
|
30 |
+
width: 100.0%;
|
31 |
+
height: 100.0%;
|
32 |
+
left: 0.0%;
|
33 |
+
top: 0.0%;
|
34 |
+
}
|
35 |
+
.leaflet-container { font-size: 1rem; }
|
36 |
+
</style>
|
37 |
+
|
38 |
+
</head>
|
39 |
+
<body>
|
40 |
+
|
41 |
+
|
42 |
+
<div class="folium-map" id="map_1bca46dd8c0ecb99e8cf98a8490d26c6" ></div>
|
43 |
+
|
44 |
+
</body>
|
45 |
+
<script>
|
46 |
+
|
47 |
+
|
48 |
+
var map_1bca46dd8c0ecb99e8cf98a8490d26c6 = L.map(
|
49 |
+
"map_1bca46dd8c0ecb99e8cf98a8490d26c6",
|
50 |
+
{
|
51 |
+
center: [40.7128, -74.006],
|
52 |
+
crs: L.CRS.EPSG3857,
|
53 |
+
zoom: 10,
|
54 |
+
zoomControl: false,
|
55 |
+
preferCanvas: false,
|
56 |
+
scrollWheelZoom: false,
|
57 |
+
dragging: false,
|
58 |
+
}
|
59 |
+
);
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
var tile_layer_5610f1ba4421bfdd6b11b0d3a8230311 = L.tileLayer(
|
66 |
+
"https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png",
|
67 |
+
{"attribution": "\u0026copy; \u003ca href=\"https://www.openstreetmap.org/copyright\"\u003eOpenStreetMap\u003c/a\u003e contributors \u0026copy; \u003ca href=\"https://carto.com/attributions\"\u003eCARTO\u003c/a\u003e", "detectRetina": false, "maxNativeZoom": 20, "maxZoom": 20, "minZoom": 0, "noWrap": false, "opacity": 1, "subdomains": "abcd", "tms": false}
|
68 |
+
);
|
69 |
+
|
70 |
+
|
71 |
+
tile_layer_5610f1ba4421bfdd6b11b0d3a8230311.addTo(map_1bca46dd8c0ecb99e8cf98a8490d26c6);
|
72 |
+
|
73 |
+
|
74 |
+
var rectangle_7a26a5f5f0553f8e9c5a706c1184bf75 = L.rectangle(
|
75 |
+
[[40.49804421521046, -74.25521082506387], [40.91294056699566, -73.70038354802529]],
|
76 |
+
{"bubblingMouseEvents": true, "color": "#F1807E", "dashArray": "5 5", "dashOffset": null, "fill": true, "fillColor": "blue", "fillOpacity": 0.2, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "noClip": false, "opacity": 1.0, "smoothFactor": 1.0, "stroke": true, "weight": 3}
|
77 |
+
).addTo(map_1bca46dd8c0ecb99e8cf98a8490d26c6);
|
78 |
+
|
79 |
+
|
80 |
+
var popup_c20294d340dae6e3dee1251d70105f4e = L.popup({"maxWidth": "100%"});
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
var html_e2caf4fa03251f2359325a8b2c62d96d = $(`<div id="html_e2caf4fa03251f2359325a8b2c62d96d" style="width: 100.0%; height: 100.0%;">Service Data Coverage Zone</div>`)[0];
|
85 |
+
popup_c20294d340dae6e3dee1251d70105f4e.setContent(html_e2caf4fa03251f2359325a8b2c62d96d);
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
rectangle_7a26a5f5f0553f8e9c5a706c1184bf75.bindPopup(popup_c20294d340dae6e3dee1251d70105f4e)
|
90 |
+
;
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
</script>
|
95 |
+
</html>
|
figures/final_map.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
figures/map1.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
figures/map2.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
figures/model_performance.png
ADDED
models/BERTopic/config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"calculate_probabilities": false,
|
3 |
+
"language": null,
|
4 |
+
"low_memory": false,
|
5 |
+
"min_topic_size": 10,
|
6 |
+
"n_gram_range": [
|
7 |
+
1,
|
8 |
+
1
|
9 |
+
],
|
10 |
+
"nr_topics": 8,
|
11 |
+
"seed_topic_list": null,
|
12 |
+
"top_n_words": 5,
|
13 |
+
"verbose": true,
|
14 |
+
"zeroshot_min_similarity": 0.7,
|
15 |
+
"zeroshot_topic_list": null,
|
16 |
+
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2"
|
17 |
+
}
|
models/BERTopic/ctfidf.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0e5537b25ef16e60f33d219dbc53128240bdd3ef0677273cbcc337157562112
|
3 |
+
size 14020
|
models/BERTopic/ctfidf_config.json
ADDED
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"ctfidf_model": {
|
3 |
+
"bm25_weighting": false,
|
4 |
+
"reduce_frequent_words": false
|
5 |
+
},
|
6 |
+
"vectorizer_model": {
|
7 |
+
"params": {
|
8 |
+
"analyzer": "word",
|
9 |
+
"binary": false,
|
10 |
+
"decode_error": "strict",
|
11 |
+
"encoding": "utf-8",
|
12 |
+
"input": "content",
|
13 |
+
"lowercase": true,
|
14 |
+
"max_df": 1.0,
|
15 |
+
"max_features": null,
|
16 |
+
"min_df": 2,
|
17 |
+
"ngram_range": [
|
18 |
+
1,
|
19 |
+
2
|
20 |
+
],
|
21 |
+
"stop_words": "english",
|
22 |
+
"strip_accents": null,
|
23 |
+
"token_pattern": "(?u)\\b\\w\\w+\\b",
|
24 |
+
"vocabulary": null
|
25 |
+
},
|
26 |
+
"vocab": {
|
27 |
+
"request": 285,
|
28 |
+
"large": 197,
|
29 |
+
"collection": 63,
|
30 |
+
"posted": 261,
|
31 |
+
"parking": 245,
|
32 |
+
"sign": 312,
|
33 |
+
"violation": 365,
|
34 |
+
"working": 376,
|
35 |
+
"contrary": 83,
|
36 |
+
"stop": 328,
|
37 |
+
"work": 374,
|
38 |
+
"order": 241,
|
39 |
+
"dirty": 110,
|
40 |
+
"sidewalk": 311,
|
41 |
+
"access": 3,
|
42 |
+
"receipt": 273,
|
43 |
+
"site": 316,
|
44 |
+
"street": 331,
|
45 |
+
"condition": 76,
|
46 |
+
"ice": 169,
|
47 |
+
"non": 232,
|
48 |
+
"missed": 224,
|
49 |
+
"area": 20,
|
50 |
+
"license": 203,
|
51 |
+
"engine": 126,
|
52 |
+
"idling": 170,
|
53 |
+
"cond": 75,
|
54 |
+
"lead": 200,
|
55 |
+
"residential": 290,
|
56 |
+
"sewer": 309,
|
57 |
+
"use": 358,
|
58 |
+
"comments": 66,
|
59 |
+
"water": 370,
|
60 |
+
"meter": 221,
|
61 |
+
"broken": 34,
|
62 |
+
"leaking": 202,
|
63 |
+
"private": 263,
|
64 |
+
"residence": 288,
|
65 |
+
"refund": 277,
|
66 |
+
"return": 295,
|
67 |
+
"permit": 249,
|
68 |
+
"improper": 175,
|
69 |
+
"certificate": 50,
|
70 |
+
"occupancy": 236,
|
71 |
+
"illegal": 171,
|
72 |
+
"plumbing": 256,
|
73 |
+
"pedestrian": 248,
|
74 |
+
"signal": 313,
|
75 |
+
"defective": 101,
|
76 |
+
"inadequate": 179,
|
77 |
+
"heat": 159,
|
78 |
+
"new": 230,
|
79 |
+
"bus": 36,
|
80 |
+
"placement": 254,
|
81 |
+
"repair": 284,
|
82 |
+
"building": 35,
|
83 |
+
"damaged": 93,
|
84 |
+
"cracked": 88,
|
85 |
+
"bicycle": 28,
|
86 |
+
"flooding": 140,
|
87 |
+
"overnight": 242,
|
88 |
+
"commercial": 67,
|
89 |
+
"storage": 329,
|
90 |
+
"surveillance": 337,
|
91 |
+
"waste": 369,
|
92 |
+
"blocked": 30,
|
93 |
+
"construction": 79,
|
94 |
+
"school": 306,
|
95 |
+
"property": 270,
|
96 |
+
"cover": 86,
|
97 |
+
"noise": 231,
|
98 |
+
"gas": 147,
|
99 |
+
"problem": 265,
|
100 |
+
"delivery": 103,
|
101 |
+
"goods": 150,
|
102 |
+
"curb": 90,
|
103 |
+
"hitting": 162,
|
104 |
+
"phone": 252,
|
105 |
+
"c1": 40,
|
106 |
+
"trees": 351,
|
107 |
+
"rent": 283,
|
108 |
+
"unauthorized": 354,
|
109 |
+
"chronic": 55,
|
110 |
+
"hanging": 157,
|
111 |
+
"accident": 4,
|
112 |
+
"cleaning": 58,
|
113 |
+
"asp": 22,
|
114 |
+
"establishment": 129,
|
115 |
+
"public": 272,
|
116 |
+
"space": 320,
|
117 |
+
"dispute": 111,
|
118 |
+
"home": 163,
|
119 |
+
"electronics": 123,
|
120 |
+
"chemical": 53,
|
121 |
+
"chained": 51,
|
122 |
+
"smoking": 319,
|
123 |
+
"car": 42,
|
124 |
+
"general": 149,
|
125 |
+
"maintenance": 214,
|
126 |
+
"asbestos": 21,
|
127 |
+
"open": 239,
|
128 |
+
"missing": 226,
|
129 |
+
"emergency": 124,
|
130 |
+
"odor": 237,
|
131 |
+
"catch": 48,
|
132 |
+
"basin": 26,
|
133 |
+
"tax": 342,
|
134 |
+
"temporary": 345,
|
135 |
+
"failure": 134,
|
136 |
+
"debris": 98,
|
137 |
+
"falling": 136,
|
138 |
+
"danger": 95,
|
139 |
+
"air": 9,
|
140 |
+
"defect": 100,
|
141 |
+
"metal": 219,
|
142 |
+
"protruding": 271,
|
143 |
+
"information": 183,
|
144 |
+
"cut": 91,
|
145 |
+
"vacant": 360,
|
146 |
+
"lot": 211,
|
147 |
+
"resident": 289,
|
148 |
+
"pipe": 253,
|
149 |
+
"toilet": 349,
|
150 |
+
"button": 38,
|
151 |
+
"wiring": 373,
|
152 |
+
"buzzer": 39,
|
153 |
+
"vehicle": 361,
|
154 |
+
"carbon": 43,
|
155 |
+
"monoxide": 227,
|
156 |
+
"smoke": 318,
|
157 |
+
"audit": 24,
|
158 |
+
"damp": 94,
|
159 |
+
"leak": 201,
|
160 |
+
"st": 324,
|
161 |
+
"facility": 133,
|
162 |
+
"law": 198,
|
163 |
+
"cigarette": 56,
|
164 |
+
"sale": 303,
|
165 |
+
"minor": 222,
|
166 |
+
"pool": 258,
|
167 |
+
"graffiti": 151,
|
168 |
+
"speed": 322,
|
169 |
+
"scale": 305,
|
170 |
+
"hours": 165,
|
171 |
+
"safety": 301,
|
172 |
+
"equipment": 128,
|
173 |
+
"signs": 314,
|
174 |
+
"notice": 233,
|
175 |
+
"box": 31,
|
176 |
+
"weeds": 372,
|
177 |
+
"grating": 152,
|
178 |
+
"removal": 280,
|
179 |
+
"requested": 286,
|
180 |
+
"controller": 85,
|
181 |
+
"flasher": 139,
|
182 |
+
"loose": 210,
|
183 |
+
"time": 347,
|
184 |
+
"switch": 340,
|
185 |
+
"stump": 335,
|
186 |
+
"sampling": 304,
|
187 |
+
"required": 287,
|
188 |
+
"head": 158,
|
189 |
+
"card": 45,
|
190 |
+
"stuck": 333,
|
191 |
+
"commission": 68,
|
192 |
+
"lack": 195,
|
193 |
+
"litter": 207,
|
194 |
+
"comm": 65,
|
195 |
+
"bldg": 29,
|
196 |
+
"basket": 27,
|
197 |
+
"fallen": 135,
|
198 |
+
"bridge": 33,
|
199 |
+
"warning": 367,
|
200 |
+
"prohibited": 269,
|
201 |
+
"inspection": 187,
|
202 |
+
"roof": 298,
|
203 |
+
"illness": 174,
|
204 |
+
"injury": 185,
|
205 |
+
"ticket": 346,
|
206 |
+
"clear": 59,
|
207 |
+
"insects": 186,
|
208 |
+
"highway": 161,
|
209 |
+
"multiple": 229,
|
210 |
+
"devices": 107,
|
211 |
+
"animal": 13,
|
212 |
+
"lane": 196,
|
213 |
+
"control": 84,
|
214 |
+
"dirt": 108,
|
215 |
+
"clothing": 61,
|
216 |
+
"high": 160,
|
217 |
+
"pressure": 262,
|
218 |
+
"debt": 99,
|
219 |
+
"materials": 218,
|
220 |
+
"agency": 8,
|
221 |
+
"application": 17,
|
222 |
+
"station": 325,
|
223 |
+
"unguarded": 355,
|
224 |
+
"driveway": 117,
|
225 |
+
"gallons": 144,
|
226 |
+
"device": 106,
|
227 |
+
"service": 307,
|
228 |
+
"swimming": 338,
|
229 |
+
"coin": 62,
|
230 |
+
"tobacco": 348,
|
231 |
+
"taste": 341,
|
232 |
+
"filing": 138,
|
233 |
+
"technical": 343,
|
234 |
+
"issues": 192,
|
235 |
+
"rights": 297,
|
236 |
+
"miscellaneous": 223,
|
237 |
+
"color": 64,
|
238 |
+
"division": 112,
|
239 |
+
"retaining": 293,
|
240 |
+
"zoning": 378,
|
241 |
+
"lawn": 199,
|
242 |
+
"status": 326,
|
243 |
+
"enforcement": 125,
|
244 |
+
"excessive": 131,
|
245 |
+
"contractor": 82,
|
246 |
+
"dry": 118,
|
247 |
+
"complaince": 71,
|
248 |
+
"electrical": 121,
|
249 |
+
"amusement": 11,
|
250 |
+
"ride": 296,
|
251 |
+
"incident": 180,
|
252 |
+
"received": 274,
|
253 |
+
"program": 268,
|
254 |
+
"nypd": 235,
|
255 |
+
"issue": 191,
|
256 |
+
"electronic": 122,
|
257 |
+
"transfer": 350,
|
258 |
+
"eft": 120,
|
259 |
+
"address": 6,
|
260 |
+
"incorrect": 181,
|
261 |
+
"wrong": 377,
|
262 |
+
"paper": 244,
|
263 |
+
"list": 206,
|
264 |
+
"passenger": 247,
|
265 |
+
"guide": 154,
|
266 |
+
"assistance": 23,
|
267 |
+
"exemption": 132,
|
268 |
+
"sro": 323,
|
269 |
+
"truck": 352,
|
270 |
+
"driver": 115,
|
271 |
+
"city": 57,
|
272 |
+
"tunnel": 353,
|
273 |
+
"licensed": 204,
|
274 |
+
"improvement": 177,
|
275 |
+
"sticker": 327,
|
276 |
+
"animals": 14,
|
277 |
+
"company": 69,
|
278 |
+
"waterway": 371,
|
279 |
+
"abcs": 1,
|
280 |
+
"housing": 167,
|
281 |
+
"haitian": 155,
|
282 |
+
"creole": 89,
|
283 |
+
"apartment": 16,
|
284 |
+
"unit": 356,
|
285 |
+
"retail": 292,
|
286 |
+
"store": 330,
|
287 |
+
"initial": 184,
|
288 |
+
"fdny": 137,
|
289 |
+
"approved": 18,
|
290 |
+
"multi": 228,
|
291 |
+
"business": 37,
|
292 |
+
"annual": 15,
|
293 |
+
"related": 278,
|
294 |
+
"dep": 104,
|
295 |
+
"internal": 189,
|
296 |
+
"vending": 362,
|
297 |
+
"machine": 213,
|
298 |
+
"marine": 217,
|
299 |
+
"dumpster": 119,
|
300 |
+
"damage": 92,
|
301 |
+
"cable": 41,
|
302 |
+
"missed collection": 225,
|
303 |
+
"street cond": 332,
|
304 |
+
"use comments": 359,
|
305 |
+
"private residence": 264,
|
306 |
+
"improper use": 176,
|
307 |
+
"residential building": 291,
|
308 |
+
"plumbing work": 257,
|
309 |
+
"work illegal": 375,
|
310 |
+
"construction site": 80,
|
311 |
+
"cover missing": 87,
|
312 |
+
"odor sewer": 238,
|
313 |
+
"sewer catch": 310,
|
314 |
+
"catch basin": 49,
|
315 |
+
"danger falling": 96,
|
316 |
+
"metal protruding": 220,
|
317 |
+
"defective street": 102,
|
318 |
+
"carbon monoxide": 44,
|
319 |
+
"safety equipment": 302,
|
320 |
+
"permit license": 250,
|
321 |
+
"grating missing": 153,
|
322 |
+
"card stuck": 46,
|
323 |
+
"stuck meter": 334,
|
324 |
+
"warning signal": 368,
|
325 |
+
"clear water": 60,
|
326 |
+
"dirt litter": 109,
|
327 |
+
"litter debris": 208,
|
328 |
+
"open unguarded": 240,
|
329 |
+
"swimming pool": 339,
|
330 |
+
"amusement ride": 12,
|
331 |
+
"address incorrect": 7,
|
332 |
+
"incorrect status": 182,
|
333 |
+
"driver license": 116,
|
334 |
+
"home improvement": 164,
|
335 |
+
"improvement contractor": 178,
|
336 |
+
"company license": 70,
|
337 |
+
"abcs housing": 2,
|
338 |
+
"haitian creole": 156,
|
339 |
+
"dep internal": 105,
|
340 |
+
"internal use": 190,
|
341 |
+
"vending machine": 363,
|
342 |
+
"unknown": 357,
|
343 |
+
"line": 205,
|
344 |
+
"knocked": 193,
|
345 |
+
"post": 260,
|
346 |
+
"wall": 366,
|
347 |
+
"excavation": 130,
|
348 |
+
"support": 336,
|
349 |
+
"foreign": 142,
|
350 |
+
"dead": 97,
|
351 |
+
"contact": 81,
|
352 |
+
"installation": 188,
|
353 |
+
"break": 32,
|
354 |
+
"house": 166,
|
355 |
+
"change": 52,
|
356 |
+
"management": 215,
|
357 |
+
"conditioning": 77,
|
358 |
+
"condo": 78,
|
359 |
+
"foundation": 143,
|
360 |
+
"referral": 275,
|
361 |
+
"route": 299,
|
362 |
+
"concrete": 74,
|
363 |
+
"panel": 243,
|
364 |
+
"complaint": 72,
|
365 |
+
"basement": 25,
|
366 |
+
"garage": 145,
|
367 |
+
"sink": 315,
|
368 |
+
"reflected": 276,
|
369 |
+
"chinese": 54,
|
370 |
+
"spanish": 321,
|
371 |
+
"arabic": 19,
|
372 |
+
"hqs": 168,
|
373 |
+
"english": 127,
|
374 |
+
"russian": 300,
|
375 |
+
"portable": 259,
|
376 |
+
"korean": 194,
|
377 |
+
"10": 0,
|
378 |
+
"television": 344,
|
379 |
+
"retaining wall": 294,
|
380 |
+
"parking lot": 246,
|
381 |
+
"air conditioning": 10,
|
382 |
+
"location": 209,
|
383 |
+
"manufacturing": 216,
|
384 |
+
"care": 47,
|
385 |
+
"activity": 5,
|
386 |
+
"low": 212,
|
387 |
+
"food": 141,
|
388 |
+
"number": 234,
|
389 |
+
"remove": 281,
|
390 |
+
"pet": 251,
|
391 |
+
"compressed": 73,
|
392 |
+
"illegal use": 173,
|
393 |
+
"illegal improper": 172,
|
394 |
+
"sewage": 308,
|
395 |
+
"drinking": 113,
|
396 |
+
"garbage": 146,
|
397 |
+
"small": 317,
|
398 |
+
"removing": 282,
|
399 |
+
"plants": 255,
|
400 |
+
"problem use": 266,
|
401 |
+
"drinking water": 114,
|
402 |
+
"gas sewer": 148,
|
403 |
+
"ventilation": 364,
|
404 |
+
"problems": 267,
|
405 |
+
"related problems": 279
|
406 |
+
}
|
407 |
+
}
|
408 |
+
}
|
models/BERTopic/topic_embeddings.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94f9c82186355ce319ce3da6352c0a285b91e216bdb680ec4e453d2df2f3c3d1
|
3 |
+
size 12376
|
models/BERTopic/topics.json
ADDED
@@ -0,0 +1,1671 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"topic_representations": {
|
3 |
+
"-1": [
|
4 |
+
[
|
5 |
+
"order",
|
6 |
+
0.05415367852300953
|
7 |
+
],
|
8 |
+
[
|
9 |
+
"property",
|
10 |
+
0.05110874633317529
|
11 |
+
],
|
12 |
+
[
|
13 |
+
"inspection",
|
14 |
+
0.047957198774650545
|
15 |
+
],
|
16 |
+
[
|
17 |
+
"condition",
|
18 |
+
0.04684968413874401
|
19 |
+
],
|
20 |
+
[
|
21 |
+
"construction",
|
22 |
+
0.040871670084454234
|
23 |
+
]
|
24 |
+
],
|
25 |
+
"0": [
|
26 |
+
[
|
27 |
+
"damaged",
|
28 |
+
0.12203031954457103
|
29 |
+
],
|
30 |
+
[
|
31 |
+
"sign",
|
32 |
+
0.10565370415490198
|
33 |
+
],
|
34 |
+
[
|
35 |
+
"sidewalk",
|
36 |
+
0.09204086251770861
|
37 |
+
],
|
38 |
+
[
|
39 |
+
"missing",
|
40 |
+
0.08904351211067452
|
41 |
+
],
|
42 |
+
[
|
43 |
+
"housing",
|
44 |
+
0.08425536080287954
|
45 |
+
]
|
46 |
+
],
|
47 |
+
"1": [
|
48 |
+
[
|
49 |
+
"license",
|
50 |
+
0.2485641290752132
|
51 |
+
],
|
52 |
+
[
|
53 |
+
"complaint",
|
54 |
+
0.14648917413895213
|
55 |
+
],
|
56 |
+
[
|
57 |
+
"illegal",
|
58 |
+
0.10854741509204496
|
59 |
+
],
|
60 |
+
[
|
61 |
+
"violation",
|
62 |
+
0.06196592365898547
|
63 |
+
],
|
64 |
+
[
|
65 |
+
"permit",
|
66 |
+
0.054220183201612294
|
67 |
+
]
|
68 |
+
],
|
69 |
+
"2": [
|
70 |
+
[
|
71 |
+
"water",
|
72 |
+
0.20043627364808767
|
73 |
+
],
|
74 |
+
[
|
75 |
+
"basin",
|
76 |
+
0.1360096285478291
|
77 |
+
],
|
78 |
+
[
|
79 |
+
"litter",
|
80 |
+
0.12766055935466478
|
81 |
+
],
|
82 |
+
[
|
83 |
+
"missed",
|
84 |
+
0.12411889941590681
|
85 |
+
],
|
86 |
+
[
|
87 |
+
"sewer",
|
88 |
+
0.11794480155381776
|
89 |
+
]
|
90 |
+
],
|
91 |
+
"3": [
|
92 |
+
[
|
93 |
+
"noise",
|
94 |
+
0.7067969405376407
|
95 |
+
],
|
96 |
+
[
|
97 |
+
"animal",
|
98 |
+
0.23151186956043018
|
99 |
+
],
|
100 |
+
[
|
101 |
+
"truck",
|
102 |
+
0.18520949564834413
|
103 |
+
],
|
104 |
+
[
|
105 |
+
"dead",
|
106 |
+
0.1440316275215734
|
107 |
+
],
|
108 |
+
[
|
109 |
+
"equipment",
|
110 |
+
0.1267727574626285
|
111 |
+
]
|
112 |
+
],
|
113 |
+
"4": [
|
114 |
+
[
|
115 |
+
"odor",
|
116 |
+
0.40165153174580426
|
117 |
+
],
|
118 |
+
[
|
119 |
+
"food",
|
120 |
+
0.30714528898208565
|
121 |
+
],
|
122 |
+
[
|
123 |
+
"air",
|
124 |
+
0.29978554690340886
|
125 |
+
],
|
126 |
+
[
|
127 |
+
"smoke",
|
128 |
+
0.19547149449356388
|
129 |
+
],
|
130 |
+
[
|
131 |
+
"taste",
|
132 |
+
0.19547149449356388
|
133 |
+
]
|
134 |
+
],
|
135 |
+
"5": [
|
136 |
+
[
|
137 |
+
"english",
|
138 |
+
0.4504386781775388
|
139 |
+
],
|
140 |
+
[
|
141 |
+
"emergency",
|
142 |
+
0.379178358375766
|
143 |
+
],
|
144 |
+
[
|
145 |
+
"spanish",
|
146 |
+
0.3611251470424905
|
147 |
+
],
|
148 |
+
[
|
149 |
+
"chinese",
|
150 |
+
0.3317092027769569
|
151 |
+
],
|
152 |
+
[
|
153 |
+
"heat",
|
154 |
+
0.3317092027769569
|
155 |
+
]
|
156 |
+
],
|
157 |
+
"6": [
|
158 |
+
[
|
159 |
+
"exemption",
|
160 |
+
0.693831167446274
|
161 |
+
],
|
162 |
+
[
|
163 |
+
"commercial",
|
164 |
+
0.49112096865161
|
165 |
+
],
|
166 |
+
[
|
167 |
+
"tax",
|
168 |
+
0.40939072124701686
|
169 |
+
],
|
170 |
+
[
|
171 |
+
"business",
|
172 |
+
0.33495604465665013
|
173 |
+
],
|
174 |
+
[
|
175 |
+
"refund",
|
176 |
+
0.17799030392909884
|
177 |
+
]
|
178 |
+
]
|
179 |
+
},
|
180 |
+
"topics": [
|
181 |
+
-1,
|
182 |
+
4,
|
183 |
+
0,
|
184 |
+
2,
|
185 |
+
0,
|
186 |
+
0,
|
187 |
+
0,
|
188 |
+
3,
|
189 |
+
-1,
|
190 |
+
0,
|
191 |
+
0,
|
192 |
+
0,
|
193 |
+
0,
|
194 |
+
-1,
|
195 |
+
1,
|
196 |
+
-1,
|
197 |
+
0,
|
198 |
+
0,
|
199 |
+
-1,
|
200 |
+
0,
|
201 |
+
0,
|
202 |
+
0,
|
203 |
+
1,
|
204 |
+
1,
|
205 |
+
2,
|
206 |
+
3,
|
207 |
+
0,
|
208 |
+
-1,
|
209 |
+
-1,
|
210 |
+
-1,
|
211 |
+
4,
|
212 |
+
-1,
|
213 |
+
-1,
|
214 |
+
0,
|
215 |
+
0,
|
216 |
+
0,
|
217 |
+
-1,
|
218 |
+
1,
|
219 |
+
-1,
|
220 |
+
3,
|
221 |
+
3,
|
222 |
+
-1,
|
223 |
+
3,
|
224 |
+
3,
|
225 |
+
1,
|
226 |
+
4,
|
227 |
+
-1,
|
228 |
+
2,
|
229 |
+
0,
|
230 |
+
0,
|
231 |
+
3,
|
232 |
+
-1,
|
233 |
+
-1,
|
234 |
+
0,
|
235 |
+
0,
|
236 |
+
-1,
|
237 |
+
-1,
|
238 |
+
-1,
|
239 |
+
3,
|
240 |
+
0,
|
241 |
+
0,
|
242 |
+
2,
|
243 |
+
2,
|
244 |
+
0,
|
245 |
+
2,
|
246 |
+
1,
|
247 |
+
0,
|
248 |
+
2,
|
249 |
+
2,
|
250 |
+
0,
|
251 |
+
-1,
|
252 |
+
2,
|
253 |
+
-1,
|
254 |
+
0,
|
255 |
+
2,
|
256 |
+
0,
|
257 |
+
0,
|
258 |
+
-1,
|
259 |
+
0,
|
260 |
+
-1,
|
261 |
+
2,
|
262 |
+
0,
|
263 |
+
2,
|
264 |
+
2,
|
265 |
+
4,
|
266 |
+
1,
|
267 |
+
0,
|
268 |
+
0,
|
269 |
+
1,
|
270 |
+
0,
|
271 |
+
0,
|
272 |
+
-1,
|
273 |
+
0,
|
274 |
+
0,
|
275 |
+
4,
|
276 |
+
-1,
|
277 |
+
-1,
|
278 |
+
-1,
|
279 |
+
-1,
|
280 |
+
0,
|
281 |
+
-1,
|
282 |
+
4,
|
283 |
+
-1,
|
284 |
+
-1,
|
285 |
+
1,
|
286 |
+
0,
|
287 |
+
3,
|
288 |
+
-1,
|
289 |
+
4,
|
290 |
+
3,
|
291 |
+
-1,
|
292 |
+
2,
|
293 |
+
-1,
|
294 |
+
0,
|
295 |
+
-1,
|
296 |
+
2,
|
297 |
+
1,
|
298 |
+
0,
|
299 |
+
-1,
|
300 |
+
2,
|
301 |
+
0,
|
302 |
+
0,
|
303 |
+
1,
|
304 |
+
-1,
|
305 |
+
0,
|
306 |
+
4,
|
307 |
+
0,
|
308 |
+
2,
|
309 |
+
0,
|
310 |
+
-1,
|
311 |
+
0,
|
312 |
+
1,
|
313 |
+
-1,
|
314 |
+
0,
|
315 |
+
0,
|
316 |
+
-1,
|
317 |
+
0,
|
318 |
+
3,
|
319 |
+
2,
|
320 |
+
0,
|
321 |
+
0,
|
322 |
+
3,
|
323 |
+
-1,
|
324 |
+
-1,
|
325 |
+
0,
|
326 |
+
2,
|
327 |
+
2,
|
328 |
+
0,
|
329 |
+
2,
|
330 |
+
-1,
|
331 |
+
0,
|
332 |
+
2,
|
333 |
+
2,
|
334 |
+
1,
|
335 |
+
2,
|
336 |
+
-1,
|
337 |
+
3,
|
338 |
+
-1,
|
339 |
+
-1,
|
340 |
+
-1,
|
341 |
+
0,
|
342 |
+
-1,
|
343 |
+
-1,
|
344 |
+
0,
|
345 |
+
0,
|
346 |
+
4,
|
347 |
+
0,
|
348 |
+
0,
|
349 |
+
0,
|
350 |
+
-1,
|
351 |
+
4,
|
352 |
+
-1,
|
353 |
+
3,
|
354 |
+
4,
|
355 |
+
4,
|
356 |
+
-1,
|
357 |
+
-1,
|
358 |
+
0,
|
359 |
+
2,
|
360 |
+
0,
|
361 |
+
0,
|
362 |
+
-1,
|
363 |
+
0,
|
364 |
+
-1,
|
365 |
+
0,
|
366 |
+
3,
|
367 |
+
3,
|
368 |
+
0,
|
369 |
+
-1,
|
370 |
+
-1,
|
371 |
+
1,
|
372 |
+
0,
|
373 |
+
3,
|
374 |
+
0,
|
375 |
+
-1,
|
376 |
+
1,
|
377 |
+
-1,
|
378 |
+
-1,
|
379 |
+
-1,
|
380 |
+
0,
|
381 |
+
0,
|
382 |
+
1,
|
383 |
+
-1,
|
384 |
+
-1,
|
385 |
+
2,
|
386 |
+
0,
|
387 |
+
-1,
|
388 |
+
0,
|
389 |
+
1,
|
390 |
+
0,
|
391 |
+
-1,
|
392 |
+
0,
|
393 |
+
0,
|
394 |
+
1,
|
395 |
+
-1,
|
396 |
+
2,
|
397 |
+
0,
|
398 |
+
-1,
|
399 |
+
-1,
|
400 |
+
4,
|
401 |
+
-1,
|
402 |
+
0,
|
403 |
+
0,
|
404 |
+
0,
|
405 |
+
-1,
|
406 |
+
1,
|
407 |
+
0,
|
408 |
+
-1,
|
409 |
+
-1,
|
410 |
+
1,
|
411 |
+
0,
|
412 |
+
4,
|
413 |
+
-1,
|
414 |
+
-1,
|
415 |
+
-1,
|
416 |
+
0,
|
417 |
+
-1,
|
418 |
+
0,
|
419 |
+
-1,
|
420 |
+
-1,
|
421 |
+
-1,
|
422 |
+
0,
|
423 |
+
-1,
|
424 |
+
1,
|
425 |
+
1,
|
426 |
+
-1,
|
427 |
+
0,
|
428 |
+
-1,
|
429 |
+
-1,
|
430 |
+
2,
|
431 |
+
3,
|
432 |
+
2,
|
433 |
+
-1,
|
434 |
+
0,
|
435 |
+
-1,
|
436 |
+
1,
|
437 |
+
0,
|
438 |
+
3,
|
439 |
+
-1,
|
440 |
+
0,
|
441 |
+
0,
|
442 |
+
4,
|
443 |
+
-1,
|
444 |
+
-1,
|
445 |
+
0,
|
446 |
+
-1,
|
447 |
+
-1,
|
448 |
+
0,
|
449 |
+
-1,
|
450 |
+
3,
|
451 |
+
0,
|
452 |
+
-1,
|
453 |
+
2,
|
454 |
+
4,
|
455 |
+
0,
|
456 |
+
2,
|
457 |
+
0,
|
458 |
+
2,
|
459 |
+
3,
|
460 |
+
0,
|
461 |
+
-1,
|
462 |
+
-1,
|
463 |
+
0,
|
464 |
+
1,
|
465 |
+
6,
|
466 |
+
0,
|
467 |
+
0,
|
468 |
+
-1,
|
469 |
+
-1,
|
470 |
+
-1,
|
471 |
+
1,
|
472 |
+
0,
|
473 |
+
1,
|
474 |
+
-1,
|
475 |
+
0,
|
476 |
+
-1,
|
477 |
+
2,
|
478 |
+
3,
|
479 |
+
0,
|
480 |
+
1,
|
481 |
+
-1,
|
482 |
+
0,
|
483 |
+
0,
|
484 |
+
4,
|
485 |
+
0,
|
486 |
+
2,
|
487 |
+
1,
|
488 |
+
-1,
|
489 |
+
2,
|
490 |
+
1,
|
491 |
+
-1,
|
492 |
+
0,
|
493 |
+
1,
|
494 |
+
-1,
|
495 |
+
0,
|
496 |
+
3,
|
497 |
+
1,
|
498 |
+
0,
|
499 |
+
0,
|
500 |
+
-1,
|
501 |
+
1,
|
502 |
+
0,
|
503 |
+
1,
|
504 |
+
0,
|
505 |
+
-1,
|
506 |
+
1,
|
507 |
+
1,
|
508 |
+
-1,
|
509 |
+
0,
|
510 |
+
4,
|
511 |
+
-1,
|
512 |
+
0,
|
513 |
+
0,
|
514 |
+
0,
|
515 |
+
-1,
|
516 |
+
3,
|
517 |
+
-1,
|
518 |
+
3,
|
519 |
+
-1,
|
520 |
+
-1,
|
521 |
+
-1,
|
522 |
+
-1,
|
523 |
+
-1,
|
524 |
+
4,
|
525 |
+
0,
|
526 |
+
2,
|
527 |
+
0,
|
528 |
+
4,
|
529 |
+
0,
|
530 |
+
2,
|
531 |
+
0,
|
532 |
+
0,
|
533 |
+
-1,
|
534 |
+
6,
|
535 |
+
-1,
|
536 |
+
4,
|
537 |
+
0,
|
538 |
+
4,
|
539 |
+
0,
|
540 |
+
0,
|
541 |
+
0,
|
542 |
+
1,
|
543 |
+
-1,
|
544 |
+
2,
|
545 |
+
1,
|
546 |
+
0,
|
547 |
+
0,
|
548 |
+
1,
|
549 |
+
4,
|
550 |
+
0,
|
551 |
+
0,
|
552 |
+
-1,
|
553 |
+
-1,
|
554 |
+
-1,
|
555 |
+
0,
|
556 |
+
2,
|
557 |
+
0,
|
558 |
+
-1,
|
559 |
+
3,
|
560 |
+
0,
|
561 |
+
-1,
|
562 |
+
0,
|
563 |
+
0,
|
564 |
+
0,
|
565 |
+
0,
|
566 |
+
0,
|
567 |
+
0,
|
568 |
+
-1,
|
569 |
+
-1,
|
570 |
+
0,
|
571 |
+
-1,
|
572 |
+
0,
|
573 |
+
-1,
|
574 |
+
4,
|
575 |
+
0,
|
576 |
+
0,
|
577 |
+
0,
|
578 |
+
0,
|
579 |
+
-1,
|
580 |
+
0,
|
581 |
+
0,
|
582 |
+
4,
|
583 |
+
0,
|
584 |
+
-1,
|
585 |
+
0,
|
586 |
+
0,
|
587 |
+
-1,
|
588 |
+
3,
|
589 |
+
2,
|
590 |
+
-1,
|
591 |
+
-1,
|
592 |
+
2,
|
593 |
+
-1,
|
594 |
+
-1,
|
595 |
+
0,
|
596 |
+
3,
|
597 |
+
2,
|
598 |
+
-1,
|
599 |
+
-1,
|
600 |
+
-1,
|
601 |
+
-1,
|
602 |
+
4,
|
603 |
+
0,
|
604 |
+
-1,
|
605 |
+
0,
|
606 |
+
0,
|
607 |
+
3,
|
608 |
+
2,
|
609 |
+
-1,
|
610 |
+
0,
|
611 |
+
0,
|
612 |
+
0,
|
613 |
+
-1,
|
614 |
+
1,
|
615 |
+
3,
|
616 |
+
4,
|
617 |
+
1,
|
618 |
+
0,
|
619 |
+
1,
|
620 |
+
-1,
|
621 |
+
4,
|
622 |
+
0,
|
623 |
+
-1,
|
624 |
+
0,
|
625 |
+
2,
|
626 |
+
4,
|
627 |
+
0,
|
628 |
+
-1,
|
629 |
+
0,
|
630 |
+
-1,
|
631 |
+
-1,
|
632 |
+
0,
|
633 |
+
2,
|
634 |
+
-1,
|
635 |
+
0,
|
636 |
+
0,
|
637 |
+
2,
|
638 |
+
-1,
|
639 |
+
0,
|
640 |
+
0,
|
641 |
+
0,
|
642 |
+
0,
|
643 |
+
0,
|
644 |
+
-1,
|
645 |
+
0,
|
646 |
+
0,
|
647 |
+
0,
|
648 |
+
4,
|
649 |
+
1,
|
650 |
+
0,
|
651 |
+
0,
|
652 |
+
1,
|
653 |
+
-1,
|
654 |
+
0,
|
655 |
+
5,
|
656 |
+
0,
|
657 |
+
-1,
|
658 |
+
4,
|
659 |
+
1,
|
660 |
+
0,
|
661 |
+
4,
|
662 |
+
-1,
|
663 |
+
-1,
|
664 |
+
4,
|
665 |
+
2,
|
666 |
+
-1,
|
667 |
+
4,
|
668 |
+
0,
|
669 |
+
-1,
|
670 |
+
-1,
|
671 |
+
3,
|
672 |
+
2,
|
673 |
+
-1,
|
674 |
+
3,
|
675 |
+
5,
|
676 |
+
-1,
|
677 |
+
-1,
|
678 |
+
-1,
|
679 |
+
0,
|
680 |
+
3,
|
681 |
+
2,
|
682 |
+
-1,
|
683 |
+
-1,
|
684 |
+
0,
|
685 |
+
2,
|
686 |
+
1,
|
687 |
+
3,
|
688 |
+
0,
|
689 |
+
1,
|
690 |
+
3,
|
691 |
+
-1,
|
692 |
+
0,
|
693 |
+
-1,
|
694 |
+
4,
|
695 |
+
0,
|
696 |
+
0,
|
697 |
+
1,
|
698 |
+
-1,
|
699 |
+
-1,
|
700 |
+
-1,
|
701 |
+
-1,
|
702 |
+
-1,
|
703 |
+
-1,
|
704 |
+
-1,
|
705 |
+
0,
|
706 |
+
0,
|
707 |
+
0,
|
708 |
+
-1,
|
709 |
+
3,
|
710 |
+
0,
|
711 |
+
-1,
|
712 |
+
1,
|
713 |
+
2,
|
714 |
+
-1,
|
715 |
+
-1,
|
716 |
+
-1,
|
717 |
+
-1,
|
718 |
+
-1,
|
719 |
+
0,
|
720 |
+
0,
|
721 |
+
-1,
|
722 |
+
-1,
|
723 |
+
-1,
|
724 |
+
0,
|
725 |
+
0,
|
726 |
+
0,
|
727 |
+
-1,
|
728 |
+
-1,
|
729 |
+
1,
|
730 |
+
0,
|
731 |
+
2,
|
732 |
+
0,
|
733 |
+
-1,
|
734 |
+
-1,
|
735 |
+
1,
|
736 |
+
0,
|
737 |
+
-1,
|
738 |
+
0,
|
739 |
+
-1,
|
740 |
+
6,
|
741 |
+
-1,
|
742 |
+
6,
|
743 |
+
0,
|
744 |
+
0,
|
745 |
+
3,
|
746 |
+
-1,
|
747 |
+
0,
|
748 |
+
-1,
|
749 |
+
0,
|
750 |
+
1,
|
751 |
+
-1,
|
752 |
+
0,
|
753 |
+
-1,
|
754 |
+
-1,
|
755 |
+
-1,
|
756 |
+
2,
|
757 |
+
-1,
|
758 |
+
3,
|
759 |
+
-1,
|
760 |
+
0,
|
761 |
+
3,
|
762 |
+
2,
|
763 |
+
-1,
|
764 |
+
6,
|
765 |
+
4,
|
766 |
+
4,
|
767 |
+
-1,
|
768 |
+
4,
|
769 |
+
3,
|
770 |
+
-1,
|
771 |
+
0,
|
772 |
+
0,
|
773 |
+
-1,
|
774 |
+
-1,
|
775 |
+
6,
|
776 |
+
-1,
|
777 |
+
0,
|
778 |
+
2,
|
779 |
+
-1,
|
780 |
+
0,
|
781 |
+
0,
|
782 |
+
2,
|
783 |
+
0,
|
784 |
+
0,
|
785 |
+
2,
|
786 |
+
-1,
|
787 |
+
0,
|
788 |
+
-1,
|
789 |
+
-1,
|
790 |
+
-1,
|
791 |
+
-1,
|
792 |
+
1,
|
793 |
+
-1,
|
794 |
+
1,
|
795 |
+
2,
|
796 |
+
2,
|
797 |
+
0,
|
798 |
+
-1,
|
799 |
+
2,
|
800 |
+
0,
|
801 |
+
-1,
|
802 |
+
1,
|
803 |
+
-1,
|
804 |
+
-1,
|
805 |
+
1,
|
806 |
+
3,
|
807 |
+
-1,
|
808 |
+
0,
|
809 |
+
2,
|
810 |
+
-1,
|
811 |
+
2,
|
812 |
+
-1,
|
813 |
+
1,
|
814 |
+
1,
|
815 |
+
0,
|
816 |
+
3,
|
817 |
+
2,
|
818 |
+
0,
|
819 |
+
3,
|
820 |
+
3,
|
821 |
+
-1,
|
822 |
+
0,
|
823 |
+
-1,
|
824 |
+
-1,
|
825 |
+
0,
|
826 |
+
0,
|
827 |
+
-1,
|
828 |
+
0,
|
829 |
+
1,
|
830 |
+
-1,
|
831 |
+
2,
|
832 |
+
4,
|
833 |
+
0,
|
834 |
+
-1,
|
835 |
+
-1,
|
836 |
+
0,
|
837 |
+
0,
|
838 |
+
0,
|
839 |
+
-1,
|
840 |
+
2,
|
841 |
+
0,
|
842 |
+
1,
|
843 |
+
-1,
|
844 |
+
4,
|
845 |
+
0,
|
846 |
+
0,
|
847 |
+
0,
|
848 |
+
0,
|
849 |
+
-1,
|
850 |
+
4,
|
851 |
+
0,
|
852 |
+
-1,
|
853 |
+
-1,
|
854 |
+
-1,
|
855 |
+
3,
|
856 |
+
3,
|
857 |
+
2,
|
858 |
+
0,
|
859 |
+
1,
|
860 |
+
-1,
|
861 |
+
0,
|
862 |
+
-1,
|
863 |
+
-1,
|
864 |
+
-1,
|
865 |
+
0,
|
866 |
+
0,
|
867 |
+
4,
|
868 |
+
2,
|
869 |
+
0,
|
870 |
+
-1,
|
871 |
+
-1,
|
872 |
+
-1,
|
873 |
+
3,
|
874 |
+
-1,
|
875 |
+
0,
|
876 |
+
-1,
|
877 |
+
4,
|
878 |
+
2,
|
879 |
+
0,
|
880 |
+
0,
|
881 |
+
0,
|
882 |
+
-1,
|
883 |
+
1,
|
884 |
+
0,
|
885 |
+
0,
|
886 |
+
2,
|
887 |
+
-1,
|
888 |
+
0,
|
889 |
+
3,
|
890 |
+
4,
|
891 |
+
-1,
|
892 |
+
2,
|
893 |
+
-1,
|
894 |
+
4,
|
895 |
+
-1,
|
896 |
+
0,
|
897 |
+
2,
|
898 |
+
0,
|
899 |
+
-1,
|
900 |
+
-1,
|
901 |
+
-1,
|
902 |
+
-1,
|
903 |
+
0,
|
904 |
+
0,
|
905 |
+
5,
|
906 |
+
2,
|
907 |
+
-1,
|
908 |
+
0,
|
909 |
+
0,
|
910 |
+
0,
|
911 |
+
-1,
|
912 |
+
1,
|
913 |
+
-1,
|
914 |
+
2,
|
915 |
+
2,
|
916 |
+
0,
|
917 |
+
0,
|
918 |
+
3,
|
919 |
+
0,
|
920 |
+
0,
|
921 |
+
6,
|
922 |
+
0,
|
923 |
+
0,
|
924 |
+
2,
|
925 |
+
-1,
|
926 |
+
-1,
|
927 |
+
-1,
|
928 |
+
-1,
|
929 |
+
-1,
|
930 |
+
-1,
|
931 |
+
-1,
|
932 |
+
3,
|
933 |
+
-1,
|
934 |
+
-1,
|
935 |
+
-1,
|
936 |
+
-1,
|
937 |
+
0,
|
938 |
+
0,
|
939 |
+
3,
|
940 |
+
-1,
|
941 |
+
-1,
|
942 |
+
0,
|
943 |
+
-1,
|
944 |
+
0,
|
945 |
+
6,
|
946 |
+
-1,
|
947 |
+
0,
|
948 |
+
1,
|
949 |
+
1,
|
950 |
+
1,
|
951 |
+
1,
|
952 |
+
0,
|
953 |
+
2,
|
954 |
+
1,
|
955 |
+
1,
|
956 |
+
-1,
|
957 |
+
1,
|
958 |
+
-1,
|
959 |
+
-1,
|
960 |
+
0,
|
961 |
+
0,
|
962 |
+
-1,
|
963 |
+
-1,
|
964 |
+
0,
|
965 |
+
0,
|
966 |
+
1,
|
967 |
+
0,
|
968 |
+
-1,
|
969 |
+
-1,
|
970 |
+
-1,
|
971 |
+
6,
|
972 |
+
-1,
|
973 |
+
3,
|
974 |
+
-1,
|
975 |
+
-1,
|
976 |
+
3,
|
977 |
+
0,
|
978 |
+
0,
|
979 |
+
0,
|
980 |
+
3,
|
981 |
+
0,
|
982 |
+
0,
|
983 |
+
-1,
|
984 |
+
2,
|
985 |
+
0,
|
986 |
+
-1,
|
987 |
+
-1,
|
988 |
+
0,
|
989 |
+
-1,
|
990 |
+
-1,
|
991 |
+
3,
|
992 |
+
2,
|
993 |
+
0,
|
994 |
+
-1,
|
995 |
+
0,
|
996 |
+
1,
|
997 |
+
-1,
|
998 |
+
-1,
|
999 |
+
0,
|
1000 |
+
-1,
|
1001 |
+
-1,
|
1002 |
+
1,
|
1003 |
+
2,
|
1004 |
+
-1,
|
1005 |
+
0,
|
1006 |
+
-1,
|
1007 |
+
-1,
|
1008 |
+
-1,
|
1009 |
+
0,
|
1010 |
+
-1,
|
1011 |
+
6,
|
1012 |
+
-1,
|
1013 |
+
-1,
|
1014 |
+
-1,
|
1015 |
+
2,
|
1016 |
+
-1,
|
1017 |
+
-1,
|
1018 |
+
-1,
|
1019 |
+
0,
|
1020 |
+
-1,
|
1021 |
+
-1,
|
1022 |
+
0,
|
1023 |
+
1,
|
1024 |
+
-1,
|
1025 |
+
0,
|
1026 |
+
-1,
|
1027 |
+
-1,
|
1028 |
+
-1,
|
1029 |
+
-1,
|
1030 |
+
0,
|
1031 |
+
3,
|
1032 |
+
-1,
|
1033 |
+
-1,
|
1034 |
+
-1,
|
1035 |
+
2,
|
1036 |
+
2,
|
1037 |
+
-1,
|
1038 |
+
0,
|
1039 |
+
0,
|
1040 |
+
0,
|
1041 |
+
-1,
|
1042 |
+
0,
|
1043 |
+
0,
|
1044 |
+
1,
|
1045 |
+
2,
|
1046 |
+
-1,
|
1047 |
+
-1,
|
1048 |
+
1,
|
1049 |
+
0,
|
1050 |
+
2,
|
1051 |
+
1,
|
1052 |
+
0,
|
1053 |
+
1,
|
1054 |
+
1,
|
1055 |
+
0,
|
1056 |
+
0,
|
1057 |
+
-1,
|
1058 |
+
6,
|
1059 |
+
2,
|
1060 |
+
-1,
|
1061 |
+
-1,
|
1062 |
+
5,
|
1063 |
+
-1,
|
1064 |
+
-1,
|
1065 |
+
-1,
|
1066 |
+
6,
|
1067 |
+
-1,
|
1068 |
+
2,
|
1069 |
+
-1,
|
1070 |
+
0,
|
1071 |
+
0,
|
1072 |
+
-1,
|
1073 |
+
-1,
|
1074 |
+
-1,
|
1075 |
+
0,
|
1076 |
+
-1,
|
1077 |
+
-1,
|
1078 |
+
0,
|
1079 |
+
-1,
|
1080 |
+
1,
|
1081 |
+
-1,
|
1082 |
+
5,
|
1083 |
+
0,
|
1084 |
+
6,
|
1085 |
+
-1,
|
1086 |
+
-1,
|
1087 |
+
-1,
|
1088 |
+
0,
|
1089 |
+
0,
|
1090 |
+
-1,
|
1091 |
+
0,
|
1092 |
+
-1,
|
1093 |
+
0,
|
1094 |
+
2,
|
1095 |
+
0,
|
1096 |
+
-1,
|
1097 |
+
0,
|
1098 |
+
-1,
|
1099 |
+
1,
|
1100 |
+
0,
|
1101 |
+
3,
|
1102 |
+
0,
|
1103 |
+
2,
|
1104 |
+
1,
|
1105 |
+
-1,
|
1106 |
+
-1,
|
1107 |
+
0,
|
1108 |
+
2,
|
1109 |
+
-1,
|
1110 |
+
0,
|
1111 |
+
1,
|
1112 |
+
-1,
|
1113 |
+
0,
|
1114 |
+
0,
|
1115 |
+
-1,
|
1116 |
+
0,
|
1117 |
+
-1,
|
1118 |
+
1,
|
1119 |
+
6,
|
1120 |
+
6,
|
1121 |
+
0,
|
1122 |
+
-1,
|
1123 |
+
-1,
|
1124 |
+
-1,
|
1125 |
+
-1,
|
1126 |
+
6,
|
1127 |
+
6,
|
1128 |
+
-1,
|
1129 |
+
1,
|
1130 |
+
1,
|
1131 |
+
1,
|
1132 |
+
0,
|
1133 |
+
-1,
|
1134 |
+
-1,
|
1135 |
+
-1,
|
1136 |
+
1,
|
1137 |
+
1,
|
1138 |
+
-1,
|
1139 |
+
1,
|
1140 |
+
1,
|
1141 |
+
-1,
|
1142 |
+
1,
|
1143 |
+
-1,
|
1144 |
+
0,
|
1145 |
+
-1,
|
1146 |
+
1,
|
1147 |
+
-1,
|
1148 |
+
-1,
|
1149 |
+
6,
|
1150 |
+
-1,
|
1151 |
+
1,
|
1152 |
+
-1,
|
1153 |
+
6,
|
1154 |
+
0,
|
1155 |
+
0,
|
1156 |
+
1,
|
1157 |
+
-1,
|
1158 |
+
4,
|
1159 |
+
1,
|
1160 |
+
1,
|
1161 |
+
1,
|
1162 |
+
0,
|
1163 |
+
-1,
|
1164 |
+
5,
|
1165 |
+
0,
|
1166 |
+
-1,
|
1167 |
+
-1,
|
1168 |
+
-1,
|
1169 |
+
0,
|
1170 |
+
-1,
|
1171 |
+
-1,
|
1172 |
+
1,
|
1173 |
+
1,
|
1174 |
+
0,
|
1175 |
+
1,
|
1176 |
+
2,
|
1177 |
+
1,
|
1178 |
+
-1,
|
1179 |
+
1,
|
1180 |
+
1,
|
1181 |
+
1,
|
1182 |
+
0,
|
1183 |
+
0,
|
1184 |
+
-1,
|
1185 |
+
6,
|
1186 |
+
6,
|
1187 |
+
1,
|
1188 |
+
1,
|
1189 |
+
1,
|
1190 |
+
0,
|
1191 |
+
-1,
|
1192 |
+
1,
|
1193 |
+
-1,
|
1194 |
+
1,
|
1195 |
+
-1,
|
1196 |
+
-1,
|
1197 |
+
1,
|
1198 |
+
1,
|
1199 |
+
5,
|
1200 |
+
1,
|
1201 |
+
1,
|
1202 |
+
3,
|
1203 |
+
1,
|
1204 |
+
-1,
|
1205 |
+
-1,
|
1206 |
+
6,
|
1207 |
+
1,
|
1208 |
+
6,
|
1209 |
+
1,
|
1210 |
+
5,
|
1211 |
+
-1,
|
1212 |
+
1,
|
1213 |
+
5,
|
1214 |
+
1,
|
1215 |
+
-1,
|
1216 |
+
1,
|
1217 |
+
1,
|
1218 |
+
1,
|
1219 |
+
0,
|
1220 |
+
6,
|
1221 |
+
1,
|
1222 |
+
1,
|
1223 |
+
1,
|
1224 |
+
1,
|
1225 |
+
1,
|
1226 |
+
0,
|
1227 |
+
1,
|
1228 |
+
-1,
|
1229 |
+
1,
|
1230 |
+
-1,
|
1231 |
+
-1,
|
1232 |
+
-1,
|
1233 |
+
6,
|
1234 |
+
1,
|
1235 |
+
5,
|
1236 |
+
-1,
|
1237 |
+
1,
|
1238 |
+
1,
|
1239 |
+
0,
|
1240 |
+
-1,
|
1241 |
+
1,
|
1242 |
+
-1,
|
1243 |
+
0,
|
1244 |
+
1,
|
1245 |
+
1,
|
1246 |
+
5,
|
1247 |
+
6,
|
1248 |
+
-1,
|
1249 |
+
-1,
|
1250 |
+
1,
|
1251 |
+
-1,
|
1252 |
+
-1,
|
1253 |
+
-1,
|
1254 |
+
0,
|
1255 |
+
-1,
|
1256 |
+
5,
|
1257 |
+
-1,
|
1258 |
+
1,
|
1259 |
+
1,
|
1260 |
+
5,
|
1261 |
+
0,
|
1262 |
+
5,
|
1263 |
+
5,
|
1264 |
+
-1,
|
1265 |
+
-1,
|
1266 |
+
1,
|
1267 |
+
0,
|
1268 |
+
5,
|
1269 |
+
5,
|
1270 |
+
-1,
|
1271 |
+
5,
|
1272 |
+
-1,
|
1273 |
+
-1,
|
1274 |
+
1,
|
1275 |
+
5,
|
1276 |
+
5,
|
1277 |
+
5,
|
1278 |
+
1,
|
1279 |
+
5,
|
1280 |
+
1,
|
1281 |
+
-1,
|
1282 |
+
5,
|
1283 |
+
1,
|
1284 |
+
5,
|
1285 |
+
0,
|
1286 |
+
-1,
|
1287 |
+
1,
|
1288 |
+
0,
|
1289 |
+
5,
|
1290 |
+
1,
|
1291 |
+
1,
|
1292 |
+
1,
|
1293 |
+
1,
|
1294 |
+
5,
|
1295 |
+
1,
|
1296 |
+
0,
|
1297 |
+
-1,
|
1298 |
+
1,
|
1299 |
+
5,
|
1300 |
+
0,
|
1301 |
+
5,
|
1302 |
+
5,
|
1303 |
+
1,
|
1304 |
+
5,
|
1305 |
+
5,
|
1306 |
+
0,
|
1307 |
+
5,
|
1308 |
+
-1,
|
1309 |
+
0,
|
1310 |
+
5,
|
1311 |
+
-1,
|
1312 |
+
-1,
|
1313 |
+
0,
|
1314 |
+
1,
|
1315 |
+
1,
|
1316 |
+
-1,
|
1317 |
+
6,
|
1318 |
+
-1,
|
1319 |
+
1,
|
1320 |
+
5,
|
1321 |
+
-1,
|
1322 |
+
5,
|
1323 |
+
5,
|
1324 |
+
6,
|
1325 |
+
1,
|
1326 |
+
1,
|
1327 |
+
1,
|
1328 |
+
6,
|
1329 |
+
1,
|
1330 |
+
1,
|
1331 |
+
5,
|
1332 |
+
5,
|
1333 |
+
1,
|
1334 |
+
0,
|
1335 |
+
1,
|
1336 |
+
-1,
|
1337 |
+
5,
|
1338 |
+
1,
|
1339 |
+
2,
|
1340 |
+
1,
|
1341 |
+
1,
|
1342 |
+
1,
|
1343 |
+
2,
|
1344 |
+
1,
|
1345 |
+
4,
|
1346 |
+
4,
|
1347 |
+
2,
|
1348 |
+
-1,
|
1349 |
+
1,
|
1350 |
+
0,
|
1351 |
+
6,
|
1352 |
+
-1,
|
1353 |
+
5,
|
1354 |
+
1,
|
1355 |
+
1,
|
1356 |
+
-1,
|
1357 |
+
-1,
|
1358 |
+
-1,
|
1359 |
+
5,
|
1360 |
+
1,
|
1361 |
+
-1,
|
1362 |
+
5,
|
1363 |
+
-1,
|
1364 |
+
5,
|
1365 |
+
1,
|
1366 |
+
0,
|
1367 |
+
3,
|
1368 |
+
1,
|
1369 |
+
-1,
|
1370 |
+
-1,
|
1371 |
+
-1,
|
1372 |
+
5,
|
1373 |
+
1,
|
1374 |
+
1,
|
1375 |
+
4,
|
1376 |
+
-1,
|
1377 |
+
1,
|
1378 |
+
1,
|
1379 |
+
1,
|
1380 |
+
1,
|
1381 |
+
-1,
|
1382 |
+
1,
|
1383 |
+
5,
|
1384 |
+
0,
|
1385 |
+
1,
|
1386 |
+
1,
|
1387 |
+
-1,
|
1388 |
+
0,
|
1389 |
+
0,
|
1390 |
+
0,
|
1391 |
+
-1,
|
1392 |
+
0,
|
1393 |
+
4,
|
1394 |
+
0,
|
1395 |
+
-1,
|
1396 |
+
-1,
|
1397 |
+
-1,
|
1398 |
+
-1,
|
1399 |
+
0,
|
1400 |
+
4,
|
1401 |
+
0,
|
1402 |
+
-1,
|
1403 |
+
0,
|
1404 |
+
-1,
|
1405 |
+
3,
|
1406 |
+
1,
|
1407 |
+
1,
|
1408 |
+
1,
|
1409 |
+
-1,
|
1410 |
+
-1,
|
1411 |
+
-1,
|
1412 |
+
-1,
|
1413 |
+
1,
|
1414 |
+
1,
|
1415 |
+
1,
|
1416 |
+
1,
|
1417 |
+
-1,
|
1418 |
+
-1,
|
1419 |
+
6,
|
1420 |
+
-1,
|
1421 |
+
0,
|
1422 |
+
1,
|
1423 |
+
0,
|
1424 |
+
-1,
|
1425 |
+
5,
|
1426 |
+
5,
|
1427 |
+
4,
|
1428 |
+
5,
|
1429 |
+
5,
|
1430 |
+
0,
|
1431 |
+
3,
|
1432 |
+
0,
|
1433 |
+
-1,
|
1434 |
+
0,
|
1435 |
+
-1,
|
1436 |
+
-1,
|
1437 |
+
-1,
|
1438 |
+
-1,
|
1439 |
+
-1,
|
1440 |
+
-1,
|
1441 |
+
-1,
|
1442 |
+
6,
|
1443 |
+
4,
|
1444 |
+
-1,
|
1445 |
+
6,
|
1446 |
+
-1,
|
1447 |
+
6,
|
1448 |
+
-1,
|
1449 |
+
-1,
|
1450 |
+
-1,
|
1451 |
+
0,
|
1452 |
+
0,
|
1453 |
+
6,
|
1454 |
+
-1,
|
1455 |
+
0,
|
1456 |
+
-1,
|
1457 |
+
0,
|
1458 |
+
-1,
|
1459 |
+
0,
|
1460 |
+
6,
|
1461 |
+
3,
|
1462 |
+
6,
|
1463 |
+
0,
|
1464 |
+
0,
|
1465 |
+
-1,
|
1466 |
+
0,
|
1467 |
+
0,
|
1468 |
+
-1,
|
1469 |
+
1,
|
1470 |
+
-1,
|
1471 |
+
-1,
|
1472 |
+
6,
|
1473 |
+
-1,
|
1474 |
+
-1,
|
1475 |
+
0,
|
1476 |
+
-1,
|
1477 |
+
-1,
|
1478 |
+
0,
|
1479 |
+
-1,
|
1480 |
+
5,
|
1481 |
+
2,
|
1482 |
+
-1,
|
1483 |
+
-1,
|
1484 |
+
6,
|
1485 |
+
1,
|
1486 |
+
1,
|
1487 |
+
0,
|
1488 |
+
-1,
|
1489 |
+
0,
|
1490 |
+
-1,
|
1491 |
+
-1,
|
1492 |
+
0,
|
1493 |
+
0,
|
1494 |
+
6
|
1495 |
+
],
|
1496 |
+
"topic_sizes": {
|
1497 |
+
"-1": 458,
|
1498 |
+
"4": 53,
|
1499 |
+
"0": 366,
|
1500 |
+
"2": 94,
|
1501 |
+
"3": 62,
|
1502 |
+
"1": 191,
|
1503 |
+
"6": 40,
|
1504 |
+
"5": 50
|
1505 |
+
},
|
1506 |
+
"topic_mapper": [
|
1507 |
+
[
|
1508 |
+
-1,
|
1509 |
+
-1,
|
1510 |
+
-1,
|
1511 |
+
-1
|
1512 |
+
],
|
1513 |
+
[
|
1514 |
+
0,
|
1515 |
+
0,
|
1516 |
+
3,
|
1517 |
+
5
|
1518 |
+
],
|
1519 |
+
[
|
1520 |
+
1,
|
1521 |
+
1,
|
1522 |
+
2,
|
1523 |
+
3
|
1524 |
+
],
|
1525 |
+
[
|
1526 |
+
2,
|
1527 |
+
2,
|
1528 |
+
1,
|
1529 |
+
0
|
1530 |
+
],
|
1531 |
+
[
|
1532 |
+
3,
|
1533 |
+
3,
|
1534 |
+
4,
|
1535 |
+
4
|
1536 |
+
],
|
1537 |
+
[
|
1538 |
+
4,
|
1539 |
+
4,
|
1540 |
+
4,
|
1541 |
+
4
|
1542 |
+
],
|
1543 |
+
[
|
1544 |
+
5,
|
1545 |
+
5,
|
1546 |
+
2,
|
1547 |
+
3
|
1548 |
+
],
|
1549 |
+
[
|
1550 |
+
6,
|
1551 |
+
6,
|
1552 |
+
0,
|
1553 |
+
2
|
1554 |
+
],
|
1555 |
+
[
|
1556 |
+
7,
|
1557 |
+
7,
|
1558 |
+
0,
|
1559 |
+
2
|
1560 |
+
],
|
1561 |
+
[
|
1562 |
+
8,
|
1563 |
+
8,
|
1564 |
+
0,
|
1565 |
+
2
|
1566 |
+
],
|
1567 |
+
[
|
1568 |
+
9,
|
1569 |
+
9,
|
1570 |
+
0,
|
1571 |
+
2
|
1572 |
+
],
|
1573 |
+
[
|
1574 |
+
10,
|
1575 |
+
10,
|
1576 |
+
1,
|
1577 |
+
0
|
1578 |
+
],
|
1579 |
+
[
|
1580 |
+
11,
|
1581 |
+
11,
|
1582 |
+
1,
|
1583 |
+
0
|
1584 |
+
],
|
1585 |
+
[
|
1586 |
+
12,
|
1587 |
+
12,
|
1588 |
+
1,
|
1589 |
+
0
|
1590 |
+
],
|
1591 |
+
[
|
1592 |
+
13,
|
1593 |
+
13,
|
1594 |
+
1,
|
1595 |
+
0
|
1596 |
+
],
|
1597 |
+
[
|
1598 |
+
14,
|
1599 |
+
14,
|
1600 |
+
1,
|
1601 |
+
0
|
1602 |
+
],
|
1603 |
+
[
|
1604 |
+
15,
|
1605 |
+
15,
|
1606 |
+
5,
|
1607 |
+
1
|
1608 |
+
],
|
1609 |
+
[
|
1610 |
+
16,
|
1611 |
+
16,
|
1612 |
+
1,
|
1613 |
+
0
|
1614 |
+
],
|
1615 |
+
[
|
1616 |
+
17,
|
1617 |
+
17,
|
1618 |
+
1,
|
1619 |
+
0
|
1620 |
+
],
|
1621 |
+
[
|
1622 |
+
18,
|
1623 |
+
18,
|
1624 |
+
1,
|
1625 |
+
0
|
1626 |
+
],
|
1627 |
+
[
|
1628 |
+
19,
|
1629 |
+
19,
|
1630 |
+
5,
|
1631 |
+
1
|
1632 |
+
],
|
1633 |
+
[
|
1634 |
+
20,
|
1635 |
+
20,
|
1636 |
+
5,
|
1637 |
+
1
|
1638 |
+
],
|
1639 |
+
[
|
1640 |
+
21,
|
1641 |
+
21,
|
1642 |
+
5,
|
1643 |
+
1
|
1644 |
+
],
|
1645 |
+
[
|
1646 |
+
22,
|
1647 |
+
22,
|
1648 |
+
6,
|
1649 |
+
6
|
1650 |
+
],
|
1651 |
+
[
|
1652 |
+
23,
|
1653 |
+
23,
|
1654 |
+
6,
|
1655 |
+
6
|
1656 |
+
]
|
1657 |
+
],
|
1658 |
+
"topic_labels": {
|
1659 |
+
"-1": "-1_order_property_inspection_condition",
|
1660 |
+
"0": "0_damaged_sign_sidewalk_missing",
|
1661 |
+
"1": "1_license_complaint_illegal_violation",
|
1662 |
+
"2": "2_water_basin_litter_missed",
|
1663 |
+
"3": "3_noise_animal_truck_dead",
|
1664 |
+
"4": "4_odor_food_air_smoke",
|
1665 |
+
"5": "5_english_emergency_spanish_chinese",
|
1666 |
+
"6": "6_exemption_commercial_tax_business"
|
1667 |
+
},
|
1668 |
+
"custom_labels": null,
|
1669 |
+
"_outliers": 1,
|
1670 |
+
"topic_aspects": {}
|
1671 |
+
}
|
models/final_model.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
reports/311_data_1.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
reports/weather_data_after2016_ts.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
reports/weather_data_ts.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
beautifulsoup4==4.12.3
|
2 |
+
bertopic==0.16.1
|
3 |
+
bs4==0.0.2
|
4 |
+
bokeh==3.4.1
|
5 |
+
darts==0.29.0
|
6 |
+
folium==0.16.0
|
7 |
+
gradio==4.27.0
|
8 |
+
ipykernel==6.29.4
|
9 |
+
ipywidgets==8.1.2
|
10 |
+
jupyterlab==4.1.8
|
11 |
+
matplotlib==3.8.4
|
12 |
+
nbformat==5.10.4
|
13 |
+
nltk==3.8.1
|
14 |
+
numpy==1.26.4
|
15 |
+
openpyxl==3.1.2
|
16 |
+
pandas==2.2.2
|
17 |
+
plotly==5.21.0
|
18 |
+
polars==0.20.21
|
19 |
+
prophet==1.1.5
|
20 |
+
pyarrow==16.0.0
|
21 |
+
scikit-learn==1.4.2
|
22 |
+
scipy==1.13.0
|
23 |
+
seaborn==0.13.2
|
24 |
+
--extra-index-url https://download.pytorch.org/whl/cu121
|
25 |
+
torch==2.2.2
|
26 |
+
xgboost==2.0.3
|
utils.py
ADDED
@@ -0,0 +1,1028 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import polars as pl
|
3 |
+
import numpy as np
|
4 |
+
import json
|
5 |
+
import gc
|
6 |
+
import folium
|
7 |
+
import html
|
8 |
+
from matplotlib import pyplot as plt
|
9 |
+
import seaborn as sns
|
10 |
+
import xgboost as xgb
|
11 |
+
from xgboost import plot_importance
|
12 |
+
from bs4 import BeautifulSoup
|
13 |
+
import plotly.express as px
|
14 |
+
import plotly.graph_objects as go
|
15 |
+
import plotly.figure_factory as ff
|
16 |
+
from plotly.subplots import make_subplots
|
17 |
+
import plotly.io as pio
|
18 |
+
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
|
19 |
+
from statsmodels.tsa.stattools import kpss, adfuller
|
20 |
+
from bertopic import BERTopic
|
21 |
+
from collections import defaultdict
|
22 |
+
|
23 |
+
color_pal = sns.color_palette("tab10")
|
24 |
+
|
25 |
+
impute_cols = [
|
26 |
+
'MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
|
27 |
+
'Percipitation', 'WindSpeed', 'MaxSustainedWind',
|
28 |
+
'Gust', 'Rain', 'SnowDepth', 'SnowIce',
|
29 |
+
]
|
30 |
+
|
31 |
+
def convert_schema_to_polars(schema):
|
32 |
+
pl_schema = {}
|
33 |
+
for k, v in schema.items():
|
34 |
+
if v == "String":
|
35 |
+
pl_schema[k] = pl.String
|
36 |
+
elif v == "Float64":
|
37 |
+
pl_schema[k] = pl.Float64
|
38 |
+
elif v == "Int64":
|
39 |
+
pl_schema[k] = pl.Int64
|
40 |
+
return pl_schema
|
41 |
+
|
42 |
+
|
43 |
+
def create_datetime(data, dt_col, format="%m/%d/%Y %I:%M:%S %p"):
|
44 |
+
# df type is either pandas or polars
|
45 |
+
df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
|
46 |
+
if "datetime" in str(data[dt_col].dtype).lower():
|
47 |
+
return data
|
48 |
+
|
49 |
+
if df_type == "pandas":
|
50 |
+
data[dt_col] = pd.to_datetime(data[dt_col], format=format)
|
51 |
+
elif df_type == "polars":
|
52 |
+
data = data.with_columns(
|
53 |
+
pl.col(dt_col).str.strptime(pl.Date, format=format).cast(pl.Datetime)
|
54 |
+
)
|
55 |
+
|
56 |
+
return data
|
57 |
+
|
58 |
+
|
59 |
+
def create_seasons(data, dt_col="Datetime", out_col="Season", prefix=""):
|
60 |
+
df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
|
61 |
+
out_col = prefix + out_col
|
62 |
+
spring_start = pd.to_datetime("2018-3-20", format = "%Y-%m-%d").dayofyear
|
63 |
+
summer_start = pd.to_datetime("2018-6-21", format = "%Y-%m-%d").dayofyear
|
64 |
+
autumn_start = pd.to_datetime("2018-9-22", format = "%Y-%m-%d").dayofyear
|
65 |
+
winter_start = pd.to_datetime("2018-12-21", format = "%Y-%m-%d").dayofyear
|
66 |
+
|
67 |
+
if df_type == "pandas":
|
68 |
+
def map_season(date):
|
69 |
+
if date.dayofyear < spring_start or date.dayofyear >= winter_start:
|
70 |
+
return "Winter"
|
71 |
+
elif date.dayofyear >= spring_start and date.dayofyear < summer_start:
|
72 |
+
return "Spring"
|
73 |
+
elif date.dayofyear >= summer_start and date.dayofyear < autumn_start:
|
74 |
+
return "Summer"
|
75 |
+
elif date.dayofyear >= autumn_start and date.dayofyear < winter_start:
|
76 |
+
return "Autumn"
|
77 |
+
data[out_col] = data[dt_col].apply(map_season)
|
78 |
+
return data
|
79 |
+
|
80 |
+
elif df_type == "polars":
|
81 |
+
|
82 |
+
def map_season(date):
|
83 |
+
# for date in dates:
|
84 |
+
if date.timetuple().tm_yday < spring_start or date.timetuple().tm_yday >= winter_start:
|
85 |
+
return "Winter"
|
86 |
+
elif date.timetuple().tm_yday >= spring_start and date.timetuple().tm_yday < summer_start:
|
87 |
+
return "Spring"
|
88 |
+
elif date.timetuple().tm_yday >= summer_start and date.timetuple().tm_yday < autumn_start:
|
89 |
+
return "Summer"
|
90 |
+
elif date.timetuple().tm_yday >= autumn_start and date.timetuple().tm_yday < winter_start:
|
91 |
+
return "Autumn"
|
92 |
+
|
93 |
+
data = data.with_columns(
|
94 |
+
pl.col(dt_col).map_elements(map_season, return_dtype=pl.String).alias(out_col)
|
95 |
+
)
|
96 |
+
return data
|
97 |
+
|
98 |
+
|
99 |
+
def create_weekend(data, dt_col="Datetime", out_col="is_weekend", prefix=""):
|
100 |
+
df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
|
101 |
+
out_col = prefix + out_col
|
102 |
+
|
103 |
+
if df_type == "pandas":
|
104 |
+
data[out_col] = (data[dt_col].dt.weekday.isin([5,6])).astype(np.int8)
|
105 |
+
|
106 |
+
elif df_type == "polars":
|
107 |
+
data = data.with_columns(
|
108 |
+
pl.col(dt_col).dt.weekday().is_in([6,7]).cast(pl.Int8).alias(out_col)
|
109 |
+
)
|
110 |
+
|
111 |
+
return data
|
112 |
+
|
113 |
+
|
114 |
+
def create_holidays(data, dt_col="Datetime", out_col="is_holiday", prefix=""):
|
115 |
+
df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
|
116 |
+
out_col = prefix + out_col
|
117 |
+
|
118 |
+
# The only holiday not included will be new years as I expect a potential affect
|
119 |
+
HOLIDAYS = [
|
120 |
+
pd.to_datetime("2016-01-18"), pd.to_datetime("2016-02-15"),
|
121 |
+
pd.to_datetime("2016-05-30"), pd.to_datetime("2016-07-04"), pd.to_datetime("2016-09-05"),
|
122 |
+
pd.to_datetime("2016-10-10"), pd.to_datetime("2016-11-11"), pd.to_datetime("2016-11-24"),
|
123 |
+
# Christmas is variable (depends on what day is actually holiday vs. what day is XMAS)
|
124 |
+
pd.to_datetime("2016-12-24"), pd.to_datetime("2016-12-25"), pd.to_datetime("2016-12-26"),
|
125 |
+
|
126 |
+
|
127 |
+
pd.to_datetime("2017-01-16"), pd.to_datetime("2017-02-20"),
|
128 |
+
pd.to_datetime("2017-05-29"), pd.to_datetime("2017-07-04"), pd.to_datetime("2017-09-04"),
|
129 |
+
pd.to_datetime("2017-10-09"), pd.to_datetime("2017-11-10"), pd.to_datetime("2017-11-23"),
|
130 |
+
pd.to_datetime("2017-12-24"), pd.to_datetime("2017-12-25"),
|
131 |
+
|
132 |
+
pd.to_datetime("2018-01-15"), pd.to_datetime("2018-02-19"),
|
133 |
+
pd.to_datetime("2018-05-28"), pd.to_datetime("2018-07-04"), pd.to_datetime("2018-09-03"),
|
134 |
+
pd.to_datetime("2018-10-08"), pd.to_datetime("2018-11-12"), pd.to_datetime("2018-11-22"),
|
135 |
+
pd.to_datetime("2018-12-24"), pd.to_datetime("2018-12-25"),
|
136 |
+
]
|
137 |
+
|
138 |
+
|
139 |
+
if df_type == "pandas":
|
140 |
+
data[out_col] = (data[dt_col].isin(HOLIDAYS)).astype(np.int8)
|
141 |
+
|
142 |
+
elif df_type == "polars":
|
143 |
+
data = data.with_columns(
|
144 |
+
pl.col(dt_col).dt.datetime().is_in(HOLIDAYS).cast(pl.Int8).alias(out_col)
|
145 |
+
)
|
146 |
+
return data
|
147 |
+
|
148 |
+
|
149 |
+
def build_temporal_features(data, dt_col, prefix=""):
|
150 |
+
df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
|
151 |
+
if df_type == "pandas" and data.index.name == dt_col:
|
152 |
+
data = data.reset_index()
|
153 |
+
|
154 |
+
if df_type == "pandas":
|
155 |
+
data[prefix+"Year"] = data[dt_col].dt.year.astype(np.int16)
|
156 |
+
data[prefix+"Month"] = data[dt_col].dt.month.astype(np.int8)
|
157 |
+
data[prefix+"Day"] = data[dt_col].dt.day.astype(np.int8)
|
158 |
+
data[prefix+"DayOfYear"] = data[dt_col].dt.dayofyear.astype(np.int16)
|
159 |
+
data[prefix+"DayOfWeek"] = data[dt_col].dt.dayofweek.astype(np.int8)
|
160 |
+
else:
|
161 |
+
data = data.with_columns (**{
|
162 |
+
prefix+"Year": pl.col(dt_col).dt.year().cast(pl.Int16),
|
163 |
+
prefix+"Month": pl.col(dt_col).dt.month().cast(pl.Int8),
|
164 |
+
prefix+"Day": pl.col(dt_col).dt.day().cast(pl.Int8),
|
165 |
+
prefix+"DayOfYear": pl.col(dt_col).dt.ordinal_day().cast(pl.Int16),
|
166 |
+
prefix+"DayOfWeek": pl.col(dt_col).dt.weekday().cast(pl.Int8)
|
167 |
+
})
|
168 |
+
|
169 |
+
data = create_seasons(data, dt_col, prefix=prefix)
|
170 |
+
data = create_weekend(data, dt_col, prefix=prefix)
|
171 |
+
data = create_holidays(data, dt_col, prefix=prefix)
|
172 |
+
return data
|
173 |
+
|
174 |
+
|
175 |
+
def agg_and_merge_historical(curr_df, hist_df, col, agg_cols=[], ops=["mean", "max", "min"]):
|
176 |
+
merge_dict = {}
|
177 |
+
for agg_col in agg_cols:
|
178 |
+
describe_tb = hist_df.groupby(col)[agg_col].describe().reset_index()
|
179 |
+
if col not in merge_dict:
|
180 |
+
merge_dict[col] = describe_tb[col].values
|
181 |
+
for op in ops:
|
182 |
+
merge_col_name = "historical_" + col + "_" + op + "_" + agg_col
|
183 |
+
if op == "mean":
|
184 |
+
merge_dict[merge_col_name] = describe_tb["mean"].values
|
185 |
+
elif op == "max":
|
186 |
+
merge_dict[merge_col_name] = describe_tb["max"].values
|
187 |
+
elif op == "min":
|
188 |
+
merge_dict[merge_col_name] = describe_tb["min"].values
|
189 |
+
elif op == "median":
|
190 |
+
merge_dict[merge_col_name] = describe_tb["50%"].values
|
191 |
+
elif op == "std":
|
192 |
+
merge_dict[merge_col_name] = describe_tb["std"].values
|
193 |
+
|
194 |
+
merge_df = pd.merge(curr_df, pd.DataFrame(merge_dict), on=col, how="left")
|
195 |
+
return merge_df
|
196 |
+
|
197 |
+
|
198 |
+
def map_vals(data, cols=["Latitude", "Longitude"], label_cols=[], color="red", submap=None, weight=3, radius=1, sample_size=10000, start_loc=[42.1657, -74.9481], zoom_start=6):
|
199 |
+
cols = cols
|
200 |
+
df_type = "pandas" if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series) else "polars"
|
201 |
+
fig = folium.Figure(height=500, width=750)
|
202 |
+
|
203 |
+
if submap is None:
|
204 |
+
map_nyc = folium.Map(
|
205 |
+
location=start_loc,
|
206 |
+
zoom_start=zoom_start,
|
207 |
+
tiles='cartodbpositron',
|
208 |
+
zoom_control=False,
|
209 |
+
scrollWheelZoom=False,
|
210 |
+
dragging=False
|
211 |
+
)
|
212 |
+
else:
|
213 |
+
map_nyc = submap
|
214 |
+
|
215 |
+
cols.extend(label_cols)
|
216 |
+
if df_type == "pandas":
|
217 |
+
for idx, row in data.loc[:, cols].dropna().sample(sample_size).iterrows():
|
218 |
+
label = ""
|
219 |
+
lat, long = row.iloc[0,], row.iloc[1,]
|
220 |
+
for i, label_col in enumerate(label_cols):
|
221 |
+
label += label_col + ": " + str(row.iloc[2+i,]) + "\n"
|
222 |
+
|
223 |
+
label_params = {"popup": label, "tooltip": label} if len(label_cols) > 0 else {}
|
224 |
+
folium.CircleMarker(location=[lat, long], radius=radius, weight=weight, color=color, fill_color=color, fill_opacity=0.7, **label_params).add_to(map_nyc)
|
225 |
+
else:
|
226 |
+
for row in data[:, cols].drop_nulls().sample(sample_size).rows():
|
227 |
+
label = ""
|
228 |
+
lat, long = row[0], row[1]
|
229 |
+
for i, label_col in enumerate(label_cols):
|
230 |
+
label += label_col + ": " + str(row[2+i]) + "\n"
|
231 |
+
|
232 |
+
label_params = {"popup": label, "tooltip": label} if len(label_cols) > 0 else {}
|
233 |
+
folium.CircleMarker(location=[lat, long], radius=radius, weight=weight, color=color, fill_color=color, fill_opacity=0.7, **label_params).add_to(map_nyc)
|
234 |
+
|
235 |
+
fig.add_child(map_nyc)
|
236 |
+
return fig, map_nyc
|
237 |
+
|
238 |
+
|
239 |
+
def find_variable_data(soup, curr_var = "Created Date"):
|
240 |
+
src = "<!doctype html>"
|
241 |
+
# HTML and head start
|
242 |
+
src += "<html lang=\"en\">"
|
243 |
+
src += str(soup.find("head"))
|
244 |
+
|
245 |
+
# Body -> content -> container -> row -> variable
|
246 |
+
src += "<body style=\"background-color: var(--table-odd-background-fill); padding-top: 20px;\">"
|
247 |
+
src += "<div class=\"content\" style=\"padding-left: 150px; padding-right: 150px; border: 0px !important; \">"
|
248 |
+
# src += "<div class=\"container\">"
|
249 |
+
src += "<div class=\"section-items\" style=\"background-color: white;\">"
|
250 |
+
# src += "<div class=\"row spacing\">"
|
251 |
+
variables_html = soup.find_all("div", class_="variable")
|
252 |
+
for var_html in variables_html:
|
253 |
+
if var_html.text[:len(curr_var)] == curr_var:
|
254 |
+
parent = var_html.parent
|
255 |
+
parent['style'] = "border: 0px"
|
256 |
+
src += str(parent)
|
257 |
+
break
|
258 |
+
|
259 |
+
src += "</div></div>"
|
260 |
+
|
261 |
+
# Scripts
|
262 |
+
for script in soup.find_all("script"):
|
263 |
+
src += str(script)
|
264 |
+
|
265 |
+
# End
|
266 |
+
src += "</body>"
|
267 |
+
src += "</html>"
|
268 |
+
|
269 |
+
# src = BeautifulSoup(src, 'html.parser').prettify()
|
270 |
+
src_doc = html.escape(src)
|
271 |
+
iframe = f'<iframe width="100%" height="1200px" srcdoc="{src_doc}" frameborder="0"></iframe>'
|
272 |
+
return iframe, src_doc
|
273 |
+
|
274 |
+
|
275 |
+
def plot_autocorr(data, col, apply=None):
|
276 |
+
time_series = data.loc[:, col].to_frame().copy()
|
277 |
+
if apply:
|
278 |
+
time_series[col] = time_series[col].apply(apply)
|
279 |
+
fig, ax = plt.subplots(2, 1, figsize=(12, 8))
|
280 |
+
_ = plot_acf(time_series[col], lags=30, ax=ax[0])
|
281 |
+
_ = plot_pacf(time_series[col], lags=30, method="ols-adjusted", ax=ax[1])
|
282 |
+
_ = plt.suptitle(f"{col}", y=0.95)
|
283 |
+
return fig
|
284 |
+
|
285 |
+
|
286 |
+
def adf_test(timeseries):
|
287 |
+
dftest = adfuller(timeseries, autolag='AIC')
|
288 |
+
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','Lags Used','Number of Observations Used'])
|
289 |
+
dfoutput['Number of Observations Used'] = dfoutput['Number of Observations Used'].astype(np.int64)
|
290 |
+
for key,value in dftest[4].items():
|
291 |
+
dfoutput['Critical Value (%s)'%key] = value
|
292 |
+
return dfoutput
|
293 |
+
|
294 |
+
|
295 |
+
def kpss_test(timeseries):
|
296 |
+
kpsstest = kpss(timeseries, regression='ct')
|
297 |
+
kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic','p-value','Lags Used'])
|
298 |
+
for key,value in kpsstest[3].items():
|
299 |
+
kpss_output['Critical Value (%s)'%key] = value
|
300 |
+
return kpss_output
|
301 |
+
|
302 |
+
|
303 |
+
def test_stationary(data, var):
|
304 |
+
adf_df = adf_test(data[var].dropna())
|
305 |
+
kpss_df = kpss_test(data[var].dropna())
|
306 |
+
result_df = adf_df.to_frame(name="Augmented-Dickey-Fuller")
|
307 |
+
result_df["KPSS Test"] = kpss_df
|
308 |
+
def pass_hypothesis(col):
|
309 |
+
test_stat, p_val = col.iloc[0], col.iloc[1]
|
310 |
+
one_p, five_p, ten_p = col.iloc[4], col.iloc[5], col.iloc[6]
|
311 |
+
if col.name == "KPSS Test":
|
312 |
+
if test_stat < one_p and p_val < 0.01:
|
313 |
+
color_fmt = ["background-color: #fc5749; font-weight: bold; color: black"]
|
314 |
+
elif test_stat < five_p and p_val < 0.05:
|
315 |
+
color_fmt = ["background-color: #F88379; font-weight: bold; color: black"]
|
316 |
+
elif test_stat < ten_p and p_val < 0.1:
|
317 |
+
color_fmt = ["background-color: #ff9f96; font-weight: bold; color: black"]
|
318 |
+
else:
|
319 |
+
color_fmt = ["background-color: green; font-weight: bold; color: black"]
|
320 |
+
else:
|
321 |
+
if test_stat < one_p and p_val < 0.01:
|
322 |
+
color_fmt = ["background-color: green; font-weight: bold; color: black"]
|
323 |
+
elif test_stat < five_p and p_val < 0.05:
|
324 |
+
color_fmt = ["background-color: greenyellow; font-weight: bold; color: black"]
|
325 |
+
elif test_stat < ten_p and p_val < 0.1:
|
326 |
+
color_fmt = ["background-color: lightgreen; font-weight: bold; color: black"]
|
327 |
+
else:
|
328 |
+
color_fmt = ["background-color: #fc5749; font-weight: bold; color: black"]
|
329 |
+
|
330 |
+
color_fmt.extend(['' for _ in col[1:]])
|
331 |
+
return color_fmt
|
332 |
+
|
333 |
+
result_df.loc["Lags Used",:] = result_df.loc["Lags Used",:].astype(np.int32)
|
334 |
+
return result_df.style.apply(pass_hypothesis)
|
335 |
+
|
336 |
+
|
337 |
+
def plot_timeseries(data, var, data_name="My", all_vars=[], height=800, width=600, start_date="2017-12-31", end_date="2018-12-31"):
|
338 |
+
if var == "":
|
339 |
+
return gr.update()
|
340 |
+
|
341 |
+
fig = go.Figure()
|
342 |
+
fig.add_trace(
|
343 |
+
go.Scatter(
|
344 |
+
x=data.index,
|
345 |
+
y=data[var],
|
346 |
+
name=var,
|
347 |
+
customdata=np.dstack((data["Season"].to_numpy(), data.reset_index()["Datetime"].dt.day_name().to_numpy(), data["is_holiday"].astype(bool).to_numpy()))[0],
|
348 |
+
hovertemplate='<br>value:%{y:.3f} <br>Season: %{customdata[0]} <br>Weekday: %{customdata[1]} <br>Is Holiday: %{customdata[2]}',
|
349 |
+
)
|
350 |
+
)
|
351 |
+
fig.update_layout(
|
352 |
+
autosize=True,
|
353 |
+
title=f"{data_name} Time Series by {var}",
|
354 |
+
xaxis_title='Date',
|
355 |
+
yaxis_title=var,
|
356 |
+
hovermode='x unified',
|
357 |
+
)
|
358 |
+
|
359 |
+
fig.update_layout(
|
360 |
+
autosize=True,
|
361 |
+
xaxis=dict(
|
362 |
+
rangeselector=dict(
|
363 |
+
buttons=list([
|
364 |
+
dict(count=7, label="1w", step="day", stepmode="backward"),
|
365 |
+
dict(count=21, label="3w", step="day", stepmode="backward"),
|
366 |
+
dict(count=1, label="1m", step="month", stepmode="backward"),
|
367 |
+
dict(count=6, label="6m", step="month", stepmode="backward"),
|
368 |
+
dict(count=1, label="1y", step="year", stepmode="backward"),
|
369 |
+
dict(step="all")
|
370 |
+
])
|
371 |
+
),
|
372 |
+
rangeslider=dict(
|
373 |
+
visible=True,
|
374 |
+
#
|
375 |
+
),
|
376 |
+
type="date",
|
377 |
+
range=(start_date, end_date),
|
378 |
+
),
|
379 |
+
)
|
380 |
+
return fig
|
381 |
+
|
382 |
+
|
383 |
+
def plot_bivariate(data, x, y, subset=None, trendline=True):
|
384 |
+
title = f"Scatterplot of {x} vs. {y}"
|
385 |
+
|
386 |
+
if subset == "None" or subset is None:
|
387 |
+
subset = None
|
388 |
+
height = 450
|
389 |
+
else:
|
390 |
+
subset_title = subset.replace(" String","")
|
391 |
+
title += f" By {subset_title}"
|
392 |
+
if subset_title in ["Season", "Year"]:
|
393 |
+
height = 450
|
394 |
+
else:
|
395 |
+
height = 800
|
396 |
+
|
397 |
+
if trendline:
|
398 |
+
trendline = "ols"
|
399 |
+
else:
|
400 |
+
trendline = None
|
401 |
+
|
402 |
+
# Special case to view categorical features
|
403 |
+
if x in ["Agency", "Borough", "Descriptor"]:
|
404 |
+
if x == "Agency":
|
405 |
+
prefix = 'AG'
|
406 |
+
elif x == "Borough":
|
407 |
+
prefix = "Borough"
|
408 |
+
else:
|
409 |
+
prefix="DG"
|
410 |
+
|
411 |
+
categories = [col for col in data.columns if prefix in col]
|
412 |
+
melt_df = pd.melt(data, id_vars=["Target"], value_vars=categories)
|
413 |
+
fig = px.scatter(
|
414 |
+
melt_df,
|
415 |
+
x="value",
|
416 |
+
y="Target",
|
417 |
+
trendline=trendline,
|
418 |
+
facet_col="variable",
|
419 |
+
facet_col_wrap=4,
|
420 |
+
facet_col_spacing=0.05,
|
421 |
+
title=title
|
422 |
+
)
|
423 |
+
height = 800
|
424 |
+
|
425 |
+
else:
|
426 |
+
fig = px.scatter(
|
427 |
+
data,
|
428 |
+
x=x, y=y,
|
429 |
+
trendline=trendline,
|
430 |
+
facet_col=subset,
|
431 |
+
facet_col_wrap=4,
|
432 |
+
facet_col_spacing=0.05,
|
433 |
+
title=title
|
434 |
+
)
|
435 |
+
|
436 |
+
fig.update_layout(
|
437 |
+
autosize=True,
|
438 |
+
height=height,
|
439 |
+
)
|
440 |
+
|
441 |
+
return fig
|
442 |
+
|
443 |
+
|
444 |
+
def plot_seasonality(data, x, y, show_box=True, show_outliers=False):
|
445 |
+
title = f"{y} by {x}"
|
446 |
+
|
447 |
+
if show_box:
|
448 |
+
if show_outliers:
|
449 |
+
points = "outliers"
|
450 |
+
else:
|
451 |
+
points = "all"
|
452 |
+
fig = px.box(data, x=x, y=y, points=points, title=title, facet_col_wrap=4, facet_col_spacing=0.05,)
|
453 |
+
else:
|
454 |
+
fig = px.strip(data, x=x, y=y, title=title, facet_col_wrap=4, facet_col_spacing=0.05,)
|
455 |
+
|
456 |
+
fig.update_layout(
|
457 |
+
autosize=True,
|
458 |
+
height=600,
|
459 |
+
)
|
460 |
+
return fig
|
461 |
+
|
462 |
+
|
463 |
+
def build_service_data(filename):
|
464 |
+
# Loading data directly with polars leads to errors
|
465 |
+
# Some rows end up missing for an unknown reason
|
466 |
+
# FIX: Load in pandas then convert to polars
|
467 |
+
service_data_pd = pd.read_csv(filename)
|
468 |
+
|
469 |
+
# Quick test to assure the unique key is in fact unique
|
470 |
+
assert service_data_pd["Unique Key"].nunique() == len(service_data_pd)
|
471 |
+
|
472 |
+
# Load from pandas Dataframe
|
473 |
+
service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
|
474 |
+
service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
|
475 |
+
service_data = pl.DataFrame(service_data_pd)
|
476 |
+
|
477 |
+
# Clear some ram
|
478 |
+
del service_data_pd
|
479 |
+
gc.collect()
|
480 |
+
|
481 |
+
drop_cols = [
|
482 |
+
"Unique Key", "Agency Name", "Location Type", "Incident Zip",
|
483 |
+
"Incident Address", "Street Name", "Cross Street 1",
|
484 |
+
"Cross Street 2", "Intersection Street 1", "Intersection Street 2",
|
485 |
+
"Address Type", "City", "Landmark", "Facility Type",
|
486 |
+
"Status", "Due Date", "Resolution Description",
|
487 |
+
"Resolution Action Updated Date", "Community Board",
|
488 |
+
"BBL", "X Coordinate (State Plane)", "Y Coordinate (State Plane)",
|
489 |
+
"Open Data Channel Type", "Park Facility Name", "Park Borough",
|
490 |
+
"Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",
|
491 |
+
"Bridge Highway Name", "Bridge Highway Direction", "Road Ramp",
|
492 |
+
"Bridge Highway Segment", "Location", "Created Year"
|
493 |
+
]
|
494 |
+
|
495 |
+
# Drop columns and create the date variable
|
496 |
+
service_data = service_data.drop(drop_cols)
|
497 |
+
service_data = create_datetime(service_data, "Created Date")
|
498 |
+
service_data = create_datetime(service_data, "Closed Date")
|
499 |
+
|
500 |
+
# Group by date to get the number of Created tickets (as target)
|
501 |
+
sd_grouped = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
|
502 |
+
pl.len().alias("Target"),
|
503 |
+
).sort(by="Datetime")
|
504 |
+
|
505 |
+
# Calculate the number of closed tickets
|
506 |
+
# Mean diff used to filter service data
|
507 |
+
# mean_diff = service_data.with_columns(
|
508 |
+
# diff_created_closed = pl.col("Closed Date") - pl.col("Created Date")
|
509 |
+
# ).filter((pl.col("Closed Date").dt.year() >= 2016) & (pl.col("Closed Date").dt.year() < 2020))["diff_created_closed"].mean().days
|
510 |
+
# Mean diff precalculated as
|
511 |
+
mean_diff = 13
|
512 |
+
|
513 |
+
# Create new Closed date with errors filled using the mean diff above
|
514 |
+
service_data = service_data.with_columns(
|
515 |
+
Closed_Date_New = pl.when(pl.col("Created Date") - pl.col("Closed Date") > pl.duration(days=1))
|
516 |
+
.then(pl.col("Created Date") + pl.duration(days=mean_diff))
|
517 |
+
.otherwise(pl.col("Closed Date")).fill_null(pl.col("Created Date") + pl.duration(days=mean_diff))
|
518 |
+
)
|
519 |
+
|
520 |
+
# Filter tickets such that the closed date < the created date to prevent future data leakage in our dataset
|
521 |
+
# We want to make sure future data is not accidentally leaked across other points in our data
|
522 |
+
closed_tickets = service_data.group_by(["Closed_Date_New", "Created Date"]) \
|
523 |
+
.agg((pl.when(pl.col("Created Date") <= pl.col("Closed_Date_New")).then(1).otherwise(0)).sum().alias("count")) \
|
524 |
+
.sort("Closed_Date_New") \
|
525 |
+
.filter((pl.col("Closed_Date_New").dt.year() >= 2016) & (pl.col("Closed_Date_New").dt.year() < 2019)) \
|
526 |
+
.group_by("Closed_Date_New").agg(pl.col("count").sum().alias("num_closed_tickets"))
|
527 |
+
|
528 |
+
# Rename this column to num closed tickets
|
529 |
+
ct_df = closed_tickets.with_columns(
|
530 |
+
pl.col("num_closed_tickets")
|
531 |
+
)
|
532 |
+
|
533 |
+
# Concat the new columns into our data
|
534 |
+
sd_df = pl.concat([sd_grouped, ct_df.drop("Closed_Date_New")], how="horizontal")
|
535 |
+
|
536 |
+
assert len(sd_grouped) == len(ct_df)
|
537 |
+
|
538 |
+
# CATEGORICAL FEATURE MAPPING
|
539 |
+
# MAPPING FOR BOROUGH
|
540 |
+
Borough_Map = {
|
541 |
+
"Unspecified": "OTHER",
|
542 |
+
"2017": "OTHER",
|
543 |
+
None: "OTHER",
|
544 |
+
"2016": "OTHER"
|
545 |
+
}
|
546 |
+
service_data = service_data.with_columns(
|
547 |
+
pl.col("Borough").replace(Borough_Map)
|
548 |
+
)
|
549 |
+
|
550 |
+
# MAPPING FOR AGENCY
|
551 |
+
# This mapping was done Manually
|
552 |
+
Agency_Map = {
|
553 |
+
"NYPD": "Security", "HPD": "Buildings", "DOT": "Transportation",
|
554 |
+
"DSNY": "Environment & Sanitation", "DEP": "Environment & Sanitation",
|
555 |
+
"DOB": "Buildings", "DOE": "Buildings", "DPR": "Parks",
|
556 |
+
"DOHMH": "Health", "DOF": "Other", "DHS": "Security",
|
557 |
+
"TLC": "Transportation", "HRA": "Other", "DCA": "Other",
|
558 |
+
"DFTA": "Other", "EDC": "Other", "DOITT": "Other", "OMB": "Other",
|
559 |
+
"DCAS": "Other", "NYCEM": "Other", "ACS": "Other", "3-1-1": "Other",
|
560 |
+
"TAX": "Other", "DCP": "Other", "DORIS": "Other", "FDNY": "Other",
|
561 |
+
"TAT": "Other", "COIB": "Other", "CEO": "Other", "MOC": "Other",
|
562 |
+
}
|
563 |
+
|
564 |
+
service_data = service_data.with_columns(
|
565 |
+
pl.col("Agency").replace(Agency_Map).alias("AG") # AG Shorthand for Agency Groups
|
566 |
+
)
|
567 |
+
|
568 |
+
|
569 |
+
# Mapping for Descriptor using BERTopic
|
570 |
+
# Store descriptors as pandas dataframe (polars not supported)
|
571 |
+
# Drop any nan values, and we only care about the unique values
|
572 |
+
descriptor_docs = service_data["Descriptor"].unique().to_numpy()
|
573 |
+
|
574 |
+
# Build our topic mapping using the pretrained BERTopic model
|
575 |
+
# Load model and get predictions
|
576 |
+
topic_model = BERTopic.load("models/BERTopic")
|
577 |
+
topics, probs = topic_model.transform(descriptor_docs)
|
578 |
+
|
579 |
+
# Visualize if wanted
|
580 |
+
# topic_model.visualize_barchart(list(range(-1,6,1)))
|
581 |
+
|
582 |
+
# Create a topic to ID map
|
583 |
+
topic_df = topic_model.get_topic_info()
|
584 |
+
topic_id_map = {row["Topic"]: row["Name"][2:] for _, row in topic_df.iterrows()}
|
585 |
+
topic_id_map[-1] = topic_id_map[-1][1:] # Fix for the -1 topic case
|
586 |
+
|
587 |
+
# For each document (descriptor string) get a mapping of topics
|
588 |
+
doc_to_topic_map = defaultdict(str)
|
589 |
+
for topic_id, doc in zip(topics, descriptor_docs):
|
590 |
+
topic = topic_id_map[topic_id]
|
591 |
+
doc_to_topic_map[doc] = topic
|
592 |
+
|
593 |
+
service_data = service_data.with_columns(
|
594 |
+
pl.col("Descriptor").replace(doc_to_topic_map).alias("DG") # DG Shorthand for descriptor Groups
|
595 |
+
)
|
596 |
+
|
597 |
+
|
598 |
+
# One Hot Encode Features
|
599 |
+
cat_features = ["AG", "Borough", "DG"]
|
600 |
+
service_data = service_data.to_dummies(columns=cat_features)
|
601 |
+
|
602 |
+
|
603 |
+
# Group by Date and create our Category Feature Vector
|
604 |
+
cat_df = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
|
605 |
+
# Categorical Features Sum
|
606 |
+
pl.col('^AG_.*$').sum(),
|
607 |
+
pl.col('^Borough_.*$').sum(),
|
608 |
+
pl.col('^DG_.*$').sum(),
|
609 |
+
).sort(by="Datetime")
|
610 |
+
|
611 |
+
# Concat our category features to our current dataframe
|
612 |
+
sd_df = pl.concat([sd_df, cat_df.drop("Datetime")], how="horizontal")
|
613 |
+
|
614 |
+
# Now that our dataframe is significantly reduced in size
|
615 |
+
# We can finally convert back to a pandas dataframe
|
616 |
+
# as pandas is usable across more python packages
|
617 |
+
sd_df = sd_df.to_pandas()
|
618 |
+
|
619 |
+
# Set index to datetime
|
620 |
+
sd_df = sd_df.set_index("Datetime")
|
621 |
+
|
622 |
+
# NOTE we added 7 new rows to our weather df
|
623 |
+
# These 7 new rows will essentially be our final pred set
|
624 |
+
# The Target for these rows will be null -> indicating it needs to be predicted
|
625 |
+
# Add these rows to the service dataframe
|
626 |
+
preds_df = pd.DataFrame({'Datetime': pd.date_range(start=sd_df.index[-1], periods=8, freq='D')})[1:]
|
627 |
+
sd_df = pd.concat([sd_df, preds_df.set_index("Datetime")], axis=0)
|
628 |
+
|
629 |
+
return sd_df
|
630 |
+
|
631 |
+
|
632 |
+
# Build all weather data from file
|
633 |
+
def build_weather_data(filename):
|
634 |
+
# Use pandas to read file
|
635 |
+
weather_data = pd.read_csv(filename)
|
636 |
+
|
637 |
+
# Quickly aggregate Year, Month, Day into a datetime object
|
638 |
+
# This is because the 311 data uses datetime
|
639 |
+
weather_data["Datetime"] = weather_data["Year"].astype("str") + "-" + weather_data["Month"].astype("str") + "-" + weather_data["Day"].astype("str")
|
640 |
+
weather_data = create_datetime(weather_data, "Datetime", format="%Y-%m-%d")
|
641 |
+
|
642 |
+
# LOCALIZE
|
643 |
+
# Pre-recorded min/max values from the service data (so we don't need again)
|
644 |
+
lat_min = 40.49804421521046
|
645 |
+
lat_max = 40.91294056699566
|
646 |
+
long_min = -74.25521082506387
|
647 |
+
long_max = -73.70038354802529
|
648 |
+
|
649 |
+
# Create the conditions for location matching
|
650 |
+
mincon_lat = weather_data["Latitude"] >= lat_min
|
651 |
+
maxcon_lat = weather_data["Latitude"] <= lat_max
|
652 |
+
mincon_long = weather_data["Longitude"] >= long_min
|
653 |
+
maxcon_long = weather_data["Longitude"] <= long_max
|
654 |
+
|
655 |
+
# Localize our data to match the service data
|
656 |
+
wd_localized = weather_data.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
|
657 |
+
drop_cols = [
|
658 |
+
"USAF",
|
659 |
+
"WBAN",
|
660 |
+
"StationName",
|
661 |
+
"State",
|
662 |
+
"Latitude",
|
663 |
+
"Longitude"
|
664 |
+
]
|
665 |
+
wd_localized = wd_localized.drop(columns=drop_cols)
|
666 |
+
|
667 |
+
# AGGREGATE
|
668 |
+
# Map columns with aggregation method
|
669 |
+
mean_cols = [
|
670 |
+
'MeanTemp',
|
671 |
+
'DewPoint',
|
672 |
+
'Percipitation',
|
673 |
+
'WindSpeed',
|
674 |
+
'Gust',
|
675 |
+
'SnowDepth',
|
676 |
+
]
|
677 |
+
min_cols = [
|
678 |
+
'MinTemp'
|
679 |
+
]
|
680 |
+
max_cols = [
|
681 |
+
'MaxTemp',
|
682 |
+
'MaxSustainedWind'
|
683 |
+
]
|
684 |
+
round_cols = [
|
685 |
+
'Rain',
|
686 |
+
'SnowIce'
|
687 |
+
]
|
688 |
+
|
689 |
+
# Perform Aggregation
|
690 |
+
mean_df = wd_localized.groupby("Datetime")[mean_cols].mean()
|
691 |
+
min_df = wd_localized.groupby("Datetime")[min_cols].min()
|
692 |
+
max_df = wd_localized.groupby("Datetime")[max_cols].max()
|
693 |
+
round_df = wd_localized.groupby("Datetime")[round_cols].mean().round().astype(np.int8)
|
694 |
+
wd_full = pd.concat([mean_df, min_df, max_df, round_df], axis=1)
|
695 |
+
|
696 |
+
# Add seasonal features
|
697 |
+
wd_full = build_temporal_features(wd_full, "Datetime")
|
698 |
+
wd_full["Season"] = wd_full["Season"].astype("category")
|
699 |
+
wd_full = wd_full.set_index("Datetime")
|
700 |
+
|
701 |
+
# We will calculate the imputation for the next 7 days after 12/31/2018
|
702 |
+
# Along with the 49 missing days
|
703 |
+
# This will act as our "Weather Forecast"
|
704 |
+
time_steps = 49 + 7
|
705 |
+
|
706 |
+
# Impute Cols
|
707 |
+
impute_cols = [
|
708 |
+
'MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
|
709 |
+
'Percipitation', 'WindSpeed', 'MaxSustainedWind',
|
710 |
+
'Gust', 'Rain', 'SnowDepth', 'SnowIce',
|
711 |
+
]
|
712 |
+
|
713 |
+
# Mean Vars
|
714 |
+
mean_vars = ["WindSpeed", "MaxSustainedWind", "Gust", "SnowDepth"]
|
715 |
+
min_vars = ["SnowIce", "MeanTemp", "MinTemp", "MaxTemp", "DewPoint", "Percipitation"]
|
716 |
+
max_vars = ["Rain"]
|
717 |
+
|
718 |
+
# Use the imported function to create the imputed data
|
719 |
+
preds_mean = impute_missing_weather(wd_full, strategy="mean", time_steps=time_steps, impute_cols=mean_vars)
|
720 |
+
preds_min = impute_missing_weather(wd_full, strategy="min", time_steps=time_steps, impute_cols=min_vars)
|
721 |
+
preds_max = impute_missing_weather(wd_full, strategy="max", time_steps=time_steps, impute_cols=max_vars)
|
722 |
+
all_preds = pd.concat([preds_mean, preds_min, preds_max], axis=1)
|
723 |
+
all_preds = build_temporal_features(all_preds.loc[:, impute_cols], "Datetime")
|
724 |
+
all_preds = all_preds.set_index("Datetime")
|
725 |
+
|
726 |
+
wd_curr = wd_full.loc[wd_full["Year"] >= 2016]
|
727 |
+
wd_df = pd.concat([wd_full, all_preds], axis=0, join="outer")
|
728 |
+
|
729 |
+
time_vars = ["Year", "Month", "Day", "DayOfWeek", "DayOfYear", "is_weekend", "is_holiday", "Season"]
|
730 |
+
wd_df.drop(columns=time_vars)
|
731 |
+
|
732 |
+
return wd_df
|
733 |
+
|
734 |
+
|
735 |
+
class MyNaiveImputer():
|
736 |
+
def __init__(self, data, time_steps=49, freq="D"):
|
737 |
+
self.data = data.reset_index().copy()
|
738 |
+
start_date = self.data["Datetime"].max() + pd.Timedelta(days=1)
|
739 |
+
end_date = start_date + pd.Timedelta(days=time_steps-1)
|
740 |
+
missing_range = pd.date_range(start_date, end_date, freq="D")
|
741 |
+
self.missing_df = pd.DataFrame(missing_range, columns=["Datetime"])
|
742 |
+
self.missing_df = build_temporal_features(self.missing_df, "Datetime")
|
743 |
+
|
744 |
+
def impute(self, col, by="DayOfYear", strategy="mean"):
|
745 |
+
def naive_impute_by(val, impute_X, data, by=by, strategy=strategy):
|
746 |
+
if strategy.lower() == "mean":
|
747 |
+
func = pd.core.groupby.DataFrameGroupBy.mean
|
748 |
+
elif strategy.lower() == "median":
|
749 |
+
func = pd.core.groupby.DataFrameGroupBy.median
|
750 |
+
elif strategy.lower() == "max":
|
751 |
+
func = pd.core.groupby.DataFrameGroupBy.max
|
752 |
+
elif strategy.lower() == "min":
|
753 |
+
func = pd.core.groupby.DataFrameGroupBy.min
|
754 |
+
grouped = func(data.groupby(by)[impute_X])
|
755 |
+
return grouped[val]
|
756 |
+
|
757 |
+
return self.missing_df["DayOfYear"].apply(naive_impute_by, args=(col, self.data, by, strategy))
|
758 |
+
|
759 |
+
def impute_all(self, cols, by="DayOfYear", strategy="mean"):
|
760 |
+
output_df = self.missing_df.copy()
|
761 |
+
for col in cols:
|
762 |
+
output_df[col] = self.impute(col, by, strategy)
|
763 |
+
return output_df
|
764 |
+
|
765 |
+
|
766 |
+
def impute_missing_weather(data, strategy="mean", time_steps=7, impute_cols=impute_cols):
|
767 |
+
final_imputer = MyNaiveImputer(data, time_steps=time_steps)
|
768 |
+
preds = final_imputer.impute_all(impute_cols, strategy=strategy).set_index("Datetime")
|
769 |
+
return preds
|
770 |
+
|
771 |
+
|
772 |
+
def get_feature_importance(data, target, split_date="01-01-2016", print_score=False):
|
773 |
+
import torch
|
774 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
775 |
+
|
776 |
+
train = data.loc[data.index <= pd.to_datetime(split_date)]
|
777 |
+
test = data.loc[data.index > pd.to_datetime(split_date)]
|
778 |
+
|
779 |
+
if type(target) == str:
|
780 |
+
X_train, X_test = train.drop(columns=target), test.drop(columns=target)
|
781 |
+
y_train, y_test = train[target], test[target]
|
782 |
+
else:
|
783 |
+
X_train, X_test = train, test
|
784 |
+
y_train, y_test = target.loc[train.index], target.loc[test.index]
|
785 |
+
target = str(target.name)
|
786 |
+
|
787 |
+
if 'int' in y_train.dtype.name:
|
788 |
+
# Use binary Classifier
|
789 |
+
metric = "logloss"
|
790 |
+
model = xgb.XGBClassifier(
|
791 |
+
base_score=0.25,
|
792 |
+
n_estimators=500,
|
793 |
+
early_stopping_rounds=50,
|
794 |
+
objective='binary:logistic',
|
795 |
+
device=device,
|
796 |
+
max_depth=3,
|
797 |
+
learning_rate=0.01,
|
798 |
+
enable_categorical=True,
|
799 |
+
eval_metric="logloss",
|
800 |
+
importance_type="gain",
|
801 |
+
random_state=22,
|
802 |
+
)
|
803 |
+
else:
|
804 |
+
metric = "MAPE"
|
805 |
+
model = xgb.XGBRegressor(
|
806 |
+
n_estimators=500,
|
807 |
+
early_stopping_rounds=50,
|
808 |
+
objective='reg:squarederror',
|
809 |
+
device=device,
|
810 |
+
max_depth=3,
|
811 |
+
learning_rate=0.01,
|
812 |
+
enable_categorical=True,
|
813 |
+
eval_metric="mape",
|
814 |
+
importance_type="gain",
|
815 |
+
random_state=22,
|
816 |
+
)
|
817 |
+
|
818 |
+
_ = model.fit(
|
819 |
+
X_train, y_train,
|
820 |
+
eval_set=[(X_train, y_train), (X_test, y_test)],
|
821 |
+
verbose=False
|
822 |
+
)
|
823 |
+
|
824 |
+
fig, ax = plt.subplots()
|
825 |
+
ax = plot_importance(model, title=f"Feature Importance for {target}", ax=ax)
|
826 |
+
if print_score:
|
827 |
+
best_score = str(round(100*model.best_score,2))+"%"
|
828 |
+
print(f"Best {metric}: {best_score}")
|
829 |
+
return fig, model
|
830 |
+
|
831 |
+
|
832 |
+
def corr_with_lag(data, target_col, covar, lags=[1], method="pearson"):
|
833 |
+
data_lagged = pd.DataFrame()
|
834 |
+
data_lagged["Target"] = data[target_col]
|
835 |
+
for lag in lags:
|
836 |
+
new_col = f"lag_{lag}D"
|
837 |
+
data_lagged[new_col] = data[covar].shift(lag)
|
838 |
+
return data_lagged.dropna().corr(method=method)
|
839 |
+
|
840 |
+
|
841 |
+
def plot_correlations(data, target, covar, lags=[0,1,2,3,4,5,6,7,10,14,18,21], method="pearson"):
|
842 |
+
df_corr = corr_with_lag(data, target, covar, lags, method)
|
843 |
+
mask = np.triu(np.ones_like(df_corr, dtype=bool))
|
844 |
+
z_dim, x_dim = len(df_corr.to_numpy()), len(df_corr.columns)
|
845 |
+
y_dim = x_dim
|
846 |
+
fig = ff.create_annotated_heatmap(
|
847 |
+
z=df_corr.mask(mask).to_numpy(),
|
848 |
+
x=df_corr.columns.tolist(),
|
849 |
+
y=df_corr.columns.tolist(),
|
850 |
+
colorscale=px.colors.diverging.RdBu,
|
851 |
+
zmin=-1,
|
852 |
+
zmax=1,
|
853 |
+
ygap=2,
|
854 |
+
xgap=2,
|
855 |
+
name="",
|
856 |
+
customdata=np.full((x_dim, y_dim, z_dim), covar),
|
857 |
+
hovertemplate='%{customdata[0]}<br>%{x} to %{y}<br>Correlation: %{z:.4f}',
|
858 |
+
showscale=True
|
859 |
+
)
|
860 |
+
|
861 |
+
fig.update_layout(
|
862 |
+
title_text=f"Correlation Heatmap of Lagged {covar}",
|
863 |
+
title_x=0.5,
|
864 |
+
height=600,
|
865 |
+
xaxis_showgrid=False,
|
866 |
+
yaxis_showgrid=False,
|
867 |
+
xaxis_zeroline=False,
|
868 |
+
yaxis_zeroline=False,
|
869 |
+
yaxis_autorange='reversed',
|
870 |
+
template='plotly_white'
|
871 |
+
)
|
872 |
+
|
873 |
+
# fig.update_annotations(font=dict(color="black"))
|
874 |
+
|
875 |
+
for i in range(len(fig.layout.annotations)):
|
876 |
+
if fig.layout.annotations[i].text == 'nan':
|
877 |
+
fig.layout.annotations[i].text = ""
|
878 |
+
else:
|
879 |
+
corr_i = round(float(fig.layout.annotations[i].text), 3)
|
880 |
+
fig.layout.annotations[i].text = corr_i
|
881 |
+
if (corr_i > 0.2 and corr_i < 0.5) or (corr_i < -0.2 and corr_i > -0.5):
|
882 |
+
fig.layout.annotations[i].font.color = "white"
|
883 |
+
|
884 |
+
return fig
|
885 |
+
|
886 |
+
|
887 |
+
def plot_all_correlations(data, data_name="weather", method="pearson", width=1392, height=600):
|
888 |
+
if data_name == "weather":
|
889 |
+
covars = ["MeanTemp", "MinTemp", "MaxTemp", 'DewPoint', 'Percipitation', 'WindSpeed', 'Gust', 'MaxSustainedWind', "SnowDepth", "SnowIce", "Rain", "Target"]
|
890 |
+
elif data_name == "service":
|
891 |
+
covars = [
|
892 |
+
"num_closed_tickets",
|
893 |
+
# Agency Group Counts
|
894 |
+
'AG_Buildings', 'AG_Environment & Sanitation', 'AG_Health',
|
895 |
+
'AG_Parks', 'AG_Security', 'AG_Transportation',
|
896 |
+
'AG_Other',
|
897 |
+
# Borough Counts
|
898 |
+
'Borough_BRONX', 'Borough_BROOKLYN', 'Borough_MANHATTAN',
|
899 |
+
'Borough_QUEENS', 'Borough_STATEN ISLAND',
|
900 |
+
'Borough_OTHER',
|
901 |
+
# Descriptor Group Counts
|
902 |
+
'DG_damaged_sign_sidewalk_missing',
|
903 |
+
'DG_english_emergency_spanish_chinese',
|
904 |
+
'DG_exemption_commercial_tax_business',
|
905 |
+
'DG_license_complaint_illegal_violation', 'DG_noise_animal_truck_dead',
|
906 |
+
'DG_odor_food_air_smoke', 'DG_order_property_inspection_condition',
|
907 |
+
'DG_water_basin_litter_missed', "Target"
|
908 |
+
]
|
909 |
+
|
910 |
+
df_corr = data.loc[:, covars].corr(method=method)
|
911 |
+
|
912 |
+
mask = np.triu(np.ones_like(df_corr, dtype=bool))
|
913 |
+
fig = ff.create_annotated_heatmap(
|
914 |
+
z=df_corr.mask(mask).to_numpy(),
|
915 |
+
x=df_corr.columns.tolist(),
|
916 |
+
y=df_corr.columns.tolist(),
|
917 |
+
colorscale=px.colors.diverging.RdBu,
|
918 |
+
zmin=-1,
|
919 |
+
zmax=1,
|
920 |
+
ygap=2,
|
921 |
+
xgap=2,
|
922 |
+
name="",
|
923 |
+
hovertemplate='%{x}-%{y} <br>Correlation: %{z:.4f}',
|
924 |
+
showscale=True
|
925 |
+
)
|
926 |
+
|
927 |
+
|
928 |
+
fig.update_layout(
|
929 |
+
title_text=f"Correlation Heatmap of Weather Variables & Target",
|
930 |
+
title_x=0.5,
|
931 |
+
height=600,
|
932 |
+
width=width,
|
933 |
+
xaxis_showgrid=False,
|
934 |
+
yaxis_showgrid=False,
|
935 |
+
xaxis_zeroline=False,
|
936 |
+
yaxis_zeroline=False,
|
937 |
+
yaxis_autorange='reversed',
|
938 |
+
template='plotly_white'
|
939 |
+
)
|
940 |
+
|
941 |
+
fig.update_annotations(font=dict(color="black"))
|
942 |
+
|
943 |
+
|
944 |
+
for i in range(len(fig.layout.annotations)):
|
945 |
+
if fig.layout.annotations[i].text == 'nan':
|
946 |
+
fig.layout.annotations[i].text = ""
|
947 |
+
else:
|
948 |
+
corr_i = round(float(fig.layout.annotations[i].text), 3)
|
949 |
+
fig.layout.annotations[i].text = corr_i
|
950 |
+
if corr_i > 0.5 or corr_i < -0.5:
|
951 |
+
fig.layout.annotations[i].font.color = "white"
|
952 |
+
|
953 |
+
return fig
|
954 |
+
|
955 |
+
|
956 |
+
def plot_gust_interpolation(data):
|
957 |
+
fig, ax = plt.subplots(2, 2, figsize=(15,12))
|
958 |
+
data["Gust_lin"].plot(ax=ax[0][0], color=color_pal[0], title="linear")
|
959 |
+
data["Gust_spline3"].plot(ax=ax[0][1], color=color_pal[1], title="spline3")
|
960 |
+
data["Gust_spline5"].plot(ax=ax[1][0], color=color_pal[2], title="spline5")
|
961 |
+
data["Gust_quad"].plot(ax=ax[1][1], color=color_pal[3], title="quadratic")
|
962 |
+
curr_fig = plt.gcf()
|
963 |
+
plt.close()
|
964 |
+
return curr_fig
|
965 |
+
|
966 |
+
|
967 |
+
def plot_train_split(train, val):
|
968 |
+
fig = plt.subplots(figsize=(15, 5))
|
969 |
+
ax = train["Target"].plot(label="Training Set")
|
970 |
+
val["Target"].plot(label="Validation Set", ax=ax)
|
971 |
+
ax.axvline('2018-04-01', color='black', ls='--')
|
972 |
+
ax.legend()
|
973 |
+
ax.set_title("Train Test Split (2018-04-01)")
|
974 |
+
curr_fig = plt.gcf()
|
975 |
+
plt.close()
|
976 |
+
return curr_fig
|
977 |
+
|
978 |
+
|
979 |
+
def plot_predictions(train, val, preds):
|
980 |
+
fig = plt.subplots(figsize=(16, 5))
|
981 |
+
ax = train["Target"].plot(label="Training Set")
|
982 |
+
val["Target"].plot(label="Validation Set", ax=ax)
|
983 |
+
val["Prediction"] = preds
|
984 |
+
val["Prediction"].plot(label="Prediction", ax=ax)
|
985 |
+
ax.axvline('2018-04-01', color='black', ls='--')
|
986 |
+
ax.legend()
|
987 |
+
ax.set_title("Model Prediction for 311 Call Volume")
|
988 |
+
|
989 |
+
curr_fig = plt.gcf()
|
990 |
+
plt.close()
|
991 |
+
return curr_fig
|
992 |
+
|
993 |
+
def plot_final_feature_importance(model):
|
994 |
+
fig, ax = plt.subplots(figsize=(12,6))
|
995 |
+
ax = plot_importance(model, max_num_features=20, title=f"Feature Importance for 311 Service Calls", ax=ax)
|
996 |
+
|
997 |
+
curr_fig = plt.gcf()
|
998 |
+
plt.close()
|
999 |
+
|
1000 |
+
return curr_fig
|
1001 |
+
|
1002 |
+
|
1003 |
+
def predict_recurse(dataset, test, model, features_to_impute=['Target_L1D', 'Target_Diff7D', 'Target_Diff14D'], last_feature='Target_L6D'):
|
1004 |
+
n_steps = len(test)
|
1005 |
+
merged_data = pd.concat([dataset[-14:], test], axis=0)
|
1006 |
+
all_index = merged_data.index
|
1007 |
+
X_test = test.drop(columns="Target")
|
1008 |
+
sd = -6 # Starting point for filling next value
|
1009 |
+
|
1010 |
+
# For each step, get the predictions
|
1011 |
+
for i in range(n_steps-1):
|
1012 |
+
pred = model.predict(X_test)[i]
|
1013 |
+
# For the three features needed, compute the new value
|
1014 |
+
X_test.loc[all_index[sd+i], features_to_impute[0]] = pred
|
1015 |
+
X_test.loc[all_index[sd+i], features_to_impute[1]] = pred - merged_data.loc[all_index[sd+i-7], features_to_impute[1]]
|
1016 |
+
X_test.loc[all_index[sd+i], features_to_impute[2]] = pred - merged_data.loc[all_index[sd+i-14], features_to_impute[2]]
|
1017 |
+
|
1018 |
+
# In the last iteration compute the Lag6D value
|
1019 |
+
if i == 5:
|
1020 |
+
X_test.loc[all_index[sd+i], last_feature] = pred - merged_data.loc[all_index[sd+i-6], last_feature]
|
1021 |
+
|
1022 |
+
|
1023 |
+
final_preds = model.predict(X_test)
|
1024 |
+
return final_preds
|
1025 |
+
|
1026 |
+
|
1027 |
+
|
1028 |
+
|