Spaces:
Running
on
T4
Running
on
T4
added git lfs
Browse files- Dockerfile +5 -2
- app.py +6 -68
- helper/utils.py +69 -0
Dockerfile
CHANGED
@@ -5,12 +5,15 @@ ENV PYTHONUNBUFFERED=1
|
|
5 |
|
6 |
RUN apt-get update && apt-get install --no-install-recommends -y \
|
7 |
build-essential \
|
8 |
-
# python3.9 \ # Commented out as it might be pre-installed
|
9 |
python3-pip \
|
10 |
git \
|
11 |
ffmpeg \
|
12 |
libsm6 \
|
13 |
libxext6 \
|
|
|
|
|
|
|
|
|
14 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
15 |
|
16 |
WORKDIR /code
|
@@ -48,4 +51,4 @@ WORKDIR $HOME/app
|
|
48 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
49 |
COPY --chown=user . $HOME/app
|
50 |
|
51 |
-
CMD ["python3", "app.py"]
|
|
|
5 |
|
6 |
RUN apt-get update && apt-get install --no-install-recommends -y \
|
7 |
build-essential \
|
|
|
8 |
python3-pip \
|
9 |
git \
|
10 |
ffmpeg \
|
11 |
libsm6 \
|
12 |
libxext6 \
|
13 |
+
curl \
|
14 |
+
&& curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \
|
15 |
+
&& apt-get install --no-install-recommends -y git-lfs \
|
16 |
+
&& git lfs install \
|
17 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
18 |
|
19 |
WORKDIR /code
|
|
|
51 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
52 |
COPY --chown=user . $HOME/app
|
53 |
|
54 |
+
CMD ["python3", "app.py"]
|
app.py
CHANGED
@@ -1,13 +1,6 @@
|
|
1 |
-
import hashlib
|
2 |
import os
|
3 |
-
import shutil
|
4 |
-
import sqlite3
|
5 |
-
from datetime import datetime
|
6 |
|
7 |
import gradio as gr
|
8 |
-
import huggingface_hub
|
9 |
-
import pandas as pd
|
10 |
-
import pytz
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
|
13 |
from helper.gradio_config import css, theme
|
@@ -15,71 +8,16 @@ from helper.text.text_about import TextAbout
|
|
15 |
from helper.text.text_app import TextApp
|
16 |
from helper.text.text_howto import TextHowTo
|
17 |
from helper.text.text_roadmap import TextRoadmap
|
|
|
18 |
from tabs.htr_tool import htr_tool_tab
|
19 |
from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
|
20 |
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
)
|
27 |
-
repo.git_pull()
|
28 |
-
|
29 |
-
# Set db to latest
|
30 |
-
shutil.copyfile("./data/traffic_data.db", DB_FILE)
|
31 |
-
|
32 |
-
|
33 |
-
def hash_ip(ip_address):
|
34 |
-
return hashlib.sha256(ip_address.encode()).hexdigest()
|
35 |
-
|
36 |
-
|
37 |
-
# Create table if it doesn't already exist
|
38 |
-
db = sqlite3.connect(DB_FILE)
|
39 |
-
try:
|
40 |
-
db.execute("SELECT * FROM ip_data").fetchall()
|
41 |
-
db.close()
|
42 |
-
except sqlite3.OperationalError:
|
43 |
-
db.execute(
|
44 |
-
"""
|
45 |
-
CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
46 |
-
current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
47 |
-
hashed_ip TEXT)
|
48 |
-
"""
|
49 |
-
)
|
50 |
-
db.commit()
|
51 |
-
db.close()
|
52 |
-
|
53 |
-
|
54 |
-
def current_time_sw():
|
55 |
-
swedish_tz = pytz.timezone("Europe/Stockholm")
|
56 |
-
return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
|
57 |
-
|
58 |
-
|
59 |
-
def add_ip_data(request: gr.Request):
|
60 |
-
host = request.client.host
|
61 |
-
hashed_ip = hash_ip(host)
|
62 |
-
|
63 |
-
db = sqlite3.connect(DB_FILE)
|
64 |
-
cursor = db.cursor()
|
65 |
-
cursor.execute("INSERT INTO ip_data(current_time, hashed_ip) VALUES(?,?)", [current_time_sw(), hashed_ip])
|
66 |
-
db.commit()
|
67 |
-
db.close()
|
68 |
-
|
69 |
-
|
70 |
-
def backup_db():
|
71 |
-
shutil.copyfile(DB_FILE, "./data/traffic_data.db")
|
72 |
-
db = sqlite3.connect(DB_FILE)
|
73 |
-
ip_data = db.execute("SELECT * FROM ip_data").fetchall()
|
74 |
-
pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip"]).to_csv("./data/ip_data.csv", index=False)
|
75 |
-
|
76 |
-
print("updating traffic_data")
|
77 |
-
repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
|
78 |
-
|
79 |
-
|
80 |
-
scheduler = BackgroundScheduler()
|
81 |
-
scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
|
82 |
-
scheduler.start()
|
83 |
|
84 |
|
85 |
with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
|
|
|
1 |
import os
|
|
|
|
|
|
|
2 |
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
|
6 |
from helper.gradio_config import css, theme
|
|
|
8 |
from helper.text.text_app import TextApp
|
9 |
from helper.text.text_howto import TextHowTo
|
10 |
from helper.text.text_roadmap import TextRoadmap
|
11 |
+
from helper.utils import add_ip_data, backup_db
|
12 |
from tabs.htr_tool import htr_tool_tab
|
13 |
from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
|
14 |
|
15 |
+
SECRET_KEY = os.environ.get("AM_I_IN_A_DOCKER_CONTAINER", False)
|
16 |
|
17 |
+
if SECRET_KEY:
|
18 |
+
scheduler = BackgroundScheduler()
|
19 |
+
scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
|
20 |
+
scheduler.start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
helper/utils.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
import sqlite3
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
import huggingface_hub
|
9 |
+
import pandas as pd
|
10 |
+
import pytz
|
11 |
+
|
12 |
+
|
13 |
+
def hash_ip(ip_address):
|
14 |
+
return hashlib.sha256(ip_address.encode()).hexdigest()
|
15 |
+
|
16 |
+
|
17 |
+
def current_time_sw():
|
18 |
+
swedish_tz = pytz.timezone("Europe/Stockholm")
|
19 |
+
return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
|
20 |
+
|
21 |
+
|
22 |
+
def add_ip_data(request: gr.Request):
|
23 |
+
host = request.client.host
|
24 |
+
hashed_ip = hash_ip(host)
|
25 |
+
|
26 |
+
db = sqlite3.connect(DB_FILE)
|
27 |
+
cursor = db.cursor()
|
28 |
+
cursor.execute("INSERT INTO ip_data(current_time, hashed_ip) VALUES(?,?)", [current_time_sw(), hashed_ip])
|
29 |
+
db.commit()
|
30 |
+
db.close()
|
31 |
+
|
32 |
+
|
33 |
+
def backup_db():
|
34 |
+
shutil.copyfile(DB_FILE, "./data/traffic_data.db")
|
35 |
+
db = sqlite3.connect(DB_FILE)
|
36 |
+
ip_data = db.execute("SELECT * FROM ip_data").fetchall()
|
37 |
+
pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip"]).to_csv("./data/ip_data.csv", index=False)
|
38 |
+
|
39 |
+
print("updating traffic_data")
|
40 |
+
repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
|
41 |
+
|
42 |
+
|
43 |
+
DB_FILE = "./traffic_data.db"
|
44 |
+
|
45 |
+
TOKEN = os.environ.get("HUB_TOKEN")
|
46 |
+
repo = huggingface_hub.Repository(
|
47 |
+
local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=TOKEN
|
48 |
+
)
|
49 |
+
repo.git_pull()
|
50 |
+
|
51 |
+
# Set db to latest
|
52 |
+
shutil.copyfile("./data/traffic_data.db", DB_FILE)
|
53 |
+
|
54 |
+
|
55 |
+
# Create table if it doesn't already exist
|
56 |
+
db = sqlite3.connect(DB_FILE)
|
57 |
+
try:
|
58 |
+
db.execute("SELECT * FROM ip_data").fetchall()
|
59 |
+
db.close()
|
60 |
+
except sqlite3.OperationalError:
|
61 |
+
db.execute(
|
62 |
+
"""
|
63 |
+
CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
64 |
+
current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
65 |
+
hashed_ip TEXT)
|
66 |
+
"""
|
67 |
+
)
|
68 |
+
db.commit()
|
69 |
+
db.close()
|