|
from fasthtml.common import * |
|
from fasthtml.components import * |
|
import json |
|
import string |
|
import random |
|
import jsonlines |
|
|
|
|
|
def gen_random_id() -> str: |
|
return "".join(random.choices(string.ascii_lowercase, k=8)) |
|
|
|
|
|
def view_data( |
|
before, |
|
after, |
|
doc_id, |
|
data_source: str = None, |
|
data_sources=None, |
|
target: str = "colcontent", |
|
): |
|
if data_sources is not None: |
|
drop_down = Select( |
|
*[ |
|
Option(ds, value=ds, selected=(ds == data_source)) |
|
for ds in data_sources |
|
], |
|
name=f"data_source_{target}", |
|
hx_get=f"/curated/{target}", |
|
hx_target=f"#{target}", |
|
hx_trigger="change", |
|
hx_swap="innerHTML", |
|
) |
|
|
|
slider = Input( |
|
type="range", |
|
name=f"doc_id_{target}", |
|
min="0", |
|
max="9", |
|
value=str(doc_id), |
|
hx_get=f"/curated/{target}", |
|
hx_target=f"#{target}", |
|
hx_trigger="change", |
|
hx_swap="innerHTML", |
|
hx_include=f'[name="data_source_{target}"]', |
|
) |
|
|
|
form = Form( |
|
Div( |
|
Label("Data source: ", drop_down), |
|
) |
|
if (data_sources is not None) |
|
else None, |
|
Div( |
|
Label("Data sample: ", slider, f"{doc_id}", cls="plotly_slider"), |
|
), |
|
cls="plotly_input_container", |
|
) |
|
|
|
col1 = Div( |
|
H3("Raw format"), |
|
Pre( |
|
json.dumps(before, indent=4), |
|
style="white-space: pre-wrap; word-break: break-all;", |
|
), |
|
style="width: 48%; float: left; overflow-x: auto;", |
|
) |
|
|
|
col2 = Div( |
|
H3("Extracted format"), |
|
Pre( |
|
json.dumps(after, indent=4), |
|
style="white-space: pre-wrap; word-break: break-all;", |
|
), |
|
style="width: 48%; float: right; overflow-x: auto;", |
|
) |
|
|
|
data_display = Div( |
|
col1, |
|
col2, |
|
style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", |
|
) |
|
return Div(form, data_display, style="margin-top: 10px;", id=target) |
|
|
|
|
|
|
|
def DVS( |
|
left, |
|
header, |
|
): |
|
col1 = Div( |
|
Pre( |
|
json.dumps(left, indent=4, ensure_ascii=False), |
|
style="white-space: pre-wrap; word-break: break-all;", |
|
), |
|
style="float: left; overflow-x: auto;", |
|
) |
|
|
|
data_display = Div( |
|
col1, |
|
style="overflow: auto; clear: both; height: 200px; border: 1px solid #ccc; padding: 20px;", |
|
) |
|
return Div(H3(header), data_display, style="margin-top: 10px;") |
|
|
|
|
|
def DV( |
|
left_file, |
|
doc_id, |
|
header, |
|
target: str = None, |
|
): |
|
if target is None: |
|
target = "".join(random.choices(string.ascii_lowercase, k=8)) |
|
|
|
if left_file.endswith("jsonl"): |
|
left = [x for x in jsonlines.open(left_file)] |
|
else: |
|
left = json.load(open(left_file, encoding="utf-8")) |
|
max_doc_id = len(left) - 1 |
|
slider = Input( |
|
type="range", |
|
name=f"doc_id_{target}", |
|
min="0", |
|
max=str(max_doc_id), |
|
value=str(doc_id), |
|
hx_get=f"/update/{target}", |
|
hx_target=f"#{target}", |
|
hx_trigger="change", |
|
hx_swap="innerHTML", |
|
hx_vals=json.dumps({"left_file": f"{left_file}", "header": f"{header}"}), |
|
) |
|
|
|
form = Div( |
|
H3(header), |
|
Label( |
|
"Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" |
|
), |
|
cls="plotly_input_container", |
|
style="padding: 20px;", |
|
) |
|
|
|
col1 = Div( |
|
Pre( |
|
json.dumps(left[doc_id], indent=4, ensure_ascii=False), |
|
style="white-space: pre-wrap; word-break: break-all;", |
|
), |
|
style="float: left; overflow-x: auto;", |
|
) |
|
|
|
data_display = Div( |
|
col1, |
|
style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", |
|
) |
|
return Div(form, data_display, style="margin-top: 10px;", id=target) |
|
|
|
|
|
def DV2( |
|
left_file, |
|
right_file, |
|
doc_id, |
|
target: str = None, |
|
): |
|
if target is None: |
|
target = "".join(random.choices(string.ascii_lowercase, k=8)) |
|
|
|
left = json.load(open(left_file, encoding="utf-8")) |
|
right = json.load(open(right_file, encoding="utf-8")) |
|
max_doc_id = len(left) - 1 |
|
|
|
slider = Input( |
|
type="range", |
|
name=f"doc_id_{target}", |
|
min="0", |
|
max=str(max_doc_id), |
|
value=str(doc_id), |
|
hx_get=f"/update/{target}", |
|
hx_target=f"#{target}", |
|
hx_trigger="change", |
|
hx_swap="innerHTML", |
|
hx_vals=json.dumps( |
|
{"left_file": f"{left_file}", "right_file": f"{right_file}"} |
|
), |
|
) |
|
|
|
form = Div( |
|
Label( |
|
"Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" |
|
), |
|
cls="plotly_input_container", |
|
style="padding: 20px;", |
|
) |
|
|
|
col1 = Div( |
|
H3("Raw format", style="margin-top: 0px;"), |
|
Pre( |
|
json.dumps(left[doc_id], indent=4, ensure_ascii=False), |
|
style="white-space: pre-wrap; word-break: break-all;", |
|
), |
|
style="width: 48%; float: left; overflow-x: auto;", |
|
) |
|
|
|
col2 = Div( |
|
H3("Extracted format", style="margin-top: 0px;"), |
|
Pre( |
|
json.dumps(right[doc_id], indent=4, ensure_ascii=False), |
|
style="white-space: pre-wrap; word-break: break-all;", |
|
), |
|
style="width: 48%; float: right; overflow-x: auto;", |
|
) |
|
|
|
data_display = Div( |
|
col1, |
|
col2, |
|
style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", |
|
) |
|
return Div(form, data_display, style="margin-top: 10px;", id=target) |
|
|
|
|
|
def update(target: str, request): |
|
params = request.query_params |
|
doc_id = int(params.get(f"doc_id_{target}", 3)) |
|
left_file = params.get("left_file") |
|
right_file = params.get("right_file") |
|
if left_file and right_file: |
|
return ( |
|
DV2( |
|
left_file, |
|
right_file, |
|
doc_id, |
|
target, |
|
), |
|
) |
|
else: |
|
return DV( |
|
left_file, |
|
doc_id, |
|
params.get("header"), |
|
target, |
|
) |
|
|