import copy
def preprocess(example):
def _add_adjusted_col_offsets(table):
"""Add adjusted column offsets to take into account multi-column cells."""
adjusted_table = []
for row in table:
real_col_index = 0
adjusted_row = []
for cell in row:
adjusted_cell = copy.deepcopy(cell)
adjusted_cell["adjusted_col_start"] = real_col_index
adjusted_cell["adjusted_col_end"] = (
adjusted_cell["adjusted_col_start"] + adjusted_cell["column_span"])
real_col_index += adjusted_cell["column_span"]
return adjusted_table
def _get_heuristic_row_headers(adjusted_table, row_index, col_index):
"""Heuristic to find row headers."""
row_headers = []
row = adjusted_table[row_index]
for i in range(0, col_index):
if row[i]["is_header"]:
return row_headers
def _get_heuristic_col_headers(adjusted_table, row_index, col_index):
"""Heuristic to find column headers."""
adjusted_cell = adjusted_table[row_index][col_index]
adjusted_col_start = adjusted_cell["adjusted_col_start"]
adjusted_col_end = adjusted_cell["adjusted_col_end"]
col_headers = []
for r in range(0, row_index):
row = adjusted_table[r]
for cell in row:
if (cell["adjusted_col_start"] < adjusted_col_end and
cell["adjusted_col_end"] > adjusted_col_start):
if cell["is_header"]:
return col_headers
table = example['table']
cell_indices = example["highlighted_cells"]
table_str = ""
if example['table_page_title']:
table_str += "