Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from huggingface_hub import Repository | |
import os | |
from pathlib import Path | |
import json | |
import numpy as np | |
# Declaring the variables for later use to talk to dataset | |
# the token is saved as secret key-value pair in the environment which can be access as shown below | |
auth_token = os.environ.get("space_to_dataset") or True | |
DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo | |
DATA_FILENAME = "paralist.json" | |
DATA_FILE = os.path.join("data", DATA_FILENAME) | |
# cloning the dataset repo | |
# Data file name | |
file_name = 'paralist.json' | |
# reading the json | |
def read_dataset(): | |
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token) | |
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: | |
paraList = json.load(json_file) | |
return repo, paraList | |
st.sidebar.markdown(""" | |
# Data Annotation Demo | |
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'. | |
""") | |
# sidebar with info and drop down to select from the keys | |
topic = None | |
repo, paraList = read_dataset() | |
# getting outer level keys in json | |
keys = paraList.keys() | |
if keys is not None: | |
topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys ) | |
#with st.container(): | |
with st.form("annotation_form"): | |
if topic is not None: | |
subtopics = list(paraList[topic].keys()) | |
#st.write(subtopics) | |
val = np.random.randint(0,len(subtopics)-1) | |
tag = subtopics[val] | |
idx = np.random.randint(0,3) | |
st.markdown("#Text") | |
st.write(paraList[topic][tag][idx]['textsegment']) | |
st.markdown("#Tag") | |
st.write(tag) | |
feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
paraList[topic][tag][idx]['annotation'].append(feedback) | |
st.write(type(paraList)) | |
#c1, c2, c3 = st.columns([3, 1, 1]) | |
#with c1: | |
# st.header('Text') | |
# st.write(paraList[topic][tag][idx]['textsegment']) | |
#with c2: | |
# st.header('Tag') | |
# st.text(tag) | |
#with c3: | |
# st.header('Feedback') | |
# feedback = None | |
# feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) | |
#if feedback: | |
# st.write(feedback) | |
# if st.button('Submit'): | |
# paraList[topic][choice][idx]['annotation'].append(feedback) | |
# with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: | |
# json.dump(paraList,json_file, ensure_ascii = True) | |
# repo.push_to_hub('added new annotation') | |
#st.write(paraList) | |
#new_row = title | |
# data = data.append(new_row, ignore_index=True) | |
# st.write(data) | |
# st.write(os.getcwd()) | |
# data.to_csv('test.csv', index= False) | |
#st.write(df) | |
# st.write('data/test.csv') | |
# iterate over files in | |
# that directory | |
#directory = os.getcwd() | |
#files = Path(directory).glob('*') | |
#for file in files: | |
# st.write(file) | |
#with open(DATA_FILE, "a") as csvfile: | |
# writer = csv.DictWriter(csvfile, fieldnames=["Sentences"]) | |
# writer.writerow({'Sentences': new_row}) | |
# repo.push_to_hub('adding new line') | |
# st.write('Succcess') | |