import streamlit as st import pandas as pd from huggingface_hub import Repository import os from pathlib import Path import json import numpy as np # Declaring the variables for later use to talk to dataset # the token is saved as secret key-value pair in the environment which can be access as shown below auth_token = os.environ.get("space_to_dataset") or True DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo DATA_FILENAME = "paralist.json" DATA_FILE = os.path.join("data", DATA_FILENAME) # cloning the dataset repo # Data file name file_name = 'paralist.json' # reading the json @st.cache(allow_output_mutation=True) def read_dataset(): repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token) with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: paraList = json.load(json_file) return repo, paraList st.sidebar.markdown(""" # Data Annotation Demo This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'. """) # sidebar with info and drop down to select from the keys topic = None repo, paraList = read_dataset() # getting outer level keys in json keys = paraList.keys() if keys is not None: topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys ) #with st.container(): with st.form("annotation_form"): if topic is not None: subtopics = list(paraList[topic].keys()) #st.write(subtopics) val = np.random.randint(0,len(subtopics)-1) tag = subtopics[val] idx = np.random.randint(0,3) st.markdown("**Text**") st.write(paraList[topic][tag][idx]['textsegment']) st.markdown("**Tag**") st.write(tag) feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) submitted = st.form_submit_button("Submit") if submitted: paraList[topic][tag][idx]['annotation'].append(feedback) with open("data/{}".format(file_name), "w") as outfile: json.dump(paraList, outfile) repo.push_to_hub('added new annotation') # st.write(type(paraList)) #c1, c2, c3 = st.columns([3, 1, 1]) #with c1: # st.header('Text') # st.write(paraList[topic][tag][idx]['textsegment']) #with c2: # st.header('Tag') # st.text(tag) #with c3: # st.header('Feedback') # feedback = None # feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) #if feedback: # st.write(feedback) # if st.button('Submit'): # paraList[topic][choice][idx]['annotation'].append(feedback) # with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: # json.dump(paraList,json_file, ensure_ascii = True) # repo.push_to_hub('added new annotation') #st.write(paraList) #new_row = title # data = data.append(new_row, ignore_index=True) # st.write(data) # st.write(os.getcwd()) # data.to_csv('test.csv', index= False) #st.write(df) # st.write('data/test.csv') # iterate over files in # that directory #directory = os.getcwd() #files = Path(directory).glob('*') #for file in files: # st.write(file) #with open(DATA_FILE, "a") as csvfile: # writer = csv.DictWriter(csvfile, fieldnames=["Sentences"]) # writer.writerow({'Sentences': new_row}) # repo.push_to_hub('adding new line') # st.write('Succcess')