Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from huggingface_hub import Repository | |
import os | |
from pathlib import Path | |
import json | |
import numpy as np | |
# Declaring the variables for later use to talk to dataset | |
# the token is saved as secret key-value pair in the environment which can be access as shown below | |
auth_token = os.environ.get("space_to_dataset") or True | |
DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo | |
DATA_FILENAME = "paralist.json" | |
DATA_FILE = os.path.join("data", DATA_FILENAME) | |
# cloning the dataset repo | |
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token) | |
# Data file name | |
file_name = 'paralist.json' | |
# reading the json | |
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: | |
paraList = json.load(json_file) | |
# getting outer level keys in json | |
keys = paraList.keys() | |
#data = pd.read_csv("test.csv") | |
# sidebar with info and drop down to select from the keys | |
st.sidebar.markdown( | |
""" | |
# Data Annotation Demo | |
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'. | |
""" | |
) | |
topic = None | |
if keys is not None: | |
topic = st.sidebar.selectbox( | |
label="Choose dataset topic to load", options=keys ) | |
# st.write(line) | |
if topic is not None: | |
c1, c2, c3 = st.columns([3, 1, 1]) | |
if np.random.randint(0,1) == 0: | |
choice = 'Gender' | |
else: | |
choice = 'Women Empowernment' | |
idx = np.random.randint(0,3) | |
with c1: | |
st.header('Text') | |
st.write(paraList[topic][choice][idx]['textsegment']) | |
with c2: | |
st.header('Tag') | |
st.text(choice) | |
with c3: | |
st.header('Feedback') | |
feedback = None | |
feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) | |
# title = st.text_input('Movie title', 'Life of Brian') | |
if st.button('Submit'): | |
if feedback is not None: | |
paraList[topic][choice][idx]['annotation'].append(feedback) | |
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file: | |
json.dump(paraList,json_file, ensure_ascii = True) | |
repo.push_to_hub('added new annotation') | |
#new_row = title | |
# data = data.append(new_row, ignore_index=True) | |
# st.write(data) | |
# st.write(os.getcwd()) | |
# data.to_csv('test.csv', index= False) | |
#st.write(df) | |
# st.write('data/test.csv') | |
# iterate over files in | |
# that directory | |
#directory = os.getcwd() | |
#files = Path(directory).glob('*') | |
#for file in files: | |
# st.write(file) | |
#with open(DATA_FILE, "a") as csvfile: | |
# writer = csv.DictWriter(csvfile, fieldnames=["Sentences"]) | |
# writer.writerow({'Sentences': new_row}) | |
# repo.push_to_hub('adding new line') | |
# st.write('Succcess') | |