annotation_dev / app.py
ppsingh's picture
Update app.py
7bb6ae5
raw
history blame
2.82 kB
import streamlit as st
import pandas as pd
from huggingface_hub import Repository
import os
from pathlib import Path
import json
import numpy as np
# Declaring the variables for later use to talk to dataset
# the token is saved as secret key-value pair in the environment which can be access as shown below
auth_token = os.environ.get("space_to_dataset") or True
DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo
DATA_FILENAME = "paralist.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)
# cloning the dataset repo
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token)
# Data file name
file_name = 'paralist.json'
# reading the json
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
paraList = json.load(json_file)
# getting outer level keys in json
keys = paraList.keys()
#data = pd.read_csv("test.csv")
# sidebar with info and drop down to select from the keys
st.sidebar.markdown(
"""
# Data Annotation Demo
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'.
"""
)
topic = None
if keys is not None:
topic = st.sidebar.selectbox(
label="Choose dataset topic to load", options=keys )
# st.write(line)
if topic is not None:
c1, c2, c3 = st.columns([3, 1, 1])
if np.random.randint(0,1) == 0:
choice = 'Gender'
else:
choice = 'Women Empowernment'
idx = np.random.randint(0,3)
with c1:
st.header('Text')
st.write(paraList[topic][choice][idx]['textsegment'])
with c2:
st.header('Tag')
st.text(choice)
with c3:
st.header('Feedback')
feedback = None
feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5))
# title = st.text_input('Movie title', 'Life of Brian')
if st.button('Submit'):
if feedback is not None:
paraList[topic][choice][idx]['annotation'].append(feedback)
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
json.dump(paraList,json_file, ensure_ascii = True)
repo.push_to_hub('added new annotation')
#new_row = title
# data = data.append(new_row, ignore_index=True)
# st.write(data)
# st.write(os.getcwd())
# data.to_csv('test.csv', index= False)
#st.write(df)
# st.write('data/test.csv')
# iterate over files in
# that directory
#directory = os.getcwd()
#files = Path(directory).glob('*')
#for file in files:
# st.write(file)
#with open(DATA_FILE, "a") as csvfile:
# writer = csv.DictWriter(csvfile, fieldnames=["Sentences"])
# writer.writerow({'Sentences': new_row})
# repo.push_to_hub('adding new line')
# st.write('Succcess')