Spaces:

ppsingh
/

annotation_dev

Sleeping

App Files Files Community

annotation_dev / app.py

ppsingh

Update app.py

9ed128a about 2 years ago

raw

history blame

No virus

3.64 kB

	import streamlit as st
	import pandas as pd
	from huggingface_hub import Repository
	import os
	from pathlib import Path
	import json
	import numpy as np


	# Declaring the variables for later use to talk to dataset

	# the token is saved as secret key-value pair in the environment which can be access as shown below
	auth_token = os.environ.get("space_to_dataset") or True

	DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo
	DATA_FILENAME = "paralist.json"
	DATA_FILE = os.path.join("data", DATA_FILENAME)

	# cloning the dataset repo


	# Data file name
	file_name = 'paralist.json'

	# reading the json
	@st.cache(allow_output_mutation=True)
	def read_dataset():
	repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token)
	with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
	paraList = json.load(json_file)

	return repo, paraList

	st.sidebar.markdown("""
	# Data Annotation Demo
	This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'.
	""")
	# sidebar with info and drop down to select from the keys

	topic = None
	repo, paraList = read_dataset()
	# getting outer level keys in json
	keys = paraList.keys()

	if keys is not None:
	topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys )


	#with st.container():


	with st.form("annotation_form"):
	if topic is not None:
	subtopics = list(paraList[topic].keys())
	#st.write(subtopics)
	val = np.random.randint(0,len(subtopics)-1)
	tag = subtopics[val]

	idx = np.random.randint(0,3)

	st.markdown("Text")
	st.write(paraList[topic][tag][idx]['textsegment'])

	st.markdown("Tag")
	st.write(tag)

	feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5))
	submitted = st.form_submit_button("Submit")
	if submitted:
	paraList[topic][tag][idx]['annotation'].append(feedback)
	with open("data/{}".format(file_name), "w") as outfile:
	json.dump(paraList, outfile)
	repo.push_to_hub('added new annotation')
	# st.write(type(paraList))


	#c1, c2, c3 = st.columns([3, 1, 1])
	#with c1:
	# st.header('Text')
	# st.write(paraList[topic][tag][idx]['textsegment'])

	#with c2:
	# st.header('Tag')
	# st.text(tag)

	#with c3:
	# st.header('Feedback')
	# feedback = None
	# feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5))
	#if feedback:
	# st.write(feedback)
	# if st.button('Submit'):
	# paraList[topic][choice][idx]['annotation'].append(feedback)
	# with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
	# json.dump(paraList,json_file, ensure_ascii = True)
	# repo.push_to_hub('added new annotation')

	#st.write(paraList)
	#new_row = title
	# data = data.append(new_row, ignore_index=True)
	# st.write(data)
	# st.write(os.getcwd())
	# data.to_csv('test.csv', index= False)


	#st.write(df)
	# st.write('data/test.csv')
	# iterate over files in
	# that directory
	#directory = os.getcwd()
	#files = Path(directory).glob('*')
	#for file in files:
	# st.write(file)

	#with open(DATA_FILE, "a") as csvfile:
	# writer = csv.DictWriter(csvfile, fieldnames=["Sentences"])
	# writer.writerow({'Sentences': new_row})
	# repo.push_to_hub('adding new line')
	# st.write('Succcess')