Spaces:

ppsingh
/

annotation_dev

Sleeping

File size: 2,986 Bytes

46af628
14ff9c4
b62e42f
6169f27
02d9582
e2393a1
776f615
2fc2e11
02d9582
e2393a1
d70c8d8
e2393a1
 
 
 
 
 
 
 
 
44e0cdf
 
e8ad8b1
44e0cdf
 
4728363
e8ad8b1
44e0cdf
 
e8ad8b1
da7e215
44e0cdf
 
e2393a1
 
 
 
 
 
fc93e08
e8ad8b1
 
 
da7e215
b0e183d
fc93e08
 
bba90d4
b0e183d
ea6af6a
536d96b
b0e183d
536d96b
 
 
 
ae013ce
043fe71
 
cd42a41
776f615
043fe71
cd42a41
 
776f615
 
cd42a41
 
 
043fe71
 
776f615
043fe71
231a0b6
043fe71
7bb6ae5
 
 
 
 
 
043fe71
da7e215
 
 
 
 
e2393a1
e8ad8b1
c475583
da7e215
 
44e0cdf
 
 
 
 
 
 
 
 
 
da7e215
 
 
 
b3d7164

import streamlit as st
import pandas as pd
from huggingface_hub import Repository
import os 
from pathlib import Path
import json
import numpy as np

 
# Declaring the variables for later use to talk to dataset

# the token is saved as secret key-value pair in the environment which can be access as shown below
auth_token = os.environ.get("space_to_dataset") or True

DATASET_REPO_URL = 'ppsingh/annotation_data'   # path to dataset repo
DATA_FILENAME = "paralist.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)

# cloning the dataset repo
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token)

# Data file name
file_name = 'paralist.json'

# reading the json
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
  paraList = json.load(json_file)

# getting outer level keys in json  
keys = paraList.keys()              
#data = pd.read_csv("test.csv")

# sidebar with info and drop down to select from the keys
st.sidebar.markdown(
    """
# Data Annotation Demo 
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'.
"""
)
topic = None
if keys is not None:
  topic = st.sidebar.selectbox(
    label="Choose dataset topic to load", options=keys )
#  st.write(line)
st.write(paraList)
if topic is not None:
  c1, c2, c3 = st.columns([3, 1, 1])
  subtopics = list(paraList[topic].keys())
  #st.write(subtopics)
  val = np.random.randint(0,len(subtopics))
  choice = subtopics[val]
  #st.write(choice)
  #if np.random.randint(0,1) == 0:
  #  choice = "Gender"
  #else:
  #  choice = "Women Empowernment"
  
  idx = np.random.randint(0,3)
  
  with c1:
    st.header('Text')
    st.write(paraList[topic][choice][idx]['textsegment'])
  
  with c2:
    st.header('Tag')
    st.text(choice)
  
  with c3:
    st.header('Feedback')
    feedback = None
    feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) 
            
# title = st.text_input('Movie title', 'Life of Brian')
if st.button('Submit'):
    if feedback is not None:
      paraList[topic][choice][idx]['annotation'].append(feedback)
      with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
        json.dump(paraList,json_file, ensure_ascii = True)
        repo.push_to_hub('added new annotation')
        
      
    #new_row  = title
#  data = data.append(new_row, ignore_index=True)
#  st.write(data)
#  st.write(os.getcwd())
#  data.to_csv('test.csv', index= False)


#st.write(df)
#   st.write('data/test.csv')
# iterate over files in
# that directory        
#directory = os.getcwd()
#files = Path(directory).glob('*')
#for file in files:
#    st.write(file)

#with open(DATA_FILE, "a") as csvfile:
#  writer = csv.DictWriter(csvfile, fieldnames=["Sentences"])
#  writer.writerow({'Sentences': new_row})
#  repo.push_to_hub('adding new line')
#  st.write('Succcess')