File size: 3,638 Bytes
46af628
14ff9c4
b62e42f
6169f27
02d9582
e2393a1
776f615
2fc2e11
02d9582
e2393a1
d70c8d8
e2393a1
85fa59b
e2393a1
85fa59b
 
 
e2393a1
 
85fa59b
44e0cdf
 
85fa59b
44e0cdf
 
9ed128a
85fa59b
 
 
 
 
 
44e0cdf
85fa59b
 
 
 
26fc0ba
 
 
 
 
 
 
 
 
 
 
f807a9a
85fa59b
f807a9a
e1aecfb
f807a9a
dbcde10
b0e183d
f807a9a
 
ae013ce
f807a9a
 
eb99439
f807a9a
 
eb99439
f807a9a
 
 
 
 
7025ca1
eb99439
 
 
 
f807a9a
49b7338
f807a9a
 
 
 
cd42a41
f807a9a
 
 
cd42a41
f807a9a
 
 
 
 
 
6ca2a04
 
c516297
 
 
7bb6ae5
efd14d3
043fe71
da7e215
 
 
 
 
e2393a1
e8ad8b1
c475583
da7e215
 
44e0cdf
 
 
 
 
 
 
 
 
 
da7e215
85fa59b
5b25195
da7e215
 
b3d7164
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import pandas as pd
from huggingface_hub import Repository
import os 
from pathlib import Path
import json
import numpy as np

 
# Declaring the variables for later use to talk to dataset

# the token is saved as secret key-value pair in the environment which can be access as shown below
auth_token = os.environ.get("space_to_dataset") or True

DATASET_REPO_URL = 'ppsingh/annotation_data'   # path to dataset repo
DATA_FILENAME = "paralist.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)

# cloning the dataset repo


# Data file name
file_name = 'paralist.json'

# reading the json
@st.cache(allow_output_mutation=True)
def read_dataset():
    repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token)
    with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
      paraList = json.load(json_file)
    
    return repo, paraList

st.sidebar.markdown(""" 
   # Data Annotation Demo 
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'.
""")
# sidebar with info and drop down to select from the keys

topic = None
repo, paraList = read_dataset()
# getting outer level keys in json 
keys = paraList.keys()  

if keys is not None:
  topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys )

 
#with st.container():

      
with st.form("annotation_form"):
    if topic is not None:
        subtopics = list(paraList[topic].keys())
  #st.write(subtopics)
    val = np.random.randint(0,len(subtopics)-1)
    tag = subtopics[val]
  
    idx = np.random.randint(0,3)
    
    st.markdown("**Text**")
    st.write(paraList[topic][tag][idx]['textsegment'])
    
    st.markdown("**Tag**")
    st.write(tag)
    
    feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5))
    submitted = st.form_submit_button("Submit")
    if submitted:
        paraList[topic][tag][idx]['annotation'].append(feedback)
        with open("data/{}".format(file_name), "w") as outfile:
            json.dump(paraList, outfile)
        repo.push_to_hub('added new annotation')
              # st.write(type(paraList))
    
      
      #c1, c2, c3 = st.columns([3, 1, 1])
      #with c1:
       #   st.header('Text')
        #  st.write(paraList[topic][tag][idx]['textsegment'])
  
      #with c2:
       #   st.header('Tag')
        #  st.text(tag)
  
      #with c3:
       #   st.header('Feedback')
        #  feedback = None
         # feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) 
          #if feedback:
           #   st.write(feedback)
#      if st.button('Submit'):
#        paraList[topic][choice][idx]['annotation'].append(feedback)
#      with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
 #       json.dump(paraList,json_file, ensure_ascii = True)
  #      repo.push_to_hub('added new annotation')
        
#st.write(paraList)      
    #new_row  = title
#  data = data.append(new_row, ignore_index=True)
#  st.write(data)
#  st.write(os.getcwd())
#  data.to_csv('test.csv', index= False)


#st.write(df)
#   st.write('data/test.csv')
# iterate over files in
# that directory        
#directory = os.getcwd()
#files = Path(directory).glob('*')
#for file in files:
#    st.write(file)

#with open(DATA_FILE, "a") as csvfile:
#  writer = csv.DictWriter(csvfile, fieldnames=["Sentences"])
#  writer.writerow({'Sentences': new_row})
#  repo.push_to_hub('adding new line')
#  st.write('Succcess')