liujch1998 commited on
Commit
c24faa7
β€’
1 Parent(s): 0cfd419

Add disclaimers

Browse files
Files changed (2) hide show
  1. app.py +8 -5
  2. constants.py +1 -1
app.py CHANGED
@@ -91,15 +91,18 @@ def creativity(index_desc, query):
91
  with gr.Blocks() as demo:
92
  with gr.Column():
93
  gr.HTML(
94
- '''<h1 text-align="center">Creativity Index</h1>
95
 
96
  <p style='font-size: 16px;'>Compute the <a href="">Creativity Index</a> of a piece of text.</p>
97
- <p style='font-size: 16px;'>The computed Creativity Index is based on verbatim match and is supported by <a href="https://infini-gram.io">infini-gram</a>.</p>
 
 
 
98
  '''
99
  )
100
  with gr.Row():
101
  with gr.Column(scale=1, min_width=240):
102
- index_desc = gr.Radio(choices=INDEX_DESCS, label='Corpus', value=INDEX_DESCS[0])
103
 
104
  with gr.Column(scale=3):
105
  creativity_query = gr.Textbox(placeholder='Enter a piece of text here', label='Input', interactive=True, lines=10)
@@ -112,8 +115,8 @@ with gr.Blocks() as demo:
112
  creativity_ci = gr.Label(value='', label='Creativity Index')
113
  creativity_htmls = []
114
  for n in range(NGRAM_LEN_MIN, NGRAM_LEN_MAX + 1):
115
- with gr.Tab(label=f'n={n}'):
116
- creativity_htmls.append(gr.HTML(value='', label=f'n={n}'))
117
 
118
  creativity_clear.add([creativity_query, creativity_latency, creativity_ci] + creativity_htmls)
119
  creativity_submit.click(creativity, inputs=[index_desc, creativity_query], outputs=[creativity_latency, creativity_ci] + creativity_htmls, api_name=False)
 
91
  with gr.Blocks() as demo:
92
  with gr.Column():
93
  gr.HTML(
94
+ f'''<h1 text-align="center">Creativity Index</h1>
95
 
96
  <p style='font-size: 16px;'>Compute the <a href="">Creativity Index</a> of a piece of text.</p>
97
+ <p style='font-size: 16px;'>The Creativity Index is computed based on verbatim matching against massive text corpora and is powered by <a href="https://infini-gram.io">infini-gram</a>. It is defined as the ratio of tokens not covered by n-grams (n >= L) that can be found in the corpus, averaged across {NGRAM_LEN_MIN} <= L <= {NGRAM_LEN_MAX}. You can view the covered tokens (highlighted in red background) for each value of L.</p>
98
+ <p style='font-size: 16px;'><b>Note:</b> The input text is limited to {MAX_QUERY_CHARS} characters. Each query has a timeout of {MAX_TIMEOUT_IN_SECONDS} seconds. If you have waited 30 seconds and receive an error, you can try submitted the same query and it's more likely to work on the second try.</p>
99
+ <p style='font-size: 16px;'><b>Disclaimer 1:</b> The Creativity Index of text that appear exactly in the corpora may be deflated. In our paper, we remove exact duplicates (including quotations and citations) from the corpus before computing the Creativity Index. However, deduplication is not applied in this demo.</p>
100
+ <p style='font-size: 16px;'><b>Disclaimer 2:</b> The Creativity Index of text generated by latest models (e.g., GPT-4) may be inflated. This is because we don't have all the data that these models are trained on, and our supported corpora have a earlier cutoff date (Dolma-v1.7 is Oct 2023, RedPajama is Mar 2023, Pile is 2020).</p>
101
  '''
102
  )
103
  with gr.Row():
104
  with gr.Column(scale=1, min_width=240):
105
+ index_desc = gr.Radio(choices=INDEX_DESCS, label='Corpus', value=INDEX_DESCS[2])
106
 
107
  with gr.Column(scale=3):
108
  creativity_query = gr.Textbox(placeholder='Enter a piece of text here', label='Input', interactive=True, lines=10)
 
115
  creativity_ci = gr.Label(value='', label='Creativity Index')
116
  creativity_htmls = []
117
  for n in range(NGRAM_LEN_MIN, NGRAM_LEN_MAX + 1):
118
+ with gr.Tab(label=f'L={n}'):
119
+ creativity_htmls.append(gr.HTML(value='', label=f'L={n}'))
120
 
121
  creativity_clear.add([creativity_query, creativity_latency, creativity_ci] + creativity_htmls)
122
  creativity_submit.click(creativity, inputs=[index_desc, creativity_query], outputs=[creativity_latency, creativity_ci] + creativity_htmls, api_name=False)
constants.py CHANGED
@@ -15,7 +15,7 @@ MAX_QUERY_CHARS = int(os.environ.get('MAX_QUERY_CHARS', 1000))
15
  NGRAM_LEN_MIN = int(os.environ.get('NGRAM_LEN_MIN', 5))
16
  NGRAM_LEN_MAX = int(os.environ.get('NGRAM_LEN_MAX', 11))
17
  MAX_DISP_CHARS_PER_LINE = int(os.environ.get('MAX_DISP_CHARS_PER_LINE', 90))
18
- MAX_TIMEOUT_IN_SECONDS = int(os.environ.get('MAX_TIMEOUT_IN_SECONDS', 60))
19
 
20
  # HF demo
21
  API_URL = os.environ.get('API_URL', None)
 
15
  NGRAM_LEN_MIN = int(os.environ.get('NGRAM_LEN_MIN', 5))
16
  NGRAM_LEN_MAX = int(os.environ.get('NGRAM_LEN_MAX', 11))
17
  MAX_DISP_CHARS_PER_LINE = int(os.environ.get('MAX_DISP_CHARS_PER_LINE', 90))
18
+ MAX_TIMEOUT_IN_SECONDS = int(os.environ.get('MAX_TIMEOUT_IN_SECONDS', 30))
19
 
20
  # HF demo
21
  API_URL = os.environ.get('API_URL', None)