Spaces:

facebook
/

incoder-demo

Sleeping

App Files Files Community

Daniel Fried commited on Apr 13, 2022

Commit

fb51e42

•

1 Parent(s): 5900055

update UI

Browse files

Files changed (3) hide show

.gitignore +3 -0
modules/app.py +6 -4
static/index.html +56 -50

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+use_normal_tokenizers
+__pycache__
+incoder-6B

modules/app.py CHANGED Viewed

@@ -5,12 +5,13 @@ import os
 # needs to be imported *before* transformers
 if os.path.exists('use_normal_tokenizers'):
     import tokenizers
 else:
     import tokenizers_patch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import json
 # from flask import Flask, request, render_template
 # from flask_cors import CORS
 # app = Flask(__name__, static_folder='static')
@@ -24,8 +25,6 @@ import json
 PORT = 7860
 VERBOSE = False
-BIG_MODEL = False
 if BIG_MODEL:
     CUDA = True
     model_name = "facebook/incoder-6B"
@@ -61,7 +60,10 @@ def generate(input, length_limit=None, temperature=None):
     input_ids = tokenizer(input, return_tensors="pt").input_ids
     if CUDA:
         input_ids = input_ids.cuda()
-    output = model.generate(input_ids=input_ids, do_sample=True, top_p=0.95, temperature=temperature, max_length=length_limit)
     detok_hypo_str = tokenizer.decode(output.flatten())
     if detok_hypo_str.startswith(BOS):
         detok_hypo_str = detok_hypo_str[len(BOS):]

 # needs to be imported *before* transformers
 if os.path.exists('use_normal_tokenizers'):
     import tokenizers
+    BIG_MODEL = False
 else:
     import tokenizers_patch
+    BIG_MODEL = True
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import json
 # from flask import Flask, request, render_template
 # from flask_cors import CORS
 # app = Flask(__name__, static_folder='static')
 PORT = 7860
 VERBOSE = False
 if BIG_MODEL:
     CUDA = True
     model_name = "facebook/incoder-6B"
     input_ids = tokenizer(input, return_tensors="pt").input_ids
     if CUDA:
         input_ids = input_ids.cuda()
+    max_length = length_limit + input_ids.flatten().size(0)
+    if max_length > 256:
+        max_length = 256
+    output = model.generate(input_ids=input_ids, do_sample=True, top_p=0.95, temperature=temperature, max_length=max_length)
     detok_hypo_str = tokenizer.decode(output.flatten())
     if detok_hypo_str.startswith(BOS):
         detok_hypo_str = detok_hypo_str[len(BOS):]

static/index.html CHANGED Viewed

@@ -124,41 +124,9 @@ label {
     <span><a href='javascript:select_example("metadata-prediction");'>Metadata Prediction</a></span>
     <span><a href='javascript:select_example("humaneval");'>Docstring->Code</a></span>
 </div>
-<div>
-Syntax:
-<select name="mode" id="mode">
-    <option value="text">Text</option>
-    <option value="c_cpp">C/C++</option>
-    <option value="csharp">C#</option>
-    <option value="clojure">Clojure</option>
-    <option value="coffee">CoffeeScript</option>
-    <option value="golang">Go</option>
-    <option value="haskell">Haskell</option>
-    <option value="java">Java</option>
-    <option value="javascript">JavaScript</option>
-    <option value="lua">Lua</option>
-    <option value="objectivec">Objective C</option>
-    <option value="perl">Perl</option>
-    <option value="php">PHP</option>
-    <option value="python">Python</option>
-    <option value="ruby">Ruby</option>
-    <option value="rust">Rust</option>
-    <option value="scala">Scala</option>
-    <option value="sh">Shell</option>
-    <option value="swift">Swift</option>
-    <option value="typescript">Typescript</option>
-</select>
 </div>
 <div class="request">
 <form id="generate-form">
-    <div class="leftside">
-        <!--
-        <textarea name="prompt" rows="12" cols="100" id="textbox"></textarea>
-        <textarea name="prefix" rows="12" cols="100" id="textbox"></textarea>
-        <textarea name="suffix" rows="12" cols="100" id="textbox"></textarea>
-        -->
-        <div id="editor"></div>
-    </div>
     <div class="rightside">
     <div>
     <label>Response Length:</label>
@@ -174,15 +142,7 @@ Syntax:
         >
     <output>0.6</output>
     </div>
-    <!--
     <div>
-    <label>Top-k:</label>
-    <input type="range" value="2" min="1" max="8" step="1" class="slider"
-        oninput="this.nextElementSibling.value = this.value" name="topk">
-    <output>2</output>
-    </div>
-    -->
-    <div class="submit-holder">
         <!-- <input type="submit" value="Extend" id="extend-form-button"/> -->
         <input type="button" value="Extend" id="extend-form-button"/>
         <span style='margin-left:1em'>
@@ -203,6 +163,39 @@ Syntax:
         <div>
         -->
     </div>
     <div id="error"></div>
     </div>
     </div>
@@ -212,13 +205,26 @@ Syntax:
 <div id="loader_holder">
     <div class="loader"></div>
     <div>
-        Please be patient. Your generation may take <span id="eta">X</span> seconds.
     </div>
 </div>
 </div>
-<h3 id="debug-info">Debug info</h3>
 <p>
 <script type="text/javascript">
 // these constants are only used for providing user expectations.
 var OVERHEAD = 3;
@@ -231,7 +237,7 @@ var Range = require("ace/range").Range;
 var EXAMPLES = {
     "python": {
         "prompt": "<| file ext=.py |>\nclass Person:\n" + SPLIT_TOKEN + "\np = Person('Eren', 18, 'Male')",
-        "length": 128,
         "mode": "python"
     },
     "python-infill2": {
@@ -241,7 +247,7 @@ def <infill>(file_name):
     """Count the number of occurrences of each word in the file."""
     <infill>
 `,
-        "length": 128,
         "mode": "python"
     },
@@ -259,7 +265,7 @@ def <infill>(file_name):
                     word_counts[word] = 1
     return word_counts
 `,
-        "length": 64,
         "mode": "python"
     },
     "docstring": {
@@ -277,27 +283,27 @@ def <infill>(file_name):
                     word_counts[word] = 1
     return word_counts
 `,
-        "length": 128,
         "mode": "python"
     },
     "javascript": {
         "prompt": "<| file ext=.js |>\n // is something really happening here",
-        "length": 128,
         "mode": "javascript"
     },
     "jupyter": {
         "prompt": "<| file ext=.ipynb:python |>\n<text>\nThis notebook demonstrates using scikit-learn to perform PCA.\n</text>\n<cell>",
-        "length": 128,
         "mode": "python"
     },
     "stackoverflow": {
         "prompt": "<| q tags=regex,html |>\nParsing HTML with regular expressions\nHow do I do this? Is it a good idea?\n<|/ q dscore=3 |>\n<| a dscore=4 |>",
-        "length": 128,
         "mode": "text"
     },
     "metadata-conditioning": {
         "prompt": "<| file ext=.py filename=train_model.py source=github dstars=4 |>\n",
-        "length": 256,
         "mode": "python"
     },
     "metadata-prediction": {

     <span><a href='javascript:select_example("metadata-prediction");'>Metadata Prediction</a></span>
     <span><a href='javascript:select_example("humaneval");'>Docstring->Code</a></span>
 </div>
 </div>
 <div class="request">
 <form id="generate-form">
     <div class="rightside">
     <div>
     <label>Response Length:</label>
         >
     <output>0.6</output>
     </div>
     <div>
         <!-- <input type="submit" value="Extend" id="extend-form-button"/> -->
         <input type="button" value="Extend" id="extend-form-button"/>
         <span style='margin-left:1em'>
         <div>
         -->
     </div>
+    <br>
+    <div class="leftside">
+        <!--
+        <textarea name="prompt" rows="12" cols="100" id="textbox"></textarea>
+        <textarea name="prefix" rows="12" cols="100" id="textbox"></textarea>
+        <textarea name="suffix" rows="12" cols="100" id="textbox"></textarea>
+        -->
+<div>
+Syntax:
+<select name="mode" id="mode">
+    <option value="text">Text</option>
+    <option value="c_cpp">C/C++</option>
+    <option value="csharp">C#</option>
+    <option value="clojure">Clojure</option>
+    <option value="coffee">CoffeeScript</option>
+    <option value="golang">Go</option>
+    <option value="haskell">Haskell</option>
+    <option value="java">Java</option>
+    <option value="javascript">JavaScript</option>
+    <option value="lua">Lua</option>
+    <option value="objectivec">Objective C</option>
+    <option value="perl">Perl</option>
+    <option value="php">PHP</option>
+    <option value="python">Python</option>
+    <option value="ruby">Ruby</option>
+    <option value="rust">Rust</option>
+    <option value="scala">Scala</option>
+    <option value="sh">Shell</option>
+    <option value="swift">Swift</option>
+    <option value="typescript">Typescript</option>
+</select>
+        <div id="editor"></div>
+    </div>
     <div id="error"></div>
     </div>
     </div>
 <div id="loader_holder">
     <div class="loader"></div>
     <div>
+      Generation queued, please wait...
     </div>
 </div>
 </div>
+<h3 id="debug-info">More Info</h3>
 <p>
+This is a demo interface for <a href="https://github.com/dpfried/incoder/blob/main/paper/InCoder-4-12-22.pdf">InCoder: A Generative Model for Code In-Filling and Synthesis</a>.
+</p>
+<p>
+See <a href="https://sites.google.com/view/incoder-code-models">our project site</a> for more information on these models, including a paper and examples.
+</p>
+<p>
+For instructions on setting up and using the models yourself, see <a href="https://github.com/dpfried/incoder/blob/main/README.md">our readme</a>.
+</p>
+<h3 id="debug-info">Credits</h3>
+<p>Model development: Daniel Fried*, Armen Aghajanyan*, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Wen-tau Yih, Luke Zettlemoyer, and Mike Lewis</p>
+<p>Thanks to Naman Goyal and Stephen Roller for writing the code this was based on. Extensions by Daniel Fried and Sida Wang.</p>
 <script type="text/javascript">
 // these constants are only used for providing user expectations.
 var OVERHEAD = 3;
 var EXAMPLES = {
     "python": {
         "prompt": "<| file ext=.py |>\nclass Person:\n" + SPLIT_TOKEN + "\np = Person('Eren', 18, 'Male')",
+        "length": 64,
         "mode": "python"
     },
     "python-infill2": {
     """Count the number of occurrences of each word in the file."""
     <infill>
 `,
+        "length": 64,
         "mode": "python"
     },
                     word_counts[word] = 1
     return word_counts
 `,
+        "length": 4,
         "mode": "python"
     },
     "docstring": {
                     word_counts[word] = 1
     return word_counts
 `,
+        "length": 32,
         "mode": "python"
     },
     "javascript": {
         "prompt": "<| file ext=.js |>\n // is something really happening here",
+        "length": 64,
         "mode": "javascript"
     },
     "jupyter": {
         "prompt": "<| file ext=.ipynb:python |>\n<text>\nThis notebook demonstrates using scikit-learn to perform PCA.\n</text>\n<cell>",
+        "length": 64,
         "mode": "python"
     },
     "stackoverflow": {
         "prompt": "<| q tags=regex,html |>\nParsing HTML with regular expressions\nHow do I do this? Is it a good idea?\n<|/ q dscore=3 |>\n<| a dscore=4 |>",
+        "length": 64,
         "mode": "text"
     },
     "metadata-conditioning": {
         "prompt": "<| file ext=.py filename=train_model.py source=github dstars=4 |>\n",
+        "length": 64,
         "mode": "python"
     },
     "metadata-prediction": {