Spaces:

facebook
/

incoder-demo

Sleeping

App Files Files Community

Daniel Fried commited on Apr 17, 2022

Commit

51676cf

•

1 Parent(s): 78ec172

add cloud logging

Browse files

Files changed (2) hide show

modules/app.py +34 -26
modules/cloud_logging.py +21 -0

modules/app.py CHANGED Viewed

@@ -3,6 +3,11 @@ from typing import List
 import traceback
 import os
 import base64
 # needs to be imported *before* transformers
 if os.path.exists('use_normal_tokenizers'):
     import tokenizers
@@ -51,11 +56,11 @@ app = FastAPI(docs_url=None, redoc_url=None)
 app.mount("/static", StaticFiles(directory="static"), name="static")
-print("loading model")
 model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
-print("loading tokenizer")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-print("loading complete")
 if CUDA:
     model = model.half().cuda()
@@ -96,7 +101,7 @@ def infill(parts: List[str], length_limit=None, temperature=None, extra_sentinel
         any_truncated = False
         retries_attempted += 1
         if VERBOSE:
-            print(f"retry {retries_attempted}")
         if len(parts) == 1:
             prompt = parts[0]
         else:
@@ -122,7 +127,7 @@ def infill(parts: List[str], length_limit=None, temperature=None, extra_sentinel
             completion = completion[len(prompt):]
             if EOM not in completion:
                 if VERBOSE:
-                    print(f"warning: {EOM} not found")
                 completion += EOM
                 # TODO: break inner loop here
                 done = False
@@ -135,18 +140,18 @@ def infill(parts: List[str], length_limit=None, temperature=None, extra_sentinel
         text = ''.join(complete)
     if VERBOSE:
-        print("generated text:")
-        print(prompt)
-        print()
-        print("parts:")
-        print(parts)
-        print()
-        print("infills:")
-        print(infills)
-        print()
-        print("restitched text:")
-        print(text)
-        print()
     return {
         'text': text,
@@ -169,17 +174,17 @@ async def generate_maybe(info: str):
     # form = await request.json()
     # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
     # fix padding, following https://stackoverflow.com/a/9956217/1319683
-    print(info)
     info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
-    print(info)
     form = json.loads(info)
-    pprint.pprint(form)
     # print(form)
     prompt = form['prompt']
     length_limit = int(form['length'])
     temperature = float(form['temperature'])
-    if VERBOSE:
-        print(prompt)
     try:
         generation, truncated = generate(prompt, length_limit, temperature)
         if truncated:
@@ -189,6 +194,7 @@ async def generate_maybe(info: str):
         return {'result': 'success', 'type': 'generate', 'prompt': prompt, 'text': generation, 'message': message}
     except Exception as e:
         traceback.print_exception(*sys.exc_info())
         return {'result': 'error', 'type': 'generate', 'prompt': prompt, 'message': f'Error: {e}.'}
 @app.get('/infill')
@@ -198,15 +204,17 @@ async def infill_maybe(info: str):
     # form = await request.json()
     # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
     # fix padding, following https://stackoverflow.com/a/9956217/1319683
-    print(info)
     info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
-    print(info)
     form = json.loads(info)
-    pprint.pprint(form)
     length_limit = int(form['length'])
     temperature = float(form['temperature'])
     max_retries = 1
     extra_sentinel = True
     try:
         if len(form['parts']) > 4:
             return {'result': 'error', 'text': ''.join(form['parts']), 'type': 'infill', 'message': f"error: Can't use more than 3 <infill> tokens in this demo (for efficiency)."}
@@ -221,7 +229,7 @@ async def infill_maybe(info: str):
         # return {'result': 'success', 'prefix': prefix, 'suffix': suffix,  'text': generation['text']}
     except Exception as e:
         traceback.print_exception(*sys.exc_info())
-        print(e)
         return {'result': 'error', 'type': 'infill', 'message': f'Error: {e}.'}

 import traceback
 import os
 import base64
+import logging
+logging.basicConfig(level=logging.INFO)
+import modules.cloud_logging
 # needs to be imported *before* transformers
 if os.path.exists('use_normal_tokenizers'):
     import tokenizers
 app.mount("/static", StaticFiles(directory="static"), name="static")
+logging.info("loading model")
 model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
+logging.info("loading tokenizer")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+logging.info("loading complete")
 if CUDA:
     model = model.half().cuda()
         any_truncated = False
         retries_attempted += 1
         if VERBOSE:
+            logging.info(f"retry {retries_attempted}")
         if len(parts) == 1:
             prompt = parts[0]
         else:
             completion = completion[len(prompt):]
             if EOM not in completion:
                 if VERBOSE:
+                    logging.info(f"warning: {EOM} not found")
                 completion += EOM
                 # TODO: break inner loop here
                 done = False
         text = ''.join(complete)
     if VERBOSE:
+        logging.info("generated text:")
+        logging.info(prompt)
+        logging.info()
+        logging.info("parts:")
+        logging.info(parts)
+        logging.info()
+        logging.info("infills:")
+        logging.info(infills)
+        logging.info()
+        logging.info("restitched text:")
+        logging.info(text)
+        logging.info()
     return {
         'text': text,
     # form = await request.json()
     # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
     # fix padding, following https://stackoverflow.com/a/9956217/1319683
     info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
     form = json.loads(info)
     # print(form)
     prompt = form['prompt']
     length_limit = int(form['length'])
     temperature = float(form['temperature'])
+    logging.info(json.dumps({
+        'length': length_limit,
+        'temperature': temperature,
+        'prompt': prompt,
+    }))
     try:
         generation, truncated = generate(prompt, length_limit, temperature)
         if truncated:
         return {'result': 'success', 'type': 'generate', 'prompt': prompt, 'text': generation, 'message': message}
     except Exception as e:
         traceback.print_exception(*sys.exc_info())
+        logging.error(e)
         return {'result': 'error', 'type': 'generate', 'prompt': prompt, 'message': f'Error: {e}.'}
 @app.get('/infill')
     # form = await request.json()
     # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
     # fix padding, following https://stackoverflow.com/a/9956217/1319683
     info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
     form = json.loads(info)
     length_limit = int(form['length'])
     temperature = float(form['temperature'])
     max_retries = 1
     extra_sentinel = True
+    logging.info(json.dumps({
+        'length': length_limit,
+        'temperature': temperature,
+        'parts_joined': '<infill>'.join(form['parts']),
+    }))
     try:
         if len(form['parts']) > 4:
             return {'result': 'error', 'text': ''.join(form['parts']), 'type': 'infill', 'message': f"error: Can't use more than 3 <infill> tokens in this demo (for efficiency)."}
         # return {'result': 'success', 'prefix': prefix, 'suffix': suffix,  'text': generation['text']}
     except Exception as e:
         traceback.print_exception(*sys.exc_info())
+        logging.error(e)
         return {'result': 'error', 'type': 'infill', 'message': f'Error: {e}.'}

modules/cloud_logging.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+def make_logging_client():
+    cred_filename = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
+    if not cred_filename:
+        return None
+    print("cred filename:", cred_filename)
+    cred_string = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_STRING')
+    print("cred string:", bool(cred_string))
+    if not os.path.exists(cred_filename):
+        if cred_string:
+            print(f"writing cred string to {cred_filename}")
+            with open(cred_filename, 'w') as f:
+                f.write(cred_string)
+        else:
+            return None
+    from google.cloud import logging
+    logging_client = logging.Client()
+    logging_client.setup_logging()
+    return logging_client
+logging_client = make_logging_client()