bglearning commited on
Commit
588a02c
1 Parent(s): 32fbbc2

Add/update docstring

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. tapas_visualizer.py +14 -6
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Tapas Tokenizer Viz
3
- emoji: 🌍
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: streamlit
 
1
  ---
2
  title: Tapas Tokenizer Viz
3
+ emoji: 🍽️
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: streamlit
tapas_visualizer.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import os
2
  from typing import Any, List, Dict
3
 
@@ -5,22 +10,24 @@ from collections import defaultdict
5
 
6
  import pandas as pd
7
 
 
 
8
  dirname = os.path.dirname(__file__)
9
  css_filename = os.path.join(dirname, "tapas-styles.css")
10
  with open(css_filename) as f:
11
  css = f.read()
12
 
13
 
14
- def HTMLBody(table_html: str, css_styles=css) -> str:
15
  """
16
  Generates the full html with css from a list of html spans
17
 
18
  Args:
19
- children (:obj:`List[str]`):
20
- A list of strings, assumed to be html elements
21
 
22
- css_styles (:obj:`str`, `optional`):
23
- Optional alternative implementation of the css
24
 
25
  Returns:
26
  :obj:`str`: An HTML string with style markup
@@ -42,10 +49,11 @@ def HTMLBody(table_html: str, css_styles=css) -> str:
42
 
43
 
44
  class TapasVisualizer:
45
- def __init__(self, tokenizer) -> None:
46
  self.tokenizer = tokenizer
47
 
48
  def normalize_token_str(self, token_str: str) -> str:
 
49
  return token_str.replace("##", "")
50
 
51
  def style_span(self, span_text: str, css_classes: List[str]) -> str:
 
1
+ """Visualizer for TAPAS
2
+
3
+ Implementation heavily based on
4
+ `EncodingVisualizer` from `tokenizers.tools`.
5
+ """
6
  import os
7
  from typing import Any, List, Dict
8
 
 
10
 
11
  import pandas as pd
12
 
13
+ from transformers import TapasTokenizer
14
+
15
  dirname = os.path.dirname(__file__)
16
  css_filename = os.path.join(dirname, "tapas-styles.css")
17
  with open(css_filename) as f:
18
  css = f.read()
19
 
20
 
21
+ def HTMLBody(table_html: str, css_styles: str = css) -> str:
22
  """
23
  Generates the full html with css from a list of html spans
24
 
25
  Args:
26
+ table_html (str):
27
+ The html string of the table
28
 
29
+ css_styles (str):
30
+ CSS styling to be embedded inline
31
 
32
  Returns:
33
  :obj:`str`: An HTML string with style markup
 
49
 
50
 
51
  class TapasVisualizer:
52
+ def __init__(self, tokenizer: TapasTokenizer) -> None:
53
  self.tokenizer = tokenizer
54
 
55
  def normalize_token_str(self, token_str: str) -> str:
56
+ # Normalize subword tokens to org subword str
57
  return token_str.replace("##", "")
58
 
59
  def style_span(self, span_text: str, css_classes: List[str]) -> str: