victormiller
commited on
Commit
•
465a4f0
1
Parent(s):
c5f6903
Update web.py
Browse files
web.py
CHANGED
@@ -953,7 +953,7 @@ def web_data():
|
|
953 |
P("""We decided to use the RedPajama V2 implementation but skip the 1st occurrence of the duplicate n-gram.
|
954 |
"""),
|
955 |
Details(
|
956 |
-
Summary("TxT360 Implementation")
|
957 |
D_code("""
|
958 |
def get_dup_ngram_frac(n, doc_n_grams, text):
|
959 |
# fetch the ngrams from the document if they exist, otherwise compute them
|
@@ -1043,11 +1043,11 @@ def web_data():
|
|
1043 |
),
|
1044 |
Details(
|
1045 |
Summary("Bullet Point Identification Implemetations"),
|
1046 |
-
P("Dolma: ")
|
1047 |
D_code("""
|
1048 |
BULLET_POINTS = ("*", "-"
|
1049 |
""", block="block", language="python"),
|
1050 |
-
P("RedPajamaV2: ")
|
1051 |
D_code("""
|
1052 |
BULLET_POINT_SYMBOLS = (
|
1053 |
"•", # bullet point
|
|
|
953 |
P("""We decided to use the RedPajama V2 implementation but skip the 1st occurrence of the duplicate n-gram.
|
954 |
"""),
|
955 |
Details(
|
956 |
+
Summary("TxT360 Implementation"),
|
957 |
D_code("""
|
958 |
def get_dup_ngram_frac(n, doc_n_grams, text):
|
959 |
# fetch the ngrams from the document if they exist, otherwise compute them
|
|
|
1043 |
),
|
1044 |
Details(
|
1045 |
Summary("Bullet Point Identification Implemetations"),
|
1046 |
+
P("Dolma: "),
|
1047 |
D_code("""
|
1048 |
BULLET_POINTS = ("*", "-"
|
1049 |
""", block="block", language="python"),
|
1050 |
+
P("RedPajamaV2: "),
|
1051 |
D_code("""
|
1052 |
BULLET_POINT_SYMBOLS = (
|
1053 |
"•", # bullet point
|