Spaces:
Sleeping
Sleeping
import re | |
from string import punctuation | |
def escape_tags_and_content(text): | |
"""Escape tags and their content containing text, which is not written in natural language, such as code snippets""" | |
NO_TEXT_TAGS = "code", "noformat" | |
for tag in NO_TEXT_TAGS: | |
regex_matching_tag = re.compile("\{%s(.*?)\}(.*?)\{%s\}" % (tag, tag), re.DOTALL) | |
text = re.sub(regex_matching_tag, "", text) | |
return text | |
def escape_tags(text): | |
"""Escape markup tags, but retain their content""" | |
ESCAPE_TAGS = "color", "quote", "anchor", "panel" | |
for tag in ESCAPE_TAGS: | |
text = re.sub("\{%s(.*?)\}" % tag, "", text) | |
return text | |
def escape_strings(text): | |
"""Escape line breaks, tabulators, slashes and JIRA heading markup symbols""" | |
ESCAPE_STRINGS = "\\r", "\\n", "\\t", "\\f", "\\v", "\"", "\\\\", "h1. ", "h2. ", "h3. ", "h4. ", "h5. ", "h6. " | |
for escape_string in ESCAPE_STRINGS: | |
text = text.replace(escape_string, " ") | |
return text | |
def escape_links(text): | |
"""Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' """ | |
LINK_STARTERS = r"\#", r"\^", r"http\:\/\/", r"https\:\/\/", r"malto\:", r"file\:", r"\~" | |
for link_starter in LINK_STARTERS: | |
text = re.sub("\[(.*?\\|)?%s(.*?)\]" % link_starter, "", text) | |
text = re.sub(r"\bhttps?://\S+", "", text) | |
return text | |
def escape_hex_character_codes(text): | |
"""Escape characters outside the latin alphabet which are converted to hex code representation""" | |
return re.sub(r"\\x\w\w", "", text) | |
def escape_punctuation_boundaries(text): | |
"""Remove all punctuation marks from the beginning and end of words, | |
except for trailing period at the end of words""" | |
return " ".join([word.strip(punctuation.replace(".", "")).lstrip(".") for word in text.split()]) | |
def escape_odd_spaces(text): | |
"""Replace several consequent spaces with one space | |
and remove spaces from string start and end""" | |
text = re.sub(r"\s+", " ", text) | |
text = text.strip() | |
return text |