giseldo's picture
ultima versao
a6e7f6b
import re
from string import punctuation
def escape_tags_and_content(text):
"""Escape tags and their content containing text, which is not written in natural language, such as code snippets"""
NO_TEXT_TAGS = "code", "noformat"
for tag in NO_TEXT_TAGS:
regex_matching_tag = re.compile("\{%s(.*?)\}(.*?)\{%s\}" % (tag, tag), re.DOTALL)
text = re.sub(regex_matching_tag, "", text)
return text
def escape_tags(text):
"""Escape markup tags, but retain their content"""
ESCAPE_TAGS = "color", "quote", "anchor", "panel"
for tag in ESCAPE_TAGS:
text = re.sub("\{%s(.*?)\}" % tag, "", text)
return text
def escape_strings(text):
"""Escape line breaks, tabulators, slashes and JIRA heading markup symbols"""
ESCAPE_STRINGS = "\\r", "\\n", "\\t", "\\f", "\\v", "\"", "\\\\", "h1. ", "h2. ", "h3. ", "h4. ", "h5. ", "h6. "
for escape_string in ESCAPE_STRINGS:
text = text.replace(escape_string, " ")
return text
def escape_links(text):
"""Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' """
LINK_STARTERS = r"\#", r"\^", r"http\:\/\/", r"https\:\/\/", r"malto\:", r"file\:", r"\~"
for link_starter in LINK_STARTERS:
text = re.sub("\[(.*?\\|)?%s(.*?)\]" % link_starter, "", text)
text = re.sub(r"\bhttps?://\S+", "", text)
return text
def escape_hex_character_codes(text):
"""Escape characters outside the latin alphabet which are converted to hex code representation"""
return re.sub(r"\\x\w\w", "", text)
def escape_punctuation_boundaries(text):
"""Remove all punctuation marks from the beginning and end of words,
except for trailing period at the end of words"""
return " ".join([word.strip(punctuation.replace(".", "")).lstrip(".") for word in text.split()])
def escape_odd_spaces(text):
"""Replace several consequent spaces with one space
and remove spaces from string start and end"""
text = re.sub(r"\s+", " ", text)
text = text.strip()
return text