Spaces:
Running
Running
""" | |
pygments.scanner | |
~~~~~~~~~~~~~~~~ | |
This library implements a regex based scanner. Some languages | |
like Pascal are easy to parse but have some keywords that | |
depend on the context. Because of this it's impossible to lex | |
that just by using a regular expression lexer like the | |
`RegexLexer`. | |
Have a look at the `DelphiLexer` to get an idea of how to use | |
this scanner. | |
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. | |
:license: BSD, see LICENSE for details. | |
""" | |
import re | |
class EndOfText(RuntimeError): | |
""" | |
Raise if end of text is reached and the user | |
tried to call a match function. | |
""" | |
class Scanner: | |
""" | |
Simple scanner | |
All method patterns are regular expression strings (not | |
compiled expressions!) | |
""" | |
def __init__(self, text, flags=0): | |
""" | |
:param text: The text which should be scanned | |
:param flags: default regular expression flags | |
""" | |
self.data = text | |
self.data_length = len(text) | |
self.start_pos = 0 | |
self.pos = 0 | |
self.flags = flags | |
self.last = None | |
self.match = None | |
self._re_cache = {} | |
def eos(self): | |
"""`True` if the scanner reached the end of text.""" | |
return self.pos >= self.data_length | |
eos = property(eos, eos.__doc__) | |
def check(self, pattern): | |
""" | |
Apply `pattern` on the current position and return | |
the match object. (Doesn't touch pos). Use this for | |
lookahead. | |
""" | |
if self.eos: | |
raise EndOfText() | |
if pattern not in self._re_cache: | |
self._re_cache[pattern] = re.compile(pattern, self.flags) | |
return self._re_cache[pattern].match(self.data, self.pos) | |
def test(self, pattern): | |
"""Apply a pattern on the current position and check | |
if it patches. Doesn't touch pos. | |
""" | |
return self.check(pattern) is not None | |
def scan(self, pattern): | |
""" | |
Scan the text for the given pattern and update pos/match | |
and related fields. The return value is a boolean that | |
indicates if the pattern matched. The matched value is | |
stored on the instance as ``match``, the last value is | |
stored as ``last``. ``start_pos`` is the position of the | |
pointer before the pattern was matched, ``pos`` is the | |
end position. | |
""" | |
if self.eos: | |
raise EndOfText() | |
if pattern not in self._re_cache: | |
self._re_cache[pattern] = re.compile(pattern, self.flags) | |
self.last = self.match | |
m = self._re_cache[pattern].match(self.data, self.pos) | |
if m is None: | |
return False | |
self.start_pos = m.start() | |
self.pos = m.end() | |
self.match = m.group() | |
return True | |
def get_char(self): | |
"""Scan exactly one char.""" | |
self.scan('.') | |
def __repr__(self): | |
return '<%s %d/%d>' % ( | |
self.__class__.__name__, | |
self.pos, | |
self.data_length | |
) | |