from operator import attrgetter
from typing import Dict, Iterable, List, Match, MutableSequence, Union
from regex import MULTILINE, escape, fullmatch
from ._wikitext import EXTERNAL_LINK_FINDITER, SubWikiText
# See includes/parser/BlockLevelPass.php for how MW parses list blocks.
SUBLIST_PATTERN = ( # noqa
rb'(?>^' rb'(?&pattern)' rb'[:;#*].*+' rb'(?>\n|\Z)' rb')*+'
)
SUBLIST_WITH_SECOND_PATTERN = ( # noqa
rb'[*#;:].*+(?>\n|\Z)' rb'(?>' rb'(?&pattern)[*#;:].*+(?>\n|\Z)' rb')*+'
)
LIST_PATTERN_FORMAT = ( # noqa
rb'(?<fullitem>^'
rb'(?<pattern>{pattern})'
rb'(?>'
rb'(?(?<=;\s*+)'
# mark inline definition as an item
rb'(?<item>[^:\n]*+)(?<fullitem>:(?<item>.*+))?+'
rb'(?>\n|\Z)' + SUBLIST_PATTERN + rb'|'
# non-definition
rb'(?>'
rb'(?<item>)'
+ SUBLIST_WITH_SECOND_PATTERN
+ rb'|(?<item>.*+)(?>\n|\Z)'
+ SUBLIST_PATTERN
+ rb')'
rb')'
rb'))++'
)
[docs]
class WikiList(SubWikiText):
"""Class to represent ordered, unordered, and definition lists."""
__slots__ = 'pattern', '_match_cache'
[docs]
def __init__(
self,
string: Union[str, MutableSequence[str]],
pattern: str,
_match: Match = None,
_type_to_spans: Dict[str, List[List[int]]] = None,
_span: List[int] = None,
_type: str = None,
) -> None:
super().__init__(string, _type_to_spans, _span, _type)
self.pattern = pattern
if _match:
self._match_cache = _match, self.string
else:
self._match_cache = (
fullmatch(
LIST_PATTERN_FORMAT.replace(
b'{pattern}', pattern.encode(), 1
),
self._list_shadow,
MULTILINE,
),
self.string,
)
@property
def _list_shadow(self):
shadow_copy = self._shadow[:]
if ':' in self.pattern:
for m in EXTERNAL_LINK_FINDITER(shadow_copy):
s, e = m.span()
shadow_copy[s:e] = b'_' * (e - s)
return shadow_copy
@property
def _match(self):
"""Return the match object for the current list."""
cache_match, cache_string = self._match_cache
string = self.string
if cache_string == string:
return cache_match
cache_match = fullmatch(
LIST_PATTERN_FORMAT.replace(
b'{pattern}', self.pattern.encode(), 1
),
self._list_shadow,
MULTILINE,
)
self._match_cache = cache_match, string
return cache_match
@property
def items(self) -> List[str]:
"""Return items as a list of strings.
Do not include sub-items and the start pattern.
"""
items: List[str] = []
append = items.append
string = self.string
match = self._match
ms = match.start()
for s, e in match.spans('item'):
append(string[s - ms : e - ms])
return items
@property
def fullitems(self) -> List[str]:
"""Return list of item strings. Includes their start and sub-items."""
fullitems = [] # type: List[str]
append = fullitems.append
string = self.string
match = self._match
ms = match.start()
# Sort because "fullitem" can be flipped compared to "items" in case
# of a definition list with the LIST_PATTERN_FORMAT regex.
for s, e in sorted(match.spans('fullitem')):
append(string[s - ms : e - ms])
return fullitems
@property
def level(self) -> int:
"""Return level of nesting for the current list.
Level is a one-based index, for example the level for `* a` will be 1.
"""
return len(self._match['pattern'])
[docs]
def sublists(
self,
i: int = None,
pattern: Union[str, Iterable[str]] = (r'\#', r'\*', '[:;]'),
) -> List['WikiList']:
"""Return the Lists inside the item with the given index.
:param i: The index of the item which its sub-lists are desired.
:param pattern: The starting symbol for the desired sub-lists.
The `pattern` of the current list will be automatically added
as prefix.
"""
if isinstance(pattern, str):
patterns = (pattern,)
else:
patterns = pattern
self_pattern = self.pattern
get_lists = super().get_lists
sublists = [] # type: List['WikiList']
sublists_append = sublists.append
if i is None:
# Any sublist is acceptable
for pattern in patterns:
for lst in get_lists(self_pattern + pattern):
sublists_append(lst)
else:
# Only return sub-lists that are within the given item
match = self._match
fullitem_spans = match.spans('fullitem')
ss = self._span_data[0]
ms = match.start()
s, e = fullitem_spans[i]
e -= ms - ss
s -= ms - ss
for pattern in patterns:
for lst in get_lists(self_pattern + pattern):
# noinspection PyProtectedMember
ls, le, _, _ = lst._span_data
if s <= ls and le <= e:
sublists_append(lst)
sublists.sort(key=attrgetter('_span_data'))
return sublists
[docs]
def convert(self, newstart: str) -> None:
"""Convert to another list type by replacing starting pattern."""
match = self._match
ms = match.start()
for s, e in reversed(match.spans('pattern')):
self[s - ms : e - ms] = newstart
self.pattern = escape(newstart)
[docs]
def get_lists(
self, pattern: Union[str, Iterable[str]] = (r'\#', r'\*', '[:;]')
) -> List['WikiList']:
return self.sublists(pattern=pattern)