from typing import Dict, Iterable, List, Optional, Tuple, TypeVar
from regex import REVERSE
from ._argument import Argument
from ._comment_bold_italic import COMMENT_PATTERN
from ._parser_function import SubWikiTextWithArgs
from ._wikitext import WS, rc
COMMENT_SUB = rc(COMMENT_PATTERN).sub
TL_NAME_ARGS_FULLMATCH = rc(rb'[^|}]*+(?#name)(?<arg>\|[^|]*+)*+').fullmatch
STARTING_WS_MATCH = rc(r'\s*+').match
ENDING_WS_MATCH = rc(r'(?>\n[ \t]*)*+', REVERSE).match
SPACE_AFTER_SEARCH = rc(r'\s*+(?=\|)').search
T = TypeVar('T')
[docs]
class Template(SubWikiTextWithArgs):
"""Convert strings to Template objects.
The string should start with {{ and end with }}.
"""
__slots__ = ()
_name_args_matcher = TL_NAME_ARGS_FULLMATCH
_first_arg_sep = 124
@property
def _content_span(self) -> Tuple[int, int]:
return 2, -2
[docs]
def normal_name(
self,
rm_namespaces=('Template',),
*,
code: str = None,
capitalize=False,
) -> str:
"""Return normal form of self.name.
- Remove comments.
- Remove language code.
- Remove namespace ("template:" or any of `localized_namespaces`.
- Use space instead of underscore.
- Remove consecutive spaces.
- Use uppercase for the first letter if `capitalize`.
- Remove #anchor.
:param rm_namespaces: is used to provide additional localized
namespaces for the template namespace. They will be removed from
the result. Default is ('Template',).
:param capitalize: If True, convert the first letter of the
template's name to a capital letter. See
[[mw:Manual:$wgCapitalLinks]] for more info.
:param code: is the language code.
Example:
>>> Template(
... '{{ eN : tEmPlAtE : <!-- c --> t_1 # b | a }}'
... ).normal_name(code='en')
'T 1'
"""
# Remove comments
name = COMMENT_SUB('', self.name).strip(WS)
# Remove code
if code:
head, sep, tail = name.partition(':')
if not head and sep:
name = tail.strip(' ')
head, sep, tail = name.partition(':')
if code.lower() == head.strip(' ').lower():
name = tail.strip(' ')
# Remove namespace
head, sep, tail = name.partition(':')
if not head and sep:
name = tail.strip(' ')
head, sep, tail = name.partition(':')
if head:
ns = head.strip(' ').lower()
for namespace in rm_namespaces:
if namespace.lower() == ns:
name = tail.strip(' ')
break
# Use space instead of underscore
name = name.replace('_', ' ')
if capitalize:
# Use uppercase for the first letter
name = name[:1].upper() + name[1:]
# Remove #anchor
name, sep, tail = name.partition('#')
return ' '.join(name.split())
[docs]
def rm_first_of_dup_args(self) -> None:
"""Eliminate duplicate arguments by removing the first occurrences.
Remove the first occurrences of duplicate arguments, regardless of
their value. Result of the rendered wikitext should remain the same.
Warning: Some meaningful data may be removed from wikitext.
Also see `rm_dup_args_safe` function.
"""
names = set() # type: set
for a in reversed(self.arguments):
name = a.name.strip(WS)
if name in names:
del a[: len(a.string)]
else:
names.add(name)
[docs]
def rm_dup_args_safe(self, tag: str = None) -> None:
"""Remove duplicate arguments in a safe manner.
Remove the duplicate arguments only in the following situations:
1. Both arguments have the same name AND value. (Remove one of
them.)
2. Arguments have the same name and one of them is empty. (Remove
the empty one.)
Warning: Although this is considered to be safe and no meaningful data
is removed from wikitext, but the result of the rendered wikitext
may actually change if the second arg is empty and removed but
the first had had a value.
If `tag` is defined, it should be a string that will be appended to
the value of the remaining duplicate arguments.
Also see `rm_first_of_dup_args` function.
"""
name_to_lastarg_vals: Dict[str, Tuple[Argument, List[str]]] = {}
# Removing positional args affects their name. By reversing the list
# we avoid encountering those kind of args.
for arg in reversed(self.arguments):
name = arg.name.strip(WS)
if arg.positional:
# Value of keyword arguments is automatically stripped by MW.
val = arg.value
else:
# But it's not OK to strip whitespace in positional arguments.
val = arg.value.strip(WS)
if name in name_to_lastarg_vals:
# This is a duplicate argument.
if not val:
# This duplicate argument is empty. It's safe to remove it.
del arg[0 : len(arg.string)]
else:
# Try to remove any of the detected duplicates of this
# that are empty or their value equals to this one.
lastarg, dup_vals = name_to_lastarg_vals[name]
if val in dup_vals:
del arg[0 : len(arg.string)]
elif '' in dup_vals:
# This happens only if the last occurrence of name has
# been an empty string; other empty values will
# be removed as they are seen.
# In other words index of the empty argument in
# dup_vals is always 0.
del lastarg[0 : len(lastarg.string)]
dup_vals.pop(0)
else:
# It was not possible to remove any of the duplicates.
dup_vals.append(val)
if tag:
arg.value += tag
else:
name_to_lastarg_vals[name] = (arg, [val])
[docs]
def set_arg(
self,
name: str,
value: str,
positional: bool = None,
before: str = None,
after: str = None,
preserve_spacing=False,
) -> None:
"""Set the value for `name` argument. Add it if it doesn't exist.
- Use `positional`, `before` and `after` keyword arguments only when
adding a new argument.
- If `before` is given, ignore `after`.
- If neither `before` nor `after` are given and it's needed to add a
new argument, then append the new argument to the end.
- If `positional` is True, try to add the given value as a positional
argument. Ignore `preserve_spacing` if positional is True.
If it's None, do what seems more appropriate.
"""
args = (*reversed(self.arguments),)
arg = get_arg(name, args)
# Updating an existing argument.
if arg:
if positional:
arg.positional = positional
if preserve_spacing:
val = arg.value
arg.value = val.replace(val.strip(WS), value, 1)
else:
arg.value = value
return
# Adding a new argument
if not name and positional is None:
positional = True
# Calculate the whitespace needed before arg-name and after arg-value.
if not positional and preserve_spacing and args:
before_names = []
name_lengths = []
before_values = []
after_values = []
for arg in args:
aname = arg.name
name_len = len(aname)
name_lengths.append(name_len)
before_names.append(STARTING_WS_MATCH(aname)[0])
arg_value = arg.value
before_values.append(STARTING_WS_MATCH(arg_value)[0])
after_values.append(ENDING_WS_MATCH(arg_value)[0])
pre_name_ws_mode = mode(before_names)
name_length_mode = mode(name_lengths)
post_value_ws_mode = mode(
[SPACE_AFTER_SEARCH(self.string)[0]] + after_values[1:]
)
pre_value_ws_mode = mode(before_values)
else:
preserve_spacing = False
# Calculate the string that needs to be added to the Template.
if positional:
# Ignore preserve_spacing for positional args.
addstring = '|' + value
else:
if preserve_spacing:
# noinspection PyUnboundLocalVariable
addstring = (
'|'
+ (pre_name_ws_mode + name.strip(WS)).ljust(
name_length_mode
)
+ '='
+ pre_value_ws_mode
+ value
+ post_value_ws_mode
)
else:
addstring = '|' + name + '=' + value
# Place the addstring in the right position.
if before:
arg = get_arg(before, args)
arg.insert(0, addstring)
elif after:
arg = get_arg(after, args)
arg.insert(len(arg.string), addstring)
else:
if args and not positional:
arg = args[0]
arg_string = arg.string
if preserve_spacing:
# Insert after the last argument.
# The addstring needs to be recalculated because we don't
# want to change the the whitespace before final braces.
# noinspection PyUnboundLocalVariable
arg[0 : len(arg_string)] = (
arg.string.rstrip(WS)
+ post_value_ws_mode
+ addstring.rstrip(WS)
+ after_values[0]
)
else:
arg.insert(len(arg_string), addstring)
else:
# The template has no arguments or the new arg is
# positional AND is to be added at the end of the template.
self.insert(-2, addstring)
[docs]
def get_arg(self, name: str) -> Optional[Argument]:
"""Return the last argument with the given name.
Return None if no argument with that name is found.
"""
return get_arg(name, reversed(self.arguments))
[docs]
def has_arg(self, name: str, value: str = None) -> bool:
"""Return true if the is an arg named `name`.
Also check equality of values if `value` is provided.
Note: If you just need to get an argument and you want to LBYL, it's
better to get_arg directly and then check if the returned value
is None.
"""
for arg in reversed(self.arguments):
if arg.name.strip(WS) == name.strip(WS):
if value:
if arg.positional:
if arg.value == value:
return True
return False
if arg.value.strip(WS) == value.strip(WS):
return True
return False
return True
return False
[docs]
def del_arg(self, name: str) -> None:
"""Delete all arguments with the given then."""
for arg in reversed(self.arguments):
if arg.name.strip(WS) == name.strip(WS):
del arg[:]
@property
def templates(self) -> List['Template']:
return super().templates[1:]
def mode(list_: List[T]) -> T:
"""Return the most common item in the list.
Return the first one if there are more than one most common items.
Example:
>>> mode([1,1,2,2,])
1
>>> mode([1,2,2])
2
>>> mode([])
...
ValueError: max() arg is an empty sequence
"""
return max(set(list_), key=list_.count)
def get_arg(name: str, args: Iterable[Argument]) -> Optional[Argument]:
"""Return the first argument in the args that has the given name.
Return None if no such argument is found.
As the computation of self.arguments is a little costly, this
function was created so that other methods that have already computed
the arguments use it instead of calling self.get_arg directly.
"""
for arg in args:
if arg.name.strip(WS) == name.strip(WS):
return arg
return None