2323# This module file contains functions to read MediaWiki markup tables, templates, lists, ...
2424
2525import wikitextparser
26+ from typing import Union , Optional
2627
2728# Get a list of lists containing all cells of a table.
2829# Parameters:
29- # wikitext (str) - the text of a wikipedia page
30- # tab_index (int) - the index of the table (if there's multiple tables on the wiki)
31- # keep_markup (bool) - if False, everything (except Templates) will be converted to plain text
32- # skip_headers (bool) - if True, header rows are removed. Assumes all headers are on top
30+ # wikitext - the text of a wikipedia page
31+ # tab_index - the index of the table (if there's multiple tables on the wiki)
32+ # keep_markup - if False, everything (except Templates) will be converted to plain text
33+ # skip_headers - if True, header rows are removed. Assumes all headers are on top
3334# Returns:
3435# The cell contents, specified as a list in a list.
3536# The outer list is the rows, the inner list are the cells in that row
3637# Throws:
3738# If the table at the specified index isn't found
38- def read_wiki_table (wikitext , tab_index = 0 , keep_markup = False , skip_headers = True ):
39+ def read_wiki_table (wikitext : str , tab_index : int = 0 , keep_markup : bool = False , skip_headers : bool = True ) -> list [ list [ Optional [ str ]]] :
3940 # Drops all markup, such as italics, hyperlinks, ...
4041 if not keep_markup :
4142 wikitext = wikitextparser .remove_markup (wikitext , replace_tables = False , replace_templates = False )
@@ -54,14 +55,14 @@ def read_wiki_table(wikitext, tab_index = 0, keep_markup = False, skip_headers =
5455
5556# Get all instances of a certain wiki template within wikitext
5657# Parameters:
57- # wikitext (str) - the text of a wikipedia page
58- # template_name (str or list of str) - the name of the template to locate, e.g. 'Deprecated features/item'
59- # keep_markup (bool) - if False, everything (except Templates) will be converted to plain text
58+ # wikitext - the text of a wikipedia page
59+ # template_name - the name or names of the template to locate, e.g. 'Deprecated features/item'
60+ # keep_markup - if False, everything (except Templates) will be converted to plain text
6061# Returns:
6162# A list containing lists of strings with values [template_string, template_name, argument1, argument2, argument3, ...]
6263# Example: ["{{Tag | key | value}}", "Tag", "key", "value"]
6364# (Note that the template_string is affected by the markup removal, so for string replace purposes, use keep_markup=True)
64- def read_wiki_templates (wikitext , template_name , keep_markup = False ):
65+ def read_wiki_templates (wikitext : str , template_name : Union [ str , list [ str ]], keep_markup : bool = False ) -> list [ list [ str ]] :
6566 if isinstance (template_name , str ):
6667 template_name = [template_name ]
6768 template_name = list (map (str .lower , template_name ))
@@ -78,16 +79,16 @@ def read_wiki_templates(wikitext, template_name, keep_markup = False):
7879
7980# Get all entries in a list within wikitext
8081# Parameters:
81- # wikitext (str) - the text of a wikipedia page
82- # list_index (int) - the index of the list (if there's multiple lists on the wiki)
83- # keep_markup (bool) - if False, everything (except Templates) will be converted to plain text
84- # include_sublists (bool) - if true, include subitems. If false, only include the highest level items
82+ # wikitext - the text of a wikipedia page
83+ # list_index - the index of the list (if there's multiple lists on the wiki)
84+ # keep_markup - if False, everything (except Templates) will be converted to plain text
85+ # include_sublists - if true, include subitems. If false, only include the highest level items
8586# When true, the list item symbol (*, **, #, ##, :, ...) will also be included in the output
8687# Returns:
8788# A list with all list items
8889# Throws:
8990# If the list at index list_index doesn't exist
90- def read_wiki_list (wikitext , list_index = 0 , keep_markup = False , include_sublists = False ):
91+ def read_wiki_list (wikitext : str , list_index : int = 0 , keep_markup : bool = False , include_sublists : bool = False ) -> list [ str ] :
9192 if not keep_markup :
9293 wikitext = wikitextparser .remove_markup (wikitext , replace_templates = False )
9394
@@ -100,7 +101,7 @@ def read_wiki_list(wikitext, list_index = 0, keep_markup = False, include_sublis
100101
101102# Get all list entries within wikitext
102103# See read_wiki_list for details (excluding list_index)
103- def read_all_wiki_lists (wikitext , keep_markup = False , include_sublists = False ):
104+ def read_all_wiki_lists (wikitext : str , keep_markup : bool = False , include_sublists : bool = False ) -> list [ str ] :
104105 res = []
105106 if not keep_markup :
106107 wikitext = wikitextparser .remove_markup (wikitext , replace_templates = False )
@@ -110,18 +111,18 @@ def read_all_wiki_lists(wikitext, keep_markup = False, include_sublists = False)
110111 while True :
111112 res .extend (read_wiki_list (wikitext , list_index = list_index , keep_markup = True , include_sublists = include_sublists ))
112113 list_index += 1
113- except :
114+ except IndexError :
114115 return res
115116
116117
117118# Convert all instances of Tag-templates to textual tags, e.g. {{Tag|oneway|yes}} -> "oneway=yes"
118119# Parameters:
119- # wikitext (str) - the text of a wikipedia page
120- # quote (bool) - whether the tag should be wrapped in ``
121- # star_value (bool) - whether empty tag values should be represented by *
120+ # wikitext - the text of a wikipedia page
121+ # quote - whether the tag should be wrapped in ``
122+ # star_value - whether empty tag values should be represented by *
122123# Returns:
123124# The wikitext with {{Tag|*}} replaced by the textual tag
124- def wikitag2text (wikitext , quote = False , star_value = True ):
125+ def wikitag2text (wikitext : str , quote : bool = False , star_value : bool = True ) -> str :
125126 tag_templates = read_wiki_templates (wikitext , ["Tag" , "Key" ], keep_markup = True )
126127 for t in tag_templates :
127128 k = t [2 ]
0 commit comments