diff --git a/pm_attr_list.py b/pm_attr_list.py deleted file mode 100644 index 3b84068..0000000 --- a/pm_attr_list.py +++ /dev/null @@ -1,214 +0,0 @@ -""" -Attribute List Extension for Python-Markdown -============================================ - -Adds attribute list syntax. Inspired by -[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s -feature of the same name. - -See -for documentation. - -Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). - -All changes Copyright 2011-2014 The Python Markdown Project - -License: [BSD](https://opensource.org/licenses/bsd-license.php) - -Additional changes to allow list-wide and table-wide attributes -by Paul Melis, 2022 -""" - -import xml.etree.ElementTree as ET -from markdown import Extension -from markdown.treeprocessors import Treeprocessor -import re - - -def _handle_double_quote(s, t): - k, v = t.split('=', 1) - return k, v.strip('"') - - -def _handle_single_quote(s, t): - k, v = t.split('=', 1) - return k, v.strip("'") - - -def _handle_key_value(s, t): - return t.split('=', 1) - - -def _handle_word(s, t): - if t.startswith('.'): - return '.', t[1:] - if t.startswith('#'): - return 'id', t[1:] - return t, t - - -_scanner = re.Scanner([ - (r'[^ =]+=".*?"', _handle_double_quote), - (r"[^ =]+='.*?'", _handle_single_quote), - (r'[^ =]+=[^ =]+', _handle_key_value), - (r'[^ =]+', _handle_word), - (r' ', None) -]) - - -def get_attrs(str): - """ Parse attribute list and return a list of attribute tuples. """ - return _scanner.scan(str)[0] - - -def isheader(elem): - return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] - -def _dump_tree(level, elem): - extra = [] - if elem.text: - extra.append('text=[%s]' % repr(elem.text)) - if elem.tail: - extra.append('tail=[%s]' % repr(elem.tail)) - e = '' - if len(extra) > 0: - e = '{' + ', '.join(extra) + '}' - - print('%s%s %s %s' % (' '*(level*4), elem.tag, e, elem)) - - if len(elem): - for child in elem: - _dump_tree(level+1, child) - -def dump_tree(doc): - for elem in doc.findall('.'): - _dump_tree(0, elem) - -def _gather_parents(parents, parent, elem): - #print(parent, elem, elem.tag, 'text='+repr(elem.text), 'tail='+repr(elem.tail)) - if parent is not None: - parents[elem] = parent - tag = elem.tag - if tag in ['ol', 'ul', 'dl', 'table']: - parent = elem - for child in elem: - _gather_parents(parents, parent, child) - return parents - -def gather_parents(doc): - parents = {} - for elem in doc.findall('.'): - _gather_parents(parents, None, elem) - return parents - - -class AttrListTreeprocessor(Treeprocessor): - - BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}' - HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) - BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) - INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) - NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' - r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' - r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' - r'\uf900-\ufdcf\ufdf0-\ufffd' - r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') - - def run(self, doc): - #dump_tree(doc) - parents = gather_parents(doc) - for elem in doc.iter(): - parent = parents[elem] if elem in parents else None - if self.md.is_block_level(elem.tag): - # Block level: check for attrs on last line of text - RE = self.BLOCK_RE - if isheader(elem) or elem.tag in ['dt', 'td', 'th']: - # header, def-term, or table cell: check for attrs at end of element - RE = self.HEADER_RE - if len(elem) and elem.tag == 'li': - # special case list items. children may include a ul or ol. - pos = None - # find the ul or ol position - for i, child in enumerate(elem): - if child.tag in ['ul', 'ol']: - pos = i - break - if pos is None and elem[-1].tail: - # use tail of last child. no ul or ol. - m = RE.search(elem[-1].tail) - if m: - self.assign_attrs(elem, m.group(1), parent) - elem[-1].tail = elem[-1].tail[:m.start()] - elif pos is not None and pos > 0 and elem[pos-1].tail: - # use tail of last child before ul or ol - m = RE.search(elem[pos-1].tail) - if m: - self.assign_attrs(elem, m.group(1), parent) - elem[pos-1].tail = elem[pos-1].tail[:m.start()] - elif elem.text: - # use text. ul is first child. - m = RE.search(elem.text) - if m: - self.assign_attrs(elem, m.group(1), parent) - elem.text = elem.text[:m.start()] - elif len(elem) and elem[-1].tail: - # has children. Get from tail of last child - m = RE.search(elem[-1].tail) - if m: - self.assign_attrs(elem, m.group(1), parent) - elem[-1].tail = elem[-1].tail[:m.start()] - if isheader(elem): - # clean up trailing #s - elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() - elif elem.text: - # no children. Get from text. - m = RE.search(elem.text) - if m: - self.assign_attrs(elem, m.group(1), parent) - elem.text = elem.text[:m.start()] - if isheader(elem): - # clean up trailing #s - elem.text = elem.text.rstrip('#').rstrip() - else: - # inline: check for attrs at start of tail - if elem.tail: - m = self.INLINE_RE.match(elem.tail) - if m: - self.assign_attrs(elem, m.group(1), parent) - elem.tail = elem.tail[m.end():] - - def assign_attrs(self, elem, attrs, parent=None): - """ Assign attrs to element. """ - #print('assign_attrs(%s, %s, %s)' % (elem, attrs, parent)) - apply_elem = elem - for k, v in get_attrs(attrs): - if k == '.': - # add to class - cls = apply_elem.get('class') - if cls: - apply_elem.set('class', '{} {}'.format(cls, v)) - else: - apply_elem.set('class', v) - elif k == '^' and parent is not None: - # set remaining attributes on parent - apply_elem = parent - else: - # assign attr k with v - apply_elem.set(self.sanitize_name(k), v) - - def sanitize_name(self, name): - """ - Sanitize name as 'an XML Name, minus the ":"' - See https://www.w3.org/TR/REC-xml-names/#NT-NCName - """ - return self.NAME_RE.sub('_', name) - - -class PMAttrListExtension(Extension): - def extendMarkdown(self, md): - md.treeprocessors.register(AttrListTreeprocessor(md), 'pm_attr_list', 8) - md.registerExtension(self) - - -def makeExtension(**kwargs): # pragma: no cover - return PMAttrListExtension(**kwargs)