diff options
Diffstat (limited to 'env/lib/python3.10/site-packages/lxml/doctestcompare.py')
-rw-r--r-- | env/lib/python3.10/site-packages/lxml/doctestcompare.py | 507 |
1 files changed, 0 insertions, 507 deletions
diff --git a/env/lib/python3.10/site-packages/lxml/doctestcompare.py b/env/lib/python3.10/site-packages/lxml/doctestcompare.py deleted file mode 100644 index 1b0daa4..0000000 --- a/env/lib/python3.10/site-packages/lxml/doctestcompare.py +++ /dev/null @@ -1,507 +0,0 @@ -""" -lxml-based doctest output comparison. - -Note: normally, you should just import the `lxml.usedoctest` and -`lxml.html.usedoctest` modules from within a doctest, instead of this -one:: - - >>> import lxml.usedoctest # for XML output - - >>> import lxml.html.usedoctest # for HTML output - -To use this module directly, you must call ``lxmldoctest.install()``, -which will cause doctest to use this in all subsequent calls. - -This changes the way output is checked and comparisons are made for -XML or HTML-like content. - -XML or HTML content is noticed because the example starts with ``<`` -(it's HTML if it starts with ``<html``). You can also use the -``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. - -Some rough wildcard-like things are allowed. Whitespace is generally -ignored (except in attributes). In text (attributes and text in the -body) you can use ``...`` as a wildcard. In an example it also -matches any trailing tags in the element, though it does not match -leading tags. You may create a tag ``<any>`` or include an ``any`` -attribute in the tag. An ``any`` tag matches any tag, while the -attribute matches any and all attributes. - -When a match fails, the reformatted example and gotten text is -displayed (indented), and a rough diff-like output is given. Anything -marked with ``+`` is in the output but wasn't supposed to be, and -similarly ``-`` means its in the example but wasn't in the output. - -You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` -""" - -from lxml import etree -import sys -import re -import doctest -try: - from html import escape as html_escape -except ImportError: - from cgi import escape as html_escape - -__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', - 'LHTMLOutputChecker', 'install', 'temp_install'] - -try: - _basestring = basestring -except NameError: - _basestring = (str, bytes) - -_IS_PYTHON_3 = sys.version_info[0] >= 3 - -PARSE_HTML = doctest.register_optionflag('PARSE_HTML') -PARSE_XML = doctest.register_optionflag('PARSE_XML') -NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') - -OutputChecker = doctest.OutputChecker - -def strip(v): - if v is None: - return None - else: - return v.strip() - -def norm_whitespace(v): - return _norm_whitespace_re.sub(' ', v) - -_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) - -def html_fromstring(html): - return etree.fromstring(html, _html_parser) - -# We use this to distinguish repr()s from elements: -_repr_re = re.compile(r'^<[^>]+ (at|object) ') -_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') - -class LXMLOutputChecker(OutputChecker): - - empty_tags = ( - 'param', 'img', 'area', 'br', 'basefont', 'input', - 'base', 'meta', 'link', 'col') - - def get_default_parser(self): - return etree.XML - - def check_output(self, want, got, optionflags): - alt_self = getattr(self, '_temp_override_self', None) - if alt_self is not None: - super_method = self._temp_call_super_check_output - self = alt_self - else: - super_method = OutputChecker.check_output - parser = self.get_parser(want, got, optionflags) - if not parser: - return super_method( - self, want, got, optionflags) - try: - want_doc = parser(want) - except etree.XMLSyntaxError: - return False - try: - got_doc = parser(got) - except etree.XMLSyntaxError: - return False - return self.compare_docs(want_doc, got_doc) - - def get_parser(self, want, got, optionflags): - parser = None - if NOPARSE_MARKUP & optionflags: - return None - if PARSE_HTML & optionflags: - parser = html_fromstring - elif PARSE_XML & optionflags: - parser = etree.XML - elif (want.strip().lower().startswith('<html') - and got.strip().startswith('<html')): - parser = html_fromstring - elif (self._looks_like_markup(want) - and self._looks_like_markup(got)): - parser = self.get_default_parser() - return parser - - def _looks_like_markup(self, s): - s = s.strip() - return (s.startswith('<') - and not _repr_re.search(s)) - - def compare_docs(self, want, got): - if not self.tag_compare(want.tag, got.tag): - return False - if not self.text_compare(want.text, got.text, True): - return False - if not self.text_compare(want.tail, got.tail, True): - return False - if 'any' not in want.attrib: - want_keys = sorted(want.attrib.keys()) - got_keys = sorted(got.attrib.keys()) - if want_keys != got_keys: - return False - for key in want_keys: - if not self.text_compare(want.attrib[key], got.attrib[key], False): - return False - if want.text != '...' or len(want): - want_children = list(want) - got_children = list(got) - while want_children or got_children: - if not want_children or not got_children: - return False - want_first = want_children.pop(0) - got_first = got_children.pop(0) - if not self.compare_docs(want_first, got_first): - return False - if not got_children and want_first.tail == '...': - break - return True - - def text_compare(self, want, got, strip): - want = want or '' - got = got or '' - if strip: - want = norm_whitespace(want).strip() - got = norm_whitespace(got).strip() - want = '^%s$' % re.escape(want) - want = want.replace(r'\.\.\.', '.*') - if re.search(want, got): - return True - else: - return False - - def tag_compare(self, want, got): - if want == 'any': - return True - if (not isinstance(want, _basestring) - or not isinstance(got, _basestring)): - return want == got - want = want or '' - got = got or '' - if want.startswith('{...}'): - # Ellipsis on the namespace - return want.split('}')[-1] == got.split('}')[-1] - else: - return want == got - - def output_difference(self, example, got, optionflags): - want = example.want - parser = self.get_parser(want, got, optionflags) - errors = [] - if parser is not None: - try: - want_doc = parser(want) - except etree.XMLSyntaxError: - e = sys.exc_info()[1] - errors.append('In example: %s' % e) - try: - got_doc = parser(got) - except etree.XMLSyntaxError: - e = sys.exc_info()[1] - errors.append('In actual output: %s' % e) - if parser is None or errors: - value = OutputChecker.output_difference( - self, example, got, optionflags) - if errors: - errors.append(value) - return '\n'.join(errors) - else: - return value - html = parser is html_fromstring - diff_parts = ['Expected:', - self.format_doc(want_doc, html, 2), - 'Got:', - self.format_doc(got_doc, html, 2), - 'Diff:', - self.collect_diff(want_doc, got_doc, html, 2)] - return '\n'.join(diff_parts) - - def html_empty_tag(self, el, html=True): - if not html: - return False - if el.tag not in self.empty_tags: - return False - if el.text or len(el): - # This shouldn't happen (contents in an empty tag) - return False - return True - - def format_doc(self, doc, html, indent, prefix=''): - parts = [] - if not len(doc): - # No children... - parts.append(' '*indent) - parts.append(prefix) - parts.append(self.format_tag(doc)) - if not self.html_empty_tag(doc, html): - if strip(doc.text): - parts.append(self.format_text(doc.text)) - parts.append(self.format_end_tag(doc)) - if strip(doc.tail): - parts.append(self.format_text(doc.tail)) - parts.append('\n') - return ''.join(parts) - parts.append(' '*indent) - parts.append(prefix) - parts.append(self.format_tag(doc)) - if not self.html_empty_tag(doc, html): - parts.append('\n') - if strip(doc.text): - parts.append(' '*indent) - parts.append(self.format_text(doc.text)) - parts.append('\n') - for el in doc: - parts.append(self.format_doc(el, html, indent+2)) - parts.append(' '*indent) - parts.append(self.format_end_tag(doc)) - parts.append('\n') - if strip(doc.tail): - parts.append(' '*indent) - parts.append(self.format_text(doc.tail)) - parts.append('\n') - return ''.join(parts) - - def format_text(self, text, strip=True): - if text is None: - return '' - if strip: - text = text.strip() - return html_escape(text, 1) - - def format_tag(self, el): - attrs = [] - if isinstance(el, etree.CommentBase): - # FIXME: probably PIs should be handled specially too? - return '<!--' - for name, value in sorted(el.attrib.items()): - attrs.append('%s="%s"' % (name, self.format_text(value, False))) - if not attrs: - return '<%s>' % el.tag - return '<%s %s>' % (el.tag, ' '.join(attrs)) - - def format_end_tag(self, el): - if isinstance(el, etree.CommentBase): - # FIXME: probably PIs should be handled specially too? - return '-->' - return '</%s>' % el.tag - - def collect_diff(self, want, got, html, indent): - parts = [] - if not len(want) and not len(got): - parts.append(' '*indent) - parts.append(self.collect_diff_tag(want, got)) - if not self.html_empty_tag(got, html): - parts.append(self.collect_diff_text(want.text, got.text)) - parts.append(self.collect_diff_end_tag(want, got)) - parts.append(self.collect_diff_text(want.tail, got.tail)) - parts.append('\n') - return ''.join(parts) - parts.append(' '*indent) - parts.append(self.collect_diff_tag(want, got)) - parts.append('\n') - if strip(want.text) or strip(got.text): - parts.append(' '*indent) - parts.append(self.collect_diff_text(want.text, got.text)) - parts.append('\n') - want_children = list(want) - got_children = list(got) - while want_children or got_children: - if not want_children: - parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) - continue - if not got_children: - parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) - continue - parts.append(self.collect_diff( - want_children.pop(0), got_children.pop(0), html, indent+2)) - parts.append(' '*indent) - parts.append(self.collect_diff_end_tag(want, got)) - parts.append('\n') - if strip(want.tail) or strip(got.tail): - parts.append(' '*indent) - parts.append(self.collect_diff_text(want.tail, got.tail)) - parts.append('\n') - return ''.join(parts) - - def collect_diff_tag(self, want, got): - if not self.tag_compare(want.tag, got.tag): - tag = '%s (got: %s)' % (want.tag, got.tag) - else: - tag = got.tag - attrs = [] - any = want.tag == 'any' or 'any' in want.attrib - for name, value in sorted(got.attrib.items()): - if name not in want.attrib and not any: - attrs.append('+%s="%s"' % (name, self.format_text(value, False))) - else: - if name in want.attrib: - text = self.collect_diff_text(want.attrib[name], value, False) - else: - text = self.format_text(value, False) - attrs.append('%s="%s"' % (name, text)) - if not any: - for name, value in sorted(want.attrib.items()): - if name in got.attrib: - continue - attrs.append('-%s="%s"' % (name, self.format_text(value, False))) - if attrs: - tag = '<%s %s>' % (tag, ' '.join(attrs)) - else: - tag = '<%s>' % tag - return tag - - def collect_diff_end_tag(self, want, got): - if want.tag != got.tag: - tag = '%s (got: %s)' % (want.tag, got.tag) - else: - tag = got.tag - return '</%s>' % tag - - def collect_diff_text(self, want, got, strip=True): - if self.text_compare(want, got, strip): - if not got: - return '' - return self.format_text(got, strip) - text = '%s (got: %s)' % (want, got) - return self.format_text(text, strip) - -class LHTMLOutputChecker(LXMLOutputChecker): - def get_default_parser(self): - return html_fromstring - -def install(html=False): - """ - Install doctestcompare for all future doctests. - - If html is true, then by default the HTML parser will be used; - otherwise the XML parser is used. - """ - if html: - doctest.OutputChecker = LHTMLOutputChecker - else: - doctest.OutputChecker = LXMLOutputChecker - -def temp_install(html=False, del_module=None): - """ - Use this *inside* a doctest to enable this checker for this - doctest only. - - If html is true, then by default the HTML parser will be used; - otherwise the XML parser is used. - """ - if html: - Checker = LHTMLOutputChecker - else: - Checker = LXMLOutputChecker - frame = _find_doctest_frame() - dt_self = frame.f_locals['self'] - checker = Checker() - old_checker = dt_self._checker - dt_self._checker = checker - # The unfortunate thing is that there is a local variable 'check' - # in the function that runs the doctests, that is a bound method - # into the output checker. We have to update that. We can't - # modify the frame, so we have to modify the object in place. The - # only way to do this is to actually change the func_code - # attribute of the method. We change it, and then wait for - # __record_outcome to be run, which signals the end of the __run - # method, at which point we restore the previous check_output - # implementation. - if _IS_PYTHON_3: - check_func = frame.f_locals['check'].__func__ - checker_check_func = checker.check_output.__func__ - else: - check_func = frame.f_locals['check'].im_func - checker_check_func = checker.check_output.im_func - # Because we can't patch up func_globals, this is the only global - # in check_output that we care about: - doctest.etree = etree - _RestoreChecker(dt_self, old_checker, checker, - check_func, checker_check_func, - del_module) - -class _RestoreChecker(object): - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, - del_module): - self.dt_self = dt_self - self.checker = old_checker - self.checker._temp_call_super_check_output = self.call_super - self.checker._temp_override_self = new_checker - self.check_func = check_func - self.clone_func = clone_func - self.del_module = del_module - self.install_clone() - self.install_dt_self() - def install_clone(self): - if _IS_PYTHON_3: - self.func_code = self.check_func.__code__ - self.func_globals = self.check_func.__globals__ - self.check_func.__code__ = self.clone_func.__code__ - else: - self.func_code = self.check_func.func_code - self.func_globals = self.check_func.func_globals - self.check_func.func_code = self.clone_func.func_code - def uninstall_clone(self): - if _IS_PYTHON_3: - self.check_func.__code__ = self.func_code - else: - self.check_func.func_code = self.func_code - def install_dt_self(self): - self.prev_func = self.dt_self._DocTestRunner__record_outcome - self.dt_self._DocTestRunner__record_outcome = self - def uninstall_dt_self(self): - self.dt_self._DocTestRunner__record_outcome = self.prev_func - def uninstall_module(self): - if self.del_module: - import sys - del sys.modules[self.del_module] - if '.' in self.del_module: - package, module = self.del_module.rsplit('.', 1) - package_mod = sys.modules[package] - delattr(package_mod, module) - def __call__(self, *args, **kw): - self.uninstall_clone() - self.uninstall_dt_self() - del self.checker._temp_override_self - del self.checker._temp_call_super_check_output - result = self.prev_func(*args, **kw) - self.uninstall_module() - return result - def call_super(self, *args, **kw): - self.uninstall_clone() - try: - return self.check_func(*args, **kw) - finally: - self.install_clone() - -def _find_doctest_frame(): - import sys - frame = sys._getframe(1) - while frame: - l = frame.f_locals - if 'BOOM' in l: - # Sign of doctest - return frame - frame = frame.f_back - raise LookupError( - "Could not find doctest (only use this function *inside* a doctest)") - -__test__ = { - 'basic': ''' - >>> temp_install() - >>> print """<xml a="1" b="2">stuff</xml>""" - <xml b="2" a="1">...</xml> - >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>""" - <xml xmlns="..."> - <tag attr="..." /> - </xml> - >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS - <xml>...foo /></xml> - '''} - -if __name__ == '__main__': - import doctest - doctest.testmod() - - |