Coverage for src/xml.py: 67%
21 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-28 07:19 -0500
« prev ^ index » next coverage.py v7.3.0, created at 2023-09-28 07:19 -0500
1import xml.etree.ElementTree
3xml.etree.ElementTree.register_namespace("", "http://www.w3.org/2005/Atom")
4xml.etree.ElementTree.register_namespace(
5 "media", "http://search.yahoo.com/mrss/")
8class ParseError(BaseException):
9 """There was an unparsable error in the XML input."""
12def prettify(content: str) -> str:
13 """Prettify an XML string
15 Raises a `ParseError` if the input is invalid.
17 >>> prettify('<some><xml></xml></some>')
18 <some>
19 <xml />
20 </some>
21 """
22 # if there is a doctype, trim it off
23 doctype, *rest = content.splitlines()
24 doctype = doctype.lower()
25 if doctype.startswith('<!doctype') or doctype.startswith('<?xml'):
26 content = '\n'.join(rest)
27 else:
28 doctype = None
30 # escape special characters
31 content = content.replace('&', '&')
33 # convert to a tree
34 try:
35 tree = xml.etree.ElementTree.fromstring(content)
36 except xml.etree.ElementTree.ParseError as e:
37 raise ParseError(
38 f'error parsing the following ({e.args})\n{content[:800]}')
40 # add indenting
41 xml.etree.ElementTree.indent(tree)
43 # decode back to string
44 content = xml.etree.ElementTree.tostring(
45 tree, encoding='utf8').decode('utf-8')
47 # trim off the first line, which is the doctype
48 content = '\n'.join(content.splitlines()[1:])
50 # reattach the original doctype, if there was one.
51 if doctype:
52 content = doctype + '\n' + content
54 return content