Coverage for src/xml.py: 100%
20 statements
« prev ^ index » next coverage.py v7.3.0, created at 2024-12-08 12:26 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2024-12-08 12:26 +0000
1import xml.etree.ElementTree
4xml.etree.ElementTree.register_namespace("", "http://www.w3.org/2005/Atom")
5xml.etree.ElementTree.register_namespace("media", "http://search.yahoo.com/mrss/")
8def prettify(content: str) -> str:
9 """Prettify an XML string
11 Raises a `ValueError` if the input is invalid.
13 >>> prettify('<some><xml></xml></some>')
14 <some>
15 <xml />
16 </some>
17 """
18 # if there is a doctype, trim it off
19 doctype, *rest = content.splitlines()
20 doctype = doctype.lower()
21 if doctype.startswith('<!doctype') or doctype.startswith('<?xml'):
22 content = '\n'.join(rest)
23 else:
24 doctype = None
26 # escape special characters
27 content = content.replace('&', '&')
29 # convert to a tree
30 try:
31 tree = xml.etree.ElementTree.fromstring(content)
32 except xml.etree.ElementTree.ParseError as e:
33 raise ValueError(f'error parsing the following ({e.args})\n{content[:800]}')
35 # add indenting
36 xml.etree.ElementTree.indent(tree)
38 # decode back to string
39 content = xml.etree.ElementTree.tostring(
40 tree, encoding='utf8').decode('utf-8')
42 # trim off the first line, which is the doctype
43 content = '\n'.join(content.splitlines()[1:])
45 # reattach the original doctype, if there was one.
46 if doctype:
47 content = doctype + '\n' + content
49 return content