Coverage for src/xml.py: 100%

1import xml.etree.ElementTree

4xml.etree.ElementTree.register_namespace("", "http://www.w3.org/2005/Atom")

5xml.etree.ElementTree.register_namespace("media", "http://search.yahoo.com/mrss/")

8def prettify(content: str) -> str:

9 """Prettify an XML string

11 Raises a `ValueError` if the input is invalid.

13 >>> prettify('<some><xml></xml></some>')

14 <some>

15 <xml />

16 </some>

17 """

18 # if there is a doctype, trim it off

19 doctype, *rest = content.splitlines()

20 doctype = doctype.lower()

21 if doctype.startswith('<!doctype') or doctype.startswith('<?xml'):

22 content = '\n'.join(rest)

23 else:

24 doctype = None

26 # escape special characters

27 content = content.replace('&', '&')

29 # convert to a tree

30 try:

31 tree = xml.etree.ElementTree.fromstring(content)

32 except xml.etree.ElementTree.ParseError as e:

33 raise ValueError(f'error parsing the following ({e.args})\n{content[:800]}')

35 # add indenting

36 xml.etree.ElementTree.indent(tree)

38 # decode back to string

39 content = xml.etree.ElementTree.tostring(

40 tree, encoding='utf8').decode('utf-8')

42 # trim off the first line, which is the doctype

43 content = '\n'.join(content.splitlines()[1:])

45 # reattach the original doctype, if there was one.

46 if doctype:

47 content = doctype + '\n' + content

49 return content