Coverage for src/xml.py: 100%

20 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2024-10-23 12:26 +0000

1import xml.etree.ElementTree 

2 

3 

4xml.etree.ElementTree.register_namespace("", "http://www.w3.org/2005/Atom") 

5xml.etree.ElementTree.register_namespace("media", "http://search.yahoo.com/mrss/") 

6 

7 

8def prettify(content: str) -> str: 

9 """Prettify an XML string 

10 

11 Raises a `ValueError` if the input is invalid. 

12 

13 >>> prettify('<some><xml></xml></some>') 

14 <some> 

15 <xml /> 

16 </some> 

17 """ 

18 # if there is a doctype, trim it off 

19 doctype, *rest = content.splitlines() 

20 doctype = doctype.lower() 

21 if doctype.startswith('<!doctype') or doctype.startswith('<?xml'): 

22 content = '\n'.join(rest) 

23 else: 

24 doctype = None 

25 

26 # escape special characters 

27 content = content.replace('&', '&amp;') 

28 

29 # convert to a tree 

30 try: 

31 tree = xml.etree.ElementTree.fromstring(content) 

32 except xml.etree.ElementTree.ParseError as e: 

33 raise ValueError(f'error parsing the following ({e.args})\n{content[:800]}') 

34 

35 # add indenting 

36 xml.etree.ElementTree.indent(tree) 

37 

38 # decode back to string 

39 content = xml.etree.ElementTree.tostring( 

40 tree, encoding='utf8').decode('utf-8') 

41 

42 # trim off the first line, which is the doctype 

43 content = '\n'.join(content.splitlines()[1:]) 

44 

45 # reattach the original doctype, if there was one. 

46 if doctype: 

47 content = doctype + '\n' + content 

48 

49 return content