src
This is the source code from my blog. To execute the build, just run the package like this:
./venv/bin/python -m src
1# flake8: noqa 2 3""" 4This is the source code from my blog. To execute the build, just run 5the package like this: 6 7``` 8./venv/bin/python -m src 9``` 10""" 11 12# functions 13from .args import load_args 14from .docs import write_api_docs 15from .logging import load_logger 16from .models.feed import load_feed 17from .models.image import load_images 18from .models.page import load_entries, load_pages 19from .models.site import load_site 20from .template import render_template 21from .testing import run_unit_tests 22from .utils import pave_webroot 23from .validate import validate_html_references 24 25# Models 26from .models import ( 27 Site, 28 Page, 29 Image, 30 Feed, 31) 32 33 34__all__ = [ 35 load_args, 36 load_entries, 37 load_feed, 38 load_images, 39 load_logger, 40 load_pages, 41 load_site, 42 pave_webroot, 43 run_unit_tests, 44 validate_html_references, 45 write_api_docs, 46 Site, 47 Page, 48 Image, 49 Feed, 50]
35def load_args() -> argparse.Namespace: 36 """ 37 Load the system args against the standard website parser. 38 39 ```python 40 args = src.load_args() 41 logger.debug('called with args = %s', vars(args)) 42 ``` 43 """ 44 result = parser.parse_args() 45 logger.info('loading system args %s', vars(result)) 46 return result
Load the system args against the standard website parser.
args = src.load_args()
logger.debug('called with args = %s', vars(args))
16def load_entries(entries_dir='./entries'): 17 """ 18 Load a list of journal entries as `Page` objects. Order the 19 list starting with the latest entry first. 20 21 ```python 22 entries = src.load_entries() 23 ``` 24 """ 25 entries = [] 26 27 entry_paths = list(sorted(pathlib.Path(entries_dir).glob('*.html'))) 28 29 # get pagination map 30 pagination = paginate_entries(entry_paths) 31 32 for path in entry_paths: 33 entries.append(Page( 34 path, 35 next_page=pagination[path.name].next, 36 previous_page=pagination[path.name].previous 37 )) 38 39 # sort latest first 40 return sorted(entries, reverse=True, key=lambda e: e.date)
Load a list of journal entries as Page
objects. Order the
list starting with the latest entry first.
entries = src.load_entries()
61def load_feed(site, entries=[], images=[]) -> Feed: # noqa: E501 62 """ 63 Load an RSS feed object. 64 65 ```python 66 feed = load_feed(site) 67 ``` 68 """ 69 items = [] 70 71 def convert_timestamp(date): 72 slug = date.strftime("%Y-%m-%d") 73 return f'{slug}T00:00:00+00:00' 74 75 # add all journal entries 76 for entry in entries: 77 kwargs = {} 78 kwargs['title'] = entry.title 79 kwargs['path'] = entry.filename 80 81 if entry.banner: 82 kwargs['image'] = f'images/banners/{entry.banner}' 83 else: 84 kwargs['image'] = None 85 86 kwargs['timestamp'] = convert_timestamp(entry.date) 87 items.append(Item(**kwargs)) 88 89 # add all other images that aren't a banner 90 for image in images: 91 if image.is_banner: 92 continue 93 94 kwargs = { 95 'title': image.title, 96 'path': f'images/{image.filename}', 97 'image': f'images/{image.filename}', 98 'timestamp': convert_timestamp(image.date), 99 } 100 items.append(Item(**kwargs)) 101 102 # sort by descending timestamp 103 items = sorted(items, key=lambda i: i.timestamp, reverse=True) 104 105 return Feed(site=site, items=items)
Load an RSS feed object.
feed = load_feed(site)
110def load_images(entries=[], images_dir='./www/images/') -> list[Image]: 111 """ 112 Loads complete set of images for website as a list of `Image` objects. 113 114 Requires a list of entries so it can associate the entry where it 115 is referenced. 116 117 ```python 118 images = src.load_images() 119 ``` 120 """ 121 122 images = [] 123 124 def is_image(p): 125 return p.suffix.lower() in ( 126 '.jpg', 127 '.jpeg', 128 '.png', 129 ) 130 131 images_dir = pathlib.Path(images_dir) 132 image_files = filter(is_image, images_dir.glob('**/*.*')) 133 134 # build a k/v map of image paths to entries 135 ref_map = {} 136 for entry in entries: 137 for path in entry.extract_links(): 138 ref_map[str(path)] = entry 139 140 # build the list of images 141 for path in image_files: 142 images.append(Image(path, ref_map.get(str(path)))) 143 144 # finally, sort them by name 145 return sorted(images, key=lambda i: i.path.name, reverse=True)
Loads complete set of images for website as a list of Image
objects.
Requires a list of entries so it can associate the entry where it is referenced.
images = src.load_images()
7def load_logger(verbose=False, logfile='./www/build.txt', truncate_logfile=True) -> logging.Logger: # noqa: E501 8 """ 9 Load a logger with some sensible defaults. 10 11 ```python 12 logger = src.load_logger() 13 logger.info('starting program') 14 ``` 15 16 Those defaults being: 17 18 1. Log to stderr with a simple formatter 19 2. Write last build log to a file in the webroot. 20 21 Extra options: 22 23 - `logfile` path to the logfile 24 - `truncate_logfile` whether to truncate the logfile or not 25 """ 26 27 logger = logging.getLogger('blog') 28 29 # formatter 30 formatter = logging.Formatter(fmt='blog: %(message)s') 31 32 # level 33 if verbose: 34 level = logging.DEBUG 35 else: 36 level = logging.INFO 37 logger.setLevel(level) 38 39 # add stderr handler 40 stderr_handler = logging.StreamHandler(stream=sys.stderr) 41 stderr_handler.setFormatter(formatter) 42 stderr_handler.setLevel(level) 43 logger.addHandler(stderr_handler) 44 45 # add logfile handler 46 file_handler = logging.FileHandler(str(logfile)) 47 file_handler.setFormatter(formatter) 48 file_handler.setLevel(level) 49 logger.addHandler(file_handler) 50 51 # truncate log file 52 with pathlib.Path(logfile).open('w') as f: 53 f.write('') 54 55 return logger
Load a logger with some sensible defaults.
logger = src.load_logger()
logger.info('starting program')
Those defaults being:
- Log to stderr with a simple formatter
- Write last build log to a file in the webroot.
Extra options:
logfile
path to the logfiletruncate_logfile
whether to truncate the logfile or not
43def load_pages(pages_dir='./pages'): 44 """ 45 Fetches a list of website pages as `Page` objects. 46 47 ```python 48 pages = src.load_pages() 49 ``` 50 """ 51 pages = pathlib.Path(pages_dir).glob('*.*') 52 pages = map(Page, pages) 53 return sorted(pages, key=lambda p: p.filename)
Fetches a list of website pages as Page
objects.
pages = src.load_pages()
96def load_site(args: argparse.Namespace) -> Site: 97 """ 98 Creates a `Site` from the results of `parser.parse_args()`. 99 100 ```python 101 args = parser.parse_args() 102 site = src.load_site(args) 103 ``` 104 105 Note: the timezone is hard-coded to `"America/Chicago"` 106 (because nobody ever brags about the beef sandwich they had in 107 Greenwich). 108 """ 109 # set timestamp 110 os.environ['TZ'] = 'America/Chicago' 111 timestamp = datetime.datetime.now() 112 113 site_args = {k[5:]: v for k, v in vars( 114 args).items() if k.startswith('site_')} 115 116 return Site(timestamp=timestamp, **site_args)
Creates a Site
from the results of parser.parse_args()
.
args = parser.parse_args()
site = src.load_site(args)
Note: the timezone is hard-coded to "America/Chicago"
(because nobody ever brags about the beef sandwich they had in
Greenwich).
5def pave_webroot() -> int: 6 """ 7 Delete all old generated files from webroot 8 9 Returns the number of old files detected and destroyed. This is 10 so you have something interesting to log. 11 12 ```python 13 logger.info('paved %d old file(s) from webroot!', src.pave_webroot()) 14 ``` 15 """ 16 17 webroot = pathlib.Path('./www') 18 19 old_files = [] 20 old_files += list(webroot.glob('*.html')) 21 old_files += list(webroot.glob('*.xml')) 22 old_files += list(webroot.glob('api/*.html')) 23 old_files += list(webroot.glob('api/*.js')) 24 25 for target in old_files: 26 target.unlink() 27 28 return len(old_files)
Delete all old generated files from webroot
Returns the number of old files detected and destroyed. This is so you have something interesting to log.
logger.info('paved %d old file(s) from webroot!', src.pave_webroot())
12def run_unit_tests() -> int: 13 ''' 14 Runs the whole suite of unit tests. 15 16 Returns the number of executed tests so you have something 17 interesting to log. 18 19 If any tests fail, the names and stack traces are printed to 20 stderr and the whole process is exited. 21 22 >>> logger.info('ran %d test(s)', run_unit_tests()) 23 ''' 24 25 # start unit test coverage recording 26 cov = coverage.Coverage() 27 cov.start() 28 29 # discover and run all unit tests 30 loader = unittest.TestLoader() 31 tests = loader.discover('src', pattern='test_*.py') 32 result = unittest.TestResult() 33 result = tests.run(result) 34 35 # stop coverage 36 cov.stop() 37 cov.save() 38 39 # report errors 40 if result.errors or result.failures: 41 logger.error('some unit tests failed!') 42 print('', file=sys.stderr) 43 for problem in result.errors: 44 print(f'=> ERROR: {problem[0]}', file=sys.stderr) 45 print(problem[1], file=sys.stderr) 46 for problem in result.failures: 47 print(f'=> FAILURE: {problem[0]}', file=sys.stderr) 48 print(problem[1], file=sys.stderr) 49 sys.exit(1) 50 51 # write html report 52 cov.html_report(directory='./www/coverage') 53 54 # return number of tests 55 return tests.countTestCases()
Runs the whole suite of unit tests.
Returns the number of executed tests so you have something interesting to log.
If any tests fail, the names and stack traces are printed to stderr and the whole process is exited.
>>> logger.info('ran %d test(s)', run_unit_tests())
37def validate_html_references(path: str) -> int: 38 """Validate an HTML file 39 """ 40 41 path = pathlib.Path(path) 42 43 with path.open('r') as f: 44 content = f.read() 45 46 # check refs 47 checker = ReferenceParser(parent=path.parent) 48 checker.feed(content) 49 for reference in checker.references: 50 if not reference.path.is_file(): 51 logger.warn('%s: %s reference not found: %s', 52 path.name, reference.attr, reference.value) 53 return len(checker.references)
Validate an HTML file
11def write_api_docs() -> int: 12 """ 13 Generate the website API documentation. 14 15 Returns the total number of generated files so you have something 16 interesting to log. 17 18 >>> logger.info('wrote docs - %d file(s)', write_api_docs()) 19 """ 20 21 output_directory = pathlib.Path('./www/api/') 22 23 pdoc.pdoc('src', output_directory=output_directory) 24 25 results = output_directory.glob('**/*') 26 results = filter(lambda p: p.is_file(), results) 27 results = filter(lambda p: not p.name.startswith('.'), results) 28 results = list(results) 29 return len(results)
Generate the website API documentation.
Returns the total number of generated files so you have something interesting to log.
>>> logger.info('wrote docs - %d file(s)', write_api_docs())
9class Site: 10 """ 11 Website model. 12 """ 13 14 def __init__(self, timestamp=None, entries=[], **kwargs): # noqa: E501 15 """ 16 Build a Site model. 17 18 Customize with the following kwargs: 19 20 - `title` 21 - `description` 22 - `author` 23 - `email` 24 - `domain` 25 - `protocol` 26 """ 27 28 fields = ['title', 'description', 'author', 29 'email', 'domain', 'protocol'] 30 31 for key in fields: 32 if value := kwargs.get(key): 33 setattr(self, '_' + key, value) 34 35 # timestamp 36 self._timestamp = timestamp 37 38 @property 39 def title(self) -> str: 40 """ 41 Website title (ex `"Blog"`) 42 """ 43 return self._title 44 45 @property 46 def description(self) -> str: 47 """ 48 Website description (ex `"A Place for my Thoughts"`) 49 """ 50 return self._description 51 52 @property 53 def author(self) -> str: 54 """ 55 Website maintainer's full name. 56 """ 57 return self._author 58 59 @property 60 def email(self) -> str: 61 """ 62 Website maintainer's email. 63 """ 64 return self._email 65 66 @property 67 def url(self) -> str: 68 """ 69 Full website URL (ex. `"https://www.alexrecker.com"`) 70 """ 71 return f'{self._protocol}://{self._domain}' 72 73 @property 74 def timestamp(self) -> datetime.datetime: 75 """ 76 Website build timestamp. 77 """ 78 return self._timestamp 79 80 @property 81 def python_version(self) -> str: 82 """ 83 The python version used to build the website. (ex. `"v3.11.0"`) 84 """ 85 return f'v{platform.python_version()}' 86 87 @property 88 def python_executable(self) -> str: 89 """ 90 Path to `python` executable used to build the site 91 (ex. `"/usr/bin/python"`) 92 """ 93 return sys.executable
Website model.
14 def __init__(self, timestamp=None, entries=[], **kwargs): # noqa: E501 15 """ 16 Build a Site model. 17 18 Customize with the following kwargs: 19 20 - `title` 21 - `description` 22 - `author` 23 - `email` 24 - `domain` 25 - `protocol` 26 """ 27 28 fields = ['title', 'description', 'author', 29 'email', 'domain', 'protocol'] 30 31 for key in fields: 32 if value := kwargs.get(key): 33 setattr(self, '_' + key, value) 34 35 # timestamp 36 self._timestamp = timestamp
Build a Site model.
Customize with the following kwargs:
title
description
author
email
domain
protocol
56class Page: 57 """ 58 A website page. Can be either a normal page, or a journal entry. 59 """ 60 61 def __init__(self, path: pathlib.Path, next_page=None, previous_page=None): 62 """ 63 `path` should be a pathlib Path. 64 65 `next_page` and `previous_page` can be filenames, if 66 pagination should be enabled. 67 """ 68 69 self.path = pathlib.Path(path) 70 71 self._next = next_page 72 self._previous = previous_page 73 74 @property 75 def filename(self): 76 """ 77 Page filename, e.g. `index.html`. 78 79 The file extension will always be `.html`, so even if the 80 source page is rendered from a template, this suffix will be 81 removed. 82 """ 83 if self.path.suffix == '.j2': 84 return self.path.name[:-3] 85 return self.path.name 86 87 @property 88 def is_entry(self) -> bool: 89 """ 90 `True` if the page is a journal entry, False if it's just a 91 normal Page. 92 """ 93 entry_dir = pathlib.Path('./entries') 94 return entry_dir in self.path.parents 95 96 @property 97 def date(self) -> datetime.datetime: 98 """ 99 Page date, as parsed from the filename. 100 """ 101 return datetime.datetime.strptime(self.path.stem, '%Y-%m-%d') 102 103 @functools.cached_property 104 def metadata(self) -> dict: 105 """ 106 Metadata embedded in the page. This is read from special HTML 107 comments. 108 109 A page with this header: 110 111 ```html 112 <!-- meta:title a walk in the park --> 113 <!-- meta:description I take a nice walk in the park --> 114 ``` 115 116 Will yield this metadata: 117 118 ```python 119 { 120 'title': 'a walk in the park', 121 'description': 'I take a nice walk in the park.', 122 } 123 ``` 124 125 For performance, this information is only read once, then 126 cached in memory during website build. 127 """ 128 with self.path.open('r') as f: 129 return parse_metadata(f.read()) 130 131 @property 132 def title(self): 133 if self.is_entry: 134 return self.date.strftime('%A, %B %-d %Y') 135 else: 136 return self.get('title') 137 138 @property 139 def description(self): 140 if self.is_entry: 141 return self.metadata['title'].replace("'", '') 142 else: 143 return self.metadata.get('description') 144 145 @property 146 def banner(self): 147 return self.metadata.get('banner') 148 149 @property 150 def next(self): 151 """Next `Page` object, if paginated.""" 152 return self._next 153 154 @property 155 def previous(self): 156 """Previous `Page` object, if paginated.""" 157 return self._previous 158 159 @property 160 def href(self): 161 """ 162 The `href` html value that points to the image. 163 164 Can be used in templates like so: 165 166 ```html 167 <a href="{{ page.href }}">...</a> 168 ``` 169 """ 170 return f'./{self.filename}' 171 172 def render(self, context: dict) -> str: 173 """ 174 Render the complete content for a page. 175 """ 176 # add current page to context 177 context['page'] = self 178 179 # build inner content 180 if self.path.name.endswith('.j2'): 181 # page is a template, so render it 182 with self.path.open('r') as f: 183 tmpl = template_env.from_string(f.read()) 184 content = tmpl.render(**context) 185 else: 186 # page isn't a template, so just read it 187 with self.path.open('r') as f: 188 content = f.read() 189 190 # now, wrap that content in the base template 191 context['content'] = content.strip() 192 content = render_template('base.html.j2', context=context).strip() 193 194 # prettify the markup 195 try: 196 return xml.prettify(content) 197 except xml.ParseError as e: 198 logger.error('cannot parse %s: %s', self.filename, e) 199 return content 200 201 def write(self, context: dict): 202 """ 203 Write the page to the www directory. 204 """ 205 target = pathlib.Path(f'./www/{self.filename}') 206 content = self.render(context) 207 with target.open('w') as f: 208 f.write(content) 209 210 def extract_links(self) -> list[pathlib.Path]: 211 """ 212 Returns a list of href or src values. 213 """ 214 results = [] 215 216 # make a ReferenceParser 217 parser = validate.ReferenceParser(parent='./www/') 218 219 # feed content to parser 220 with self.path.open('r') as f: 221 parser.feed(f.read()) 222 223 # collect all the links 224 for reference in parser.references: 225 results.append(reference.path) 226 227 return results
A website page. Can be either a normal page, or a journal entry.
61 def __init__(self, path: pathlib.Path, next_page=None, previous_page=None): 62 """ 63 `path` should be a pathlib Path. 64 65 `next_page` and `previous_page` can be filenames, if 66 pagination should be enabled. 67 """ 68 69 self.path = pathlib.Path(path) 70 71 self._next = next_page 72 self._previous = previous_page
path
should be a pathlib Path.
next_page
and previous_page
can be filenames, if
pagination should be enabled.
Page filename, e.g. index.html
.
The file extension will always be .html
, so even if the
source page is rendered from a template, this suffix will be
removed.
Metadata embedded in the page. This is read from special HTML comments.
A page with this header:
<!-- meta:title a walk in the park -->
<!-- meta:description I take a nice walk in the park -->
Will yield this metadata:
{
'title': 'a walk in the park',
'description': 'I take a nice walk in the park.',
}
For performance, this information is only read once, then cached in memory during website build.
The href
html value that points to the image.
Can be used in templates like so:
<a href="{{ page.href }}">...</a>
172 def render(self, context: dict) -> str: 173 """ 174 Render the complete content for a page. 175 """ 176 # add current page to context 177 context['page'] = self 178 179 # build inner content 180 if self.path.name.endswith('.j2'): 181 # page is a template, so render it 182 with self.path.open('r') as f: 183 tmpl = template_env.from_string(f.read()) 184 content = tmpl.render(**context) 185 else: 186 # page isn't a template, so just read it 187 with self.path.open('r') as f: 188 content = f.read() 189 190 # now, wrap that content in the base template 191 context['content'] = content.strip() 192 content = render_template('base.html.j2', context=context).strip() 193 194 # prettify the markup 195 try: 196 return xml.prettify(content) 197 except xml.ParseError as e: 198 logger.error('cannot parse %s: %s', self.filename, e) 199 return content
Render the complete content for a page.
201 def write(self, context: dict): 202 """ 203 Write the page to the www directory. 204 """ 205 target = pathlib.Path(f'./www/{self.filename}') 206 content = self.render(context) 207 with target.open('w') as f: 208 f.write(content)
Write the page to the www directory.
210 def extract_links(self) -> list[pathlib.Path]: 211 """ 212 Returns a list of href or src values. 213 """ 214 results = [] 215 216 # make a ReferenceParser 217 parser = validate.ReferenceParser(parent='./www/') 218 219 # feed content to parser 220 with self.path.open('r') as f: 221 parser.feed(f.read()) 222 223 # collect all the links 224 for reference in parser.references: 225 results.append(reference.path) 226 227 return results
Returns a list of href or src values.
11class Image: 12 """ 13 A website image. 14 """ 15 16 def __init__(self, path: str | pathlib.Path, entry=None): 17 self._path = pathlib.Path(path) 18 self._entry = entry 19 20 @property 21 def path(self) -> pathlib.Path: 22 """ 23 Image as a `pathlib.Path` object. 24 """ 25 return self._path 26 27 @property 28 def filename(self): 29 """ 30 Name of the file, ex `test.jpg` 31 """ 32 return self.path.name 33 34 @property 35 def date(self) -> datetime.datetime: 36 """ 37 Date, according to the image file's YYY-MM-DD date slug. 38 """ 39 40 if match := r_filename.search(self.path.stem): 41 return datetime.datetime( 42 year=int(match.group('year')), 43 month=int(match.group('month')), 44 day=int(match.group('day')), 45 ) 46 raise ValueError(f'could not parse date from {self.filename}') 47 48 @property 49 def date_slug(self): 50 """ 51 Parses the YYYY-MM-DD date slug from the file name. 52 """ 53 return self.date.strftime('%Y-%m-%d') 54 55 @property 56 def slug(self): 57 """ 58 The portion of the filename without the extension or the date slug. 59 60 If the full filename is `2023-01-01-fish-soup.png`, the slug 61 would be `fish-soup`. 62 """ 63 if match := r_filename.search(self.path.stem): 64 return match.group('slug') 65 66 # otherwise just return the stem 67 return self.path.stem 68 69 @property 70 def title(self): 71 """ 72 Human readable name for the image, based on the date slug. 73 74 For example, `test-image.jpg`, becomes `Test Image` 75 """ 76 return self.slug.replace('-', ' ').title() 77 78 @property 79 def href(self): 80 """ 81 The `href` html value that points to the image. 82 83 Can be used in templates like so: 84 85 ```html 86 <a href="{{ img.href }}">...</a> 87 ``` 88 """ 89 www_dir = pathlib.Path('./www') 90 relpath = self.path.relative_to(www_dir) 91 return f'./{relpath}' 92 93 @property 94 def is_banner(self): 95 """ 96 True if the image lives in the banners directory. 97 """ 98 99 banner_dir = pathlib.Path('./www/images/banners/') 100 return banner_dir in self.path.parents 101 102 @property 103 def entry(self): 104 """ 105 The entry where the image is referenced. 106 """ 107 return self._entry
A website image.
The portion of the filename without the extension or the date slug.
If the full filename is 2023-01-01-fish-soup.png
, the slug
would be fish-soup
.
Human readable name for the image, based on the date slug.
For example, test-image.jpg
, becomes Test Image
The href
html value that points to the image.
Can be used in templates like so:
<a href="{{ img.href }}">...</a>
16class Feed: 17 """ 18 Website RSS feed. 19 20 An atom feed that you can build from site information and write 21 locally as a valid atom RSS feed. 22 """ 23 24 filename = 'feed.xml' 25 26 def __init__(self, site=None, items: list[Item] = []): # noqa: E501 27 """ 28 Build a feed object. 29 30 Takes a list of `Item` objects, which is just this named 31 tuple: 32 33 ```python 34 Item = collections.namedtuple('Item', [ 35 'title', 36 'timestamp', 37 'path', # ex. 2020-01-01.html 38 'image', # ex. banners/2021-01-01.jpg 39 ]) 40 ``` 41 """ 42 self.site = site 43 self.items = items 44 45 def render(self): 46 content = render_template('feed.xml.j2', context={ 47 'filename': self.filename, 48 'site': self.site, 49 'items': self.items, 50 }) 51 return xml.prettify(content) 52 53 def write(self): 54 with open(f'./www/{self.filename}', 'w') as f: 55 f.write(self.render()) 56 57 def __repr__(self): 58 return f'<Feed {self.filename}>'
Website RSS feed.
An atom feed that you can build from site information and write locally as a valid atom RSS feed.
26 def __init__(self, site=None, items: list[Item] = []): # noqa: E501 27 """ 28 Build a feed object. 29 30 Takes a list of `Item` objects, which is just this named 31 tuple: 32 33 ```python 34 Item = collections.namedtuple('Item', [ 35 'title', 36 'timestamp', 37 'path', # ex. 2020-01-01.html 38 'image', # ex. banners/2021-01-01.jpg 39 ]) 40 ``` 41 """ 42 self.site = site 43 self.items = items
Build a feed object.
Takes a list of Item
objects, which is just this named
tuple:
Item = collections.namedtuple('Item', [
'title',
'timestamp',
'path', # ex. 2020-01-01.html
'image', # ex. banners/2021-01-01.jpg
])