#!/usr/bin/env python3
import os
import sys
import markdown
from mdx_gfm import GithubFlavoredMarkdownExtension
import weasyprint
import re
import logging
logging.basicConfig(format='%(levelname)s: %(message)s' ,stream=sys.stderr, level=logging.INFO)
LOGGER = logging.getLogger('preprocess')
# sort the file order
def sort_func(x):
# place "papers/" at the end (like an appendix)
try:
x.index('%sdoc%s' % (os.path.sep, os.path.sep))
except ValueError:
return 'z'
# place readmes at the start of each section
try:
rm = x.index('README.md')
return x[:rm] + '0'
except ValueError:
return x
# make the links work in-pdf
def fix_links(match):
# images
if os.path.splitext(match.group(2))[-1] == '.png':
return '[%s](%s)' % (
match.group(1),
os.path.join(os.path.split(my_file)[0], match.group(2))
)
# urls to other files
BASE_URL = 'https://github.com/tildearrow/furnace/tree/master/'
if match.group(2).startswith(BASE_URL):
file_path = match.group(2).split(BASE_URL)[-1]
if os.path.splitext(file_path)[-1] == '':
file_path += '/README.md'
return '[%s](#%s)' % (
match.group(1),
file_path.replace('/','__')
)
# preserve external urls
elif match.group(2).startswith('http'):
return match.group(0)
# fix paths
act_path = os.path.split(my_file)[0] + '/' + match.group(2)
act_path = os.path.relpath(os.path.abspath(act_path))
return '[%s](#%s)' % (
match.group(1),
act_path.replace(os.path.sep,'__')
)
def fix_headings(match):
return '%s#' % (
match.group(1)
)
if __name__ == "__main__":
#-- first, prepare the file list --#
file_list = []
for i in os.walk('../../doc'):
base_dir, subfolders, files = i
for file_ in filter(lambda x: x.lower().endswith('.md'), files):
file_list.append(os.path.join(base_dir, file_))
#-- then, create the document --#
html = ''
# perform sort
file_list.sort(key=sort_func)
for my_file in file_list:
with open(my_file, 'r') as md:
LOGGER.info("processing file %s" % my_file)
data = md.read()
# perform link fixing
data = re.sub(r'\[(.+?)\]\((.+?)\)', fix_links, data)
data = re.sub(r'^\s*(#+)', fix_headings, data, flags=re.MULTILINE)
# each file is its own section
html +='