#!/usr/bin/env python3 import os import sys import markdown from mdx_gfm import GithubFlavoredMarkdownExtension import re import logging logging.basicConfig(format='%(levelname)s: %(message)s' ,stream=sys.stderr, level=logging.INFO) LOGGER = logging.getLogger('preprocess') hosted = False # sort the file order def sort_func(x): # place "papers/" at the end (like an appendix) try: x.index('%sdoc%s' % (os.path.sep, os.path.sep)) except ValueError: return 'z' # place readmes at the start of each section try: rm = x.index('README.md') return x[:rm] + '0' except ValueError: return x # make the links work in-pdf def fix_links(match): # images if os.path.splitext(match.group(2))[-1] == '.png': return '[%s](%s)' % ( match.group(1), match.group(2) ) # preserve external urls elif match.group(2).startswith('http'): return match.group(0) elif match.group(2).endswith('README.md'): return '[%s](%s)' % ( match.group(1), match.group(2).replace('README.md','index.html') ) # fix paths return '[%s](%s)' % ( match.group(1), match.group(2).replace('.md','.html') ) def fix_headings(match): return '%s#' % ( match.group(1) ) if __name__ == "__main__": # check whether hosted mode is on if len(sys.argv)>1: if sys.argv[1]=='hosted': hosted=True #-- first, prepare the file list --# file_list = [] for i in os.walk('../../doc'): base_dir, subfolders, files = i for file_ in filter(lambda x: x.lower().endswith('.md'), files): file_list.append(os.path.join(base_dir, file_)) #-- then, create the document --# html = '' # perform sort file_list.sort(key=sort_func) first = True for my_file in file_list: with open(my_file, 'r') as md: LOGGER.info("processing file %s" % my_file) data = md.read() # retrieve path pagePath = 'htmldoc' + os.path.sep + my_file[10:] pagePathH = re.sub(r'\.md$','.html',pagePath).replace("README.html","index.html") docDir = pagePath[:pagePath.rfind(os.path.sep)] LOGGER.info("path: %s" % pagePathH) if not os.path.exists(docDir): os.makedirs(docDir) # retrieve title pageTitle = data.partition('\n')[0].replace("# ","") # perform link fixing data = re.sub(r'\[(.+?)\]\((.+?)\)', fix_links, data) data = re.sub(r'^\s*(#+)', fix_headings, data, flags=re.MULTILINE) # convert html = ''' %s %s ''' % ( pageTitle, markdown.markdown(data, extensions=['nl2br', 'mdx_breakless_lists', GithubFlavoredMarkdownExtension()]) ) with open(pagePathH, 'w') as ht: ht.write(html)