mirror of
https://github.com/tildearrow/furnace.git
synced 2024-11-26 22:43:01 +00:00
150 lines
3.1 KiB
Python
150 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
|
|
import markdown
|
|
from mdx_gfm import GithubFlavoredMarkdownExtension
|
|
|
|
import re
|
|
|
|
import logging
|
|
logging.basicConfig(format='%(levelname)s: %(message)s' ,stream=sys.stderr, level=logging.INFO)
|
|
LOGGER = logging.getLogger('preprocess')
|
|
|
|
hosted = False
|
|
|
|
# sort the file order
|
|
def sort_func(x):
|
|
# place "papers/" at the end (like an appendix)
|
|
try:
|
|
x.index('%sdoc%s' % (os.path.sep, os.path.sep))
|
|
except ValueError:
|
|
return 'z'
|
|
|
|
# place readmes at the start of each section
|
|
try:
|
|
rm = x.index('README.md')
|
|
return x[:rm] + '0'
|
|
except ValueError:
|
|
return x
|
|
|
|
# make the links work in-pdf
|
|
def fix_links(match):
|
|
# images
|
|
if os.path.splitext(match.group(2))[-1] == '.png':
|
|
return '[%s](%s)' % (
|
|
match.group(1),
|
|
match.group(2)
|
|
)
|
|
|
|
# preserve external urls
|
|
elif match.group(2).startswith('http'):
|
|
return match.group(0)
|
|
|
|
elif match.group(2).endswith('README.md'):
|
|
return '[%s](%s)' % (
|
|
match.group(1),
|
|
match.group(2).replace('README.md','index.html')
|
|
)
|
|
|
|
# fix paths
|
|
return '[%s](%s)' % (
|
|
match.group(1),
|
|
match.group(2).replace('.md','.html')
|
|
)
|
|
|
|
def fix_headings(match):
|
|
return '%s#' % (
|
|
match.group(1)
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
# check whether hosted mode is on
|
|
if len(sys.argv)>1:
|
|
if sys.argv[1]=='hosted':
|
|
hosted=True
|
|
|
|
#-- first, prepare the file list --#
|
|
file_list = []
|
|
for i in os.walk('../../doc'):
|
|
base_dir, subfolders, files = i
|
|
for file_ in filter(lambda x: x.lower().endswith('.md'), files):
|
|
file_list.append(os.path.join(base_dir, file_))
|
|
|
|
#-- then, create the document --#
|
|
html = ''
|
|
|
|
# perform sort
|
|
file_list.sort(key=sort_func)
|
|
|
|
first = True
|
|
|
|
for my_file in file_list:
|
|
with open(my_file, 'r') as md:
|
|
LOGGER.info("processing file %s" % my_file)
|
|
data = md.read()
|
|
|
|
# retrieve path
|
|
pagePath = 'htmldoc' + os.path.sep + my_file[10:]
|
|
pagePathH = re.sub(r'\.md$','.html',pagePath).replace("README.html","index.html")
|
|
docDir = pagePath[:pagePath.rfind(os.path.sep)]
|
|
LOGGER.info("path: %s" % pagePathH)
|
|
|
|
if not os.path.exists(docDir):
|
|
os.makedirs(docDir)
|
|
|
|
# retrieve title
|
|
pageTitle = data.partition('\n')[0].replace("# ","")
|
|
|
|
# perform link fixing
|
|
data = re.sub(r'\[(.+?)\]\((.+?)\)', fix_links, data)
|
|
data = re.sub(r'^\s*(#+)', fix_headings, data, flags=re.MULTILINE)
|
|
|
|
# convert
|
|
html = '''
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="utf-8"/>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
|
<style>
|
|
body {
|
|
background-color: #222;
|
|
color: #eee;
|
|
font-family: sans-serif;
|
|
}
|
|
|
|
a {
|
|
color: #3df;
|
|
}
|
|
|
|
a:visited {
|
|
color: #fd3;
|
|
}
|
|
|
|
b {
|
|
color: #fff;
|
|
}
|
|
|
|
h1 {
|
|
text-align: center;
|
|
}
|
|
|
|
img {
|
|
max-width: 100%%;
|
|
}
|
|
</style>
|
|
<title>%s</title>
|
|
</head>
|
|
<body>
|
|
%s
|
|
</body>
|
|
</html>
|
|
''' % (
|
|
pageTitle,
|
|
markdown.markdown(data, extensions=['nl2br', 'mdx_breakless_lists', GithubFlavoredMarkdownExtension()])
|
|
)
|
|
|
|
with open(pagePathH, 'w') as ht:
|
|
ht.write(html)
|