mirror of
https://github.com/catchorg/Catch2.git
synced 2025-05-25 22:19:25 +00:00
Rename to updateDocumentToC.py and adapt for use with Catch
adding missing GPL 3.0 license (thanks for noting @horenmar).
This commit is contained in:
parent
7e9b53e40c
commit
61280e6d0a
1 changed files with 182 additions and 136 deletions
446
scripts/updateDocumentToC.py
Normal file
446
scripts/updateDocumentToC.py
Normal file
|
@ -0,0 +1,446 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
#
|
||||
# updateDocumentToC.py
|
||||
#
|
||||
# Insert table of contents at top of Catch markdown documents.
|
||||
#
|
||||
# This script is distributed under the GNU General Public License v3.0
|
||||
#
|
||||
# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
|
||||
# https://github.com/rasbt/markdown-toclify
|
||||
#
|
||||
|
||||
from __future__ import print_function
|
||||
from scriptCommon import catchPath
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
# Configuration:
|
||||
|
||||
minTocEntries = 4
|
||||
|
||||
headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
|
||||
headingExcludeRelease = [2,3,4,5] # use level 1 headers for release-notes.md
|
||||
|
||||
documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
|
||||
releaseNotesName = 'release-notes.md'
|
||||
|
||||
contentTitle = '**Contents** '
|
||||
contentLineNo = 4
|
||||
contentLineNdx = contentLineNo - 1
|
||||
|
||||
# End configuration
|
||||
|
||||
VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
|
||||
|
||||
def readLines(in_file):
|
||||
"""Returns a list of lines from a input markdown file."""
|
||||
|
||||
with open(in_file, 'r') as inf:
|
||||
in_contents = inf.read().split('\n')
|
||||
return in_contents
|
||||
|
||||
def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
|
||||
"""Removes existing [back to top] links and <a id> tags."""
|
||||
|
||||
if not remove:
|
||||
return lines[:]
|
||||
|
||||
out = []
|
||||
for l in lines:
|
||||
if l.startswith(remove):
|
||||
continue
|
||||
out.append(l)
|
||||
return out
|
||||
|
||||
def removeToC(lines):
|
||||
"""Removes existing table of contents starting at index contentLineNdx."""
|
||||
if not lines[contentLineNdx ].startswith(contentTitle):
|
||||
return lines[:]
|
||||
|
||||
result_top = lines[:contentLineNdx]
|
||||
|
||||
pos = contentLineNdx + 1
|
||||
while lines[pos].startswith('['):
|
||||
pos = pos + 1
|
||||
|
||||
result_bottom = lines[pos + 1:]
|
||||
|
||||
return result_top + result_bottom
|
||||
|
||||
def dashifyHeadline(line):
|
||||
"""
|
||||
Takes a header line from a Markdown document and
|
||||
returns a tuple of the
|
||||
'#'-stripped version of the head line,
|
||||
a string version for <a id=''></a> anchor tags,
|
||||
and the level of the headline as integer.
|
||||
E.g.,
|
||||
>>> dashifyHeadline('### some header lvl3')
|
||||
('Some header lvl3', 'some-header-lvl3', 3)
|
||||
|
||||
"""
|
||||
stripped_right = line.rstrip('#')
|
||||
stripped_both = stripped_right.lstrip('#')
|
||||
level = len(stripped_right) - len(stripped_both)
|
||||
stripped_wspace = stripped_both.strip()
|
||||
|
||||
# character replacements
|
||||
replaced_colon = stripped_wspace.replace('.', '')
|
||||
replaced_slash = replaced_colon.replace('/', '')
|
||||
rem_nonvalids = ''.join([c if c in VALIDS
|
||||
else '-' for c in replaced_slash])
|
||||
|
||||
lowered = rem_nonvalids.lower()
|
||||
dashified = re.sub(r'(-)\1+', r'\1', lowered) # remove duplicate dashes
|
||||
dashified = dashified.strip('-') # strip dashes from start and end
|
||||
|
||||
# exception '&' (double-dash in github)
|
||||
dashified = dashified.replace('-&-', '--')
|
||||
|
||||
return [stripped_wspace, dashified, level]
|
||||
|
||||
def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
|
||||
"""
|
||||
Gets headlines from the markdown document and creates anchor tags.
|
||||
|
||||
Keyword arguments:
|
||||
lines: a list of sublists where every sublist
|
||||
represents a line from a Markdown document.
|
||||
id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
|
||||
back_links: if true, adds "back to top" links below each headline
|
||||
exclude_h: header levels to exclude. E.g., [2, 3]
|
||||
excludes level 2 and 3 headings.
|
||||
|
||||
Returns a tuple of 2 lists:
|
||||
1st list:
|
||||
A modified version of the input list where
|
||||
<a id="some-header"></a> anchor tags where inserted
|
||||
above the header lines (if github is False).
|
||||
|
||||
2nd list:
|
||||
A list of 3-value sublists, where the first value
|
||||
represents the heading, the second value the string
|
||||
that was inserted assigned to the IDs in the anchor tags,
|
||||
and the third value is an integer that reprents the headline level.
|
||||
E.g.,
|
||||
[['some header lvl3', 'some-header-lvl3', 3], ...]
|
||||
|
||||
"""
|
||||
out_contents = []
|
||||
headlines = []
|
||||
for l in lines:
|
||||
saw_headline = False
|
||||
|
||||
orig_len = len(l)
|
||||
l_stripped = l.lstrip()
|
||||
|
||||
if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
|
||||
|
||||
# comply with new markdown standards
|
||||
|
||||
# not a headline if '#' not followed by whitespace '##no-header':
|
||||
if not l.lstrip('#').startswith(' '):
|
||||
continue
|
||||
# not a headline if more than 6 '#':
|
||||
if len(l) - len(l.lstrip('#')) > 6:
|
||||
continue
|
||||
# headers can be indented by at most 3 spaces:
|
||||
if orig_len - len(l_stripped) > 3:
|
||||
continue
|
||||
|
||||
# ignore empty headers
|
||||
if not set(l) - {'#', ' '}:
|
||||
continue
|
||||
|
||||
saw_headline = True
|
||||
dashified = dashifyHeadline(l)
|
||||
|
||||
if not exclude_h or not dashified[-1] in exclude_h:
|
||||
if id_tag:
|
||||
id_tag = '<a class="mk-toclify" id="%s"></a>'\
|
||||
% (dashified[1])
|
||||
out_contents.append(id_tag)
|
||||
headlines.append(dashified)
|
||||
|
||||
out_contents.append(l)
|
||||
if back_links and saw_headline:
|
||||
out_contents.append('[[back to top](#table-of-contents)]')
|
||||
return out_contents, headlines
|
||||
|
||||
def positioningHeadlines(headlines):
|
||||
"""
|
||||
Strips unnecessary whitespaces/tabs if first header is not left-aligned
|
||||
"""
|
||||
left_just = False
|
||||
for row in headlines:
|
||||
if row[-1] == 1:
|
||||
left_just = True
|
||||
break
|
||||
if not left_just:
|
||||
for row in headlines:
|
||||
row[-1] -= 1
|
||||
return headlines
|
||||
|
||||
def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
|
||||
"""
|
||||
Creates the table of contents from the headline list
|
||||
that was returned by the tagAndCollect function.
|
||||
|
||||
Keyword Arguments:
|
||||
headlines: list of lists
|
||||
e.g., ['Some header lvl3', 'some-header-lvl3', 3]
|
||||
hyperlink: Creates hyperlinks in Markdown format if True,
|
||||
e.g., '- [Some header lvl1](#some-header-lvl1)'
|
||||
top_link: if True, add a id tag for linking the table
|
||||
of contents itself (for the back-to-top-links)
|
||||
no_toc_header: suppresses TOC header if True.
|
||||
|
||||
Returns a list of headlines for a table of contents
|
||||
in Markdown format,
|
||||
e.g., [' - [Some header lvl3](#some-header-lvl3)', ...]
|
||||
|
||||
"""
|
||||
processed = []
|
||||
if not no_toc_header:
|
||||
if top_link:
|
||||
processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
|
||||
processed.append(contentTitle)
|
||||
|
||||
for line in headlines:
|
||||
if hyperlink:
|
||||
item = '[%s](#%s) ' % (line[0], line[1])
|
||||
else:
|
||||
item = '%s- %s' % ((line[2]-1)*' ', line[0])
|
||||
processed.append(item)
|
||||
processed.append('\n')
|
||||
return processed
|
||||
|
||||
def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
|
||||
"""
|
||||
Returns a string with the Markdown output contents incl.
|
||||
the table of contents.
|
||||
|
||||
Keyword arguments:
|
||||
toc_headlines: lines for the table of contents
|
||||
as created by the createToc function.
|
||||
body: contents of the Markdown file including
|
||||
ID-anchor tags as returned by the
|
||||
tagAndCollect function.
|
||||
spacer: Adds vertical space after the table
|
||||
of contents. Height in pixels.
|
||||
placeholder: If a placeholder string is provided, the placeholder
|
||||
will be replaced by the TOC instead of inserting the TOC at
|
||||
the top of the document
|
||||
|
||||
"""
|
||||
if spacer:
|
||||
spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
|
||||
toc_markdown = "\n".join(toc_headlines + spacer_line)
|
||||
else:
|
||||
toc_markdown = "\n".join(toc_headlines)
|
||||
|
||||
if placeholder:
|
||||
body_markdown = "\n".join(body)
|
||||
markdown = body_markdown.replace(placeholder, toc_markdown)
|
||||
else:
|
||||
body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
|
||||
body_markdown_p2 = "\n".join(body[ contentLineNdx:])
|
||||
markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
|
||||
|
||||
return markdown
|
||||
|
||||
def outputMarkdown(markdown_cont, output_file):
|
||||
"""
|
||||
Writes to an output file if `outfile` is a valid path.
|
||||
|
||||
"""
|
||||
if output_file:
|
||||
with open(output_file, 'w') as out:
|
||||
out.write(markdown_cont)
|
||||
|
||||
def markdownToclify(
|
||||
input_file,
|
||||
output_file=None,
|
||||
min_toc_len=2,
|
||||
github=False,
|
||||
back_to_top=False,
|
||||
nolink=False,
|
||||
no_toc_header=False,
|
||||
spacer=0,
|
||||
placeholder=None,
|
||||
exclude_h=None):
|
||||
""" Function to add table of contents to markdown files.
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
input_file: str
|
||||
Path to the markdown input file.
|
||||
|
||||
output_file: str (defaul: None)
|
||||
Path to the markdown output file.
|
||||
|
||||
min_toc_len: int (default: 2)
|
||||
Miniumum number of entries to create a table of contents for.
|
||||
|
||||
github: bool (default: False)
|
||||
Uses GitHub TOC syntax if True.
|
||||
|
||||
back_to_top: bool (default: False)
|
||||
Inserts back-to-top links below headings if True.
|
||||
|
||||
nolink: bool (default: False)
|
||||
Creates the table of contents without internal links if True.
|
||||
|
||||
no_toc_header: bool (default: False)
|
||||
Suppresses the Table of Contents header if True
|
||||
|
||||
spacer: int (default: 0)
|
||||
Inserts horizontal space (in pixels) after the table of contents.
|
||||
|
||||
placeholder: str (default: None)
|
||||
Inserts the TOC at the placeholder string instead
|
||||
of inserting the TOC at the top of the document.
|
||||
|
||||
exclude_h: list (default None)
|
||||
Excludes header levels, e.g., if [2, 3], ignores header
|
||||
levels 2 and 3 in the TOC.
|
||||
|
||||
Returns
|
||||
-----------
|
||||
changed: Boolean
|
||||
True if the file has been updated, False otherwise.
|
||||
|
||||
"""
|
||||
cleaned_contents = removeLines(
|
||||
removeToC(readLines(input_file)),
|
||||
remove=('[[back to top]', '<a class="mk-toclify"'))
|
||||
|
||||
processed_contents, raw_headlines = tagAndCollect(
|
||||
cleaned_contents,
|
||||
id_tag=not github,
|
||||
back_links=back_to_top,
|
||||
exclude_h=exclude_h)
|
||||
|
||||
# add table of contents?
|
||||
if len(raw_headlines) < min_toc_len:
|
||||
processed_headlines = []
|
||||
else:
|
||||
leftjustified_headlines = positioningHeadlines(raw_headlines)
|
||||
|
||||
processed_headlines = createToc(
|
||||
leftjustified_headlines,
|
||||
hyperlink=not nolink,
|
||||
top_link=not nolink and not github,
|
||||
no_toc_header=no_toc_header)
|
||||
|
||||
if nolink:
|
||||
processed_contents = cleaned_contents
|
||||
|
||||
cont = buildMarkdown(
|
||||
toc_headlines=processed_headlines,
|
||||
body=processed_contents,
|
||||
spacer=spacer,
|
||||
placeholder=placeholder)
|
||||
|
||||
if output_file:
|
||||
outputMarkdown(cont, output_file)
|
||||
|
||||
def isReleaseNotes(f):
|
||||
return os.path.basename(f) == releaseNotesName
|
||||
|
||||
def excludeHeadingsFor(f):
|
||||
return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
|
||||
|
||||
def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
|
||||
"""Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
|
||||
if verbose :
|
||||
print( 'file: {}'.format(input_file))
|
||||
|
||||
output_file = input_file + '.tmp'
|
||||
|
||||
markdownToclify(
|
||||
input_file=input_file,
|
||||
output_file=output_file,
|
||||
min_toc_len=min_toc_len,
|
||||
github=True,
|
||||
back_to_top=False,
|
||||
nolink=False,
|
||||
no_toc_header=False,
|
||||
spacer=False,
|
||||
placeholder=False,
|
||||
exclude_h=excludeHeadingsFor(input_file))
|
||||
|
||||
# prevent race-condition (Python 3.3):
|
||||
if sys.version_info >= (3, 3):
|
||||
os.replace(output_file, input_file)
|
||||
else:
|
||||
os.remove(input_file)
|
||||
os.rename(output_file, input_file)
|
||||
|
||||
return 1
|
||||
|
||||
def updateDocumentToC(paths, min_toc_len, verbose):
|
||||
"""Add or update table of contents to specified paths. Return number of changed files"""
|
||||
n = 0
|
||||
for g in paths:
|
||||
for f in glob.glob(g):
|
||||
if os.path.isfile(f):
|
||||
n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
|
||||
return n
|
||||
|
||||
def updateDocumentToCMain():
|
||||
"""Add or update table of contents to specified paths."""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Add or update table of contents in markdown documents.',
|
||||
epilog="""""",
|
||||
formatter_class=argparse.RawTextHelpFormatter)
|
||||
|
||||
parser.add_argument(
|
||||
'Input',
|
||||
metavar='file',
|
||||
type=str,
|
||||
nargs=argparse.REMAINDER,
|
||||
help='files to process, at default: docs/*.md')
|
||||
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
help='report the name of the file being processed')
|
||||
|
||||
parser.add_argument(
|
||||
'--min-toc-entries',
|
||||
dest='minTocEntries',
|
||||
default=minTocEntries,
|
||||
type=int,
|
||||
metavar='N',
|
||||
help='the minimum number of entries to create a table of contents for [{deflt}]'.format(deflt=minTocEntries))
|
||||
|
||||
parser.add_argument(
|
||||
'--remove-toc',
|
||||
action='store_const',
|
||||
dest='minTocEntries',
|
||||
const=99,
|
||||
help='remove all tables of contents')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
paths = args.Input if len(args.Input) > 0 else [documentsDefault]
|
||||
|
||||
changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
|
||||
|
||||
if changedFiles > 0:
|
||||
print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
|
||||
else:
|
||||
print( "No table of contents added or updated" )
|
||||
|
||||
if __name__ == '__main__':
|
||||
updateDocumentToCMain()
|
||||
|
||||
# end of file
|
Loading…
Add table
Add a link
Reference in a new issue