#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
#
# Number Headings of a HTML Page
# ==============================
#
# number-html-headings.py in_file.html out_file.html
#
# Example:
#
First
# Sub-Item
# Sub-Item
#
# Second
#
# Result:
#
# 1 First
# 1.1 Sub-Item
# 1.2 Sub-Item
#
# 2 Second
#
# A table of content is inserted if there is "<-- TOC -->" in the
# html source.
#
# (c) September 2002 Thomas Guettler
#
# This script is in the public domain
#
# Feedback welcome
#
# TODO: Use ElementTree
import os
import re
import sys
import time
import getopt
import StringIO
from types import *
def usage():
print """Usage: %s [--max-depth N] [--no-toc-links] file.html file-created.html
--max-depth N: The depth of the table of content should be N.
--no-toc-links: Don't insert links to the TOC after each heading.
--hide:
Hide all headings and the content "below" which is marked with class="hide"
Example: Ignore Me
Number headings in a HTML file. The headings get numbered by
the tags in your file. You can insert a TOC (Table of Content)
with .
Example:
First
Sub-Item
Sub-Item
Second
Result:
1 First
1.1 Sub-Item
1.2 Sub-Item
2 Second
""" % (
os.path.basename(sys.argv[0]))
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "", ["max-depth=",
"no-toc-links",
"hide"])
except getopt.GetoptError:
usage()
sys.exit(1)
if len(args)!=2:
usage()
sys.exit(1)
max_depth=3
no_toc_links=0
do_hide=False
for o, a in opts:
if o=="--no-toc-links":
no_toc_links=1
elif o=="--max-depth":
try:
max_depth=int(a)
except ValueError:
sys.stderr.write("%s must be a number\n" % a)
sys.exit(1)
elif o=="--hide":
do_hide=True
else:
raise Exception("Internal Error %s %s not processed" % (
o, a))
filename_in=args[0]
filename_out=args[1]
fd=open(filename_in)
file=fd.readlines()
fd.close()
re_heading=re.compile(r'^(.*)]*)>(.*?)<(/)?h(\d)>(.*)$')
stack=[]
last_level=0
out=StringIO.StringIO()
out_orig=out
hide_out=StringIO.StringIO()
hide=None
headings=[]
for line in file:
match=re_heading.match(line)
if not match:
out.write(line)
else:
heading=match.group(4)
before_heading=match.group(1)
attributes=match.group(3)
after_heading=match.group(7)
if match.group(2)!=match.group(6):
raise Exception("Parse Error: does not match %s %s: '%s' " % (
match.group(2), match.group(6), line))
if match.group(5)!="/":
raise Exception("Parse Error: Missing slash in end-tag: '%s'" % line.strip())
level=int(match.group(2))
assert(level>0 and level<10)
if level==last_level+1:
stack.append(1)
elif level==last_level:
stack[-1]+=1
elif levellen(stack):
raise Exception("Parse Error: diff: %s stack: %s last_level: %s"
" level: %s" % (
diff, stack, last_level, level))
for i in range(diff):
stack.pop()
stack[-1]+=1
else:
raise Exception("Strange sequence of Stack: %s "
" level:%s last_level:%s line: '%s'" %
(stack, level, last_level, line))
if hide and level<=hide:
out=out_orig
hide=None
last_level=level
number=[]
space=[]
number_td=[]
for i in range(len(stack)):
number.append(str(stack[i]))
if i!=0:
space.append(' ')
space=''.join(space)
number='.'.join(number)
match=re.match(r'^(.*?)\s*id="(.+?)"\s*(.*)$', attributes)
if match:
# If the heading has already an id, then take this name
# and don't use link_N
id=match.group(2)
attributes="%s %s" % (match.group(1), match.group(3))
else:
id="link_%s" % number
if do_hide:
match=re.match(r'^(.*?)\s*class="hide"\s*(.*)$', attributes)
if match:
# hide this heading and all stuff below
assert hide==None
hide=level
attributes="%s %s" % (match.group(1), match.group(2))
out=hide_out
# Create column for each number in heading (for alignment)
#print stack, heading, number
if len(stack)==1:
size="font-size: 100%"
elif len(stack)==2:
size="font-size: 80%"
else:
size="font-size: 60%"
for i in range(max_depth):
if i==len(stack)-1:
point=" "
value=stack[i]
elif i%s%s' % (
size, value, point))
number_td=''.join(number_td)
if no_toc_links:
toc=""
else:
toc='[toc]' % (
number)
out.write(before_heading)
out_orig.write('''
%s %s
%s
''' % (level, attributes,
id, number, heading, toc,
level))
if hide==level:
out_orig.write("...")
else:
out.write(after_heading)
assert(type(level)==IntType)
# create heading
if level<=max_depth:
headings.append('''
%s
%s%s |
''' % (number_td, number,
id, space, heading))
headings='''
''' % ''.join(headings)
out=out.getvalue()
out=re.sub(r'', ''.join(headings), out)
out=re.sub(r'',
'''
''' %
(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), filename_in),
out)
out=re.sub(r'\s*', r'', out, re.DOTALL)
fd=open(filename_out, "wt")
fd.write(out)
fd.close()
if __name__=="__main__":
main()