#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # # Number Headings of a HTML Page # ============================== # # number-html-headings.py in_file.html out_file.html # # Example: #

First

#

Sub-Item

#

Sub-Item

# #

Second

# # Result: # #

1 First

#

1.1 Sub-Item

#

1.2 Sub-Item

# #

2 Second

# # A table of content is inserted if there is "<-- TOC -->" in the # html source. # # (c) September 2002 Thomas Guettler # # This script is in the public domain # # Feedback welcome # # TODO: Use ElementTree import os import re import sys import time import getopt import StringIO from types import * def usage(): print """Usage: %s [--max-depth N] [--no-toc-links] file.html file-created.html --max-depth N: The depth of the table of content should be N. --no-toc-links: Don't insert links to the TOC after each heading. --hide: Hide all headings and the content "below" which is marked with class="hide" Example:

Ignore Me

Number headings in a HTML file. The headings get numbered by the tags in your file. You can insert a TOC (Table of Content) with . Example:

First

Sub-Item

Sub-Item

Second

Result:

1 First

1.1 Sub-Item

1.2 Sub-Item

2 Second

""" % ( os.path.basename(sys.argv[0])) def main(): try: opts, args = getopt.getopt(sys.argv[1:], "", ["max-depth=", "no-toc-links", "hide"]) except getopt.GetoptError: usage() sys.exit(1) if len(args)!=2: usage() sys.exit(1) max_depth=3 no_toc_links=0 do_hide=False for o, a in opts: if o=="--no-toc-links": no_toc_links=1 elif o=="--max-depth": try: max_depth=int(a) except ValueError: sys.stderr.write("%s must be a number\n" % a) sys.exit(1) elif o=="--hide": do_hide=True else: raise Exception("Internal Error %s %s not processed" % ( o, a)) filename_in=args[0] filename_out=args[1] fd=open(filename_in) file=fd.readlines() fd.close() re_heading=re.compile(r'^(.*)]*)>(.*?)<(/)?h(\d)>(.*)$') stack=[] last_level=0 out=StringIO.StringIO() out_orig=out hide_out=StringIO.StringIO() hide=None headings=[] for line in file: match=re_heading.match(line) if not match: out.write(line) else: heading=match.group(4) before_heading=match.group(1) attributes=match.group(3) after_heading=match.group(7) if match.group(2)!=match.group(6): raise Exception("Parse Error: does not match %s %s: '%s' " % ( match.group(2), match.group(6), line)) if match.group(5)!="/": raise Exception("Parse Error: Missing slash in end-tag: '%s'" % line.strip()) level=int(match.group(2)) assert(level>0 and level<10) if level==last_level+1: stack.append(1) elif level==last_level: stack[-1]+=1 elif levellen(stack): raise Exception("Parse Error: diff: %s stack: %s last_level: %s" " level: %s" % ( diff, stack, last_level, level)) for i in range(diff): stack.pop() stack[-1]+=1 else: raise Exception("Strange sequence of Stack: %s " " level:%s last_level:%s line: '%s'" % (stack, level, last_level, line)) if hide and level<=hide: out=out_orig hide=None last_level=level number=[] space=[] number_td=[] for i in range(len(stack)): number.append(str(stack[i])) if i!=0: space.append('  ') space=''.join(space) number='.'.join(number) match=re.match(r'^(.*?)\s*id="(.+?)"\s*(.*)$', attributes) if match: # If the heading has already an id, then take this name # and don't use link_N id=match.group(2) attributes="%s %s" % (match.group(1), match.group(3)) else: id="link_%s" % number if do_hide: match=re.match(r'^(.*?)\s*class="hide"\s*(.*)$', attributes) if match: # hide this heading and all stuff below assert hide==None hide=level attributes="%s %s" % (match.group(1), match.group(2)) out=hide_out # Create column for each number in heading (for alignment) #print stack, heading, number if len(stack)==1: size="font-size: 100%" elif len(stack)==2: size="font-size: 80%" else: size="font-size: 60%" for i in range(max_depth): if i==len(stack)-1: point=" " value=stack[i] elif i%s%s' % ( size, value, point)) number_td=''.join(number_td) if no_toc_links: toc="" else: toc='[toc]' % ( number) out.write(before_heading) out_orig.write(''' %s %s %s ''' % (level, attributes, id, number, heading, toc, level)) if hide==level: out_orig.write("...") else: out.write(after_heading) assert(type(level)==IntType) # create heading if level<=max_depth: headings.append(''' %s %s%s ''' % (number_td, number, id, space, heading)) headings=''' %s
''' % ''.join(headings) out=out.getvalue() out=re.sub(r'', ''.join(headings), out) out=re.sub(r'', ''' ''' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), filename_in), out) out=re.sub(r'\s*', r'', out, re.DOTALL) fd=open(filename_out, "wt") fd.write(out) fd.close() if __name__=="__main__": main()