#! /usr/bin/env python

import sys, os, stat, string, formatter, time

def cleanhtmls(str, index):
    # print "CLEAN %d %s"%(len(str), str[index:-1])
    qchar=str[index+8]
    # print "qchar = "+qchar
    u =  str[index+9:-1]
    # print "u = " + u
    next = string.find(u,qchar)
    if next < 0:
        return u
    url = u[0:next]
    rest = str[index+next+10:-1]
    # print "URL = "+url
    # print "rest = " + rest
    S = u
    if string.find(url,"http") < 0:
        pound = string.find(url,"#")
        if pound > 0:
            # print "FOUND POUND "+url+" REST "+rest
            rest = url[pound:-1] + rest
            url = url[0:pound]
            # print "ROUND POUND "+url+" REST "+rest
        ext = os.path.splitext(url)[1]
        if ext == '.html':
            url = os.path.splitext(url)[0] + '.htm'
            S = url.lower() + qchar + rest
    nextindex = string.find(S.lower(), "<a href=")
    if nextindex > 0:
        # print "RECURSE %d %s "%(len(S), S)
        tmp = S[0:nextindex+9]
        S = tmp + cleanhtmls(S + '\n',nextindex)
    # print "returning S = "+S
    return S

def html2htm(oldfile, newfile):
    try:
        input = open(oldfile)
        output = open(newfile,'w')
        S = input.readline()
        while S:
            index = string.find(S.lower(),"<a href=")
            if index >= 0:
                tmp = S[0:index+9]
                S = tmp + cleanhtmls(S,index) + '\n'
            output.write(S)
            S = input.readline()
    except IOError, iio:
        sys.stderr.write("can't open image %s reason: %s\n" % (mailfile,iio))
        return
    except os.error, exc:
        sys.stderr.write("can't create directory %s:%s\n" % (dir, exc))
        
def parseHTMLFiles(arg, dir, names):
    dirname = os.path.split(dir)[1]
    if dirname == 'thumbs':
        return
    if dirname == 'code':
        return
    if dirname == 'private':
        return
    for filename in names:
        ext = os.path.splitext(filename)[1]
        if ext == '.htm':
            print "(HTML) %12s   <> %s" % (filename, dir)
            html2htm(os.path.join(dir,filename), os.path.join(dir,'tmp_tmp'))
            os.unlink(os.path.join(dir,filename))
            filename = filename.lower()
            os.rename(os.path.join(dir,'tmp_tmp'), os.path.join(dir,filename))
        elif ext == '.html':
            newname = os.path.splitext(filename)[0]+'.htm'
            newname = newname.lower()
            print " HTML with .html extension " + filename
            print " cleaning to " + newname
            html2htm(os.path.join(dir,filename), os.path.join(dir,newname))
            os.unlink(os.path.join(dir,filename))
            # print "(HTML) %12s   <> %s" % (newname, dir)

# main ***************************************************************
######################################################################
    
def main():
    
    inputdirs = sys.argv[1:]

    print time.strftime("html 2 htm = %a, %d %b %Y %H:%M:%S +0000", time.gmtime(time.time()))
    print inputdirs

    for dir in inputdirs:
        os.path.walk(dir,parseHTMLFiles,0)

if __name__ == '__main__':
    main()