#!/usr/bin/env python

"""
Small utility to convert MSIE favourites to an object structure.

Originally written by Fredrik Lundh.
Modified by Lars Marius Garshol
2-17-2002 T. B. Passin. Now closes folder when its traverse is done.  Also works
    with current IE shortcut format, which can differ from the format
    that was assumed here.
11-6-2002 T. B. Passin. Fixed input and output encoding issues.  Note that the
    folder titles are file system names, and will be in the same encoding as
    the file system names.  In the US, this would normally be cp-1252.  Now
    those titles (and the bookmark titles as well) are decoded into Unicode.  If
    bookmark titles are not also in cp-1252, there could be a problem, but
    normally they would be.

    Change the input encoding if your system uses another code page.
    
    Requires coordinated changes in bookmark.py so that the output encoding 
    can get passed as a parameter. This is required so that when bookmark.py 
    creates the xml declaration, it will know to use the same encoding as 
    the output writer (the only way to avoid passing the encoding is for
    this program to assume that bookmarks.py will always use utf-8 encoding.  
    This would not be good coding practice).

    Occurrences of "&amp;" in URLs are unescaped back to "&".  This is done
    so that later, on conversion to XML again, we will not end up with
    "&amp;amp;". 
11-30-2002 T.B. Passin. Now reads through entire file looking for "[InternetShortcut]".  Saves
    "[DEFAULT]" if found but only returns it if "[InternetShortcut]" is not
    found.  Previously, returned "[DEFAULT]" value if found, since it would
    come first.  This is needed because when a url value is changed in IE's
    "Organize Favorites" tool, the default value is not changed.
    
"""

import bookmark,os,string
from xml.sax import saxutils

DIR = "Favoritter" # Norwegian version

USRDIR = os.environ["USERPROFILE"] # NT version
#USRDIR = r"c:\windows" # 95 version

def unescape(data,entities={}):
    '''Unescape &amp;. This is needed mainly because sometimes
    URL strings are received with "&" escaped and sometimes they are not.

    This function is intended mainly to fix up URLs that may
    have undesired "&amp;" entities.  The "entities" dictionary
    is provided only for possible future use.'''

    data = data.replace('&amp;','&')
    return data

class MSIE:
    # internet explorer

    def __init__(self,bookmarks, path, encoding):
        self.bms=bookmarks
        self.root = None
        self.path = path
        
        (encoder,decoder,reader,writer) = codecs.lookup(encoding)
        self.decoder = decoder
        self.encoder = encoder
        self.__walk()



    def __walk(self, subpath=[]):
        # traverse favourites folder
        path = os.path.join(self.path, string.join(subpath, os.sep))
        for file in os.listdir(path):
            fullname = os.path.join(path, file)
            if os.path.isdir(fullname):
                folder_title=self.decoder(file,'replace')[0]
                self.bms.add_folder(folder_title,None)
                self.__walk(subpath + [file])
                self.bms.leave_folder()
            else:
                url = self.__geturl(fullname)
                if url:
                    url_title = self.decoder(os.path.splitext(file)[0],'replace')[0]
                    self.bms.add_bookmark(url_title,None,
                                          None,None,url)

    def __geturl(self, file):
        try:
            fp = open(file)
            #if fp.readline() != "[InternetShortcut]\n":
            #    return None
            default=''
            while 1:
                line=fp.readline()
                if not line:
                    #return None
                    break
                if line=="[InternetShortcut]\n":
                    s = fp.readline()
                    if not s:
                        break
                    if s[:4] == "URL=":
                        fp.close()
                        return unescape(s[4:-1])
                elif line=="[DEFAULT]\n":
                    s = fp.readline()
                    if not s:
                        break
                    if s[:8] == "BASEURL=":
                        #fp.close()
                        #return unescape(s[8:-1])
                        default = unescape(s[8:-1])
            fp.close()
            return default
        except IOError:
            return ''
        fp.close()
        return ''

# --- Testprogram

if __name__ == '__main__':
    import sys

    if len(sys.argv)>1:
        path = sys.argv[1]
    else:
        try:
            import win32api, win32con
        except ImportError:
            print "The win32api module is not available on this system"
            print "so we can't automatically find your favorites folder."
            print "Please re-run this program specifiying the location of your"
            print "favorites folder on the command line."
            sys.exit(1)
        keyname = r"Software\Microsoft\Windows\CurrentVersion\Explorer\ShellFolders"
        hkey = win32api.RegOpenKey(win32con.HKEY_CURRENT_USER, keyname)
        path, pathtype = win32api.RegQueryValueEx(hkey, "Favorites")
        assert pathtype == win32con.REG_SZ

    import codecs
    INPUT_ENCODING='cp1252'  # Change to suit encoding in use
    msie=MSIE(bookmark.Bookmarks(), path, INPUT_ENCODING)

    OUTPUT_ENCODING='utf-8'
    #OUTPUT_ENCODING='iso-8859-1'  # Can't handle some cp1252 characters but bookmark.py
    # does not create character references for them, so do not use this encoding
    # when the input is cp1252 input data.

    (encoder,decoder,reader,writer) = codecs.lookup(OUTPUT_ENCODING)
    out = writer(sys.stdout)
    msie.bms.dump_xbel(out,OUTPUT_ENCODING)


