# This file is part of ZTM.
#
# ZTM is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# ZTM is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ZTM; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
""" This module contains a ContentHandler for XML TopicMaps documents.

    DESIGN:
        The sax parser runs through all topicmap elements, constructing an 
        in-memory representation of topics, names, occurrences, and associations
        in lists and dictionaries.
        
        From these data, it extends a topicmap with in-memory. Once all elements
        are in the topicmap, they are persisted to ZODB and indexed.
        
        The rationale for this triple-run design was abysmal performance
        importing very large XTM documents.
"""
# CURRENT ISSUES:
#   No support for multiple topicMap elements in a document.
#   No support for mergemap
#   Untested support for xml:base
#   Some issues with xtm1.1 and xtm1.0 handling
#   Not all ZTM-specific handling has been fully integrated
#   Issues with source-locators
#   Issues with maintaing reification relationships.

from time import time
from urlparse import urlparse
from xml.sax import make_parser, ContentHandler, ErrorHandler
from Globals import package_home

from zLOG import *
from Acquisition import aq_base
from DateTime import DateTime

from Products.ZTM2.Locator import Locator
from Products.ZTM2.Variant import Variant
from Products.ZTM2.Utils import generateRandomId
from Products.ZTM2.ZTopic import ZTopic as Topic
from Products.ZTM2.ZTopicMap import ZTopicMap as TopicMap
from Products.ZTM2.ZBaseName import ZBaseName as BaseName
from Products.ZTM2.ZOccurrence import ZOccurrence as Occurrence
from Products.ZTM2.ZAssociation import ZAssociation as Association
from Products.ZTM2.ZAssociationRole import ZAssociationRole as AssociationRole

HTTPPROTOCOL = u'http://'
HTTPSPROTOCOL = u'https://'
PLACEFULPROTOCOL = u'x-zope-path:'

portal_workflow = None
portal_membership = None

class XTMParseError(StandardError): pass
class XTMVersionError(XTMParseError): pass

NOVERSIONATTRIBUTEERRORTEXT = """No version attribute on topicMap element.The \
document is assumed to be XTM 1.0."""
WRONGVERSIONATTRIBUTEVALUEERRORTEXT = """The version attribute specified a \
value (%s) that this XTM Parser does not currently support."""

ALLOWED_CONTENT = True
DISALLOWED_CONTENT = False

class XTMErrorHandler(ErrorHandler): pass

class XTMEntityResolver:
    def resolveEntity(self, pubid, sysid):
        if pubid=="-//TopicMaps.Org//DTD XML Topic Map (XTM) 1.1//EN":
            filename = package_home(globals()) + '/xtm11.dtd'
            return filename
        elif pubid=="-//TopicMaps.Org//DTD XML Topic Map (XTM) 1.0//EN":
            filename = package_home(globals()) + '/xtm1.dtd'
            return filename
        raise AssertionError, "No matching public identifier for the Topic Maps document."

class XTMHandler(ContentHandler):
    """ A XML Topic Maps (XTM) SAX ContentHandler. """
    
    def __init__( self
                , topicmap
                , importfolder = None
                , sourcelocator = None
                , foldermapping = {}
                , typemapping = {}):
        # Generate a source locator for this topicmap if none is given.
        if sourcelocator is None:
            sourcelocator = topicmap.absolute_url()
        self._slstack = [sourcelocator]
        
        # Store a reference to the topicmap for later use.
        self._topicmap = topicmap
        
        # Retrieve the default importfolder if none is given.
        if importfolder is None:
            importfolder = getattr(self._topicmap, 'import').aq_inner
        self._importfolder = importfolder

        global portal_workflow
        global portal_membership
        portal_workflow = topicmap.portal_workflow
        portal_membership = topicmap.portal_membership
        
        # Foldermapping is a ZTM construct that allows us to create imported
        # topics in a folder of our choice.
        self._foldermapping = foldermapping
        self._typemapping = typemapping
        
        self._elementstack = []
        self._endxmlbase = []
        self._topics = {}
        self._assocs = []
        self.accepting_characters = DISALLOWED_CONTENT
        
        self._start_association = 0
        self._start_topic = 0
    
    def characters(self, content):
        """ Gathers the characters of a PCDATA element. """
        if self.accepting_characters == ALLOWED_CONTENT:
            self._data.append(content)
    
    def startElement(self, name, attrs):
        """ Dispach the new element to the correct handler if any. """
        self._elementstack.append(name)
        
        if name=='topicMap' and not attrs.has_key('xml:base'):
            # There is no given xml:base, so we attempt to use the topicmaps 
            # base locator if any
            bl = self._topicmap.getBaseLocator()
            if bl:
                self._slstack.append( bl.getAddress() )
            else:
                addr = "x-zope-path:%s"%('/'.join( self._topicmap.getPhysicalPath() ))
                self._slstack.append( addr )
        elif attrs.has_key('xml:base'):
            #TODO: Verify that the xml:base attribute is valid
            self._slstack.append(attrs['xml:base'])
            exitmethod = "end_%s"%'_'.join(self._elementstack[1:])
            self._endxmlbase.append(exitmethod)

        method = getattr(self,"start_%s"%'_'.join(self._elementstack[1:]), None)
        if not method is None:
            method(attrs)
    
    def endElement(self, name):
        """ Dispatch the end of an element to the correct handler if any. """
        methodname = "end_%s"%'_'.join(self._elementstack[1:])
        method = getattr( self, methodname, None)
        if not method is None:
            method()
        if self._endxmlbase and methodname==self._endxmlbase[-1]:
            self._slstack.pop(-1)
        self._elementstack.pop(-1)
    
    def start_(self, attrs):
        """ <topicMap> """
        print "Started parsing", DateTime()
        # Verify that this is a XTM 1.1 document.
        version = attrs.get('version', None)
        if version is None:
            #XXX: To conform, ZTM must also handle 1.0 documents.
            #     For now we simply throw the error, and expect the higher ups
            #     to handle it.
            #
            #     To deal with it we may have to change validator during a run,
            #     which we do not yet know if is possible.
            raise XTMVersionError(NOVERSIONATTRIBUTEERRORTEXT)
        elif version != "1.1":
            raise XTMVersionError(WRONGVERSIONATTRIBUTEVALUEERRORTEXT%version)
        
        topicmapelementid = attrs.get('id', '')
        # Retrieve the id or any base attributes for constructing the
        # sourcelocator.
        sourcelocator = Locator(self._slstack[-1] + '#' + topicmapelementid)
        self._topicmap.addSourceLocator(sourcelocator)
        
        # If the topicmaps [base locator] property is null property, we set it
        # to the base of the imported topic.
        
        
    def end_(self):
        """ </topicMap> """
        print "Finnished parsing", DateTime()
        self.constructTopicMap()
        del self._topicmap
        del self._importfolder
        del self.accepting_characters
    
    def start_mergeMap(self, attrs):
        """ <mergeMap> """
        # We haven't implemented support for mergeMap directives yet.
        raise NotImplementedError("ZTM does not yet support mergeMap"\
                                  " directives.")
    
    def start_topic(self, attrs):
        """ <topic> """
        sl = "%s#%s"%(self._slstack[-1], attrs['id'])
        
        # We build a datastructure containing the topics properties, and delay
        # constructing until we reach the end of the topics data. 
        topicdata = { 'instanceOf':[]
                    , 'subjectIdentity':[('topicRef', sl)]
                    , 'sourcelocator':''
                    , 'occurrences':[]
                    , 'topicnames':[]
                    , 'topicid': attrs['id']
                    }
        
        self._topicdata = topicdata
        self._start_topic += 1
        if not self._start_topic%100:
            print "Parsed", self._start_topic, "Topics", DateTime()
   
    def end_topic(self):
        """ </topic> """
        topicid = self._topicdata['topicid']
        self._topics[topicid] = self._topicdata
        del self._topicdata
    
    def start_topic_instanceOf(self, attrs): pass
    def start_topic_instanceOf_topicRef(self, attrs):
        self._topicdata['instanceOf'].append(
                      ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],))
                  )
   
    def start_topic_instanceOf_subjectIndicatorRef(self, attrs):
        self._topicdata['instanceOf'].append(
                      ('subjectIndicatorRef', attrs['xlink:href'],)
                  )
    def start_topic_instanceOf_resourceRef(self, attrs):
        # XTM 1.1 specific
        self._topicdata['instanceOf'].append(
                      ('resourceRef', attrs['xlink:href'],)
                  )
                  
    
    def start_topic_subjectIdentity(self, attrs): pass
    def start_topic_subjectIdentity_resourceRef(self, attrs):
        self._topicdata['subjectIdentity'].append(
                      ('resourceRef', attrs['xlink:href'],)
                  )
    
    def start_topic_subjectIdentity_topicRef(self, attrs):
        self._topicdata['subjectIdentity'].append(
                      ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],))
                  )
    
    def start_topic_subjectIdentity_subjectIndicatorRef(self, attrs):
        self._topicdata['subjectIdentity'].append(
                      ('subjectIndicatorRef', attrs['xlink:href'],)
                  )
    #
    # BaseNames
    #
    def start_topic_baseName(self, attrs):
        basename = {'variants':[], 'id':attrs.get('id', None), 'scope':[],
                    'type':None, 'string':None}
        if attrs.has_key('id'):
            sourcelocator = "%s#%s"%(self._slstack[-1], attrs['id'])
            basename['sourcelocator'] = sourcelocator
        else:
            basenameid = basename['id']
            if basenameid is None:
                basenameid = generateRandomId()
        
        self._basename = basename
        self._variantstack = []
    
    def end_topic_baseName(self):
        self._topicdata['topicnames'].append(self._basename)
    
    def start_topic_baseName_instanceOf(self, attrs): pass
    def start_topic_baseName_instanceOf_topicRef(self, attrs):
        # XTM 1.1 Specific
        self._basename['type'] = ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],))
    def start_topic_baseName_instanceOf_resourceRef(self, attrs):
        # XTM 1.1 Specific        
        self._basename['type'] = ('resourceRef', attrs['xlink:href'],)        
    def start_topic_baseName_instanceOf_subjectIndicatorRef(self, attrs):
        # XTM 1.1 Specific
        self._basename['type'] = ('subjectIndicatorRef', attrs['xlink:href'],)
        
    def start_topic_baseName_scope(self, attrs):
        self._basename['scope'] = []
    def start_topic_baseName_scope_topicRef(self, attrs):
        self._basename['scope'].append(('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],)))
    def start_topic_baseName_scope_resourceRef(self, attrs):
        self._basename['scope'].append(('resourceRef', attrs['xlink:href'],))
    def start_topic_baseName_scope_subjectIndicatorRef(self, attrs):
        self._basename['scope'].append(
                    ('subjectIndicatorRef', attrs['xlink:href'],)
                )
    
    def start_topic_baseName_baseNameString(self, attrs):
        self.accepting_characters = ALLOWED_CONTENT
        self._data = []
    def end_topic_baseName_baseNameString(self):
        self.accepting_characters = DISALLOWED_CONTENT      
        self._basename['string'] = ''.join( self._data )
        del self._data
    
    #
    # Variants
    #
    def start_topic_baseName_variant(self, attrs):
        self._variant = {'parameters':[], 'variantName':None}
        self._variantstack = []
        self._parentstack = []
    
    def end_topic_baseName_variant(self):
        if self._variantstack:
            # We are inside a nested variant
            self._elementstack.append('variant')
            self._variantstack.pop(-1)
            self._basename['variants'].append(self._variant)
            parentvariant = self._parentstack.pop(-1)
            self._variant = parentvariant
        else:
            self._basename['variants'].append(self._variant)
    
    def start_topic_baseName_variant_parameters(self, attrs): pass
    def start_topic_baseName_variant_parameters_topicRef(self, attrs):
        self._variant['parameters'].append( ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],) ))
    
    def start_topic_baseName_variant_parameters_subjectIndicatorRef(self,attrs):
        parameters = self._variant['parameters']
        parameters.append( ('subjectIndicatorRef', attrs['xlink:href'],) )
    def start_topic_baseName_variant_parameters_resourceRef(self,attrs):
        # XTM 1.1 specific
        parameters = self._variant['parameters']
        parameters.append( ('resourceRef', attrs['xlink:href'],) )
    
    def start_topic_baseName_variant_variantName(self, attrs): pass
    def start_topic_baseName_variant_variantName_resourceRef(self, attrs):
        if self._variant['variantName'] is None:
            self._variant['variantName'] = ('resourceRef', attrs['xlink:href'])
        else:
            raise AssertionError, "variantName can only contain on "\
                                  "instance resourceRef or resourceData"
        
    def start_topic_baseName_variant_variantName_resourceData(self, attrs):
        if self._variant['variantName'] is None:
            self.accepting_characters = ALLOWED_CONTENT
            self._data = []
        else:
            raise AssertionError, "variantName can only contain on "\
                                  "instance resourceRef or resourceData"

    def end_topic_baseName_variant_variantName_resourceData(self):
        self.accepting_characters = DISALLOWED_CONTENT      
        self._variant['variantName'] = ''.join( self._data )
        del self._data
    
    def start_topic_baseName_variant_variant(self, attrs):
        # We need to carefully manage the stack here.
        self._variantstack.append( self.elementstack.pop(-1) )
        self._parentvariantstack.append(self._variant)
        self._variant = { 'parameters'  : self._variant['parameters']
                        , 'variantName' : ''
                        }
    
    #
    # Occurrences
    #    
    def start_topic_occurrence(self, attrs):
        occurrence = { 'locator' : None
                     , 'data'    : None
                     , 'id'      : attrs.get('id', None)
                     , 'scope'   : []
                     , 'type'    : None
                     } 
        if attrs.has_key('id'):
            sourcelocator = "%s#%s"%(self._slstack[-1], attrs['id'])
            occurrence['sourcelocator'] = sourcelocator
        else:
            #TODO: Is it a problem that occurrence doesn't have a sourcelocator?
            pass
        self._occurrence = occurrence
        
    def end_topic_occurrence(self):
        self._topicdata['occurrences'].append(self._occurrence)
        del self._occurrence
        
    def start_topic_occurrence_instanceOf(self, attrs): pass
    def start_topic_occurrence_instanceOf_topicRef(self, attrs):
        self._occurrence['type'] = ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],))
    def start_topic_occurrence_instanceOf_resourceRef(self, attrs):
        # XTM 1.1 specific
        self._occurrence['type'] = ('resourceRef', attrs['xlink:href'],)
    def start_topic_occurrence_instanceOf_subjectIndicatorRef(self, attrs):
        self._occurrence['type'] = ('subjectIndicatorRef', attrs['xlink:href'],)

        
    def start_topic_occurrence_scope(self, attrs): pass
    def start_topic_occurrence_scope_topicRef(self, attrs):
        self._occurrence['scope'].append(('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],)))
    
    def start_topic_occurrence_scope_resourceRef(self, attrs):
        self._occurrence['scope'].append(('resourceRef', attrs['xlink:href'],))
    
    def start_topic_occurrence_scope_subjectIndicatorRef(self, attrs):
        theme = ('subjectIndicatorRef', attrs['xlink:href'],)
        self._occurrence['scope'].append(theme)
    
    def start_topic_occurrence_resourceRef(self, attrs):
        self._occurrence['locator'] = attrs['xlink:href']
    
    def start_topic_occurrence_resourceData(self, attrs):
        self.accepting_characters = ALLOWED_CONTENT
        self._data = []
    
    def end_topic_occurrence_resourceData(self):
        self.accepting_characters = DISALLOWED_CONTENT
        self._occurrence['data'] = ''.join(self._data)
        del self._data
    
    #
    # Associations
    #
    def start_association(self, attrs):
        associd = attrs.get('id', None)
        self._assoc = {'roles': [], 'id': associd, 'scope':[]}
        if associd:
            sourcelocator = "%s#%s"%(self._slstack[-1], attrs['id'])
            self._assoc['sourcelocator'] = sourcelocator
        else:
            #XXX: Is it an error to not have a source locator?
            pass
        self._start_association += 1
        if not self._start_association%100:
            print "Parsed", self._start_association, "Associations", DateTime()

    def end_association(self):
        self._assocs.append(self._assoc)
        del self._assoc
    
    def start_association_instanceOf(self, attrs): pass
    def start_association_instanceOf_topicRef(self, attrs):
        self._assoc['assoctype'] = ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href']))
    def start_association_instanceOf_resourceRef(self, attrs):
        self._assoc['assoctype'] = ('resourceRef', attrs['xlink:href'])
    def start_association_instanceOf_subjectIndicatorRef(self, attrs):
        self._assoc['assoctype'] = ('subjectIndicatorRef', attrs['xlink:href'])

            
    def start_association_scope(self, attrs):
        self._assoc['scope'] = []
    def start_association_scope_topicRef(self, attrs):
        self._assoc['scope'].append( ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'])))
    def start_association_scope_resourceRef(self, attrs):
        self._assoc['scope'].append( ('resourceRef', attrs['xlink:href']))
    def start_association_scope_subjectIndicatorRef(self, attrs):
        self._assoc['scope'].append( ('subjectIndicatorRef', attrs['xlink:href']))
    
    def start_association_member(self, attrs):
        self._rolespec = None
        self._players = []
    
    def end_association_member(self):
        for player in self._players:
            self._assoc['roles'].append((player, self._rolespec))
        del self._rolespec
        del self._players
    
    def start_association_member_roleSpec(self, attrs): pass
    def start_association_member_roleSpec_topicRef(self, attrs):
        self._rolespec = ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href']))
    def start_association_member_roleSpec_subjectIndicatorRef(self, attrs):
        self._rolespec = ('subjectIndicatorRef', attrs['xlink:href'])
    def start_association_member_roleSpec_resourceRef(self, attrs):
        # XTM 1.1 specific
        self._rolespec = ('resourceRef', attrs['xlink:href'])
      
    def start_association_member_topicRef(self, attrs):
        self._players.append( ('topicRef', "%s%s"%(self._slstack[-1], attrs['xlink:href'],) ))
    def start_association_member_resourceRef(self, attrs):
        self._players.append( ('resourceRef', attrs['xlink:href'],) )
    def start_association_member_subjectIndicatorRef(self, attrs):
        self._players.append( ('subjectIndicatorRef', attrs['xlink:href'],) )
    
    def constructTopicMap(self):
        """ Builds the topicmap from the constructed data. """
        print "Constructing topicmap:", DateTime()
        treatoccs = { "http://psi.forskning.no/workflow/#ztmdefault":workflow_state
                    , "http://psi.forskning.no/workflow/#history":workflow_history
                    , "http://purl.org/dc/elements/1.1/effectivedate":effectivedate
                    , "http://purl.org/dc/elements/1.1/expirationdate":expirationdate
                    , "http://purl.org/dc/elements/1.1/creationdate":creationdate
                    , "http://purl.org/dc/elements/1.1/creator":creator
                    , "http://purl.org/dc/elements/1.1/modificationdate":modificationdate
                    , "http://purl.org/dc/elements/1.1/description":description
                    , u"http://psi.forskning.no/workflow/#ztmdefault":workflow_state
                    , u"http://psi.forskning.no/workflow/#history":workflow_history
                    , u"http://purl.org/dc/elements/1.1/effectivedate":effectivedate
                    , u"http://purl.org/dc/elements/1.1/expirationdate":expirationdate
                    , u"http://purl.org/dc/elements/1.1/creationdate":creationdate
                    , u"http://purl.org/dc/elements/1.1/creator":creator
                    , u"http://purl.org/dc/elements/1.1/modificationdate":modificationdate
                    , u"http://purl.org/dc/elements/1.1/description":description
                    }
        
        topicmap = self._topicmap
        
        newtopics = []
        counter = 0
        print "Constructing %d topics."%len(self._topics)
        for topicid, topicdata in self._topics.iteritems():
            counter += 1
            if not counter%100:
                print "Constructed", counter, "topics by", DateTime()
            # First we check to see if there are existing topics this should
            # be "merged" with
            topics = []
            
            #XXX: Do we need to handle more than one sourcelocator?
            #topic = self._topicmap.getTopicBySourceLocator(topicdata['sourcelocator'], wrap=False)
            #if topic is not None:
            #    topics.append(topic)
            
            # Are there any topics with identical subject identity
            for reftype, ref in topicdata['subjectIdentity']:
                if reftype == 'topicRef':
                    topic = self._topicmap.getTopicBySourceLocator(ref, wrap=False)
                    if topic is not None:
                        topics.append(topic)
                elif reftype == 'subjectIndicatorRef':
                    topic = self._topicmap.getTopicBySubjectIdentifier(ref, wrap=False)
                    if topic is not None:
                        topics.append(topic)
                elif reftype == 'resourceRef':
                    topic = self._topicmap.getTopicBySubjectLocator(ref, wrap=False)
                    if topic is not None:
                        topics.append(topic)
                else:
                    raise AssertionError, "Unknown reftype: %s"%reftype
            
            # If we haven't been able to retrieve an existing topic, we
            # construct a new topic.
            if len(topics)==0:
                topic = Topic(topicid.encode('latin-1'), topicmap)
                newtopics.append(topic)
            elif len(topics)>1:
                # There are existing topics to be merged.
                while len(topics)>1:
                    topic1 = topics[0]
                    topic2 = topics.pop(-1)
                    if not aq_base(topic1) is aq_base(topic2):
                        # Attempt to keep the on already in ZODB.
                        topic1.mergeIn(topic2)
                topic = topics[0]
            else:
                # We found an existing topic
                topic = topics[0]
            
            # Add types
            for reftype, value in topicdata['instanceOf']:
                if reftype == 'topicRef':
                    topictype = topicRef(self, value, newtopics)
                    topic.addType(topictype)
                    assert topictype in topic.getTypes(wrap=False)
                elif reftype == 'subjectIndicatorRef':
                    topictype = subjectIndicatorRef(self, value, newtopics)
                    #XXX: This is a nasty workaround that temporarily disables
                    # reindexing of the current topic. addType calls
                    # reindexObjext.
                    def reindexObject(self, idxs=[]): pass
                    Topic.reindexObject = reindexObject
                    topic.addType( topictype )
                    del Topic.reindexObject
                    assert topictype in topic.getTypes(wrap=False)
                else:
                    raise AssertionError, "%s not valid in instanceOf"%reftype
            
            # Register identity
            for reftype, value in topicdata['subjectIdentity']:
                if reftype == 'topicRef':
                    topic.addSourceLocator( Locator(value) )
                elif reftype == 'subjectIndicatorRef':
                    topic.addSubjectIdentifier( Locator(value) )
                elif reftype == 'resourceRef':
                    topic.setSubjectLocator( Locator(value) )
                else:
                    raise AssertionError, "%s not valid in instanceOf"%reftype
            
            # Register basenames
            for basenamedata in topicdata['topicnames']:
                baseid = basenamedata['id']
                while baseid is None or not topic.checkIdAvailable(baseid):
                    baseid = generateRandomId()
                
                # Won't this be an issue on in-memory objects?
                #topic.invokeFactory(type_name="BaseName", id=baseid)
                #basename = getattr(topic, baseid)
                basename = BaseName(baseid, topic)
                basename.setData(basenamedata['string'])
                topic.addBaseName(basename)
                
                # Set type
                if basenamedata['type']:
                    reftype, value = basenamedata['type']
                    if reftype == 'topicRef':
                        basename.setType( topicRef(self, value, newtopics) )
                    elif reftype == 'subjectIndicatorRef':
                        basename.setType( subjectIndicatorRef(self, value, newtopics) )
                    elif reftype == 'resourceRef':
                        basename.setType(resourceRef(self, value, newtopics))
                    else:
                        raise AssertionError, "%s not valid in instanceOf"%reftype
                
                # Set scope
                for reftype, value in basenamedata['scope']:
                    if reftype == 'topicRef':
                        theme = topicRef(self, value, newtopics)
                    elif reftype == 'subjectIndicatorRef':
                        theme = subjectIndicatorRef(self, value, newtopics)
                    elif reftype == 'resourceRef':
                        theme = resourceRef(self, value, newtopics)
                    else:
                        raise AssertionError, "%s not valid in instanceOf"%reftype
                    
                    basename.addScopingTopic(theme)
                    assert theme in basename.getScope(wrap=False)
                
                # Add variants.
                #scopeset = [aq_base(theme) for theme in basename.getScope()]
                for variantdata in basenamedata['variants']:
                    themes = []
                    for reftype, value in variantdata['parameters']:
                        if reftype == 'topicRef':
                            theme = topicRef(self, value, newtopics)
                        elif reftype == 'subjectIndicatorRef':
                            theme = subjectIndicatorRef(self, value, newtopics)
                        else:
                            raise AssertionError, "%s not valid in instanceOf %s"%reftype
                        themes.append(theme)
                    
                    variant = Variant(self._topicmap, basename, variantdata['variantName'], scope=themes)
                    basename.addVariant(variant)
            
            topic.updateTitle()
            
            # Register occurrences
            for occurrencedata in topicdata['occurrences']:
                #XXX: This method only supports binary associations
                #TODO: Set the sourcelocator
                occid = occurrencedata['id']
                while occid is None or not topic.checkIdAvailable(occid):
                    occid = generateRandomId()
                
                #occ = Occurrence(occid, topic)
                
                # Type
                occtype = occurrencedata['type']
                if occtype:
                    reftype, value = occtype
                    if reftype == 'topicRef':
                        occtype = topicRef(self, value, newtopics)
                    elif reftype == 'subjectIndicatorRef':
                        if value in treatoccs:
                            if occurrencedata['data'] is None:
                                treatoccs[value](topic, occurrencedata['locator'])
                            else:
                                treatoccs[value](topic, occurrencedata['data'])
                            continue
                        occtype = subjectIndicatorRef(self, value, newtopics)
                    elif reftype == 'resourceRef':
                        occtype = resourceRef(self, value, newtopics)
                    else:
                        raise AssertionError, "%s not valid in instanceOf"%reftype
                    
                    for si in [loc.getAddress for loc in occtype.getSubjectIdentifiers()]:
                        if si in treatoccs:
                            treatoccs[si](topic, occurrencedata['data'] or occurrencedata['locator'])
                            continue
                                
                occ = Occurrence(occid, topic)                    
                occ.setType(occtype)
                assert aq_base(occtype) is aq_base(occ.getType(wrap=False))
                                        
                # Scope
                for themeRef, value in occurrencedata['scope']:
                    if reftype == 'topicRef':
                        theme = topicRef(self, value, newtopics)
                    elif reftype == 'subjectIndicatorRef':
                        theme = subjectIndicatorRef(self, value, newtopics)
                    elif reftype == 'resourceRef':
                        theme = resourceRef(self, value, newtopics)
                    else:
                        raise AssertionError, "%s not valid in instanceOf"%reftype
                    
                    occ.addScopingTopic(theme)
                    assert theme in occ.getScope()
                
                # Data
                if occurrencedata['locator'] and occurrencedata['data']:
                    raise AssertionError, "An occurrence can not be both a " \
                           "locator and contain data."
                elif occurrencedata['locator'] is not None:
                    occ.setLocator(Locator(occurrencedata['locator']))
                elif occurrencedata['data'] is not None:
                    occ.setData(occurrencedata['data'])
                else:
                    if occurrencedata['type']:
                        typetext = occurrencedata['type'][1]
                    else:
                        typetext = 'None'
                    errtext = "Empty occurrence. Neither locator nor data in " \
                              "occurrence of type %s in <topic id='%s'>."
                    raise AssertionError, errtext%(typetext, topicdata['topicid'])

                topic.addOccurrence(occ)
        
        print "Constructed topics:", DateTime()
        
        
        newassocs = []
        print "Constructing %d assocs."%len(self._assocs)
        for counter, assocdata in enumerate(self._assocs):
            #TODO: Set the sourcelocator
            if not counter%100:
                print "Constructed", counter, "assocs by", DateTime()
            
            # Determine the id of the assoc.
            associd = assocdata['id']
            assoc = Association(associd.encode('ascii'), topicmap)
            topicmap.addAssociation(assoc)
            newassocs.append(assoc)
            
            # Add Scope
            for retype, value in assocdata['scope']:
                if reftype == 'topicRef':
                    theme = topicRef(self, value, newtopics)
                elif reftype == 'subjectIndicatorRef':
                    theme = subjectIndicatorRef(self, value, newtopics)
                elif reftype == 'resourceRef':
                    theme = resourceRef(self, value, newtopics)
                else:
                    raise AssertionError, "%s not valid in instanceOf"%reftype
                assoc.addScopingTopic(theme)
            
            # Add type
            if assocdata['assoctype']:
                reftype, value = assocdata['assoctype']
                if reftype == 'topicRef':
                    assoctype = topicRef(self, value, newtopics)
                elif reftype == 'subjectIndicatorRef':
                    assoctype = subjectIndicatorRef(self, value, newtopics)
                elif reftype == 'resourceRef':
                    assoctype = resourceRef(self, value, newtopics)
                else:
                    raise AssertionError, "%s not valid in instanceOf"%reftype
                assoc.setType(assoctype)
            
            # Add roles
            for counter, roledata in enumerate(assocdata['roles']):
                roleid = 'role'+str(counter)
                role = AssociationRole(roleid, assoc)
                assoc.addRole(role)
                player, rolespec = roledata
                # Role player
                if player:
                    reftype, ref = player
                    if reftype == 'topicRef':
                        player = topicRef(self, ref, newtopics)
                    elif reftype == 'subjectIndicatorRef':
                        player = subjectIndicatorRef(self, ref, newtopics)
                    elif reftype == 'resourceRef':
                        player = resourceRef(self, ref, newtopics)
                    else:
                        raise AssertionError, "%s not valid in instanceOf"%reftype
                    role.setPlayer(player)
                
                # Rolespec
                if rolespec:
                    reftype, ref = rolespec
                    if reftype == 'topicRef':
                        rolespec = topicRef(self, ref, newtopics)
                    elif reftype == 'subjectIndicatorRef':
                        rolespec = subjectIndicatorRef(self, ref, newtopics)
                    elif reftype == 'resourceRef':
                        rolespec = resourceRef(self, ref, newtopics)
                    else:
                        raise AssertionError, "%s not valid in instanceOf"%reftype
                    role.setRoleSpec(rolespec)
        
        print "Constructed associations:", DateTime()
        
        portal = self._topicmap.portal_url.getPortalObject()
        default = getattr(portal, 'import')
        foldermapping = self._foldermapping
        typemapping = self._typemapping
        counter = 0
        for topic in newtopics:
            counter += 1
            if not counter%100:
                print "Inserted", counter, "topics by", DateTime()
            # Retrieve id & path
            topicid, path = pathidfromName(topic)
            if topicid and topicid.find(u'http')!=-1:
                raise AssertionError
            if not topicid:
                topicid = topic.getId()
                if isinstance(topicid, unicode):
                    topicid = topicid.encode('latin-1')
            if not topicid == topic.getId() :
                # This is a CMF 1.4/1.5 issue. The notifyModified also calls
                # addCreator, which fails because there is no acquisition
                # context yet. We avoid this by setting
                topic._setId(topicid.encode('latin-1'), notifyModified=False)
                
            # Retrieve correct folder
            folder = selectFolder(portal, topicid.encode('latin-1'), path, default, topic.getType(wrap=False), foldermapping, typemapping)
            # Stick topic in ZODB, and reindex
            #TODO: Make sure the id matches the topic name of id-type
            if isinstance(topicid, unicode):
                topicid = topicid.encode('ascii')
            
            folder._setObject(topicid, topic)
            # Stick all occurrences and basenames in ZODB
            for occurrence in topic.getOccurrences(wrap=False):
                occid = occurrence.getId()
                while not topic.checkIdAvailable(occid):
                    occid = generateRandomId()
                topic._setObject(occid, aq_base(occurrence))
            
            for topicname in topic.getTopicNames():
                nameid = topicname.getId()
                while not topic.checkIdAvailable(nameid):
                    nameid = generateRandomId()
                topic._setObject(nameid, aq_base(topicname))

        print "Inserted topics:", DateTime()
        
        assocfolder = getattr(self._topicmap.portal_url.getPortalObject(), 'associations')
        counter = 0
        for assoc in newassocs:
            counter += 1
            if not counter%100:
                print "Inserted", counter, "assocs by", DateTime()
            # Stick assoc in ZODB and reindex
            associd = assoc.getId()
            while not assocfolder.checkIdAvailable(associd):
                associd = generateRandomId()
            assocfolder._setObject(associd, assoc)
            
            # Stick all roles in ZODB
            for counter, role in enumerate(assoc.getRoles()):
                roleid = role.getId()
                assoc._setObject(roleid, role)
        
        print "Inserted assocs:", DateTime()
        #TODO: Fix any topics whose id/path is not matching their current location.
    
#
# Utility functions
#
def pathidfromName(topic):
    for topicname in topic.getTopicNames(wrap=False):
        topicnametype = topicname.getType(wrap=False)
        if topicnametype and 'http://psi.emnekart.no/ztm/core/#x-zope-id' in [loc.getAddress() for loc in topicnametype.getSubjectIdentifiers()]:
            return topicname.getData(), topicname.getVariants()[0].getData()
            
    return None, None

def topicRef(self, value, newtopics):
    topic = self._topicmap.getTopicBySourceLocator(value, wrap=False)
    scheme, netloc, path, params, query, fragment = urlparse(value)    
    if topic is None:
        if fragment:
            topicid = fragment
        else:
            topicid = path.split('/')[-1]        
        topic = Topic(topicid.encode('ascii'), self._topicmap)
        topic.addSourceLocator(Locator(value))
        newtopics.append(topic)
    return topic

def resourceRef(self, value, newtopics):
    topic = self._topicmap.getTopicBySubjectLocator(value, wrap=False)
    scheme, netloc, path, params, query, fragment = urlparse(value)
    if not topic:
        # Attempt to create a nice id by converting the domain
        if not path:
            topicid = netloc.split('.')[-3:]
            topicid = '.'.join(topicid)
        else:
            topicid = generateRandomId()
        topic = Topic(topicid.encode('ascii'). self._topicmap)
        topic.setSubjectLocator(Locator(value))
        newtopics.append(topic)
    return topic

def subjectIndicatorRef(self, value, newtopics):
    topic = self._topicmap.getTopicBySubjectIdentifier(value, wrap=False)
    scheme, netloc, path, params, query, fragment = urlparse(value)
    if topic is None:
        # Try hard to retrieve a usable id
        if fragment:
            topicid = fragment
        else:
            topicid = path.split('/')[-1]
        topic = Topic(topicid.encode('ascii'), self._topicmap)
        topic.addSubjectIdentifier(Locator(value))
        newtopics.append(topic)
    return topic

def selectFolder( portal  # Root of the import hierarchy
                , topicid
                , path    # Full path to the topic
                , default # Default import folder if path cannot be constructed
                , topictype
                , foldermapping = {}
                , typemapping = {}
                , folder_type_name="CMF BTree Folder"
                ):
    """ This method will attempt to position a topic at it's correct index, creating folders if it must. """
    if topictype and topictype.getId() in typemapping:
        path = typemapping[topictype.getId()] + '/' + topicid
    elif path:
        # Check to see if the path should be rewritten.
        for origmapping in foldermapping.keys():
            if path.startswith(origmapping):
                path = foldermapping[origmapping] + path[len(origmapping):]
                break
    else:
        # No path
        path = "%s/%s"%('/'.join( default.getPhysicalPath() ), topicid)
    
    comps = path.split('/')[2:-1] #Cut portal, root and self
    if comps:
        container = portal
        # We need to make sure the path that is to be used exists.
        for comp in comps:
            if getattr(aq_base(container), comp, None) is None:
                container.invokeFactory(type_name=folder_type_name, id=comp)
            container = getattr(container, comp)
        
        return container
    return default

        
#################################################
###        
### Dublin Core handlers
###
def description(topic, data):
    topic.setDescription(data)
        
def modificationdate(topic, data):
    topic.setModificationDate(data)

def creationdate(topic, data):
    topic.creation_date = DateTime(data)

        
def effectivedate(topic, data):
    topic.setEffectiveDate(data)

def expirationdate(topic, data):
    topic.setExpirationDate(data)
        
def creator(topic, data):
    #TODO: This must be rewired so that non-existant users are replaced by the current user.
    data = data.encode('latin-1')
    #TODO: Retrive user
    global portal_membership
    member = portal_membership.getMemberById(data)
    if member is None:
        user = portal_membership.getPhysicalRoot().acl_users.getUserById(data)
        if user is None:
            #XXX: Default to current user taking ownership...
            user = portal_membership.getPhysicalRoot().acl_users.getUserById('arnarl')
        user = user.__of__(portal_membership.getPhysicalRoot().acl_users.aq_inner)
    else:
        user = member.getUser()
    topic.changeOwnership(user, recursive=True)
    # Set existing local roles
    topic.__ac_local_roles__ = {data:['Owner']}
            
def workflow_state(topic, data):
    # TODO: Make sure that the correct workflow is exported and imported
    global portal_workflow
    history = getattr( topic, 'workflow_history', {})
    portal_workflow.setStatusOf('ztmdefault_workflow', topic, data.encode('latin-1'))
    topic.workflow_history = history
        
def workflow_history(topic, data):
    # TODO: Make sure that the correct workflow history is set
    history = getattr( topic, 'workflow_history', {'ztmdefault_workflow': ()}) 
    olddata = list( history.get('ztmdefault_workflow',[]) )
    
    time, action, review_state, actor, comments = data.encode('latin-1').split(':-:')
    
    tmp = {'action': None, 'review_state': 'private', 'actor': 'arnarl', 'comments': '', 'time': DateTime('2004/02/24 18:14:20.141 GMT+1')}
    tmp['action'] = action
    tmp['time'] = DateTime(time)
    tmp['actor'] = actor
    tmp['comments'] = comments
    tmp['review_state'] = review_state
    olddata.append(tmp)
    
    topic.workflow_history = {'ztmdefault_workflow': tuple(olddata)}
