mò
Ø€;c           @   s†  d  Z  d k Z d k Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z	 e i d ƒ Z
 e i d	 ƒ Z e i d
 ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e i d e i ƒ Z e i d ƒ Z e i d	 ƒ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z d e f d „  ƒ  YZ d f  d „  ƒ  YZ d S(   s   A parser for HTML and XHTML.Ns   [&<]s   <(/|\Z)s"   &([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?s   &([a-zA-Z][-.a-zA-Z0-9]*);s   &#(?:[0-9]+|[xX][0-9a-fA-F]+);s	   <[a-zA-Z]s   <\?t   >s   </s   <!s	   <![^<>]*>s   <!--s   --\s*>s   [a-zA-Z][-.a-zA-Z0-9:_]*s]   \s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?sê  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
s   \s*/?>s#   </\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>s   [a-zA-Z][-_.a-zA-Z0-9]*\s*s   (\'[^\']*\'|"[^"]*")\s*t   HTMLParseErrorc           B   s)   t  Z d  Z e e f d „ Z d „  Z RS(   s&   Exception raised for all parse errors.c         C   s5   | p t ‚ | |  _  | d |  _ | d |  _ d  S(   Ni    i   (   t   msgt   AssertionErrort   selft   positiont   linenot   offset(   R   R   R   (    (    tN   /home2/kennisonb/webapps/zope/Zope/Products/Squishdot/stripogram/HTMLParser.pyt   __init__E   s    	c         C   s[   |  i } |  i d  j	 o | d |  i } n |  i d  j	 o | d |  i d } n | S(   Ns   , at line %ds   , column %di   (   R   R   t   resultR   t   NoneR   (   R   R
   (    (    R   t   __str__K   s    	(   t   __name__t
   __module__t   __doc__R   R	   R   (    (    (    R   R   B   s    t
   HTMLParserc           B   s  t  Z d  Z d Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 e
 Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   sÇ  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  Entity references are
    passed by calling self.handle_entityref() with the entity
    reference as the argument.  Numeric character references are
    passed to self.handle_charref() with the string containing the
    reference as the argument.
    t   scriptt   stylec         C   s   |  i ƒ  d S(   s#   Initialize and reset this instance.N(   R   t   reset(   R   (    (    R   R	   k   s     c         C   s:   d |  _ g  |  _ d |  _ d |  _ d |  _ t |  _ d S(   s1   Reset this instance.  Loses all unprocessed data.t    s   ???i   i    N(   R   t   rawdatat   stackt   lasttagR   R   t   interesting_normalt   interesting(   R   (    (    R   R   o   s     					c         C   s!   |  i | |  _ |  i d ƒ d S(   s   Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '
').
        i    N(   R   R   t   datat   goahead(   R   R   (    (    R   t   feedx   s     c         C   s   |  i d ƒ d S(   s   Handle any buffered data.i   N(   R   R   (   R   (    (    R   t   close   s     c         C   s’   | | j o | Sn |  i } t i | d | | ƒ } | o= |  i | |  _ t i | d | | ƒ } | | d |  _
 n |  i
 | | |  _
 | S(   Ns   
i   (   t   it   jR   R   t   stringt   countt   nlinesR   t   rindext   posR   (   R   R   R   R$   R   R"   (    (    R   t	   updatepos‰   s    	c         C   s   |  i |  i f S(   s&   Return current line number and offset.N(   R   R   R   (   R   (    (    R   t   getpos–   s     c         C   s   |  i S(   s)   Return full source of start tag: '<...>'.N(   R   t   _HTMLParser__starttag_text(   R   (    (    R   t   get_starttag_textœ   s     c         C   s   t  |  _ d  S(   N(   t   interesting_cdataR   R   (   R   (    (    R   t   set_cdata_mode    s    c         C   s   t  |  _ d  S(   N(   R   R   R   (   R   (    (    R   t   clear_cdata_mode£   s    c   	      C   sš  |  i } d } t | ƒ } x-| | j  o|  i i | | ƒ } | o | i ƒ  } n | } | | j  o |  i
 | | | !ƒ n |  i | | ƒ } | | j o Pn | | d j oat i | | ƒ o |  i | ƒ } nô t i | | ƒ o. |  i | ƒ } | d j o |  i ƒ  qÐn³ t i | | ƒ o |  i | ƒ } n t i | | ƒ o |  i | ƒ } ng t i | | ƒ o |  i | ƒ } nA | | d j  o) t d | | | d !|  i ƒ  ƒ ‚ n d } | d j  o% | o t d |  i ƒ  ƒ ‚ n Pn |  i | | ƒ } q | | d j ot i | | ƒ } | oE | i ƒ  d d !} |  i | ƒ | i ƒ  } |  i | | ƒ } q n t i | | ƒ } | oA | i d ƒ } |  i  | ƒ | i ƒ  } |  i | | ƒ } q n t! i | | ƒ o( | o t d	 |  i ƒ  ƒ ‚ n d Sn t d
 |  i ƒ  ƒ ‚ q d p
 t" d ‚ q W| o7 | | j  o* |  i
 | | | !ƒ |  i | | ƒ } n | | |  _ d  S(   Ni    t   <i   s   invalid '<' construct: %si   iÿÿÿÿs   EOF in middle of constructt   &s#   EOF in middle of entity or char refs"   '&' not part of entity or char refs   interesting.search() lied(#   R   R   R   t   lent   nR   t   searcht   matcht   startR   t   handle_dataR%   t   starttagopent   parse_starttagt   kt
   endtagopent   parse_endtagR+   t   commentopent   parse_commentt   piopent   parse_pit   declopent   parse_declarationR   R&   t   endt   charreft   groupt   namet   handle_charreft	   entityreft   handle_entityreft
   incompleteR   (	   R   R?   RB   R   R   R/   R   R6   R1   (    (    R   R   ©   s€    	   c         C   s„   |  i } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d Sn | i ƒ  } |  i	 | | d | !ƒ | i
 ƒ  } | S(   Ni   s   <!--s"   unexpected call to parse_comment()iÿÿÿÿ(   R   R   R   R   t   commentcloseR0   R1   R2   R   t   handle_commentR?   (   R   R   R   R   R1   (    (    R   R:   ï   s    	"c         C   sK  |  i } | d } | | | !d j p
 t d ‚ | | | d !d j o d Sn t | ƒ } xç | | j  oÙ | | } | d j o$ |  i | | d | !ƒ | d Sn | d	 j o1 t	 i
 | | ƒ } | p d Sn | i ƒ  } q` | d
 j o1 t i
 | | ƒ } | p d Sn | i ƒ  } q` t d | | |  i ƒ  ƒ ‚ q` Wd S(   Ni   s   <!s$   unexpected call to parse_declarationi   t   -R   iÿÿÿÿR    s   "'t4   abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZs"   unexpected char in declaration: %s(   RI   R   (   R   R   R   R   R   R.   R/   t   ct   handle_declt   declstringlitR1   t   mR?   t   declnameR   R&   (   R   R   RK   R   RN   R/   R   (    (    R   R>   û   s2    	
 
c         C   s„   |  i } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d Sn | i ƒ  } |  i	 | | d | !ƒ | i
 ƒ  } | S(   Ni   s   <?s   unexpected call to parse_pi()iÿÿÿÿ(   R   R   R   R   t   picloseR0   R1   R2   R   t	   handle_piR?   (   R   R   R   R   R1   (    (    R   R<     s    	"c         C   s­  d  |  _ |  i | ƒ } | d j  o | Sn |  i } | | | !|  _ g  } t i	 | | d ƒ } | p
 t
 d ‚ | i ƒ  } t i | | d | !ƒ |  _ } xó | | j  oå t i	 | | ƒ } | p Pn | i d d d ƒ \ } }
 } |
 p
 d  } nm | d  d j o | d j n p& | d  d j o | d j n o  | d d !} |  i | ƒ } n | i t i | ƒ | f ƒ | i ƒ  } q£ Wt i | | | !ƒ } | d j o |  i ƒ  \ } }	 d |  i j o? | t i |  i d ƒ } t |  i ƒ t i |  i d ƒ }	 n |	 t |  i ƒ }	 t  d | | | !d  | |	 f ƒ ‚ n | d d
 j o |  i! | | ƒ n/ |  i" | | ƒ | |  i# j o |  i$ ƒ  n | S(   Ni    i   s#   unexpected call to parse_starttag()i   i   s   'iÿÿÿÿt   "R    s   />s   
s    junk characters in start tag: %si   iþÿÿÿ(   R    s   />(%   R   R   R'   t   check_for_whole_start_tagR   t   endposR   t   attrst   tagfindR1   R   R?   R6   R    t   lowerR   t   tagt   attrfindRN   RA   t   attrnamet   restt	   attrvaluet   unescapet   appendt   stripR&   R   R   R!   R.   t   rfindR   t   handle_startendtagt   handle_starttagt   CDATA_CONTENT_ELEMENTSR*   (   R   R   R?   R\   RN   RZ   R   RX   R   R   R[   R1   R6   RT   RU   (    (    R   R5   +  sN    		! 
L&c         C   s4  |  i } t i | | ƒ } | o| i ƒ  } | | | d !} | d j o | d Sn | d j ol | | | d !} | d j o | d Sn | d j o d Sn |  i
 | | d ƒ t d |  i ƒ  ƒ ‚ n | d j o d Sn | d	 j o d Sn |  i
 | | ƒ t d
 |  i ƒ  ƒ ‚ n t d ƒ ‚ d  S(   Ni   R    t   /i   s   />iÿÿÿÿs   malformed empty start tagR   s6   abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZs   malformed start tags   we should not gt here!(   R   R   t   locatestarttagendR1   R   RN   R?   R   t   nextt   sR%   R   R&   R   (   R   R   R   RN   Rf   Rg   R   (    (    R   RS   _  s.    	c         C   sÃ   |  i } | | | d !d j p
 t d ‚ t i | | d ƒ } | p d Sn | i ƒ  } t	 i | | ƒ } | p% t
 d | | | !|  i ƒ  ƒ ‚ n | i d ƒ } |  i t i | ƒ ƒ | S(   Ni   s   </s   unexpected call to parse_endtagi   iÿÿÿÿs   bad end tag: %s(   R   R   R   R   t	   endendtagR0   R1   R?   R   t
   endtagfindR   R&   RA   RX   t   handle_endtagR    RW   (   R   R   R   RX   R   R1   (    (    R   R8     s    	"c         C   s!   |  i | | ƒ |  i | ƒ d  S(   N(   R   Rb   RX   RU   Rj   (   R   RX   RU   (    (    R   Ra     s    c         C   s   d  S(   N(    (   R   RX   RU   (    (    R   Rb   ”  s    c         C   s   d  S(   N(    (   R   RX   (    (    R   Rj   ˜  s    c         C   s   d  S(   N(    (   R   RB   (    (    R   RC   œ  s    c         C   s   d  S(   N(    (   R   RB   (    (    R   RE      s    c         C   s   d  S(   N(    (   R   R   (    (    R   R3   ¤  s    c         C   s   d  S(   N(    (   R   R   (    (    R   RH   ¨  s    c         C   s   d  S(   N(    (   R   t   decl(    (    R   RL   ¬  s    c         C   s   d  S(   N(    (   R   R   (    (    R   RQ   °  s    c         C   s‚   d | j o | Sn t i | d d ƒ } t i | d d ƒ } t i | d d ƒ } t i | d d	 ƒ } t i | d
 d ƒ } | S(   NR-   s   &lt;R,   s   &gt;R    s   &apos;t   's   &quot;RR   s   &amp;(   Rg   R    t   replace(   R   Rg   (    (    R   R]   ´  s    (   s   scripts   style(    R   R   R   Rc   R	   R   R   R   R%   R&   R   R'   R(   R*   R+   R   R:   R>   R<   R5   RS   R8   Ra   Rb   Rj   RC   RE   R3   RH   RL   RQ   R]   (    (    (    R   R   T   s:    												F		$		4	 										(   R   t   reR    t   compileR   R)   RF   RD   R@   R4   R;   RP   R7   R=   t   specialR9   RG   RV   RY   t   VERBOSERe   t   endstarttagRh   Ri   RO   RM   t	   ExceptionR   R   (   R4   Re   RO   R@   Rh   R)   R   RV   Ri   R;   RP   Rp   R=   Rn   RY   R   R    R7   R9   RG   RM   Rr   R   RD   RF   (    (    R   t   ?	   s2   
		