zfc@sdZddlZddlZejdZejdZejdZejdZejdZejdZ ejd Z ejd Z ejd Z ejd Z ejd ejZejdZejdZdefdYZdejfdYZdS(sA parser for HTML and XHTML.iNs[&<]s &[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s <[a-zA-Z]t>s--\s*>s$([a-zA-Z][^ />]*)(?:\s|/(?!>))*s[a-zA-Z][^ />]*s]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace s#tHTMLParseErrorcBs#eZdZddZdZRS(s&Exception raised for all parse errors.cCs'||_|d|_|d|_dS(Nii(tmsgtlinenotoffset(tselfRtposition((s"/usr/lib64/python2.7/HTMLParser.pyt__init__<s  cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S(Ns , at line %ds , column %di(RRtNoneR(Rtresult((s"/usr/lib64/python2.7/HTMLParser.pyt__str__Bs  N(NN(t__name__t __module__t__doc__RRR (((s"/usr/lib64/python2.7/HTMLParser.pyR9s t HTMLParsercBs eZdZdZdZdZdZdZdZdZ dZ d Z d Z d Zd Zd dZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ RS( sFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. tscripttstylecCs|jdS(s#Initialize and reset this instance.N(treset(R((s"/usr/lib64/python2.7/HTMLParser.pyRbscCs8d|_d|_t|_d|_tjj|dS(s1Reset this instance. Loses all unprocessed data.ts???N( trawdatatlasttagtinteresting_normalt interestingRt cdata_elemt markupbaset ParserBaseR(R((s"/usr/lib64/python2.7/HTMLParser.pyRfs     cCs!|j||_|jddS(sFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). iN(Rtgoahead(Rtdata((s"/usr/lib64/python2.7/HTMLParser.pytfeednscCs|jddS(sHandle any buffered data.iN(R(R((s"/usr/lib64/python2.7/HTMLParser.pytclosewscCst||jdS(N(Rtgetpos(Rtmessage((s"/usr/lib64/python2.7/HTMLParser.pyterror{scCs|jS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s"/usr/lib64/python2.7/HTMLParser.pytget_starttag_textscCs2|j|_tjd|jtj|_dS(Ns (tlowerRtretcompiletIR(Rtelem((s"/usr/lib64/python2.7/HTMLParser.pytset_cdata_modescCst|_d|_dS(N(RRRR(R((s"/usr/lib64/python2.7/HTMLParser.pytclear_cdata_modes c Csj|j}d}t|}x||kr|jj||}|rT|j}n|jraPn|}||kr|j|||!n|j||}||krPn|j}|d|r7t j ||r|j |}n|d|r |j |}n|d|r*|j |}nm|d|rK|j|}nL|d|rl|j|}n+|d|kr|jd|d}nP|dkr"|sPn|jd|d}|dkr|jd|d}|dkr |d}q n |d7}|j|||!n|j||}q|d |rtj ||}|r|jd d !} |j| |j}|d |ds|d}n|j||}qqd ||kr|j|||d !|j||d }nPq|d |rtj ||}|r|jd} |j| |j}|d |dsv|d}n|j||}qntj ||}|r|r|j||kr|jdnPq|d|kr |jd |j||d}qPqqW|rY||krY|j rY|j|||!|j||}n|||_dS(NitRtitnR4tjR2tktname((s"/usr/lib64/python2.7/HTMLParser.pyRs                   cCs|j}|||d!dkr0|jdn|||d!dkrT|j|S|||d!dkrx|j|S|||d!jd kr|jd |d}|d krd S|j||d|!|d S|j|SdS( Nis(RRBRR4RD((s"/usr/lib64/python2.7/HTMLParser.pyR8s   cCsnd|_|j|}|dkr(|S|j}|||!|_g}tj||d}|j}|jdj|_ }x||krut j||}|sPn|jddd\} } } | sd} nX| d dko| dkns%| d dko | dknr5| dd!} n| rM|j | } n|j | j| f|j}qW|||!j } | d kr|j\} }d |jkr| |jjd } t|j|jjd }n|t|j}|j|||!|S| jd r;|j||n/|j||||jkrj|j|n|S( Niiiis'it"Rs/>s (Rs/>(RR!tcheck_for_whole_start_tagRttagfindR4R>R<R#RtattrfindtunescapetappendtstripRtcountR-trfindR0tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR((RRBtendposRtattrsR4REttagtmtattrnametrestt attrvalueR>RR((s"/usr/lib64/python2.7/HTMLParser.pyR5sP     $$  cCs|j}tj||}|r|j}|||d!}|dkrR|dS|dkr|jd|rx|dS|jd|rdS|j||d|jdn|dkrdS|d krdS||kr|S|dSntd dS( NiRt/s/>iismalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZswe should not get here!(RtlocatestarttagendR4R>R2R1R tAssertionError(RRBRR`RDtnext((s"/usr/lib64/python2.7/HTMLParser.pyRQNs,        cCsj|j}tj||d}|s)dS|j}tj||}|s|jdk rt|j|||!|St j||d}|s|||d!dkr|dS|j |Sn|j dj }|j d|j}|j||dS|j dj }|jdk rO||jkrO|j|||!|Sn|j||j|S(NiiiisR(Rt endendtagR.R>t endtagfindR4RRR0RRRIR<R#R:t handle_endtagR)(RRBRR4RJt namematchttagnameR'((s"/usr/lib64/python2.7/HTMLParser.pyR6ns6     cCs!|j|||j|dS(N(R[Rj(RR_R^((s"/usr/lib64/python2.7/HTMLParser.pyRZscCsdS(N((RR_R^((s"/usr/lib64/python2.7/HTMLParser.pyR[scCsdS(N((RR_((s"/usr/lib64/python2.7/HTMLParser.pyRjscCsdS(N((RRF((s"/usr/lib64/python2.7/HTMLParser.pyR=scCsdS(N((RRF((s"/usr/lib64/python2.7/HTMLParser.pyR@scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyR0scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyRKscCsdS(N((Rtdecl((s"/usr/lib64/python2.7/HTMLParser.pyRHscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyROscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyt unknown_declscs2d|kr|Sfd}tjd||S(NR,cs|jd}yZ|ddkri|d}|dd krSt|dd}n t|}t|SWntk rd|dSXtjdkrd dl}id d 6}x-|jj D]\}}t|||s&