usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyc000064400000066206147205604750021632 0ustar00 abc!@`sddlmZmZmZddlmZmZddlmZm Z ddl Z ddl Z ddl m Z ddlmZmZmZmZddlmZdd lmZdd lmZydd lmZWnek reZnXegeD]Zejd ^qZegeD]Zejd ^q"ZegeD]Zejd ^qJZeed dgBZ dZ!ej"re!ddkre!j#ddkst$e j%e!d e&ddZ'ne j%e!Z'e(dddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2g Z)e j%d3Z*iZ+d4e,fd5YZ-d6Z.d7e,fd8YZ/d9e/fd:YZ0d;e1fd<YZ2d=e,fd>YZ3d?e,fd@YZ4dAZ5dS(Bi(tabsolute_importtdivisiontunicode_literals(t text_typet binary_type(t http_clientturllibN(t webencodingsi(tEOFtspaceCharacterst asciiLetterstasciiUppercase(tReparseException(t_utils(tStringIO(tBytesIOuasciit>tt|j||krd|t|j|8}|d7}q'W||g|_dS(Nii(t_bufferedBytestAssertionErrorRRR(RRtoffsetti((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytseekLscC`sp|js|j|S|jdt|jkr_|jdt|jdkr_|j|S|j|SdS(Niii(Rt _readStreamRRt_readFromBuffer(Rtbytes((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytreadUs     cC`s&tg|jD]}t|^q S(N(tsumRR(Rtitem((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyR^scC`sL|jj|}|jj||jdcd7 Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) u􏿿iiuutf-8ucertainN( R tsupports_lone_surrogatestNonetreportCharacterErrorsRtcharacterErrorsUCS4tcharacterErrorsUCS2tnewLinestlookupEncodingt charEncodingt openStreamt dataStreamtreset(RR?((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs    cC`sCd|_d|_d|_g|_d|_d|_d|_dS(Nui(Rt chunkSizet chunkOffsetterrorst prevNumLinest prevNumColsRFt_bufferedCharacter(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyROs      cC`s(t|dr|}n t|}|S(uvProduces a file object from source. source can be either a file object, local filename or a string. uread(R:R(RR?R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRMs  cC`st|j}|jdd|}|j|}|jdd|}|dkr\|j|}n||d}||fS(Nu iii(RtcountRStrfindRT(RRRtnLinest positionLinet lastLinePostpositionColumn((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt _positions   cC`s&|j|j\}}|d|fS(u:Returns (line, col) of the current position in the stream.i(R\RQ(Rtlinetcol((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRscC`sL|j|jkr%|js%tSn|j}|j|}|d|_|S(uo Read one character from the stream or queue if available. Return EOF when EOF is reached. i(RQRPt readChunkRR(RRQtchar((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyR`s    cC`sO|dkr|j}n|j|j\|_|_d|_d|_d|_|jj |}|j r|j |}d|_ n |st St |dkrt |d}|dksd|kodknr|d|_ |d }qn|jr|j|n|jdd }|jd d }||_t ||_tS( Nuiiii iiu u u (RFt_defaultChunkSizeR\RPRSRTRRQRNR$RUR9RtordRGtreplacetTrue(RRPR(tlastv((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyR_s0        (   cC`s:x3tttj|D]}|jjdqWdS(Nuinvalid-codepoint(trangeRtinvalid_unicode_retfindallRRR'(RR(t_((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRH%s"cC`st}xtj|D]}|r(qnt|j}|j}tj|||d!rtj|||d!}|t kr|j j dnt }q|dkr|dkr|t |dkr|j j dqt}|j j dqWdS(Niuinvalid-codepointiii(R9RgtfinditerRbtgrouptstartR tisSurrogatePairtsurrogatePairToCodepointtnon_bmp_invalid_codepointsRRR'RdR(RR(tskiptmatcht codepointRtchar_val((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRI)s    c C`s}yt||f}Wntk rx&|D]}t|dks+tq+Wdjg|D]}dt|^qZ}|sd|}ntjd|}t||fcB`sbeZdZd d d d dedZdZdZedZdZ dZ dZ RS( uProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. u windows-1252cC`s|j||_tj||jd|_d|_||_||_||_||_ ||_ |j ||_ |j ddk st|jdS(uInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) iidiN(RMt rawStreamR=Rt numBytesMetatnumBytesChardettoverride_encodingttransport_encodingtsame_origin_parent_encodingtlikely_encodingtdefault_encodingtdetermineEncodingRLRFRRO(RR?RRRRRt useChardet((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs       cC`s3|jdjj|jd|_tj|dS(Niureplace(RLt codec_infot streamreaderRRNR=RO(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyROs"cC`sUt|dr|}n t|}y|j|jWnt|}nX|S(uvProduces a file object from source. source can be either a file object, local filename or a string. uread(R:RR RR(RR?R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRMs  cC`s!|jdf}|ddk r&|St|jdf}|ddk rO|St|jdf}|ddk rx|S|jdf}|ddk r|St|jdf}|ddk r|djjd r|St|j df}|ddk r|S|ryddl m }Wnt k r4qXg}|}x[|j s|jj|j}t|tszt|sPn|j||j|qGW|jt|jd}|jjd|dk r|dfSnt|jdf}|ddk r|StddfS(Nucertainiu tentativeuutf-16(tUniversalDetectoruencodingu windows-1252(t detectBOMRFRKRRtdetectEncodingMetaRtnamet startswithRtchardet.universaldetectorRt ImportErrortdoneRR$RR4R#RR'tfeedtclosetresultR R(RtchardetRLRtbufferstdetectorRtencoding((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRsR'       cC`s|jddkstt|}|dkr5dS|jdkretd}|dk stnr||jdkr|jddf|_nF|jjd|df|_|jtd|jd|fdS( Niucertainuutf-16beuutf-16leuutf-8iuEncoding changed from %s to %s(uutf-16beuutf-16le( RLRRKRFRRR ROR (Rt newEncoding((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytchangeEncodings    cC`sidtj6dtj6dtj6dtj6dtj6}|jjd}t|t s_t |j |d }d}|s|j |}d}|s|j |d }d}qn|r|jj |t |S|jj d d Sd S( uAttempts to detect at BOM at the start of the stream. If an encoding can be determined from the BOM return the name of the encoding otherwise return Noneuutf-8uutf-16leuutf-16beuutf-32leuutf-32beiiiiN(tcodecstBOM_UTF8t BOM_UTF16_LEt BOM_UTF16_BEt BOM_UTF32_LEt BOM_UTF32_BERR$R4R#RtgetR RKRF(RtbomDicttstringRR ((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs&   cC`s|jj|j}t|ts*tt|}|jjd|j}|dk r||j dkr|t d}n|S(u9Report the encoding declared by the meta element iuutf-16beuutf-16leuutf-8N(uutf-16beuutf-16le( RR$RR4R#RtEncodingParserR t getEncodingRFRRK(RRtparserR((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyR9s  N( R1R2R3RFRdRRORMRRRR(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyR>s(   >  "t EncodingBytescB`seZdZdZdZdZdZdZdZdZ dZ e e e Z d Z e e Zed Zd Zd Zd ZRS(uString-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raisedcC`s+t|tsttj||jS(N(R4R#Rt__new__tlower(Rtvalue((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRLscC`s d|_dS(Ni(R\(RR((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRPscC`s|S(N((R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt__iter__TscC`sS|jd}|_|t|kr/tn|dkrDtn|||d!S(Nii(R\Rt StopIterationR<(Rtp((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt__next__Ws    cC`s |jS(N(R(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytnext_scC`sY|j}|t|kr$tn|dkr9tn|d|_}|||d!S(Nii(R\RRR<(RR((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytpreviouscs    cC`s+|jt|krtn||_dS(N(R\RR(RR((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt setPositionls cC`s<|jt|krtn|jdkr4|jSdSdS(Ni(R\RRRF(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt getPositionqs  cC`s||j|jd!S(Ni(R(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytgetCurrentByte{scC`sc|j}xJ|t|krU|||d!}||krH||_|S|d7}q W||_dS(uSkip past a list of charactersiN(RRR\RF(RR{RR|((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRps    cC`sc|j}xJ|t|krU|||d!}||krH||_|S|d7}q W||_dS(Ni(RRR\RF(RR{RR|((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt skipUntils    cC`sQ|j}|||t|!}|j|}|rM|jt|7_n|S(uLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone(RRR(RR#RR(R,((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyt matchBytess  cC`sh||jj|}|dkr^|jdkr=d|_n|j|t|d7_tStdS(uLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchiiiN(RtfindR\RRdR(RR#t newPosition((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytjumpTos  (R1R2R3RRRRRRRRtpropertyRRt currentBytetspaceCharactersBytesRpRRR(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRHs           RcB`s_eZdZdZdZdZdZdZdZdZ dZ d Z RS( u?Mini parser for detecting character encoding from meta elementscC`st||_d|_dS(u3string - the data to work on for encoding detectionN(RR(RFR(RR(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRscC`sd|jfd|jfd|jfd|jfd|jfd|jff}xv|jD]k}t}xR|D]J\}}|jj|rky|}PWqtk rt }PqXqkqkW|sXPqXqXW|j S(Ns(R(R(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRscC`sK|jjtkrtSt}d}x"trF|j}|dkrGtS|ddkr|ddk}|rC|dk rC||_tSq%|ddkr|d}t|}|dk rC||_tSq%|ddkr%t t |d}|j }|dk rCt|}|dk r@|r4||_tS|}q@qCq%q%WdS(Nis http-equivis content-typetcharsettcontent( R(RRRdR9RFt getAttributeRRKtContentAttrParserRtparse(Rt hasPragmatpendingEncodingtattrttentativeEncodingtcodect contentParser((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs:             cC`s |jtS(N(thandlePossibleTagR9(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRscC`st|j|jtS(N(RR(RRd(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs cC`s|j}|jtkr9|r5|j|jntS|jt}|dkra|jn+|j}x|dk r|j}qpWtS(NR( R(RtasciiLettersBytesRRRdRtspacesAngleBracketsRRF(RtendTagR(R|R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs      cC`s|jjdS(NR(R(R(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRscC`s|j}|jttdgB}|dksIt|dksIt|d krYdSg}g}xtr |dkr|rPnz|tkr|j}Pn^|d krdj|dfS|t kr|j |j n|dkrdS|j |t |}qhW|dkr7|j dj|dfSt ||j}|d kr|}xtrt |}||krt |dj|dj|fS|t kr|j |j qb|j |qbWn^|dkrdj|dfS|t kr|j |j n|dkr-dS|j |x}trt |}|tkrwdj|dj|fS|t kr|j |j q=|dkrdS|j |q=WdS( u_Return a name,value pair for the next attribute in the stream, if one is found, or Nonet/iRt=R)t't"N(RN(RR(RR(R(RpRt frozensetRFRRRdR*tasciiUppercaseBytesR'RRRR(RR(R|tattrNamet attrValuet quoteChar((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRsh $                           ( R1R2R3RRRRRRRRR(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRs    $    RcB`seZdZdZRS(cC`s"t|tst||_dS(N(R4R#RR((RR(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRfscC`s:y!|jjd|jjd7_|jj|jjdksHdS|jjd7_|jj|jjdkr|jj}|jjd7_|jj}|jj|r|j||jj!SdSnP|jj}y(|jjt|j||jj!SWntk r|j|SXWntk r5dSXdS(NRiRRR(RR( R(RRRpRRFRRR(Rt quoteMarkt oldPosition((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRjs.       (R1R2RR(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRes cC`swt|tr:y|jd}Wq:tk r6dSXn|dk roytj|SWqstk rkdSXndSdS(u{Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.uasciiN(R4RtdecodetUnicodeDecodeErrorRFRtlookuptAttributeError(R((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pyRKs   (6t __future__RRRtpip._vendor.sixRRtpip._vendor.six.movesRRRRvt pip._vendorRt constantsRR R R R R)R tioRRRRR&tencodeRRRRtinvalid_unicode_no_surrogateRERVRRwtevalRgtsetRotascii_punctuation_reRttobjectRRDR=R>R#RRRRK(((sE/usr/lib/python2.7/site-packages/pip/_vendor/html5lib/_inputstream.pytsR  "  ((( + J h'