__pycache__/__init__.cpython-36.opt-1.pyc000064400000001570147204715120014106 0ustar003 B;W@shdZddlmZmZmZddlmZmZmZddl m Z ddl m Z ddl mZdd d d d d gZdZdS)aM HTML parsing library based on the WHATWG "HTML5" specification. The parser is designed to be compatible with existing HTML found in the wild and implements well-defined error recovery that is largely compatible with modern desktop web browsers. Example usage: import html5lib f = open("my_document.html") tree = html5lib.parse(f) )absolute_importdivisionunicode_literals) HTMLParserparse parseFragment)getTreeBuilder) getTreeWalker) serializerrrr r r z 0.999999999N)__doc__Z __future__rrrZ html5parserrrrZ treebuildersr Z treewalkersr Z serializerr __all__ __version__rr/usr/lib/python3.6/__init__.py s   __pycache__/__init__.cpython-36.pyc000064400000001570147204715120013147 0ustar003 B;W@shdZddlmZmZmZddlmZmZmZddl m Z ddl m Z ddl mZdd d d d d gZdZdS)aM HTML parsing library based on the WHATWG "HTML5" specification. The parser is designed to be compatible with existing HTML found in the wild and implements well-defined error recovery that is largely compatible with modern desktop web browsers. Example usage: import html5lib f = open("my_document.html") tree = html5lib.parse(f) )absolute_importdivisionunicode_literals) HTMLParserparse parseFragment)getTreeBuilder) getTreeWalker) serializerrrr r r z 0.999999999N)__doc__Z __future__rrrZ html5parserrrrZ treebuildersr Z treewalkersr Z serializerr __all__ __version__rr/usr/lib/python3.6/__init__.py s   __pycache__/_ihatexml.cpython-36.opt-1.pyc000064400000032626147204715120014327 0ustar003 B;WAA@sddlmZmZmZddlZddlZddlmZdZdZ dZ dZ d Z d j ee gZd j ee d d d e e gZd j ed gZejdZejdZddZddZeddZddZddZddZddZejdZejdZejd ZGd!d"d"eZ dS)#)absolute_importdivisionunicode_literalsN)DataLossWarninga^ [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]z*[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]a [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309Aa  [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]z} #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]z | .-_z#x([\d|A-F]{4,4})z'\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]cCsdd|jdD}g}x|D]}d}x`ttfD]T}|j|}|dk r0|jdd|jDt|d dkr~|d d|d <d}Pq0W|s|jt|gdqWt|}|S) NcSsg|] }|jqS)strip).0itemr r /usr/lib/python3.6/_ihatexml.py hsz$charStringToList..z | FcSsg|] }t|qSr )hexToInt)r r r r rrosrTrr) splitreChar reCharRangematchappendgroupslenordnormaliseCharList)charsZ charRangesrvr Z foundMatchZregexprr r rcharStringToListgs   rcCst|}x |D]}qWg}d}x|t|krd}|j||xT||t|kr|||d|dddkr|||d|dd<|d7}q@W||7}q W|S)Nrrrr)sortedrr)charListr rijr r rr|s 2  rZFFFFcCsg}|ddkr*|jd|dddgxBt|ddD].\}}|j|dd||dddgqr?r@rArBrCr r r__init__szInfosetFilter.__init__NcCsL|jr |jdr tjdtdS|jr>|dkr>tjdtdS|j|SdS)Nzxmlns:z"Attributes cannot begin with xmlnszhttp://www.w3.org/2000/xmlns/z)Attributes cannot be in the xml namespace)r> startswithwarningswarnrr? toXmlName)rEname namespacer r rcoerceAttributes  zInfosetFilter.coerceAttributecCs |j|S)N)rJ)rErKr r r coerceElementszInfosetFilter.coerceElementcCsN|jrJx$d|kr*tjdt|jdd}qW|jdrJtjdt|d7}|S)Nz--z'Comments cannot contain adjacent dashesz- -rzComments cannot end in a dash )r@rHrIrr:endswith)rEdatar r r coerceComments    zInfosetFilter.coerceCommentcCs:|jr6x"t|jdD]}tjdtqW|jdd}|S)N zText cannot contain U+000CrO)rBrangecountrHrIrr:)rErQr r r rcoerceCharacterss  zInfosetFilter.coerceCharacterscCsp|}x4tj|D]&}tjdt|j|}|j||}qW|jrl|jddkrltjdt|jd|jd}|S)NzCoercing non-XML pubid'rz!Pubid cannot contain single quote) nonPubidCharRegexpfindallrHrIrgetReplacementCharacterr:rCfind)rErQZ dataOutputr< replacementr r r coercePubids   zInfosetFilter.coercePubidc Cs|d}|dd}tj|}|r:tjdt|j|}n|}|}ttj|}x.|D]&}tjdt|j|} |j || }qVW||S)NrrzCoercing non-XML name) nonXmlNameFirstBMPRegexprrHrIrrZsetnonXmlNameBMPRegexprYr:) rErK nameFirstZnameRestmZnameFirstOutputZnameRestOutputZ replaceCharsr<r\r r rrJs       zInfosetFilter.toXmlNamecCs$||jkr|j|}n |j|}|S)N)rD escapeChar)rEr<r\r r rrZs   z%InfosetFilter.getReplacementCharactercCs0x*t|jj|D]}|j||j|}qW|S)N)r_replacementRegexprYr: unescapeChar)rErKr r r r fromXmlNameszInfosetFilter.fromXmlNamecCsdt|}||j|<|S)NzU%05X)rrD)rEr<r\r r rrcs  zInfosetFilter.escapeCharcCstt|dddS)Nrr#)r)r,)rEZcharcoder r rreszInfosetFilter.unescapeChar)FFFFTF)N)__name__ __module__ __qualname__recompilerdrFrMrNrRrVr]rJrZrfrcrer r r rr=s"     r=)!Z __future__rrrrjrHZ constantsrZbaseCharZ ideographicZcombiningCharacterZdigitZextenderr*ZletterrKrarkrrrrr,r%r&r+rr(r`r^rXobjectr=r r r rs2 0        __pycache__/_ihatexml.cpython-36.pyc000064400000032721147204715120013364 0ustar003 B;WAA@sddlmZmZmZddlZddlZddlmZdZdZ dZ dZ d Z d j ee gZd j ee d d d e e gZd j ed gZejdZejdZddZddZeddZddZddZddZddZejdZejdZejd ZGd!d"d"eZ dS)#)absolute_importdivisionunicode_literalsN)DataLossWarninga^ [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]z*[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]a [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309Aa  [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]z} #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]z | .-_z#x([\d|A-F]{4,4})z'\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]cCsdd|jdD}g}x|D]}d}x`ttfD]T}|j|}|dk r0|jdd|jDt|d dkr~|d d|d <d}Pq0W|st|dkst|jt|gdqWt |}|S) NcSsg|] }|jqS)strip).0itemr r /usr/lib/python3.6/_ihatexml.py hsz$charStringToList..z | FcSsg|] }t|qSr )hexToInt)r r r r rrosrTrr) splitreChar reCharRangematchappendgroupslenAssertionErrorordnormaliseCharList)charsZ charRangesrvr Z foundMatchZregexprr r rcharStringToListgs"  rcCst|}x |D]}|d|dkstqWg}d}x|t|krd}|j||xT||t|kr|||d|dddkr|||d|dd<|d7}qTW||7}q4W|S)Nrrrr)sortedrrr)charListr rijr r rr|s 2  rZFFFFcCsg}|ddkr*|jd|dddgxBt|ddD].\}}|j|dd||dddgq|dkr>tjdtdS|j|SdS)Nzxmlns:z"Attributes cannot begin with xmlnszhttp://www.w3.org/2000/xmlns/z)Attributes cannot be in the xml namespace)r? startswithwarningswarnrr@ toXmlName)rFname namespacer r rcoerceAttributes  zInfosetFilter.coerceAttributecCs |j|S)N)rK)rFrLr r r coerceElementszInfosetFilter.coerceElementcCsN|jrJx$d|kr*tjdt|jdd}qW|jdrJtjdt|d7}|S)Nz--z'Comments cannot contain adjacent dashesz- -rzComments cannot end in a dash )rArIrJrr;endswith)rFdatar r r coerceComments    zInfosetFilter.coerceCommentcCs:|jr6x"t|jdD]}tjdtqW|jdd}|S)N zText cannot contain U+000CrP)rCrangecountrIrJrr;)rFrRr r r rcoerceCharacterss  zInfosetFilter.coerceCharacterscCsp|}x4tj|D]&}tjdt|j|}|j||}qW|jrl|jddkrltjdt|jd|jd}|S)NzCoercing non-XML pubid'rz!Pubid cannot contain single quote) nonPubidCharRegexpfindallrIrJrgetReplacementCharacterr;rDfind)rFrRZ dataOutputr= replacementr r r coercePubids   zInfosetFilter.coercePubidc Cs|d}|dd}tj|}|r:tjdt|j|}n|}|}ttj|}x.|D]&}tjdt|j|} |j || }qVW||S)NrrzCoercing non-XML name) nonXmlNameFirstBMPRegexprrIrJrr[setnonXmlNameBMPRegexprZr;) rFrL nameFirstZnameRestmZnameFirstOutputZnameRestOutputZ replaceCharsr=r]r r rrKs       zInfosetFilter.toXmlNamecCs$||jkr|j|}n |j|}|S)N)rE escapeChar)rFr=r]r r rr[s   z%InfosetFilter.getReplacementCharactercCs0x*t|jj|D]}|j||j|}qW|S)N)r`replacementRegexprZr; unescapeChar)rFrLr r r r fromXmlNameszInfosetFilter.fromXmlNamecCsdt|}||j|<|S)NzU%05X)rrE)rFr=r]r r rrds  zInfosetFilter.escapeCharcCstt|dddS)Nrr$)r*r-)rFZcharcoder r rrfszInfosetFilter.unescapeChar)FFFFTF)N)__name__ __module__ __qualname__recompilererGrNrOrSrWr^rKr[rgrdrfr r r rr>s"     r>)!Z __future__rrrrkrIZ constantsrZbaseCharZ ideographicZcombiningCharacterZdigitZextenderr+ZletterrLrbrlrrrrr-r&r'r,rr)rar_rYobjectr>r r r rs2 0        __pycache__/_inputstream.cpython-36.opt-1.pyc000064400000053160147204715120015063 0ustar003 B;W~)@sddlmZmZmZddlmZmZddlmZm Z ddl Z ddl Z ddl Z ddl mZmZmZmZddl mZddlmZdd lmZydd lmZWnek reZYnXed d eDZed d eDZedd eDZeeddgBZdZejr$e j eddEe!ddZ"n e j eZ"e#ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3g Z$e j d4Z%iZ&Gd5d6d6e'Z(d7d8Z)Gd9d:d:e'Z*Gd;d<dd>e,Z-Gd?d@d@e'Z.GdAdBdBe'Z/dCdDZ0dS)F)absolute_importdivisionunicode_literals) text_type binary_type) http_clienturllibN)EOFspaceCharacters asciiLettersasciiUppercase)ReparseException)_utils)StringIO)BytesIOcCsg|]}|jdqS)ascii)encode).0itemr"/usr/lib/python3.6/_inputstream.py srcCsg|]}|jdqS)r)r)rrrrrrscCsg|]}|jdqS)r)r)rrrrrrs>.)sumr)r!rrr_bufferedBytes^szBufferedStream._bufferedBytescCs<|jj|}|jj||jdd7<t||jd<|S)Nrr )rr-rappendr r#)r!r,datarrrr*as   zBufferedStream._readStreamcCs|}g}|jd}|jd}x|t|jkr|dkr|j|}|t||krb|}|||g|_n"t||}|t|g|_|d7}|j||||||8}d}qW|r|j|j|dj|S)Nrr )r r#rr0r*join)r!r,ZremainingBytesrvZ bufferIndexZ bufferOffsetZ bufferedDataZ bytesToReadrrrr+hs$    zBufferedStream._readFromBufferN) __name__ __module__ __qualname____doc__r"r&r)r-r/r*r+rrrrr9s  rcKst|tjs(t|tjjr.t|jtjr.d}n&t|drJt|jdt }n t|t }|rdd|D}|rvt d|t |f|St |f|SdS)NFr-rcSsg|]}|jdr|qS)Z _encoding)endswith)rxrrrrsz#HTMLInputStream..z3Cannot set an encoding with a unicode input, set %r) isinstancerZ HTTPResponserZresponseZaddbasefphasattrr-r TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourcekwargsZ isUnicodeZ encodingsrrrHTMLInputStreams     rCc@speZdZdZdZddZddZddZd d Zd d Z d dZ dddZ ddZ ddZ dddZddZdS)r?zProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. i(cCsZtjsd|_ntddkr$|j|_n|j|_dg|_tddf|_|j ||_ |j dS)aInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) Nu􏿿r rzutf-8certain) rsupports_lone_surrogatesreportCharacterErrorsr#characterErrorsUCS4characterErrorsUCS2ZnewLineslookupEncoding charEncoding openStream dataStreamreset)r!rArrrr"s   zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)Nr)r% chunkSize chunkOffseterrors prevNumLines prevNumCols_bufferedCharacter)r!rrrrMszHTMLUnicodeInputStream.resetcCst|dr|}nt|}|S)zvProduces a file object from source. source can be either a file object, local filename or a string. r-)r=r)r!rArrrrrKs z!HTMLUnicodeInputStream.openStreamcCsT|j}|jdd|}|j|}|jdd|}|dkr@|j|}n ||d}||fS)N rr r)r%countrRrfindrS)r!r'r%ZnLinesZ positionLineZ lastLinePosZpositionColumnrrr _positions   z HTMLUnicodeInputStream._positioncCs|j|j\}}|d|fS)z:Returns (line, col) of the current position in the stream.r )rXrP)r!linecolrrrr szHTMLUnicodeInputStream.positioncCs6|j|jkr|jstS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return EOF when EOF is reached. r )rPrO readChunkr r%)r!rPcharrrrr\s   zHTMLUnicodeInputStream.charNcCs|dkr|j}|j|j\|_|_d|_d|_d|_|jj|}|j rX|j |}d|_ n|s`dSt |dkrt |d }|dksd|kodknr|d |_ |dd}|j r|j ||j dd }|j d d }||_t ||_d S)NrNrFr iiz rU Trrr)_defaultChunkSizerXrOrRrSr%rPrLr-rTr#ordrFreplace)r!rOr1Zlastvrrrr[s0           z HTMLUnicodeInputStream.readChunkcCs,x&tttj|D]}|jjdqWdS)Nzinvalid-codepoint)ranger#invalid_unicode_refindallrQr0)r!r1_rrrrG%sz*HTMLUnicodeInputStream.characterErrorsUCS4cCsd}xtj|D]}|rqt|j}|j}tj|||drttj|||d}|tkrn|j j dd}q|dkr|dkr|t |dkr|j j dqd}|j j dqWdS)NFzinvalid-codepointTiir ) rcfinditerr`groupstartrZisSurrogatePairZsurrogatePairToCodepointnon_bmp_invalid_codepointsrQr0r#)r!r1skipmatchZ codepointr$Zchar_valrrrrH)s   z*HTMLUnicodeInputStream.characterErrorsUCS2Fc Csyt||f}WnNtk r^djdd|D}|s@d|}tjd|}t||f<YnXg}x||j|j|j}|dkr|j|jkrPn0|j }||jkr|j |j|j|||_P|j |j|jd|j sfPqfWdj|}|S)z Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. rNcSsg|]}dt|qS)z\x%02x)r`)rcrrrrNsz5HTMLUnicodeInputStream.charsUntil..z^%sz[%s]+N) charsUntilRegExKeyErrorr3recompilerlr%rPrOendr0r[) r!Z charactersZoppositecharsZregexr4mrrrrrr charsUntil@s.    z!HTMLUnicodeInputStream.charsUntilcCs@|dk r<|jdkr.||j|_|jd7_n|jd8_dS)Nrr )rPr%rO)r!r\rrrungetos   zHTMLUnicodeInputStream.unget)N)F)r5r6r7r8r_r"rMrKrXr r\r[rGrHrvrwrrrrr?s   & /r?c@sLeZdZdZdddZddZd d Zdd d Zd dZddZ ddZ dS)r@zProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. N windows-1252TcCs\|j||_tj||jd|_d|_||_||_||_||_ ||_ |j ||_ |j dS)aInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) idN)rK rawStreamr?r" numBytesMetanumBytesChardetoverride_encodingtransport_encodingsame_origin_parent_encodinglikely_encodingdefault_encodingdetermineEncodingrJrM)r!rAr}r~rrrZ useChardetrrrr"s  zHTMLBinaryInputStream.__init__cCs&|jdjj|jd|_tj|dS)Nrra)rJZ codec_info streamreaderrzrLr?rM)r!rrrrMszHTMLBinaryInputStream.resetc CsDt|dr|}nt|}y|j|jWnt|}YnX|S)zvProduces a file object from source. source can be either a file object, local filename or a string. r-)r=rr)r&r)r!rArrrrrKs z HTMLBinaryInputStream.openStreamc Cs|jdf}|ddk r|St|jdf}|ddk r:|St|jdf}|ddk rX|S|jdf}|ddk rt|St|jdf}|ddk r|djjd r|St|jdf}|ddk r|S|rdyddl m }Wnt k rYnxXg}|}x6|j s.|j j|j}|sP|j||j|qW|jt|jd}|j jd|dk rd|dfSt|jdf}|ddk r|StddfS)NrDrZ tentativezutf-16)UniversalDetectorencodingz windows-1252) detectBOMrIr}r~detectEncodingMetarname startswithrZchardet.universaldetectorr ImportErrordonerzr-r|r0Zfeedcloseresultr)r)r!ZchardetrJrZbuffersZdetectorrrrrrrsP           z'HTMLBinaryInputStream.determineEncodingcCst|}|dkrdS|jdkr(td}nT||jdkrH|jddf|_n4|jjd|df|_|jtd|jd|fdS)Nutf-16beutf-16lezutf-8rrDzEncoding changed from %s to %s)rr)rIrrJrzr)rMr)r!Z newEncodingrrrchangeEncodings   z$HTMLBinaryInputStream.changeEncodingc Cstjdtjdtjdtjdtjdi}|jjd}|j|dd}d}|sp|j|}d}|sp|j|dd }d }|r|jj |t |S|jj d dSdS) zAttempts to detect at BOM at the start of the stream. If an encoding can be determined from the BOM return the name of the encoding otherwise return Nonezutf-8zutf-16lezutf-16bezutf-32lezutf-32beNrfr) codecsBOM_UTF8 BOM_UTF16_LE BOM_UTF16_BE BOM_UTF32_LE BOM_UTF32_BErzr-getr)rI)r!ZbomDictstringrr)rrrrs"     zHTMLBinaryInputStream.detectBOMcCsH|jj|j}t|}|jjd|j}|dk rD|jdkrDtd}|S)z9Report the encoding declared by the meta element rNutf-16beutf-16lezutf-8)rr)rzr-r{EncodingParserr) getEncodingrrI)r!rparserrrrrr9s z(HTMLBinaryInputStream.detectEncodingMeta)NNNNrxT)T) r5r6r7r8r"rMrKrrrrrrrrr@s ( >"r@c@seZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ e e e Z ddZe eZefddZddZddZddZdS) EncodingByteszString-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raisedcCstj||jS)N)r,__new__lower)r!valuerrrrLszEncodingBytes.__new__cCs d|_dS)Nr r)rX)r!rrrrr"PszEncodingBytes.__init__cCs|S)Nr)r!rrr__iter__TszEncodingBytes.__iter__cCs>|jd}|_|t|kr"tn |dkr.t|||dS)Nr r)rXr# StopIterationr>)r!prrr__next__Ws  zEncodingBytes.__next__cCs|jS)N)r)r!rrrnext_szEncodingBytes.nextcCsB|j}|t|krtn |dkr$t|d|_}|||dS)Nrr )rXr#rr>)r!rrrrpreviouscs zEncodingBytes.previouscCs|jt|krt||_dS)N)rXr#r)r!r rrr setPositionlszEncodingBytes.setPositioncCs*|jt|krt|jdkr"|jSdSdS)Nr)rXr#r)r!rrr getPositionqs  zEncodingBytes.getPositioncCs||j|jdS)Nr )r )r!rrrgetCurrentByte{szEncodingBytes.getCurrentBytecCsL|j}x:|t|kr@|||d}||kr6||_|S|d7}qW||_dS)zSkip past a list of charactersr N)r r#rX)r!rsrrmrrrrks zEncodingBytes.skipcCsL|j}x:|t|kr@|||d}||kr6||_|S|d7}qW||_dS)Nr )r r#rX)r!rsrrmrrr skipUntils zEncodingBytes.skipUntilcCs>|j}|||t|}|j|}|r:|jt|7_|S)zLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone)r r#r)r!r,rr1r4rrr matchBytess  zEncodingBytes.matchBytescCsR||jdj|}|dkrJ|jdkr,d|_|j|t|d7_dStdS)zLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchNr rTrr)r findrXr#r)r!r,Z newPositionrrrjumpTos zEncodingBytes.jumpToN)r5r6r7r8rr"rrrrrrpropertyr r currentBytespaceCharactersBytesrkrrrrrrrrHs      rc@sXeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)rz?Mini parser for detecting character encoding from meta elementscCst||_d|_dS)z3string - the data to work on for encoding detectionN)rr1r)r!r1rrrr"s zEncodingParser.__init__c Csd|jfd|jfd|jfd|jfd|jfd|jff}x^|jD]T}d}xD|D]<\}}|jj|rJy |}PWqJtk rd}PYqJXqJW|s)r1r)r!rrrrszEncodingParser.handleCommentcCs|jjtkrdSd}d}x|j}|dkr.dS|ddkr^|ddk}|r|dk r||_dSq|ddkr|d}t|}|dk r||_dSq|ddkrtt|d}|j}|dk rt|}|dk r|r||_dS|}qWdS) NTFrs http-equivr s content-typescharsetscontent) r1rr getAttributerrIContentAttrParserrparse)r!Z hasPragmaZpendingEncodingattrZtentativeEncodingcodecZ contentParserrrrrs:      zEncodingParser.handleMetacCs |jdS)NF)handlePossibleTag)r!rrrrsz%EncodingParser.handlePossibleStartTagcCst|j|jdS)NT)rr1r)r!rrrrs z#EncodingParser.handlePossibleEndTagcCsf|j}|jtkr(|r$|j|jdS|jt}|dkrD|jn|j}x|dk r`|j}qNWdS)NTr)r1rasciiLettersBytesrrrspacesAngleBracketsr)r!ZendTagr1rmrrrrrs     z EncodingParser.handlePossibleTagcCs |jjdS)Nr)r1r)r!rrrrszEncodingParser.handleOthercCs|j}|jttdgB}|dkr&dSg}g}xt|dkr@|r@PnX|tkrT|j}PnD|d krjdj|dfS|tkr|j|jn|dkrdS|j|t|}q0W|dkr|j dj|dfSt||j}|d kr:|}xt|}||krt|dj|dj|fS|tkr*|j|jq|j|qWnJ|dkrRdj|dfS|tkrl|j|jn|dkrzdS|j|x^t|}|t krdj|dj|fS|tkr|j|jn|dkrdS|j|qWdS) z_Return a name,value pair for the next attribute in the stream, if one is found, or None/rN=r2'")rN)rr)rr) r1rkr frozensetr3asciiUppercaseBytesr0rrrr)r!r1rmZattrNameZ attrValueZ quoteCharrrrrsf             zEncodingParser.getAttributeN) r5r6r7r8r"rrrrrrrrrrrrrs$rc@seZdZddZddZdS)rcCs ||_dS)N)r1)r!r1rrrr"fszContentAttrParser.__init__cCsy|jjd|jjd7_|jj|jjdks8dS|jjd7_|jj|jjdkr|jj}|jjd7_|jj}|jj|r|j||jjSdSnF|jj}y|jjt|j||jjStk r|j|dSXWntk rdSXdS)Nscharsetr rrr)rr)r1rr rkrrrr)r!Z quoteMarkZ oldPositionrrrrjs.       zContentAttrParser.parseN)r5r6r7r"rrrrrresrcCs`t|tr.y|jd}Wntk r,dSX|dk rXy tj|Stk rTdSXndSdS)z{Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.rN)r;rdecodeUnicodeDecodeError webencodingslookupAttributeError)rrrrrIs  rIr)1Z __future__rrrZsixrrZ six.movesrrrrprZ constantsr r r r rrNriorrrrrrrrZinvalid_unicode_no_surrogaterErqevalrcsetrjZascii_punctuation_rernobjectrrCr?r@r,rrrrIrrrrsV              JgIh6'__pycache__/_inputstream.cpython-36.pyc000064400000054067147204715120014133 0ustar003 B;W~)@sddlmZmZmZddlmZmZddlmZm Z ddl Z ddl Z ddl Z ddl mZmZmZmZddl mZddlmZdd lmZydd lmZWnek reZYnXed d eDZed d eDZedd eDZeeddgBZdZejrFedEdkr"ej ddks&t!e j"eddFe#ddZ$n e j"eZ$e%ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3g Z&e j"d4Z'iZ(Gd5d6d6e)Z*d7d8Z+Gd9d:d:e)Z,Gd;d<dd>e.Z/Gd?d@d@e)Z0GdAdBdBe)Z1dCdDZ2dS)G)absolute_importdivisionunicode_literals) text_type binary_type) http_clienturllibN)EOFspaceCharacters asciiLettersasciiUppercase)ReparseException)_utils)StringIO)BytesIOcCsg|]}|jdqS)ascii)encode).0itemr"/usr/lib/python3.6/_inputstream.py srcCsg|]}|jdqS)r)r)rrrrrrscCsg|]}|jdqS)r)r)rrrrrrs>.)sumr)r!rrrr'^szBufferedStream._bufferedBytescCs<|jj|}|jj||jdd7<t||jd<|S)Nrr )rr/rappendr r#)r!r.datarrrr,as   zBufferedStream._readStreamcCs|}g}|jd}|jd}x|t|jkr|dkr|dks@t|j|}|t||krn|}|||g|_n"t||}|t|g|_|d7}|j||||||8}d}qW|r|j|j|dj|S)Nrr )r r#rr(r1r,join)r!r.ZremainingBytesrvZ bufferIndexZ bufferOffsetZ bufferedDataZ bytesToReadrrrr-hs&     zBufferedStream._readFromBufferN) __name__ __module__ __qualname____doc__r"r&r+r/r'r,r-rrrrr9s  rcKst|tjs(t|tjjr.t|jtjr.d}n&t|drJt|jdt }n t|t }|rdd|D}|rvt d|t |f|St |f|SdS)NFr/rcSsg|]}|jdr|qS)Z _encoding)endswith)rxrrrrsz#HTMLInputStream..z3Cannot set an encoding with a unicode input, set %r) isinstancerZ HTTPResponserZresponseZaddbasefphasattrr/r TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourcekwargsZ isUnicodeZ encodingsrrrHTMLInputStreams     rDc@speZdZdZdZddZddZddZd d Zd d Z d dZ dddZ ddZ ddZ dddZddZdS)r@zProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. i(cCsZtjsd|_ntddkr$|j|_n|j|_dg|_tddf|_|j ||_ |j dS)aInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) Nu􏿿r rzutf-8certain) rsupports_lone_surrogatesreportCharacterErrorsr#characterErrorsUCS4characterErrorsUCS2ZnewLineslookupEncoding charEncoding openStream dataStreamreset)r!rBrrrr"s   zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)Nr)r% chunkSize chunkOffseterrors prevNumLines prevNumCols_bufferedCharacter)r!rrrrNszHTMLUnicodeInputStream.resetcCst|dr|}nt|}|S)zvProduces a file object from source. source can be either a file object, local filename or a string. r/)r>r)r!rBrrrrrLs z!HTMLUnicodeInputStream.openStreamcCsT|j}|jdd|}|j|}|jdd|}|dkr@|j|}n ||d}||fS)N rr r)r%countrSrfindrT)r!r)r%ZnLinesZ positionLineZ lastLinePosZpositionColumnrrr _positions   z HTMLUnicodeInputStream._positioncCs|j|j\}}|d|fS)z:Returns (line, col) of the current position in the stream.r )rYrQ)r!linecolrrrr szHTMLUnicodeInputStream.positioncCs6|j|jkr|jstS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return EOF when EOF is reached. r )rQrP readChunkr r%)r!rQcharrrrr]s   zHTMLUnicodeInputStream.charNcCs|dkr|j}|j|j\|_|_d|_d|_d|_|jj|}|j rX|j |}d|_ n|s`dSt |dkrt |d }|dksd|kodknr|d |_ |dd}|j r|j ||j dd }|j d d }||_t ||_d S)NrOrFr iiz rV Trrr)_defaultChunkSizerYrPrSrTr%rQrMr/rUr#ordrGreplace)r!rPr2Zlastvrrrr\s0           z HTMLUnicodeInputStream.readChunkcCs,x&tttj|D]}|jjdqWdS)Nzinvalid-codepoint)ranger#invalid_unicode_refindallrRr1)r!r2_rrrrH%sz*HTMLUnicodeInputStream.characterErrorsUCS4cCsd}xtj|D]}|rqt|j}|j}tj|||drttj|||d}|tkrn|j j dd}q|dkr|dkr|t |dkr|j j dqd}|j j dqWdS)NFzinvalid-codepointTiir ) rdfinditerragroupstartrZisSurrogatePairZsurrogatePairToCodepointnon_bmp_invalid_codepointsrRr1r#)r!r2skipmatchZ codepointr$Zchar_valrrrrI)s   z*HTMLUnicodeInputStream.characterErrorsUCS2Fc Csyt||f}Wnltk r|x|D]}t|dks&tq&Wdjdd|D}|s^d|}tjd|}t||f<YnXg}x||j|j|j }|dkr|j |j krPn0|j }||j kr|j |j|j |||_ P|j |j|j d|j sPqWdj|} | S)z Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. rOcSsg|]}dt|qS)z\x%02x)ra)rcrrrrNsz5HTMLUnicodeInputStream.charsUntil..z^%sz[%s]+N)charsUntilRegExKeyErrorrar(r4recompilermr%rQrPendr1r\) r!Z charactersZoppositecharsroZregexr5mrtrrrr charsUntil@s2     z!HTMLUnicodeInputStream.charsUntilcCsT|dk rP|jdkr.||j|_|jd7_n"|jd8_|j|j|ksPtdS)Nrr )rQr%rPr()r!r]rrrungetos   zHTMLUnicodeInputStream.unget)N)F)r6r7r8r9r`r"rNrLrYr r]r\rHrIrxryrrrrr@s   & /r@c@sLeZdZdZdddZddZd d Zdd d Zd dZddZ ddZ dS)rAzProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. N windows-1252TcCsn|j||_tj||jd|_d|_||_||_||_||_ ||_ |j ||_ |j ddk sbt |jdS)aInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) idrN)rL rawStreamr@r" numBytesMetanumBytesChardetoverride_encodingtransport_encodingsame_origin_parent_encodinglikely_encodingdefault_encodingdetermineEncodingrKr(rN)r!rBrrrrrZ useChardetrrrr"s  zHTMLBinaryInputStream.__init__cCs&|jdjj|jd|_tj|dS)Nrrb)rKZ codec_info streamreaderr|rMr@rN)r!rrrrNszHTMLBinaryInputStream.resetc CsDt|dr|}nt|}y|j|jWnt|}YnX|S)zvProduces a file object from source. source can be either a file object, local filename or a string. r/)r>rr+r&r)r!rBrrrrrLs z HTMLBinaryInputStream.openStreamc Cs|jdf}|ddk r|St|jdf}|ddk r:|St|jdf}|ddk rX|S|jdf}|ddk rt|St|jdf}|ddk r|djjd r|St|jdf}|ddk r|S|rtyddl m }Wnt k rYnXg}|}xF|j s>|j j|j}t|ts t|s(P|j||j|qW|jt|jd}|j jd|dk rt|dfSt|jdf}|ddk r|StddfS)NrErZ tentativezutf-16)UniversalDetectorencodingz windows-1252) detectBOMrJrrdetectEncodingMetarname startswithrZchardet.universaldetectorr ImportErrordoner|r/r~r<r.r(r1Zfeedcloseresultr+r)r!ZchardetrKrZbuffersZdetectorrrrrrrsR           z'HTMLBinaryInputStream.determineEncodingcCs|jddkstt|}|dkr&dS|jdkrFtd}|dk stnT||jdkrf|jddf|_n4|jjd|df|_|jtd|jd|fdS) Nr rEutf-16beutf-16lezutf-8rzEncoding changed from %s to %s)rr)rKr(rJrr|r+rNr)r!Z newEncodingrrrchangeEncodings   z$HTMLBinaryInputStream.changeEncodingc Cstjdtjdtjdtjdtjdi}|jjd}t|t s"rAc@seZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ e e e Z ddZe eZefddZddZddZddZdS) EncodingByteszString-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raisedcCst|tsttj||jS)N)r<r.r(__new__lower)r!valuerrrrLszEncodingBytes.__new__cCs d|_dS)Nr r)rY)r!rrrrr"PszEncodingBytes.__init__cCs|S)Nr)r!rrr__iter__TszEncodingBytes.__iter__cCs>|jd}|_|t|kr"tn |dkr.t|||dS)Nr r)rYr# StopIterationr?)r!prrr__next__Ws  zEncodingBytes.__next__cCs|jS)N)r)r!rrrnext_szEncodingBytes.nextcCsB|j}|t|krtn |dkr$t|d|_}|||dS)Nrr )rYr#rr?)r!rrrrpreviouscs zEncodingBytes.previouscCs|jt|krt||_dS)N)rYr#r)r!r rrr setPositionlszEncodingBytes.setPositioncCs*|jt|krt|jdkr"|jSdSdS)Nr)rYr#r)r!rrr getPositionqs  zEncodingBytes.getPositioncCs||j|jdS)Nr )r )r!rrrgetCurrentByte{szEncodingBytes.getCurrentBytecCsL|j}x:|t|kr@|||d}||kr6||_|S|d7}qW||_dS)zSkip past a list of charactersr N)r r#rY)r!rurrorrrrls zEncodingBytes.skipcCsL|j}x:|t|kr@|||d}||kr6||_|S|d7}qW||_dS)Nr )r r#rY)r!rurrorrr skipUntils zEncodingBytes.skipUntilcCs>|j}|||t|}|j|}|r:|jt|7_|S)zLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone)r r#r)r!r.rr2r5rrr matchBytess  zEncodingBytes.matchBytescCsR||jdj|}|dkrJ|jdkr,d|_|j|t|d7_dStdS)zLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchNr rTrr)r findrYr#r)r!r.Z newPositionrrrjumpTos zEncodingBytes.jumpToN)r6r7r8r9rr"rrrrrrpropertyr r currentBytespaceCharactersBytesrlrrrrrrrrHs      rc@sXeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)rz?Mini parser for detecting character encoding from meta elementscCst||_d|_dS)z3string - the data to work on for encoding detectionN)rr2r)r!r2rrrr"s zEncodingParser.__init__c Csd|jfd|jfd|jfd|jfd|jfd|jff}x^|jD]T}d}xD|D]<\}}|jj|rJy |}PWqJtk rd}PYqJXqJW|s)r2r)r!rrrrszEncodingParser.handleCommentcCs|jjtkrdSd}d}x|j}|dkr.dS|ddkr^|ddk}|r|dk r||_dSq|ddkr|d}t|}|dk r||_dSq|ddkrtt|d}|j}|dk rt|}|dk r|r||_dS|}qWdS) NTFrs http-equivr s content-typescharsetscontent) r2rr getAttributerrJContentAttrParserrparse)r!Z hasPragmaZpendingEncodingattrZtentativeEncodingcodecZ contentParserrrrrs:      zEncodingParser.handleMetacCs |jdS)NF)handlePossibleTag)r!rrrrsz%EncodingParser.handlePossibleStartTagcCst|j|jdS)NT)rr2r)r!rrrrs z#EncodingParser.handlePossibleEndTagcCsf|j}|jtkr(|r$|j|jdS|jt}|dkrD|jn|j}x|dk r`|j}qNWdS)NTr)r2rasciiLettersBytesrrrspacesAngleBracketsr)r!ZendTagr2rorrrrrs     z EncodingParser.handlePossibleTagcCs |jjdS)Nr)r2r)r!rrrrszEncodingParser.handleOthercCs|j}|jttdgB}|dks2t|dks2t|d kr>dSg}g}xt|dkrX|rXPnX|tkrl|j}PnD|d krdj|dfS|tkr|j|j n|dkrdS|j|t |}qHW|dkr|j dj|dfSt ||j}|d krT|}xt |}||kr(t |dj|dj|fS|tkrB|j|j n |j|qWnJ|dkrldj|dfS|tkr|j|j n|dkrdS|j|x^t |}|t krdj|dj|fS|tkr|j|j n|dkrdS|j|qWdS) z_Return a name,value pair for the next attribute in the stream, if one is found, or None/Nr r=r3'")rN)rr)rr) r2rlr frozensetr#r(r4asciiUppercaseBytesr1rrrr)r!r2roZattrNameZ attrValueZ quoteCharrrrrsh             zEncodingParser.getAttributeN) r6r7r8r9r"rrrrrrrrrrrrrs$rc@seZdZddZddZdS)rcCst|tst||_dS)N)r<r.r(r2)r!r2rrrr"fszContentAttrParser.__init__cCsy|jjd|jjd7_|jj|jjdks8dS|jjd7_|jj|jjdkr|jj}|jjd7_|jj}|jj|r|j||jjSdSnF|jj}y|jjt|j||jjStk r|j|dSXWntk rdSXdS)Nscharsetr rrr)rr)r2rr rlrrrr)r!Z quoteMarkZ oldPositionrrrrjs.       zContentAttrParser.parseN)r6r7r8r"rrrrrresrcCs`t|tr.y|jd}Wntk r,dSX|dk rXy tj|Stk rTdSXndSdS)z{Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.rN)r<rdecodeUnicodeDecodeError webencodingslookupAttributeError)rrrrrJs  rJrr)3Z __future__rrrZsixrrZ six.movesrrrrrrZ constantsr r r r rrOriorrrrrrrrZinvalid_unicode_no_surrogaterFrWr(rsevalrdsetrkZascii_punctuation_rerpobjectrrDr@rAr.rrrrJrrrrsX    "          JgIh6'__pycache__/_tokenizer.cpython-36.opt-1.pyc000064400000122067147204715120014525 0ustar003 B;W+@sddlmZmZmZddlmZddlmZddl m Z ddl m Z ddl m Z m Z ddl mZmZmZdd l mZmZdd l mZdd lmZdd lmZee ZGd ddeZdS))absolute_importdivisionunicode_literals)unichr)deque)spaceCharacters)entities) asciiLettersasciiUpper2Lower)digits hexDigitsEOF) tokenTypes tagTokenTypes)replacementCharacters)HTMLInputStream)TriecsdeZdZdZdfdd ZddZddZdd d Zd d ZddZ ddZ ddZ ddZ ddZ ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zdd?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZdzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS) HTMLTokenizera  This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. Nc sFt|f||_||_d|_g|_|j|_d|_d|_t t |j dS)NF) rstreamparserZ escapeFlagZ lastFourChars dataStatestateescape currentTokensuperr__init__)selfrrkwargs) __class__ /usr/lib/python3.6/_tokenizer.pyr"szHTMLTokenizer.__init__ccs\tg|_xL|jrVx&|jjr:td|jjjddVqWx|jrR|jjVq>Wq WdS)z This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. ParseErrorr)typedataN)r tokenQueuerrerrorsrpoppopleft)rr r r!__iter__1s    zHTMLTokenizer.__iter__c %Cs(t}d}|rt}d}g}|jj}x(||krJ|tk rJ|j||jj}q$Wtdj||}|tkrt|}|j jt ddd|idnld|kod kns|d krd }|j jt ddd|idn(d |kod knsd|kodknsd|kodknsd|ko4dkns|t ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d g#kr|j jt ddd|idy t |}Wn>t k r|d6}t d|d?Bt d7|d8@B}YnX|d9kr$|j jt dd:d;|jj||S)r"z'expected-tag-name-but-got-right-bracket)r#r$rJz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerDT)rr6markupDeclarationOpenStatercloseTagOpenStater rr tagNameStater%r7rr=bogusCommentState)rr$r r r!r]is6               zHTMLTokenizer.tagOpenStatecCs|jj}|tkr0td|gdd|_|j|_n|dkrX|jjtddd|j |_nn|t kr|jjtddd|jjtd d d|j |_n0|jjtdd d |id |jj ||j |_dS)NrVF)r#rUr$rWrjr"z*expected-closing-tag-but-got-right-bracket)r#r$z expected-closing-tag-but-got-eofrJz|tkr|jjtdd d|j |_n|jjtd|dd S) Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT) rr6r%r7r scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterr)rr$r r r!rs$          z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|jj}|dkr2|jjtddd|j|_n|dkrZ|jjtddd|j|_n|dkr|jjtddd|jjtddd|j|_nF|t kr|jjtdd d|j |_n|jjtd|d|j|_d S) Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT) rr6r%r7r$scriptDataDoubleEscapedDashDashStaterrrrr)rr$r r r!rs(           z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|jj}|dkr*|jjtdddn|dkrR|jjtddd|j|_n|dkrz|jjtddd|j|_n|dkr|jjtddd|jjtdd d|j|_nF|t kr|jjtdd d|j |_n|jjtd|d|j|_d S) Nr|rJ)r#r$rDrjr[r"zinvalid-codepointu�zeof-in-script-in-scriptT) rr6r%r7rrrrfrrr)rr$r r r!rs,           z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|jj}|dkr8|jjtdddd|_|j|_n|jj||j |_dS)NrirJ)r#r$r,T) rr6r%r7rrrscriptDataDoubleEscapeEndStaterr=r)rr$r r r!r0s   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|jj}|ttdBkrR|jjtd|d|jjdkrH|j |_ q|j |_ nB|t kr|jjtd|d|j|7_n|jj ||j |_ dS)NrirjrJ)r#r$rT)rirj)rr6rr:r%r7rrrrurrrr r=)rr$r r r!r;s    z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|jj}|tkr$|jjtdn|tkrJ|jdj|dg|j|_n|dkr\|j n|dkrn|j |_n|dkr|j jt d d d |jdj|dg|j|_n|d kr|j jt d dd |jdjddg|j|_nF|t kr|j jt d dd |j|_n|jdj|dg|j|_dS)NTr$r,rjri'"rHrDr"z#invalid-character-in-attribute-name)r#r$r[zinvalid-codepointu�z#expected-attribute-name-but-got-eof)rrrHrD)rr6rr^r rr7attributeNameStaterrZrqr%rrr)rr$r r r!rpKs6              z&HTMLTokenizer.beforeAttributeNameStatecCs|jj}d}d}|dkr&|j|_n0|tkr^|jddd||jjtd7<d}n|dkrld}n|tkr~|j|_n|dkr|j |_n|d kr|j j t d d d |jdddd 7<d}n|dkr|j j t d dd |jddd|7<d}nH|t kr8|j j t d dd |j|_n|jddd|7<d}|r|jdddjt|jddd<xP|jdddD]:\}}|jddd|kr|j j t d dd PqW|r|jdS)NTFrHr$rrrjrir[r"zinvalid-codepoint)r#r$u�rrrDz#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attributerKrK)rrrDrKrKrKrKrKrK)rr6beforeAttributeValueStaterr rr^rafterAttributeNameStaterqr%r7rrrrXr rZ)rr$ZleavingThisStateZ emitTokenrU_r r r!risR             &  z HTMLTokenizer.attributeNameStatecCsF|jj}|tkr$|jjtdn|dkr8|j|_n |dkrJ|jn|tkrp|jdj |dg|j |_n|dkr|j |_n|dkr|j j t dd d |jdj d dg|j |_n|dkr|j j t ddd |jdj |dg|j |_nF|tkr&|j j t ddd |j|_n|jdj |dg|j |_dS)NTrHrjr$r,rir[r"zinvalid-codepoint)r#r$u�rrrDz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rrrD)rr6rr^rrrZr rr7rrqr%rrr)rr$r r r!rs:                z%HTMLTokenizer.afterAttributeNameStatecCsj|jj}|tkr$|jjtdnB|dkr8|j|_n.|dkrX|j|_|jj|n|dkrl|j|_n|dkr|j j t ddd|j n|d kr|j j t dd d|j d dd d 7<|j|_n|dkr|j j t ddd|j d dd |7<|j|_nL|tkrD|j j t ddd|j|_n"|j d dd |7<|j|_dS)NTrrCrrjr"z.expected-attribute-value-but-got-right-bracket)r#r$r[zinvalid-codepointr$ru�rHrD`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eofrK)rHrDrrKrK)rr6rr^attributeValueDoubleQuotedStaterattributeValueUnQuotedStater=attributeValueSingleQuotedStater%r7rrZrrr)rr$r r r!rs>                 z'HTMLTokenizer.beforeAttributeValueStatecCs|jj}|dkr|j|_n|dkr0|jdn|dkrj|jjtddd|jdd dd 7<nN|t kr|jjtdd d|j |_n&|jdd d||jj d7<d S)NrrCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-double-quoteTrKrK)rrCr[) rr6afterAttributeValueStaterrTr%r7rrrrr^)rr$r r r!rs         z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|jj}|dkr|j|_n|dkr0|jdn|dkrj|jjtddd|jdd dd 7<nN|t kr|jjtdd d|j |_n&|jdd d||jj d7<d S)NrrCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-single-quoteTrKrK)rrCr[) rr6rrrTr%r7rrrrr^)rr$r r r!rs         z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|jj}|tkr|j|_n|dkr2|jdn|dkrD|jn|dkr~|jjt dd d |j d dd |7<n|d kr|jjt ddd |j d dd d7<nV|t kr|jjt ddd |j |_n.|j d dd ||jj tdtB7<dS)NrCrjrrrHrDrr"z0unexpected-character-in-unquoted-attribute-value)r#r$r$rr[zinvalid-codepointu�z eof-in-attribute-value-no-quotesT)rrrHrDrrKrKrK)rCrjrrrHrDrr[)rr6rrprrTrZr%r7rrrrr^r:)rr$r r r!rs,           z)HTMLTokenizer.attributeValueUnQuotedStatecCs|jj}|tkr|j|_n|dkr.|jnp|dkr@|j|_n^|tkrt|jj t ddd|jj ||j |_n*|jj t ddd|jj ||j|_dS)Nrjrir"z$unexpected-EOF-after-attribute-value)r#r$z*unexpected-character-after-attribute-valueT) rr6rrprrZrqrr%r7rr=r)rr$r r r!r s"           z&HTMLTokenizer.afterAttributeValueStatecCs|jj}|dkr&d|jd<|jn^|tkrZ|jjtddd|jj||j |_ n*|jjtddd|jj||j |_ dS)NrjTrWr"z#unexpected-EOF-after-solidus-in-tag)r#r$z)unexpected-character-after-solidus-in-tag) rr6rrZrr%r7rr=rrrp)rr$r r r!rq4s          z&HTMLTokenizer.selfClosingStartTagStatecCsD|jjd}|jdd}|jjtd|d|jj|j|_dS)Nrjr[u�Comment)r#r$T) rr^replacer%r7rr6rr)rr$r r r!roFs   zHTMLTokenizer.bogusCommentStatecCs|jjg}|ddkrT|j|jj|ddkrPtddd|_|j|_dSn|ddkrd}x.d&D]&}|j|jj|d'|krjd}PqjW|rtdddddd|_|j|_dSn|d(dkrH|jdk rH|jj j rH|jj j d)j |jj j krHd}x2d*D]*}|j|jj|d+|krd}PqW|rH|j |_dS|jjtdddx|rz|jj|jq`W|j|_dS),Nrr|rr,)r#r$TdDoOr@CtTyYpPeEFZDoctype)r#rUpublicIdsystemIdcorrect[Ar"zexpected-dashes-or-doctyperKrKrK)rrrrr@rrrrrrrrr)rrrrrrrKrKrK)rrrrrrrK)rr6r7rrcommentStartStater doctypeStaterZtreeZ openElements namespaceZdefaultNamespacecdataSectionStater%r=r'ro)rr?matchedexpectedr r r!rlUsR           z(HTMLTokenizer.markupDeclarationOpenStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd d|jj|j|j|_nP|t kr|jjtdd d|jj|j|j|_n|jd|7<|j |_d S) Nr|r[r"zinvalid-codepoint)r#r$r$u�rjzincorrect-commentzeof-in-commentT) rr6commentStartDashStaterr%r7rrrr commentState)rr$r r r!rs(          zHTMLTokenizer.commentStartStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd d|jj|j|j|_nT|t kr|jjtdd d|jj|j|j|_n|jdd|7<|j |_d S) Nr|r[r"zinvalid-codepoint)r#r$r$u-�rjzincorrect-commentzeof-in-commentT) rr6commentEndStaterr%r7rrrrr)rr$r r r!rs(          z#HTMLTokenizer.commentStartDashStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<nT|tkr|jjtddd|jj|j|j |_n|jd||jj d 7<d S) Nr|r[r"zinvalid-codepoint)r#r$r$u�zeof-in-commentT)r|r[) rr6commentEndDashStaterr%r7rrrrr^)rr$r r r!rs        zHTMLTokenizer.commentStatecCs|jj}|dkr|j|_n|dkrV|jjtddd|jdd7<|j|_nT|t kr|jjtddd|jj|j|j |_n|jdd|7<|j|_d S) Nr|r[r"zinvalid-codepoint)r#r$r$u-�zeof-in-comment-end-dashT) rr6rrr%r7rrrrr)rr$r r r!rs         z!HTMLTokenizer.commentEndDashStatecCs,|jj}|dkr*|jj|j|j|_n|dkrd|jjtddd|jdd7<|j|_n|dkr|jjtdd d|j |_n|d kr|jjtdd d|jd|7<nj|t kr|jjtdd d|jj|j|j|_n4|jjtdd d|jdd|7<|j|_dS)Nrjr[r"zinvalid-codepoint)r#r$r$u--�rhz,unexpected-bang-after-double-dash-in-commentr|z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T) rr6r%r7rrrrrcommentEndBangStater)rr$r r r!rs6               zHTMLTokenizer.commentEndStatecCs|jj}|dkr*|jj|j|j|_n|dkrN|jdd7<|j|_n|dkr|jjtddd|jdd 7<|j |_nT|t kr|jjtdd d|jj|j|j|_n|jdd|7<|j |_d S) Nrjr|r$z--!r[r"zinvalid-codepoint)r#r$u--!�zeof-in-comment-end-bang-stateT) rr6r%r7rrrrrrr)rr$r r r!rs(         z!HTMLTokenizer.commentEndBangStatecCs|jj}|tkr|j|_nj|tkr\|jjtdddd|j d<|jj|j |j |_n*|jjtddd|jj ||j|_dS)Nr"z!expected-doctype-name-but-got-eof)r#r$Frzneed-space-after-doctypeT) rr6rbeforeDoctypeNameStaterrr%r7rrrr=)rr$r r r!r s         zHTMLTokenizer.doctypeStatecCs|jj}|tkrn|dkrT|jjtdddd|jd<|jj|j|j|_n|dkr|jjtdddd |jd <|j |_nR|t kr|jjtdd dd|jd<|jj|j|j|_n||jd <|j |_d S) Nrjr"z+expected-doctype-name-but-got-right-bracket)r#r$Frr[zinvalid-codepointu�rUz!expected-doctype-name-but-got-eofT) rr6rr%r7rrrrdoctypeNameStater)rr$r r r!rs.              z$HTMLTokenizer.beforeDoctypeNameStatecCs|jj}|tkr2|jdjt|jd<|j|_n|dkrh|jdjt|jd<|jj |j|j |_n|dkr|jj t ddd|jdd7<|j |_nh|t kr|jj t dddd |jd <|jdjt|jd<|jj |j|j |_n|jd|7<d S) NrUrjr[r"zinvalid-codepoint)r#r$u�zeof-in-doctype-nameFrT)rr6rrrXr afterDoctypeNameStaterr%r7rrrr)rr$r r r!r6s,          zHTMLTokenizer.doctypeNameStatecCsR|jj}|tkrn8|dkr8|jj|j|j|_n|tkrd|jd<|jj ||jjt ddd|jj|j|j|_n|d!krd }x$d'D]}|jj}||krd}PqW|r|j |_d SnJ|d(krd }x(d.D] }|jj}||krd}PqW|r|j |_d S|jj ||jjt ddd|id d|jd<|j |_d S)/NrjFrr"zeof-in-doctype)r#r$rrTuUbBlLiIr@rsSrrrrrrmMz*expected-space-or-right-bracket-in-doctyper$)r#r$r.)rrrrrrrrrrr@r)rrrrr)rrrrrrrrrrrr)rrrrr)rr6rr%r7rrrrr=rafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)rr$rrr r r!rOsT              z#HTMLTokenizer.afterDoctypeNameStatecCs|jj}|tkr|j|_n|d krP|jjtddd|jj||j|_nT|t kr|jjtdddd|j d<|jj|j |j |_n|jj||j|_d S) Nrrr"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFrT)rr) rr6r"beforeDoctypePublicIdentifierStaterr%r7rr=rrr)rr$r r r!rs"           z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|jj}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jjt dddd |jd <|jj|j|j |_nh|t kr|jjt dd dd |jd <|jj|j|j |_n(|jjt dd dd |jd <|j |_d S)Nrr,rrrjr"zunexpected-end-of-doctype)r#r$Frzeof-in-doctypezunexpected-char-in-doctypeT) rr6rr(doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater%r7rrrr)rr$r r r!rs4                z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6!afterDoctypePublicIdentifierStaterr%r7rrrr)rr$r r r!rs*            z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6rrr%r7rrrr)rr$r r r!rs*            z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs |jj}|tkr|j|_n|dkr<|jj|j|j|_n|dkrn|jjt dddd|jd<|j |_n|dkr|jjt dddd|jd<|j |_nh|t kr|jjt dd dd |jd <|jj|j|j|_n(|jjt dddd |jd <|j |_d S) Nrjrr"zunexpected-char-in-doctype)r#r$r,rrzeof-in-doctypeFrT)rr6r-betweenDoctypePublicAndSystemIdentifiersStaterr%r7rrr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterr)rr$r r r!rs6                  z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|jj}|tkrn|dkr4|jj|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j |_nh|t kr|jjt dddd |jd <|jj|j|j|_n(|jjt dd dd |jd <|j |_d S) Nrjrr,rrr"zeof-in-doctype)r#r$Frzunexpected-char-in-doctypeT) rr6rr%r7rrrrrrrr)rr$r r r!rs.             z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|jj}|tkr|j|_n|d krP|jjtddd|jj||j|_nT|t kr|jjtdddd|j d<|jj|j |j |_n|jj||j|_d S) Nrrr"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFrT)rr) rr6r"beforeDoctypeSystemIdentifierStaterr%r7rr=rrr)rr$r r r!rs"           z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|jj}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jjt dddd |jd <|jj|j|j |_nh|t kr|jjt dd dd |jd <|jj|j|j |_n(|jjt dddd |jd <|j |_d S) Nrr,rrrjr"zunexpected-char-in-doctype)r#r$Frzeof-in-doctypeT) rr6rrrrrr%r7rrrr)rr$r r r!r/s4                z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6!afterDoctypeSystemIdentifierStaterr%r7rrrr)rr$r r r!rLs*            z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6rrr%r7rrrr)rr$r r r!rds*            z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|jj}|tkrn~|dkr4|jj|j|j|_n^|tkrt|jjt dddd|jd<|jj|j|j|_n|jjt ddd|j |_dS) Nrjr"zeof-in-doctype)r#r$Frzunexpected-char-in-doctypeT) rr6rr%r7rrrrrr)rr$r r r!r|s         z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|jj}|dkr*|jj|j|j|_n,|tkrV|jj||jj|j|j|_ndS)NrjT) rr6r%r7rrrrr=)rr$r r r!rs    zHTMLTokenizer.bogusDoctypeStatecCsg}xt|j|jjd|j|jjd|jj}|tkr@Pq|ddddkrl|ddd|d<Pq|j|qWdj|}|jd}|dkrx&t|D]}|jjt d d d qW|j dd }|r|jjt d |d |j |_ dS)N]rjrz]]r,r[rr"zinvalid-codepoint)r#r$u�rJTrKrKrrK) r7rr^r6rr9countranger%rrrr)rr$r6Z nullCountrr r r!rs.       zHTMLTokenizer.cdataSectionState)N)NF)N__name__ __module__ __qualname____doc__rr)rBrSrTrZrr\rbr`rdrfrgr]rmrnrarsrtrcrwrxreryr{rzr}rrr~rrrrrrrrrrprrrrrrrrqrorlrrrrrrrrrrrrrrrrrrrrrrr __classcell__r r )rr!rs H P#         6 "-3rN)Z __future__rrrZsixrr; collectionsrZ constantsrr r r r r rrrrZ _inputstreamrZ_trierrLobjectrr r r r!s       __pycache__/_tokenizer.cpython-36.pyc000064400000122125147204715120013561 0ustar003 B;W+@sddlmZmZmZddlmZddlmZddl m Z ddl m Z ddl m Z m Z ddl mZmZmZdd l mZmZdd l mZdd lmZdd lmZee ZGd ddeZdS))absolute_importdivisionunicode_literals)unichr)deque)spaceCharacters)entities) asciiLettersasciiUpper2Lower)digits hexDigitsEOF) tokenTypes tagTokenTypes)replacementCharacters)HTMLInputStream)TriecsdeZdZdZdfdd ZddZddZdd d Zd d ZddZ ddZ ddZ ddZ ddZ ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zdd?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZdzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS) HTMLTokenizera  This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. Nc sFt|f||_||_d|_g|_|j|_d|_d|_t t |j dS)NF) rstreamparserZ escapeFlagZ lastFourChars dataStatestateescape currentTokensuperr__init__)selfrrkwargs) __class__ /usr/lib/python3.6/_tokenizer.pyr"szHTMLTokenizer.__init__ccs\tg|_xL|jrVx&|jjr:td|jjjddVqWx|jrR|jjVq>Wq WdS)z This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. ParseErrorr)typedataN)r tokenQueuerrerrorsrpoppopleft)rr r r!__iter__1s    zHTMLTokenizer.__iter__c %Cs(t}d}|rt}d}g}|jj}x(||krJ|tk rJ|j||jj}q$Wtdj||}|tkrt|}|j jt ddd|idnld|kod kns|d krd }|j jt ddd|idn(d |kod knsd|kodknsd|kodknsd|ko4dkns|t ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d g#kr|j jt ddd|idy t |}Wn>t k r|d6}t d|d?Bt d7|d8@B}YnX|d9kr$|j jt dd:d;|jj||S)r"z'expected-tag-name-but-got-right-bracket)r#r$rJz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerDT)rr6markupDeclarationOpenStatercloseTagOpenStater rr tagNameStater%r7rr=bogusCommentState)rr$r r r!r]is6               zHTMLTokenizer.tagOpenStatecCs|jj}|tkr0td|gdd|_|j|_n|dkrX|jjtddd|j |_nn|t kr|jjtddd|jjtd d d|j |_n0|jjtdd d |id |jj ||j |_dS)NrVF)r#rUr$rWrjr"z*expected-closing-tag-but-got-right-bracket)r#r$z expected-closing-tag-but-got-eofrJz|tkr|jjtdd d|j |_n|jjtd|dd S) Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT) rr6r%r7r scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterr)rr$r r r!rs$          z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|jj}|dkr2|jjtddd|j|_n|dkrZ|jjtddd|j|_n|dkr|jjtddd|jjtddd|j|_nF|t kr|jjtdd d|j |_n|jjtd|d|j|_d S) Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT) rr6r%r7r$scriptDataDoubleEscapedDashDashStaterrrrr)rr$r r r!rs(           z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|jj}|dkr*|jjtdddn|dkrR|jjtddd|j|_n|dkrz|jjtddd|j|_n|dkr|jjtddd|jjtdd d|j|_nF|t kr|jjtdd d|j |_n|jjtd|d|j|_d S) Nr|rJ)r#r$rDrjr[r"zinvalid-codepointu�zeof-in-script-in-scriptT) rr6r%r7rrrrfrrr)rr$r r r!rs,           z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|jj}|dkr8|jjtdddd|_|j|_n|jj||j |_dS)NrirJ)r#r$r,T) rr6r%r7rrrscriptDataDoubleEscapeEndStaterr=r)rr$r r r!r0s   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|jj}|ttdBkrR|jjtd|d|jjdkrH|j |_ q|j |_ nB|t kr|jjtd|d|j|7_n|jj ||j |_ dS)NrirjrJ)r#r$rT)rirj)rr6rr:r%r7rrrrurrrr r=)rr$r r r!r;s    z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|jj}|tkr$|jjtdn|tkrJ|jdj|dg|j|_n|dkr\|j n|dkrn|j |_n|dkr|j jt d d d |jdj|dg|j|_n|d kr|j jt d dd |jdjddg|j|_nF|t kr|j jt d dd |j|_n|jdj|dg|j|_dS)NTr$r,rjri'"rHrDr"z#invalid-character-in-attribute-name)r#r$r[zinvalid-codepointu�z#expected-attribute-name-but-got-eof)rrrHrD)rr6rr^r rr7attributeNameStaterrZrqr%rrr)rr$r r r!rpKs6              z&HTMLTokenizer.beforeAttributeNameStatecCs|jj}d}d}|dkr&|j|_n0|tkr^|jddd||jjtd7<d}n|dkrld}n|tkr~|j|_n|dkr|j |_n|d kr|j j t d d d |jdddd 7<d}n|dkr|j j t d dd |jddd|7<d}nH|t kr8|j j t d dd |j|_n|jddd|7<d}|r|jdddjt|jddd<xP|jdddD]:\}}|jddd|kr|j j t d dd PqW|r|jdS)NTFrHr$rrrjrir[r"zinvalid-codepoint)r#r$u�rrrDz#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attributerKrK)rrrDrKrKrKrKrKrK)rr6beforeAttributeValueStaterr rr^rafterAttributeNameStaterqr%r7rrrrXr rZ)rr$ZleavingThisStateZ emitTokenrU_r r r!risR             &  z HTMLTokenizer.attributeNameStatecCsF|jj}|tkr$|jjtdn|dkr8|j|_n |dkrJ|jn|tkrp|jdj |dg|j |_n|dkr|j |_n|dkr|j j t dd d |jdj d dg|j |_n|dkr|j j t ddd |jdj |dg|j |_nF|tkr&|j j t ddd |j|_n|jdj |dg|j |_dS)NTrHrjr$r,rir[r"zinvalid-codepoint)r#r$u�rrrDz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rrrD)rr6rr^rrrZr rr7rrqr%rrr)rr$r r r!rs:                z%HTMLTokenizer.afterAttributeNameStatecCsj|jj}|tkr$|jjtdnB|dkr8|j|_n.|dkrX|j|_|jj|n|dkrl|j|_n|dkr|j j t ddd|j n|d kr|j j t dd d|j d dd d 7<|j|_n|dkr|j j t ddd|j d dd |7<|j|_nL|tkrD|j j t ddd|j|_n"|j d dd |7<|j|_dS)NTrrCrrjr"z.expected-attribute-value-but-got-right-bracket)r#r$r[zinvalid-codepointr$ru�rHrD`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eofrK)rHrDrrKrK)rr6rr^attributeValueDoubleQuotedStaterattributeValueUnQuotedStater=attributeValueSingleQuotedStater%r7rrZrrr)rr$r r r!rs>                 z'HTMLTokenizer.beforeAttributeValueStatecCs|jj}|dkr|j|_n|dkr0|jdn|dkrj|jjtddd|jdd dd 7<nN|t kr|jjtdd d|j |_n&|jdd d||jj d7<d S)NrrCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-double-quoteTrKrK)rrCr[) rr6afterAttributeValueStaterrTr%r7rrrrr^)rr$r r r!rs         z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|jj}|dkr|j|_n|dkr0|jdn|dkrj|jjtddd|jdd dd 7<nN|t kr|jjtdd d|j |_n&|jdd d||jj d7<d S)NrrCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-single-quoteTrKrK)rrCr[) rr6rrrTr%r7rrrrr^)rr$r r r!rs         z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|jj}|tkr|j|_n|dkr2|jdn|dkrD|jn|dkr~|jjt dd d |j d dd |7<n|d kr|jjt ddd |j d dd d7<nV|t kr|jjt ddd |j |_n.|j d dd ||jj tdtB7<dS)NrCrjrrrHrDrr"z0unexpected-character-in-unquoted-attribute-value)r#r$r$rr[zinvalid-codepointu�z eof-in-attribute-value-no-quotesT)rrrHrDrrKrKrK)rCrjrrrHrDrr[)rr6rrprrTrZr%r7rrrrr^r:)rr$r r r!rs,           z)HTMLTokenizer.attributeValueUnQuotedStatecCs|jj}|tkr|j|_n|dkr.|jnp|dkr@|j|_n^|tkrt|jj t ddd|jj ||j |_n*|jj t ddd|jj ||j|_dS)Nrjrir"z$unexpected-EOF-after-attribute-value)r#r$z*unexpected-character-after-attribute-valueT) rr6rrprrZrqrr%r7rr=r)rr$r r r!r s"           z&HTMLTokenizer.afterAttributeValueStatecCs|jj}|dkr&d|jd<|jn^|tkrZ|jjtddd|jj||j |_ n*|jjtddd|jj||j |_ dS)NrjTrWr"z#unexpected-EOF-after-solidus-in-tag)r#r$z)unexpected-character-after-solidus-in-tag) rr6rrZrr%r7rr=rrrp)rr$r r r!rq4s          z&HTMLTokenizer.selfClosingStartTagStatecCsD|jjd}|jdd}|jjtd|d|jj|j|_dS)Nrjr[u�Comment)r#r$T) rr^replacer%r7rr6rr)rr$r r r!roFs   zHTMLTokenizer.bogusCommentStatecCs|jjg}|ddkrT|j|jj|ddkrPtddd|_|j|_dSn|ddkrd}x.d&D]&}|j|jj|d'|krjd}PqjW|rtdddddd|_|j|_dSn|d(dkrH|jdk rH|jj j rH|jj j d)j |jj j krHd}x2d*D]*}|j|jj|d+|krd}PqW|rH|j |_dS|jjtdddx|rz|jj|jq`W|j|_dS),Nrr|rr,)r#r$TdDoOr@CtTyYpPeEFZDoctype)r#rUpublicIdsystemIdcorrect[Ar"zexpected-dashes-or-doctyperKrKrK)rrrrr@rrrrrrrrr)rrrrrrrKrKrK)rrrrrrrK)rr6r7rrcommentStartStater doctypeStaterZtreeZ openElements namespaceZdefaultNamespacecdataSectionStater%r=r'ro)rr?matchedexpectedr r r!rlUsR           z(HTMLTokenizer.markupDeclarationOpenStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd d|jj|j|j|_nP|t kr|jjtdd d|jj|j|j|_n|jd|7<|j |_d S) Nr|r[r"zinvalid-codepoint)r#r$r$u�rjzincorrect-commentzeof-in-commentT) rr6commentStartDashStaterr%r7rrrr commentState)rr$r r r!rs(          zHTMLTokenizer.commentStartStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd d|jj|j|j|_nT|t kr|jjtdd d|jj|j|j|_n|jdd|7<|j |_d S) Nr|r[r"zinvalid-codepoint)r#r$r$u-�rjzincorrect-commentzeof-in-commentT) rr6commentEndStaterr%r7rrrrr)rr$r r r!rs(          z#HTMLTokenizer.commentStartDashStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<nT|tkr|jjtddd|jj|j|j |_n|jd||jj d 7<d S) Nr|r[r"zinvalid-codepoint)r#r$r$u�zeof-in-commentT)r|r[) rr6commentEndDashStaterr%r7rrrrr^)rr$r r r!rs        zHTMLTokenizer.commentStatecCs|jj}|dkr|j|_n|dkrV|jjtddd|jdd7<|j|_nT|t kr|jjtddd|jj|j|j |_n|jdd|7<|j|_d S) Nr|r[r"zinvalid-codepoint)r#r$r$u-�zeof-in-comment-end-dashT) rr6rrr%r7rrrrr)rr$r r r!rs         z!HTMLTokenizer.commentEndDashStatecCs,|jj}|dkr*|jj|j|j|_n|dkrd|jjtddd|jdd7<|j|_n|dkr|jjtdd d|j |_n|d kr|jjtdd d|jd|7<nj|t kr|jjtdd d|jj|j|j|_n4|jjtdd d|jdd|7<|j|_dS)Nrjr[r"zinvalid-codepoint)r#r$r$u--�rhz,unexpected-bang-after-double-dash-in-commentr|z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T) rr6r%r7rrrrrcommentEndBangStater)rr$r r r!rs6               zHTMLTokenizer.commentEndStatecCs|jj}|dkr*|jj|j|j|_n|dkrN|jdd7<|j|_n|dkr|jjtddd|jdd 7<|j |_nT|t kr|jjtdd d|jj|j|j|_n|jdd|7<|j |_d S) Nrjr|r$z--!r[r"zinvalid-codepoint)r#r$u--!�zeof-in-comment-end-bang-stateT) rr6r%r7rrrrrrr)rr$r r r!rs(         z!HTMLTokenizer.commentEndBangStatecCs|jj}|tkr|j|_nj|tkr\|jjtdddd|j d<|jj|j |j |_n*|jjtddd|jj ||j|_dS)Nr"z!expected-doctype-name-but-got-eof)r#r$Frzneed-space-after-doctypeT) rr6rbeforeDoctypeNameStaterrr%r7rrrr=)rr$r r r!r s         zHTMLTokenizer.doctypeStatecCs|jj}|tkrn|dkrT|jjtdddd|jd<|jj|j|j|_n|dkr|jjtdddd |jd <|j |_nR|t kr|jjtdd dd|jd<|jj|j|j|_n||jd <|j |_d S) Nrjr"z+expected-doctype-name-but-got-right-bracket)r#r$Frr[zinvalid-codepointu�rUz!expected-doctype-name-but-got-eofT) rr6rr%r7rrrrdoctypeNameStater)rr$r r r!rs.              z$HTMLTokenizer.beforeDoctypeNameStatecCs|jj}|tkr2|jdjt|jd<|j|_n|dkrh|jdjt|jd<|jj |j|j |_n|dkr|jj t ddd|jdd7<|j |_nh|t kr|jj t dddd |jd <|jdjt|jd<|jj |j|j |_n|jd|7<d S) NrUrjr[r"zinvalid-codepoint)r#r$u�zeof-in-doctype-nameFrT)rr6rrrXr afterDoctypeNameStaterr%r7rrrr)rr$r r r!r6s,          zHTMLTokenizer.doctypeNameStatecCsR|jj}|tkrn8|dkr8|jj|j|j|_n|tkrd|jd<|jj ||jjt ddd|jj|j|j|_n|d!krd }x$d'D]}|jj}||krd}PqW|r|j |_d SnJ|d(krd }x(d.D] }|jj}||krd}PqW|r|j |_d S|jj ||jjt ddd|id d|jd<|j |_d S)/NrjFrr"zeof-in-doctype)r#r$rrTuUbBlLiIr@rsSrrrrrrmMz*expected-space-or-right-bracket-in-doctyper$)r#r$r.)rrrrrrrrrrr@r)rrrrr)rrrrrrrrrrrr)rrrrr)rr6rr%r7rrrrr=rafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)rr$rrr r r!rOsT              z#HTMLTokenizer.afterDoctypeNameStatecCs|jj}|tkr|j|_n|d krP|jjtddd|jj||j|_nT|t kr|jjtdddd|j d<|jj|j |j |_n|jj||j|_d S) Nrrr"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFrT)rr) rr6r"beforeDoctypePublicIdentifierStaterr%r7rr=rrr)rr$r r r!rs"           z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|jj}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jjt dddd |jd <|jj|j|j |_nh|t kr|jjt dd dd |jd <|jj|j|j |_n(|jjt dd dd |jd <|j |_d S)Nrr,rrrjr"zunexpected-end-of-doctype)r#r$Frzeof-in-doctypezunexpected-char-in-doctypeT) rr6rr(doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater%r7rrrr)rr$r r r!rs4                z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6!afterDoctypePublicIdentifierStaterr%r7rrrr)rr$r r r!rs*            z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6rrr%r7rrrr)rr$r r r!rs*            z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs |jj}|tkr|j|_n|dkr<|jj|j|j|_n|dkrn|jjt dddd|jd<|j |_n|dkr|jjt dddd|jd<|j |_nh|t kr|jjt dd dd |jd <|jj|j|j|_n(|jjt dddd |jd <|j |_d S) Nrjrr"zunexpected-char-in-doctype)r#r$r,rrzeof-in-doctypeFrT)rr6r-betweenDoctypePublicAndSystemIdentifiersStaterr%r7rrr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterr)rr$r r r!rs6                  z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|jj}|tkrn|dkr4|jj|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j |_nh|t kr|jjt dddd |jd <|jj|j|j|_n(|jjt dd dd |jd <|j |_d S) Nrjrr,rrr"zeof-in-doctype)r#r$Frzunexpected-char-in-doctypeT) rr6rr%r7rrrrrrrr)rr$r r r!rs.             z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|jj}|tkr|j|_n|d krP|jjtddd|jj||j|_nT|t kr|jjtdddd|j d<|jj|j |j |_n|jj||j|_d S) Nrrr"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFrT)rr) rr6r"beforeDoctypeSystemIdentifierStaterr%r7rr=rrr)rr$r r r!rs"           z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|jj}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jjt dddd |jd <|jj|j|j |_nh|t kr|jjt dd dd |jd <|jj|j|j |_n(|jjt dddd |jd <|j |_d S) Nrr,rrrjr"zunexpected-char-in-doctype)r#r$Frzeof-in-doctypeT) rr6rrrrrr%r7rrrr)rr$r r r!r/s4                z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6!afterDoctypeSystemIdentifierStaterr%r7rrrr)rr$r r r!rLs*            z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|jj}|dkr|j|_n|dkrN|jjtddd|jdd7<n|dkr|jjtdd dd |jd <|jj|j|j|_nR|t kr|jjtdd dd |jd <|jj|j|j|_n|jd|7<d S)Nrr[r"zinvalid-codepoint)r#r$ru�rjzunexpected-end-of-doctypeFrzeof-in-doctypeT) rr6rrr%r7rrrr)rr$r r r!rds*            z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|jj}|tkrn~|dkr4|jj|j|j|_n^|tkrt|jjt dddd|jd<|jj|j|j|_n|jjt ddd|j |_dS) Nrjr"zeof-in-doctype)r#r$Frzunexpected-char-in-doctypeT) rr6rr%r7rrrrrr)rr$r r r!r|s         z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|jj}|dkr*|jj|j|j|_n,|tkrV|jj||jj|j|j|_ndS)NrjT) rr6r%r7rrrrr=)rr$r r r!rs    zHTMLTokenizer.bogusDoctypeStatecCsg}x|j|jjd|j|jjd|jj}|tkr@Pq|dksLt|ddddkrx|ddd|d<Pq|j|qWdj|}|jd}|dkrx&t|D]}|j jt d d d qW|j dd }|r|j jt d |d |j |_ dS)N]rjrz]]r,r[rr"zinvalid-codepoint)r#r$u�rJTrKrKrrK)r7rr^r6rAssertionErrorr9countranger%rrrr)rr$r6Z nullCountrr r r!rs0        zHTMLTokenizer.cdataSectionState)N)NF)N__name__ __module__ __qualname____doc__rr)rBrSrTrZrr\rbr`rdrfrgr]rmrnrarsrtrcrwrxreryr{rzr}rrr~rrrrrrrrrrprrrrrrrrqrorlrrrrrrrrrrrrrrrrrrrrrrr __classcell__r r )rr!rs H P#         6 "-3rN)Z __future__rrrZsixrr; collectionsrZ constantsrr r r r r rrrrZ _inputstreamrZ_trierrLobjectrr r r r!s       __pycache__/_utils.cpython-36.opt-1.pyc000064400000006220147204715120013643 0ustar003 B;W@sddlmZmZmZddlZddlmZddlmZyddl j j Z Wn e k rdddlj jZ YnXddddd d d gZejdd koejd dkZyedZeeesedZWndZYnXdZGdddeZddZddZdd ZddZdS))absolute_importdivisionunicode_literalsN) ModuleType) text_type default_etreeMethodDispatcherisSurrogatePairsurrogatePairToCodepointmoduleFactoryFactorysupports_lone_surrogatesPY27z"\uD800"z u"\uD800"FTc@s$eZdZdZffddZddZdS)rapDict with 2 special properties: On initiation, keys that are lists, sets or tuples are converted to multiple keys so accessing any one of the items in the original list-like object returns the matching value md = MethodDispatcher({("foo", "bar"):"baz"}) md["foo"] == "baz" A default value which can be set through the default attribute. cCsjg}xN|D]F\}}t|ttttfrBx*|D]}|j||fq*Wq |j||fq Wtj||d|_dS)N) isinstancelisttuple frozensetsetappenddict__init__default)selfitemsZ _dictEntriesnamevalueitemr/usr/lib/python3.6/_utils.pyr4s  zMethodDispatcher.__init__cCstj|||jS)N)rgetr)rkeyrrr __getitem__CszMethodDispatcher.__getitem__N)__name__ __module__ __qualname____doc__rr#rrrr r's  cCsLt|dkoJt|ddkoJt|ddkoJt|ddkoJt|ddkS)Nrriirii)lenord)datarrr r Js  cCs,dt|dddt|dd}|S)Niriiri)r))r*Zchar_valrrr r Pscsifdd}|S)Nc sttjtdrd|j}n d|j}t|j}y|||Stk rt|}|f||}|jj|dkri|<d|kri||<d||kri|||<||||<|SXdS)Nz _%s_factorys _%s_factoryrargskwargs) rrr$typerrKeyError__dict__update)Z baseModuler,r-rZ kwargs_tuplemodZobjs)factory moduleCacherr moduleFactory\s$      z+moduleFactoryFactory..moduleFactoryr)r3r5r)r3r4r r Yscsifdd}|S)Ncs2t|t|jf}|kr*|||<|S)N)rr)r,r-r")cachefuncrr wrappedyszmemoize..wrappedr)r7r8r)r6r7r memoizevsr9)Z __future__rrrsystypesrZsixrZxml.etree.cElementTreeZetreeZ cElementTreer ImportErrorZxml.etree.ElementTreeZ ElementTree__all__ version_infor evalZ_xrr rrr r r r9rrrr s0    # __pycache__/_utils.cpython-36.pyc000064400000006316147204715120012712 0ustar003 B;W@s ddlmZmZmZddlZddlmZddlmZyddl j j Z Wn e k rdddlj jZ YnXddddd d d gZejdd koejd dkZy,edZeeesedZeeestWndZYnXdZGdddeZddZddZdd ZddZdS))absolute_importdivisionunicode_literalsN) ModuleType) text_type default_etreeMethodDispatcherisSurrogatePairsurrogatePairToCodepointmoduleFactoryFactorysupports_lone_surrogatesPY27z"\uD800"z u"\uD800"FTc@s$eZdZdZffddZddZdS)rapDict with 2 special properties: On initiation, keys that are lists, sets or tuples are converted to multiple keys so accessing any one of the items in the original list-like object returns the matching value md = MethodDispatcher({("foo", "bar"):"baz"}) md["foo"] == "baz" A default value which can be set through the default attribute. cCs~g}xN|D]F\}}t|ttttfrBx*|D]}|j||fq*Wq |j||fq Wtj||t|t|kstt d|_ dS)N) isinstancelisttuple frozensetsetappenddict__init__lenAssertionErrordefault)selfitemsZ _dictEntriesnamevalueitemr!/usr/lib/python3.6/_utils.pyr4s  zMethodDispatcher.__init__cCstj|||jS)N)rgetr)rkeyr!r!r" __getitem__CszMethodDispatcher.__getitem__N)__name__ __module__ __qualname____doc__rr%r!r!r!r"r's  cCsLt|dkoJt|ddkoJt|ddkoJt|ddkoJt|ddkS)Nrriirii)rord)datar!r!r"r Js  cCs,dt|dddt|dd}|S)Niriiri)r*)r+Zchar_valr!r!r"r Pscsifdd}|S)Nc sttjtdrd|j}n d|j}t|j}y|||Stk rt|}|f||}|jj|dkri|<d|kri||<d||kri|||<||||<|SXdS)Nz _%s_factorys _%s_factoryrargskwargs) rrr&typerrKeyError__dict__update)Z baseModuler-r.rZ kwargs_tuplemodZobjs)factory moduleCacher!r" moduleFactory\s$      z+moduleFactoryFactory..moduleFactoryr!)r4r6r!)r4r5r"r Yscsifdd}|S)Ncs2t|t|jf}|kr*|||<|S)N)rr)r-r.r$)cachefuncr!r"wrappedyszmemoize..wrappedr!)r8r9r!)r7r8r"memoizevsr:)Z __future__rrrsystypesrZsixrZxml.etree.cElementTreeZetreeZ cElementTreer ImportErrorZxml.etree.ElementTreeZ ElementTree__all__ version_infor evalZ_xrrr rrr r r r:r!r!r!r"s0    # __pycache__/constants.cpython-36.opt-1.pyc000064400000201300147204715120014354 0ustar003 B;WE@s-ddlmZmZmZddlZdZddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d,d,d0d1d2d3d4d5d6d7d8d9d:d;dd?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddZdddddddZeeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgNZ eeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfgZ ddddddddddddddddddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/>Zd0d1iZd2d3ed2fd2d4ed2fd2d5ed2fd2d6ed2fd2d7ed2fd2ded2fd2d8ed2fd9ded9fd9d:ed9fd9d;ed9fddd?ejDZed@dAdBdCdDgZedddddgZeejZeejZeejZeejZeejZedEd?ejDZd|Z edddFddddddddddԐdGdHgZ!eddgZ"edddddddgZ#edIgedJgedKgedLdMgedLdMgedNdOgedPgedQdRgedSdRdTdUgedVgedWgedRdXgedRdXdYgedRdXgedRdZgedRdXd[dZdTdKgedRdXdZdQgedRdXgd\Z$d}Z%edydzd{d|d}gZ&d~d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdАdѐdҐdӐddѐdԐdՐd֐dÐdאdؐdِdڐdېdܐdݐdސdߐdddddddddddddԐddddddddddddddddddddddddddddddddddd d d d d d dddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d(d+dd,d-d.d/d0d0d1d1d2d3d4d5d5d4d6d7dڐd8d9d:d;d<d=d>d?d@dAdBdCdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdՐd֐dudvdwdxdydzd{d|d}d~ddddאdؐdِddddXdddddddddddddddddddddd"ddAddddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdϐdАdѐdҐdҐdӐdӐdԐdՐd֐dאdאdؐdِdڐdېdܐdݐdސdߐdddddddddddddddddd'ddddddddddddddddddddddddddddddd dd d d d ddddddddddddddddddd dڐd!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0dߐd^d d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d?d@dAdBdCdDddEdFdGdHdFdIdIdJdKdLd@dMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d^d_d`dadbdcdcdddedfdgdgdhdidjdkdldmdndodpd1dqdrdsdtdudvdܐdݐdwdxdydzd{d|d}d~d~ddddddddddddddddddddddddddddddddddddddddddddddddddddddddɐdɐddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐dd͐dΐdϐdYdАdѐdҐdӐdԐdYdҐdՐdՐd֐dאdYddؐdؐdِdِddڐdېdܐdݐdސdߐdddddkddܐddddddݐdddddd:ddmddddddddddddd ddddddddddddddududdddddd d d d d dddddddddddddddd*dddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.dސdd/d/d0d1dߐdd2d3d4d5d5dd6d,d,d7d8d9d:d;d<d=d>d?d$d@dAdBdBdCdDdEdFdddGdHdHddIdJdKdKdLdMdNdOdPdQdRddSdTdUdVdPdWdXdYdZdZd[ddd\d]d^d3d^ddXd_dd`ddddadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdldmdydzd{d{dndwdydzdd|d}dԐd~dddߐddiddːdddϐdddddddddddddΐdΐdddѐdddddddddddddʐdӐddddddߐdddddddddddddddd ddddddddddddddddddddddddddddddddddddddddddÐdĐdĐdŐddddddƐdǐddȐdɐdʐdːd̐dӐdd͐dΐdΐdϐdϐdАdѐdddddҐdӐdԐdՐd֐dאdؐdِdڐdېdܐdݐdސd dߐddddddddddddddddddddddddddddddddddddddddddddddd"ddddddddd d d d dd d d̐d d dddddddddddddddd ddd֐ddd(ddgdddddddddd d!d"d#d$d%d&d'd(dd)d*dd+d+d;d,d,d-d.d/d/d֐d0d1d1d7d2d3d4d5d6d7d4d@d4d8d9d:dd;d<d=d8d9d>dd>d?d@dAdBdCdDd@dEdEdFddGdHdIdJdd<dKdLdMdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d}dՐd`dadvdbdcdddedXdfd]dgd]dhdidid^d_djdkd$dldmdndodpdqdrdsdtdudvdwdxdydzd{d|dadvd}d~dܐdddddd^doddsddgd`dddduddvdydydddddddhddudbdwdzddfddwdddsdddddddddddddddddddddbdddddldddddddidddddd`dddddddddzdddwdݐdddTdTddddddddddndddddddddddddddjdvddddddddÐdÐddאdĐdddŐd!ddƐdǐdddȐdɐddʐddːd̐d̐d͐dΐddϐdАdѐdҐddӐdԐdՐdƐd֐dאd̐dؐdِdڐd̐dېdېddddddܐdݐdސdːdߐdddddddxdxdddddddddddddddddАddddddddϐdddddddddddddddd͐ddddddddddddddddddddddddӐdddddddÐdŐdĐdd͐ddɐdʐdʐd͐ddddddddddÐdĐdddȐdǐdȐddddddddddddddd d d d d dddddddddddwdwddSdddTdUddddVdddddd d!d"d#d#d$d%dِddQd&d'dd(d)d*d+d,dd-d.d/dd0dRd1d2d2d3d3d4d4d5d6d7d8d2d9d9d:d;d;dd<d=d=d>dސd?d?dސd@dAddBdCdDdEdudFdGdHdIddJdKdߐdLddMddNdOd"dPddQdRdddSdTddUdVdWdWddXdYddddYdddZd[d\dd]dd[dZd\dd^d_d`ddddadbdcddded2dfdgdhd)didjdǐddd#dڐdkddldmdnd5dod dpdqd drdrd dsd dtdudvd%ddwdxdydzd{d|ddd}d~dddddddd~dddddddd$dd!dddddddddddd dyddddddzdddʐdddddddddddddddddddd%ddddddddddddddAddddCdBdddddddDdddddd ddddddddddddddddސddddddVddWdWdddddÐdĐdŐd^ddƐdǐddȐdɐdʐdːd̐d͐dΐdϐdАdѐdѐdސd7dҐd<dӐd8d9d8d9d:d;d:d;d6d6d d d dԐddȐd>dՐddŐdIdd֐dאdؐd@dِdڐdېdܐdݐdސd֐d@dאdܐdېdߐdddAddCdBddddDdEddddddddGddddHdddddddddGdHdddddddd!dddddddddddd[dddRdRddddYdVdUdYdVddd͐ddd#ddd3dddddddddJd dddnd ddd d d d dddddddddYddܐdddd1ddddddrddtdddddqddd d d!d"d#dѐdѐd$d%d1dsdqddnd&dyd&d'd(d(d)d*d+d,d-d.d dd'd/d/d0dݐd1d2dېd3dŐdWddIdLddsddd4d5d6d7ddldd8dd0d9d:d;ddd<dlddǐd=ddd>d5d4d7d6d?d@dAddBdCdDdEdCdddFd:ddmddGdؐddHdאdddIddJdddِddKddddddLdLdMdNdOdPdPdQdRdSdTdUdVdVdWdXdYdZdd[d\d]d^d_d`dadbdcZ'dddDdАdeddݐdddddOdEd,ddѐdfddgdhdݐddܐdd5ddd͐dJddddidXddj"Z(ddkdldmdndodpdqdrZ)ee)dse)dte)dugZ*edvd?ejDZ+dwe+d<Gdxdydye,Z-Gdzd{d{e.Z/dS(~)absolute_importdivisionunicode_literalsNz5Null character in input stream, replaced with U+FFFD.zInvalid codepoint in stream.z&Solidus (/) incorrectly placed in tag.z.Incorrect CR newline entity, replaced with LF.z9Entity used with illegal number (windows-1252 reference).zPNumeric entity couldn't be converted to character (codepoint U+%(charAsInt)08x).zBNumeric entity represents an illegal codepoint: U+%(charAsInt)08x.z#Numeric entity didn't end with ';'.z1Numeric entity expected. Got end of file instead.z'Numeric entity expected but none found.z!Named entity didn't end with ';'.z Named entity expected. Got none.z'End tag contains unexpected attributes.z.End tag contains unexpected self-closing flag.z#Expected tag name. Got '>' instead.zSExpected tag name. Got '?' instead. (HTML doesn't support processing instructions.)z-Expected tag name. Got something else insteadz6Expected closing tag. Got '>' instead. Ignoring ''.z-Expected closing tag. Unexpected end of file.z' instead.z"Unexpected = in unquoted attributez*Unexpected character in unquoted attributez*Unexpected character after attribute name.z+Unexpected character after attribute value.z.Unexpected end of file in attribute value (").z.Unexpected end of file in attribute value (').z*Unexpected end of file in attribute value.z)Unexpected end of file in tag. Expected >z/Unexpected character after / in tag. Expected >z&Expected '--' or 'DOCTYPE'. Not found.z Unexpected ! after -- in commentz$Unexpected space after -- in commentzIncorrect comment.z"Unexpected end of file in comment.z%Unexpected end of file in comment (-)z+Unexpected '-' after '--' found in comment.z'Unexpected end of file in comment (--).z&Unexpected character in comment found.z(No space after literal string 'DOCTYPE'.z.Unexpected > character. Expected DOCTYPE name.z.Unexpected end of file. Expected DOCTYPE name.z'Unexpected end of file in DOCTYPE name.z"Unexpected end of file in DOCTYPE.z%Expected space or '>'. Got '%(data)s'zUnexpected end of DOCTYPE.z Unexpected character in DOCTYPE.zXXX innerHTML EOFzUnexpected DOCTYPE. Ignored.z%html needs to be the first start tag.z)Unexpected End of file. Expected DOCTYPE.zErroneous DOCTYPE.z2Unexpected non-space characters. Expected DOCTYPE.z2Unexpected start tag (%(name)s). Expected DOCTYPE.z0Unexpected end tag (%(name)s). Expected DOCTYPE.z?Unexpected end tag (%(name)s) after the (implied) root element.z4Unexpected end of file. Expected end tag (%(name)s).z4Unexpected start tag head in existing head. Ignored.z'Unexpected end tag (%(name)s). Ignored.z;Unexpected start tag (%(name)s) that can be in head. Moved.z Unexpected start tag (%(name)s).zMissing end tag (%(name)s).zMissing end tags (%(name)s).zCUnexpected start tag (%(startName)s) implies end tag (%(endName)s).z@Unexpected start tag (%(originalName)s). Treated as %(newName)s.z,Unexpected start tag %(name)s. Don't use it!z'Unexpected start tag %(name)s. Ignored.zEUnexpected end tag (%(gotName)s). Missing end tag (%(expectedName)s).z:End tag (%(name)s) seen too early. Expected other end tag.zFUnexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).z+End tag (%(name)s) seen too early. Ignored.zQEnd tag (%(name)s) violates step 1, paragraph 1 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 1, paragraph 2 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 1, paragraph 3 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 4, paragraph 4 of the adoption agency algorithm.z>Unexpected end tag (%(originalName)s). Treated as %(newName)s.z'This element (%(name)s) has no end tag.z9Unexpected implied end tag (%(name)s) in the table phase.z>Unexpected implied end tag (%(name)s) in the table body phase.zDUnexpected non-space characters in table context caused voodoo mode.z3Unexpected input with type hidden in table context.z!Unexpected form in table context.zDUnexpected start tag (%(name)s) in table context caused voodoo mode.zBUnexpected end tag (%(name)s) in table context caused voodoo mode.zCUnexpected table cell start tag (%(name)s) in the table body phase.zFGot table cell end tag (%(name)s) while required end tags are missing.z?Unexpected end tag (%(name)s) in the table body phase. Ignored.z=Unexpected implied end tag (%(name)s) in the table row phase.z>Unexpected end tag (%(name)s) in the table row phase. Ignored.zJUnexpected select start tag in the select phase treated as select end tag.z/Unexpected input start tag in the select phase.zBUnexpected start tag token (%(name)s in the select phase. Ignored.z;Unexpected end tag (%(name)s) in the select phase. Ignored.zKUnexpected table element start tag (%(name)s) in the select in table phase.zIUnexpected table element end tag (%(name)s) in the select in table phase.z8Unexpected non-space characters in the after body phase.z>Unexpected start tag token (%(name)s) in the after body phase.zZ attributenameZ attributetypeZ basefrequencyZ baseprofileZcalcmodeZ clippathunitsZcontentscripttypeZcontentstyletypeZdiffuseconstantZedgemodeZexternalresourcesrequiredZ filterresZ filterunitsZglyphrefZgradienttransformZ gradientunitsZ kernelmatrixZkernelunitlengthZ keypointsZ keysplinesZkeytimesZ lengthadjustZlimitingconeangleZ markerheightZ markerunitsZ markerwidthZmaskcontentunitsZ maskunitsZ numoctavesZ pathlengthZpatterncontentunitsZpatterntransformZ patternunitsZ pointsatxZ pointsatyZ pointsatzZ preservealphaZpreserveaspectratioZprimitiveunitsZrefxZrefyZ repeatcountZ repeatdurZrequiredextensionsZrequiredfeaturesZspecularconstantZspecularexponentZ spreadmethodZ startoffsetZ stddeviationZ stitchtilesZ surfacescaleZsystemlanguageZ tablevaluesZtargetxZtargetyZ textlengthZviewboxZ viewtargetZxchannelselectorZychannelselectorZ zoomandpanZ definitionurlZ definitionURLrZactuateZarcroleZhrefZroleZshowtyper ZlangZspacer ) z xlink:actuatez xlink:arcrolez xlink:hrefz xlink:rolez xlink:showz xlink:titlez xlink:typezxml:basezxml:langz xml:spacer z xmlns:xlinkcCs"g|]\}\}}}||f|fqSr2).0ZqnameprefixZlocalnsr2r2/usr/lib/python3.6/constants.py sr7     cCs g|]}t|t|jfqSr2)ordlower)r3cr2r2r6r7#sz event-sourcesourceZtrackZ irrelevantZscopedZismapZautoplayZcontrolsZdeferasyncopenZmultipleZdisabledZhiddenZcheckeddefaultZnoshadeZ autosubmitreadonlyZselectedZ autofocusZrequired)r0r(ZaudioZvideor.rZdatagridrr'r+rZoptionZoptgrouprr)r/output   & ! 0 `9 R}    "   "!a: S~xzlt;zgt;zamp;zapos;zquot;Æ&ÁuĂÂuАu𝔄ÀuΑuĀu⩓uĄu𝔸u⁡Åu𝒜u≔ÃÄu∖u⫧u⌆uБu∵uℬuΒu𝔅u𝔹u˘u≎uЧ©uĆu⋒uⅅuℭuČÇuĈu∰uĊ¸·uΧu⊙u⊖u⊕u⊗u∲u”u’u∷u⩴u≡u∯u∮uℂu∐u∳u⨯u𝒞u⋓u≍u⤑uЂuЅuЏu‡u↡u⫤uĎuДu∇uΔu𝔇´u˙u˝`u˜u⋄uⅆu𝔻¨u⃜u≐u⇓u⇐u⇔u⟸u⟺u⟹u⇒u⊨u⇑u⇕u∥u↓u⤓u⇵ȗu⥐u⥞u↽u⥖u⥟u⇁u⥗u⊤u↧u𝒟uĐuŊÐÉuĚÊuЭuĖu𝔈Èu∈uĒu◻u▫uĘu𝔼uΕu⩵u≂u⇌uℰu⩳uΗËu∃uⅇuФu𝔉u◼u▪u𝔽u∀uℱuЃ>uΓuϜuĞuĢuĜuГuĠu𝔊u⋙u𝔾u≥u⋛u≧u⪢u≷u⩾u≳u𝒢u≫uЪuˇ^uĤuℌuℋuℍu─uĦu≏uЕuIJuЁÍÎuИuİuℑÌuĪuⅈu∬u∫u⋂u⁣u⁢uĮu𝕀uΙuℐuĨuІÏuĴuЙu𝔍u𝕁u𝒥uЈuЄuХuЌuΚuĶuКu𝔎u𝕂u𝒦uЉ⃒u⧞u⤂u≤⃒u<⃒u⊴⃒u⤃u⊵⃒u∼⃒u⇖u⤣u⤧óôuоuőu⨸u⦼uœu⦿u𝔬u˛òu⧁u⦵u⦾u⦻u⧀uōuωuοu⦶u𝕠u⦷u⦹u∨u⩝uℴªºu⊶u⩖u⩗u⩛øu⊘õu⨶öu⌽¶u⫳u⫽uп%.u‰u‱u𝔭uφuϕu☎uπuϖuℎ+u⨣u⨢u⨥u⩲u⨦u⨧u⨕u𝕡£u⪳u⪷u⪹u⪵u⋨u′u⌮u⌒u⌓u⊰u𝓅uψu u𝔮u𝕢u⁗u𝓆u⨖?u⤜u⥤u∽̱uŕu⦳u⦒u⦥»u⥵u⤠u⤳u⤞u⥅u⥴u↣u↝u⤚u∶u❳}]u⦌u⦎u⦐uřuŗuрu⤷u⥩u↳u▭u⥽u𝔯u⥬uρuϱu⇉u⋌u˚u‏u⎱u⫮u⟭u⇾u⦆u𝕣u⨮u⨵)u⦔u⨒u›u𝓇u⋊u▹u⧎u⥨u℞uśu⪴u⪸ušuşuŝu⪶u⪺u⋩u⨓uсu⋅u⩦u⇘§;u⤩u✶u𝔰u♯uщuш­uσuςu⩪u⪞u⪠u⪝u⪟u≆u⨤u⥲u⨳u⧤u⌣u⪪u⪬u⪬︀uь/u⧄u⌿u𝕤u♠u⊓︀u⊔︀u𝓈u☆u⊂u⫅u⪽u⫃u⫁u⫋u⊊u⪿u⥹u⫇u⫕u⫓u♪¹²³u⫆u⪾u⫘u⫄u⟉u⫗u⥻u⫂u⫌u⊋u⫀u⫈u⫔u⫖u⇙u⤪ßu⌖uτuťuţuтu⌕u𝔱uθuϑþ×u⨱u⨰u⌶u⫱u𝕥u⫚u‴u▵u≜u◬u⨺u⨹u⧍u⨻u⏢u𝓉uцuћuŧu⥣úuўuŭûuуuűu⥾u𝔲ùu▀u⌜u⌏u◸uūuųu𝕦uυu⇈u⌝u⌎uůu◹u𝓊u⋰uũüu⦧u⫨u⫩u⦜u⊊︀u⫋︀u⊋︀u⫌︀uвu⊻u≚u⋮u𝔳u𝕧u𝓋u⦚uŵu⩟u≙u℘u𝔴u𝕨u𝓌u𝔵uξu⋻u𝕩u𝓍ýuяuŷuы¥u𝔶uїu𝕪u𝓎uюÿuźužuзużuζu𝔷uжu⇝u𝕫u𝓏u‍u‌(ZAEligzAElig;ZAMPzAMP;ZAacutezAacute;zAbreve;ZAcirczAcirc;zAcy;zAfr;ZAgravezAgrave;zAlpha;zAmacr;zAnd;zAogon;zAopf;zApplyFunction;ZAringzAring;zAscr;zAssign;ZAtildezAtilde;ZAumlzAuml;z Backslash;zBarv;zBarwed;zBcy;zBecause;z Bernoullis;zBeta;zBfr;zBopf;zBreve;zBscr;zBumpeq;zCHcy;ZCOPYzCOPY;zCacute;zCap;zCapitalDifferentialD;zCayleys;zCcaron;ZCcedilzCcedil;zCcirc;zCconint;zCdot;zCedilla;z CenterDot;zCfr;zChi;z CircleDot;z CircleMinus;z CirclePlus;z CircleTimes;zClockwiseContourIntegral;zCloseCurlyDoubleQuote;zCloseCurlyQuote;zColon;zColone;z Congruent;zConint;zContourIntegral;zCopf;z Coproduct;z CounterClockwiseContourIntegral;zCross;zCscr;zCup;zCupCap;zDD;z DDotrahd;zDJcy;zDScy;zDZcy;zDagger;zDarr;zDashv;zDcaron;zDcy;zDel;zDelta;zDfr;zDiacriticalAcute;zDiacriticalDot;zDiacriticalDoubleAcute;zDiacriticalGrave;zDiacriticalTilde;zDiamond;zDifferentialD;zDopf;zDot;zDotDot;z DotEqual;zDoubleContourIntegral;z DoubleDot;zDoubleDownArrow;zDoubleLeftArrow;zDoubleLeftRightArrow;zDoubleLeftTee;zDoubleLongLeftArrow;zDoubleLongLeftRightArrow;zDoubleLongRightArrow;zDoubleRightArrow;zDoubleRightTee;zDoubleUpArrow;zDoubleUpDownArrow;zDoubleVerticalBar;z DownArrow;z DownArrowBar;zDownArrowUpArrow;z DownBreve;zDownLeftRightVector;zDownLeftTeeVector;zDownLeftVector;zDownLeftVectorBar;zDownRightTeeVector;zDownRightVector;zDownRightVectorBar;zDownTee;z DownTeeArrow;z Downarrow;zDscr;zDstrok;zENG;ZETHzETH;ZEacutezEacute;zEcaron;ZEcirczEcirc;zEcy;zEdot;zEfr;ZEgravezEgrave;zElement;zEmacr;zEmptySmallSquare;zEmptyVerySmallSquare;zEogon;zEopf;zEpsilon;zEqual;z EqualTilde;z Equilibrium;zEscr;zEsim;zEta;ZEumlzEuml;zExists;z ExponentialE;zFcy;zFfr;zFilledSmallSquare;zFilledVerySmallSquare;zFopf;zForAll;z Fouriertrf;zFscr;zGJcy;ZGTzGT;zGamma;zGammad;zGbreve;zGcedil;zGcirc;zGcy;zGdot;zGfr;zGg;zGopf;z GreaterEqual;zGreaterEqualLess;zGreaterFullEqual;zGreaterGreater;z GreaterLess;zGreaterSlantEqual;z GreaterTilde;zGscr;zGt;zHARDcy;zHacek;zHat;zHcirc;zHfr;z HilbertSpace;zHopf;zHorizontalLine;zHscr;zHstrok;z HumpDownHump;z HumpEqual;zIEcy;zIJlig;zIOcy;ZIacutezIacute;ZIcirczIcirc;zIcy;zIdot;zIfr;ZIgravezIgrave;zIm;zImacr;z ImaginaryI;zImplies;zInt;z Integral;z Intersection;zInvisibleComma;zInvisibleTimes;zIogon;zIopf;zIota;zIscr;zItilde;zIukcy;ZIumlzIuml;zJcirc;zJcy;zJfr;zJopf;zJscr;zJsercy;zJukcy;zKHcy;zKJcy;zKappa;zKcedil;zKcy;zKfr;zKopf;zKscr;zLJcy;ZLTzLT;zLacute;zLambda;zLang;z Laplacetrf;zLarr;zLcaron;zLcedil;zLcy;zLeftAngleBracket;z LeftArrow;z LeftArrowBar;zLeftArrowRightArrow;z LeftCeiling;zLeftDoubleBracket;zLeftDownTeeVector;zLeftDownVector;zLeftDownVectorBar;z LeftFloor;zLeftRightArrow;zLeftRightVector;zLeftTee;z LeftTeeArrow;zLeftTeeVector;z LeftTriangle;zLeftTriangleBar;zLeftTriangleEqual;zLeftUpDownVector;zLeftUpTeeVector;z LeftUpVector;zLeftUpVectorBar;z LeftVector;zLeftVectorBar;z Leftarrow;zLeftrightarrow;zLessEqualGreater;zLessFullEqual;z LessGreater;z LessLess;zLessSlantEqual;z LessTilde;zLfr;zLl;z Lleftarrow;zLmidot;zLongLeftArrow;zLongLeftRightArrow;zLongRightArrow;zLongleftarrow;zLongleftrightarrow;zLongrightarrow;zLopf;zLowerLeftArrow;zLowerRightArrow;zLscr;zLsh;zLstrok;zLt;zMap;zMcy;z MediumSpace;z Mellintrf;zMfr;z MinusPlus;zMopf;zMscr;zMu;zNJcy;zNacute;zNcaron;zNcedil;zNcy;zNegativeMediumSpace;zNegativeThickSpace;zNegativeThinSpace;zNegativeVeryThinSpace;zNestedGreaterGreater;zNestedLessLess;zNewLine;zNfr;zNoBreak;zNonBreakingSpace;zNopf;zNot;z NotCongruent;z NotCupCap;zNotDoubleVerticalBar;z NotElement;z NotEqual;zNotEqualTilde;z NotExists;z NotGreater;zNotGreaterEqual;zNotGreaterFullEqual;zNotGreaterGreater;zNotGreaterLess;zNotGreaterSlantEqual;zNotGreaterTilde;zNotHumpDownHump;z NotHumpEqual;zNotLeftTriangle;zNotLeftTriangleBar;zNotLeftTriangleEqual;zNotLess;z NotLessEqual;zNotLessGreater;z NotLessLess;zNotLessSlantEqual;z NotLessTilde;zNotNestedGreaterGreater;zNotNestedLessLess;z NotPrecedes;zNotPrecedesEqual;zNotPrecedesSlantEqual;zNotReverseElement;zNotRightTriangle;zNotRightTriangleBar;zNotRightTriangleEqual;zNotSquareSubset;zNotSquareSubsetEqual;zNotSquareSuperset;zNotSquareSupersetEqual;z NotSubset;zNotSubsetEqual;z NotSucceeds;zNotSucceedsEqual;zNotSucceedsSlantEqual;zNotSucceedsTilde;z NotSuperset;zNotSupersetEqual;z NotTilde;zNotTildeEqual;zNotTildeFullEqual;zNotTildeTilde;zNotVerticalBar;zNscr;ZNtildezNtilde;zNu;zOElig;ZOacutezOacute;ZOcirczOcirc;zOcy;zOdblac;zOfr;ZOgravezOgrave;zOmacr;zOmega;zOmicron;zOopf;zOpenCurlyDoubleQuote;zOpenCurlyQuote;zOr;zOscr;ZOslashzOslash;ZOtildezOtilde;zOtimes;ZOumlzOuml;zOverBar;z OverBrace;z OverBracket;zOverParenthesis;z PartialD;zPcy;zPfr;zPhi;zPi;z PlusMinus;zPoincareplane;zPopf;zPr;z Precedes;zPrecedesEqual;zPrecedesSlantEqual;zPrecedesTilde;zPrime;zProduct;z Proportion;z Proportional;zPscr;zPsi;ZQUOTzQUOT;zQfr;zQopf;zQscr;zRBarr;ZREGzREG;zRacute;zRang;zRarr;zRarrtl;zRcaron;zRcedil;zRcy;zRe;zReverseElement;zReverseEquilibrium;zReverseUpEquilibrium;zRfr;zRho;zRightAngleBracket;z RightArrow;zRightArrowBar;zRightArrowLeftArrow;z RightCeiling;zRightDoubleBracket;zRightDownTeeVector;zRightDownVector;zRightDownVectorBar;z RightFloor;z RightTee;zRightTeeArrow;zRightTeeVector;zRightTriangle;zRightTriangleBar;zRightTriangleEqual;zRightUpDownVector;zRightUpTeeVector;zRightUpVector;zRightUpVectorBar;z RightVector;zRightVectorBar;z Rightarrow;zRopf;z RoundImplies;z Rrightarrow;zRscr;zRsh;z RuleDelayed;zSHCHcy;zSHcy;zSOFTcy;zSacute;zSc;zScaron;zScedil;zScirc;zScy;zSfr;zShortDownArrow;zShortLeftArrow;zShortRightArrow;z ShortUpArrow;zSigma;z SmallCircle;zSopf;zSqrt;zSquare;zSquareIntersection;z SquareSubset;zSquareSubsetEqual;zSquareSuperset;zSquareSupersetEqual;z SquareUnion;zSscr;zStar;zSub;zSubset;z SubsetEqual;z Succeeds;zSucceedsEqual;zSucceedsSlantEqual;zSucceedsTilde;z SuchThat;zSum;zSup;z Superset;zSupersetEqual;zSupset;ZTHORNzTHORN;zTRADE;zTSHcy;zTScy;zTab;zTau;zTcaron;zTcedil;zTcy;zTfr;z Therefore;zTheta;z ThickSpace;z ThinSpace;zTilde;z TildeEqual;zTildeFullEqual;z TildeTilde;zTopf;z TripleDot;zTscr;zTstrok;ZUacutezUacute;zUarr;z Uarrocir;zUbrcy;zUbreve;ZUcirczUcirc;zUcy;zUdblac;zUfr;ZUgravezUgrave;zUmacr;z UnderBar;z UnderBrace;z UnderBracket;zUnderParenthesis;zUnion;z UnionPlus;zUogon;zUopf;zUpArrow;z UpArrowBar;zUpArrowDownArrow;z UpDownArrow;zUpEquilibrium;zUpTee;z UpTeeArrow;zUparrow;z Updownarrow;zUpperLeftArrow;zUpperRightArrow;zUpsi;zUpsilon;zUring;zUscr;zUtilde;ZUumlzUuml;zVDash;zVbar;zVcy;zVdash;zVdashl;zVee;zVerbar;zVert;z VerticalBar;z VerticalLine;zVerticalSeparator;zVerticalTilde;zVeryThinSpace;zVfr;zVopf;zVscr;zVvdash;zWcirc;zWedge;zWfr;zWopf;zWscr;zXfr;zXi;zXopf;zXscr;zYAcy;zYIcy;zYUcy;ZYacutezYacute;zYcirc;zYcy;zYfr;zYopf;zYscr;zYuml;zZHcy;zZacute;zZcaron;zZcy;zZdot;zZeroWidthSpace;zZeta;zZfr;zZopf;zZscr;Zaacutezaacute;zabreve;zac;zacE;zacd;Zacirczacirc;Zacutezacute;zacy;Zaeligzaelig;zaf;zafr;Zagravezagrave;zalefsym;zaleph;zalpha;zamacr;zamalg;Zampzamp;zand;zandand;zandd;z andslope;zandv;zang;zange;zangle;zangmsd;z angmsdaa;z angmsdab;z angmsdac;z angmsdad;z angmsdae;z angmsdaf;z angmsdag;z angmsdah;zangrt;zangrtvb;z angrtvbd;zangsph;zangst;zangzarr;zaogon;zaopf;zap;zapE;zapacir;zape;zapid;zapos;zapprox;z approxeq;Zaringzaring;zascr;zast;zasymp;zasympeq;Zatildezatilde;Zaumlzauml;z awconint;zawint;zbNot;z backcong;z backepsilon;z backprime;zbacksim;z backsimeq;zbarvee;zbarwed;z barwedge;zbbrk;z bbrktbrk;zbcong;zbcy;zbdquo;zbecaus;zbecause;zbemptyv;zbepsi;zbernou;zbeta;zbeth;zbetween;zbfr;zbigcap;zbigcirc;zbigcup;zbigodot;z bigoplus;z bigotimes;z bigsqcup;zbigstar;zbigtriangledown;zbigtriangleup;z biguplus;zbigvee;z bigwedge;zbkarow;z blacklozenge;z blacksquare;zblacktriangle;zblacktriangledown;zblacktriangleleft;zblacktriangleright;zblank;zblk12;zblk14;zblk34;zblock;zbne;zbnequiv;zbnot;zbopf;zbot;zbottom;zbowtie;zboxDL;zboxDR;zboxDl;zboxDr;zboxH;zboxHD;zboxHU;zboxHd;zboxHu;zboxUL;zboxUR;zboxUl;zboxUr;zboxV;zboxVH;zboxVL;zboxVR;zboxVh;zboxVl;zboxVr;zboxbox;zboxdL;zboxdR;zboxdl;zboxdr;zboxh;zboxhD;zboxhU;zboxhd;zboxhu;z boxminus;zboxplus;z boxtimes;zboxuL;zboxuR;zboxul;zboxur;zboxv;zboxvH;zboxvL;zboxvR;zboxvh;zboxvl;zboxvr;zbprime;zbreve;Zbrvbarzbrvbar;zbscr;zbsemi;zbsim;zbsime;zbsol;zbsolb;z bsolhsub;zbull;zbullet;zbump;zbumpE;zbumpe;zbumpeq;zcacute;zcap;zcapand;z capbrcup;zcapcap;zcapcup;zcapdot;zcaps;zcaret;zcaron;zccaps;zccaron;Zccedilzccedil;zccirc;zccups;zccupssm;zcdot;Zcedilzcedil;zcemptyv;Zcentzcent;z centerdot;zcfr;zchcy;zcheck;z checkmark;zchi;zcir;zcirE;zcirc;zcirceq;zcirclearrowleft;zcirclearrowright;z circledR;z circledS;z circledast;z circledcirc;z circleddash;zcire;z cirfnint;zcirmid;zcirscir;zclubs;z clubsuit;zcolon;zcolone;zcoloneq;zcomma;zcommat;zcomp;zcompfn;z complement;z complexes;zcong;zcongdot;zconint;zcopf;zcoprod;copyzcopy;zcopysr;zcrarr;zcross;zcscr;zcsub;zcsube;zcsup;zcsupe;zctdot;zcudarrl;zcudarrr;zcuepr;zcuesc;zcularr;zcularrp;zcup;z cupbrcap;zcupcap;zcupcup;zcupdot;zcupor;zcups;zcurarr;zcurarrm;z curlyeqprec;z curlyeqsucc;z curlyvee;z curlywedge;Zcurrenzcurren;zcurvearrowleft;zcurvearrowright;zcuvee;zcuwed;z cwconint;zcwint;zcylcty;zdArr;zdHar;zdagger;zdaleth;zdarr;zdash;zdashv;zdbkarow;zdblac;zdcaron;zdcy;zdd;zddagger;zddarr;zddotseq;Zdegzdeg;zdelta;zdemptyv;zdfisht;zdfr;zdharl;zdharr;zdiam;zdiamond;z diamondsuit;zdiams;zdie;zdigamma;zdisin;zdiv;Zdividezdivide;zdivideontimes;zdivonx;zdjcy;zdlcorn;zdlcrop;zdollar;zdopf;zdot;zdoteq;z doteqdot;z dotminus;zdotplus;z dotsquare;zdoublebarwedge;z downarrow;zdowndownarrows;zdownharpoonleft;zdownharpoonright;z drbkarow;zdrcorn;zdrcrop;zdscr;zdscy;zdsol;zdstrok;zdtdot;zdtri;zdtrif;zduarr;zduhar;zdwangle;zdzcy;z dzigrarr;zeDDot;zeDot;Zeacutezeacute;zeaster;zecaron;zecir;Zecirczecirc;zecolon;zecy;zedot;zee;zefDot;zefr;zeg;Zegravezegrave;zegs;zegsdot;zel;z elinters;zell;zels;zelsdot;zemacr;zempty;z emptyset;zemptyv;zemsp13;zemsp14;zemsp;zeng;zensp;zeogon;zeopf;zepar;zeparsl;zeplus;zepsi;zepsilon;zepsiv;zeqcirc;zeqcolon;zeqsim;z eqslantgtr;z eqslantless;zequals;zequest;zequiv;zequivDD;z eqvparsl;zerDot;zerarr;zescr;zesdot;zesim;zeta;Zethzeth;Zeumlzeuml;zeuro;zexcl;zexist;z expectation;z exponentiale;zfallingdotseq;zfcy;zfemale;zffilig;zfflig;zffllig;zffr;zfilig;zfjlig;zflat;zfllig;zfltns;zfnof;zfopf;zforall;zfork;zforkv;z fpartint;Zfrac12zfrac12;zfrac13;Zfrac14zfrac14;zfrac15;zfrac16;zfrac18;zfrac23;zfrac25;Zfrac34zfrac34;zfrac35;zfrac38;zfrac45;zfrac56;zfrac58;zfrac78;zfrasl;zfrown;zfscr;zgE;zgEl;zgacute;zgamma;zgammad;zgap;zgbreve;zgcirc;zgcy;zgdot;zge;zgel;zgeq;zgeqq;z geqslant;zges;zgescc;zgesdot;zgesdoto;z gesdotol;zgesl;zgesles;zgfr;zgg;zggg;zgimel;zgjcy;zgl;zglE;zgla;zglj;zgnE;zgnap;z gnapprox;zgne;zgneq;zgneqq;zgnsim;zgopf;zgrave;zgscr;zgsim;zgsime;zgsiml;gtzgt;zgtcc;zgtcir;zgtdot;zgtlPar;zgtquest;z gtrapprox;zgtrarr;zgtrdot;z gtreqless;z gtreqqless;zgtrless;zgtrsim;z gvertneqq;zgvnE;zhArr;zhairsp;zhalf;zhamilt;zhardcy;zharr;zharrcir;zharrw;zhbar;zhcirc;zhearts;z heartsuit;zhellip;zhercon;zhfr;z hksearow;z hkswarow;zhoarr;zhomtht;zhookleftarrow;zhookrightarrow;zhopf;zhorbar;zhscr;zhslash;zhstrok;zhybull;zhyphen;Ziacuteziacute;zic;Zicirczicirc;zicy;ziecy;Ziexclziexcl;ziff;zifr;Zigravezigrave;zii;ziiiint;ziiint;ziinfin;ziiota;zijlig;zimacr;zimage;z imagline;z imagpart;zimath;zimof;zimped;zin;zincare;zinfin;z infintie;zinodot;zint;zintcal;z integers;z intercal;z intlarhk;zintprod;ziocy;ziogon;ziopf;ziota;ziprod;Ziquestziquest;ziscr;zisin;zisinE;zisindot;zisins;zisinsv;zisinv;zit;zitilde;ziukcy;Ziumlziuml;zjcirc;zjcy;zjfr;zjmath;zjopf;zjscr;zjsercy;zjukcy;zkappa;zkappav;zkcedil;zkcy;zkfr;zkgreen;zkhcy;zkjcy;zkopf;zkscr;zlAarr;zlArr;zlAtail;zlBarr;zlE;zlEg;zlHar;zlacute;z laemptyv;zlagran;zlambda;zlang;zlangd;zlangle;zlap;Zlaquozlaquo;zlarr;zlarrb;zlarrbfs;zlarrfs;zlarrhk;zlarrlp;zlarrpl;zlarrsim;zlarrtl;zlat;zlatail;zlate;zlates;zlbarr;zlbbrk;zlbrace;zlbrack;zlbrke;zlbrksld;zlbrkslu;zlcaron;zlcedil;zlceil;zlcub;zlcy;zldca;zldquo;zldquor;zldrdhar;z ldrushar;zldsh;zle;z leftarrow;zleftarrowtail;zleftharpoondown;zleftharpoonup;zleftleftarrows;zleftrightarrow;zleftrightarrows;zleftrightharpoons;zleftrightsquigarrow;zleftthreetimes;zleg;zleq;zleqq;z leqslant;zles;zlescc;zlesdot;zlesdoto;z lesdotor;zlesg;zlesges;z lessapprox;zlessdot;z lesseqgtr;z lesseqqgtr;zlessgtr;zlesssim;zlfisht;zlfloor;zlfr;zlg;zlgE;zlhard;zlharu;zlharul;zlhblk;zljcy;zll;zllarr;z llcorner;zllhard;zlltri;zlmidot;zlmoust;z lmoustache;zlnE;zlnap;z lnapprox;zlne;zlneq;zlneqq;zlnsim;zloang;zloarr;zlobrk;zlongleftarrow;zlongleftrightarrow;z longmapsto;zlongrightarrow;zlooparrowleft;zlooparrowright;zlopar;zlopf;zloplus;zlotimes;zlowast;zlowbar;zloz;zlozenge;zlozf;zlpar;zlparlt;zlrarr;z lrcorner;zlrhar;zlrhard;zlrm;zlrtri;zlsaquo;zlscr;zlsh;zlsim;zlsime;zlsimg;zlsqb;zlsquo;zlsquor;zlstrok;ltzlt;zltcc;zltcir;zltdot;zlthree;zltimes;zltlarr;zltquest;zltrPar;zltri;zltrie;zltrif;z lurdshar;zluruhar;z lvertneqq;zlvnE;zmDDot;Zmacrzmacr;zmale;zmalt;zmaltese;zmap;zmapsto;z mapstodown;z mapstoleft;z mapstoup;zmarker;zmcomma;zmcy;zmdash;zmeasuredangle;zmfr;zmho;microzmicro;zmid;zmidast;zmidcir;Zmiddotzmiddot;zminus;zminusb;zminusd;zminusdu;zmlcp;zmldr;zmnplus;zmodels;zmopf;zmp;zmscr;zmstpos;zmu;z multimap;zmumap;znGg;znGt;znGtv;z nLeftarrow;znLeftrightarrow;znLl;znLt;znLtv;z nRightarrow;znVDash;znVdash;znabla;znacute;znang;znap;znapE;znapid;znapos;znapprox;znatur;znatural;z naturals;Znbspznbsp;znbump;znbumpe;zncap;zncaron;zncedil;zncong;z ncongdot;zncup;zncy;zndash;zne;zneArr;znearhk;znearr;znearrow;znedot;znequiv;znesear;znesim;znexist;znexists;znfr;zngE;znge;zngeq;zngeqq;z ngeqslant;znges;zngsim;zngt;zngtr;znhArr;znharr;znhpar;zni;znis;znisd;zniv;znjcy;znlArr;znlE;znlarr;znldr;znle;z nleftarrow;znleftrightarrow;znleq;znleqq;z nleqslant;znles;znless;znlsim;znlt;znltri;znltrie;znmid;znopf;notznot;znotin;znotinE;z notindot;znotinva;znotinvb;znotinvc;znotni;znotniva;znotnivb;znotnivc;znpar;z nparallel;znparsl;znpart;znpolint;znpr;znprcue;znpre;znprec;znpreceq;znrArr;znrarr;znrarrc;znrarrw;z nrightarrow;znrtri;znrtrie;znsc;znsccue;znsce;znscr;z nshortmid;znshortparallel;znsim;znsime;znsimeq;znsmid;znspar;znsqsube;znsqsupe;znsub;znsubE;znsube;znsubset;z nsubseteq;z nsubseteqq;znsucc;znsucceq;znsup;znsupE;znsupe;znsupset;z nsupseteq;z nsupseteqq;zntgl;Zntildezntilde;zntlg;zntriangleleft;zntrianglelefteq;zntriangleright;zntrianglerighteq;znu;znum;znumero;znumsp;znvDash;znvHarr;znvap;znvdash;znvge;znvgt;znvinfin;znvlArr;znvle;znvlt;znvltrie;znvrArr;znvrtrie;znvsim;znwArr;znwarhk;znwarr;znwarrow;znwnear;zoS;Zoacutezoacute;zoast;zocir;Zocirczocirc;zocy;zodash;zodblac;zodiv;zodot;zodsold;zoelig;zofcir;zofr;zogon;Zogravezograve;zogt;zohbar;zohm;zoint;zolarr;zolcir;zolcross;zoline;zolt;zomacr;zomega;zomicron;zomid;zominus;zoopf;zopar;zoperp;zoplus;zor;zorarr;zord;zorder;zorderof;Zordfzordf;Zordmzordm;zorigof;zoror;zorslope;zorv;zoscr;Zoslashzoslash;zosol;Zotildezotilde;zotimes;z otimesas;Zoumlzouml;zovbar;zpar;Zparazpara;z parallel;zparsim;zparsl;zpart;zpcy;zpercnt;zperiod;zpermil;zperp;zpertenk;zpfr;zphi;zphiv;zphmmat;zphone;zpi;z pitchfork;zpiv;zplanck;zplanckh;zplankv;zplus;z plusacir;zplusb;zpluscir;zplusdo;zplusdu;zpluse;Zplusmnzplusmn;zplussim;zplustwo;zpm;z pointint;zpopf;Zpoundzpound;zpr;zprE;zprap;zprcue;zpre;zprec;z precapprox;z preccurlyeq;zpreceq;z precnapprox;z precneqq;z precnsim;zprecsim;zprime;zprimes;zprnE;zprnap;zprnsim;zprod;z profalar;z profline;z profsurf;zprop;zpropto;zprsim;zprurel;zpscr;zpsi;zpuncsp;zqfr;zqint;zqopf;zqprime;zqscr;z quaternions;zquatint;zquest;zquesteq;Zquotzquot;zrAarr;zrArr;zrAtail;zrBarr;zrHar;zrace;zracute;zradic;z raemptyv;zrang;zrangd;zrange;zrangle;Zraquozraquo;zrarr;zrarrap;zrarrb;zrarrbfs;zrarrc;zrarrfs;zrarrhk;zrarrlp;zrarrpl;zrarrsim;zrarrtl;zrarrw;zratail;zratio;z rationals;zrbarr;zrbbrk;zrbrace;zrbrack;zrbrke;zrbrksld;zrbrkslu;zrcaron;zrcedil;zrceil;zrcub;zrcy;zrdca;zrdldhar;zrdquo;zrdquor;zrdsh;zreal;zrealine;z realpart;zreals;zrect;Zregzreg;zrfisht;zrfloor;zrfr;zrhard;zrharu;zrharul;zrho;zrhov;z rightarrow;zrightarrowtail;zrightharpoondown;zrightharpoonup;zrightleftarrows;zrightleftharpoons;zrightrightarrows;zrightsquigarrow;zrightthreetimes;zring;z risingdotseq;zrlarr;zrlhar;zrlm;zrmoust;z rmoustache;zrnmid;zroang;zroarr;zrobrk;zropar;zropf;zroplus;zrotimes;zrpar;zrpargt;z rppolint;zrrarr;zrsaquo;zrscr;zrsh;zrsqb;zrsquo;zrsquor;zrthree;zrtimes;zrtri;zrtrie;zrtrif;z rtriltri;zruluhar;zrx;zsacute;zsbquo;zsc;zscE;zscap;zscaron;zsccue;zsce;zscedil;zscirc;zscnE;zscnap;zscnsim;z scpolint;zscsim;zscy;zsdot;zsdotb;zsdote;zseArr;zsearhk;zsearr;zsearrow;Zsectzsect;zsemi;zseswar;z setminus;zsetmn;zsext;zsfr;zsfrown;zsharp;zshchcy;zshcy;z shortmid;zshortparallel;Zshyzshy;zsigma;zsigmaf;zsigmav;zsim;zsimdot;zsime;zsimeq;zsimg;zsimgE;zsiml;zsimlE;zsimne;zsimplus;zsimrarr;zslarr;zsmallsetminus;zsmashp;z smeparsl;zsmid;zsmile;zsmt;zsmte;zsmtes;zsoftcy;zsol;zsolb;zsolbar;zsopf;zspades;z spadesuit;zspar;zsqcap;zsqcaps;zsqcup;zsqcups;zsqsub;zsqsube;z sqsubset;z sqsubseteq;zsqsup;zsqsupe;z sqsupset;z sqsupseteq;zsqu;zsquare;zsquarf;zsquf;zsrarr;zsscr;zssetmn;zssmile;zsstarf;zstar;zstarf;zstraightepsilon;z straightphi;zstrns;zsub;zsubE;zsubdot;zsube;zsubedot;zsubmult;zsubnE;zsubne;zsubplus;zsubrarr;zsubset;z subseteq;z subseteqq;z subsetneq;z subsetneqq;zsubsim;zsubsub;zsubsup;zsucc;z succapprox;z succcurlyeq;zsucceq;z succnapprox;z succneqq;z succnsim;zsuccsim;zsum;zsung;Zsup1zsup1;Zsup2zsup2;Zsup3zsup3;zsup;zsupE;zsupdot;zsupdsub;zsupe;zsupedot;zsuphsol;zsuphsub;zsuplarr;zsupmult;zsupnE;zsupne;zsupplus;zsupset;z supseteq;z supseteqq;z supsetneq;z supsetneqq;zsupsim;zsupsub;zsupsup;zswArr;zswarhk;zswarr;zswarrow;zswnwar;Zszligzszlig;ztarget;ztau;ztbrk;ztcaron;ztcedil;ztcy;ztdot;ztelrec;ztfr;zthere4;z therefore;ztheta;z thetasym;zthetav;z thickapprox;z thicksim;zthinsp;zthkap;zthksim;Zthornzthorn;ztilde;timesztimes;ztimesb;z timesbar;ztimesd;ztint;ztoea;ztop;ztopbot;ztopcir;ztopf;ztopfork;ztosa;ztprime;ztrade;z triangle;z triangledown;z triangleleft;ztrianglelefteq;z triangleq;ztriangleright;ztrianglerighteq;ztridot;ztrie;z triminus;ztriplus;ztrisb;ztritime;z trpezium;ztscr;ztscy;ztshcy;ztstrok;ztwixt;ztwoheadleftarrow;ztwoheadrightarrow;zuArr;zuHar;Zuacutezuacute;zuarr;zubrcy;zubreve;Zucirczucirc;zucy;zudarr;zudblac;zudhar;zufisht;zufr;Zugravezugrave;zuharl;zuharr;zuhblk;zulcorn;z ulcorner;zulcrop;zultri;zumacr;Zumlzuml;zuogon;zuopf;zuparrow;z updownarrow;zupharpoonleft;zupharpoonright;zuplus;zupsi;zupsih;zupsilon;z upuparrows;zurcorn;z urcorner;zurcrop;zuring;zurtri;zuscr;zutdot;zutilde;zutri;zutrif;zuuarr;Zuumlzuuml;zuwangle;zvArr;zvBar;zvBarv;zvDash;zvangrt;z varepsilon;z varkappa;z varnothing;zvarphi;zvarpi;z varpropto;zvarr;zvarrho;z varsigma;z varsubsetneq;zvarsubsetneqq;z varsupsetneq;zvarsupsetneqq;z vartheta;zvartriangleleft;zvartriangleright;zvcy;zvdash;zvee;zveebar;zveeeq;zvellip;zverbar;zvert;zvfr;zvltri;zvnsub;zvnsup;zvopf;zvprop;zvrtri;zvscr;zvsubnE;zvsubne;zvsupnE;zvsupne;zvzigzag;zwcirc;zwedbar;zwedge;zwedgeq;zweierp;zwfr;zwopf;zwp;zwr;zwreath;zwscr;zxcap;zxcirc;zxcup;zxdtri;zxfr;zxhArr;zxharr;zxi;zxlArr;zxlarr;zxmap;zxnis;zxodot;zxopf;zxoplus;zxotime;zxrArr;zxrarr;zxscr;zxsqcup;zxuplus;zxutri;zxvee;zxwedge;Zyacutezyacute;zyacy;zycirc;zycy;Zyenzyen;zyfr;zyicy;zyopf;zyscr;zyucy;Zyumlzyuml;zzacute;zzcaron;zzcy;zzdot;zzeetrf;zzeta;zzfr;zzhcy;zzigrarr;zzopf;zzscr;zzwj;zzwnj;u�)"r )ZDoctypeZ CharactersZSpaceCharactersStartTagEndTagEmptyTagCommentZ ParseErrorrrrcCsg|]\}}||fqSr2r2)r3kvr2r2r6r7x sZmathc@s eZdZdS)DataLossWarningN)__name__ __module__ __qualname__r2r2r2r6r| src@s eZdZdS)ReparseExceptionN)rrrr2r2r2r6r sr)rr r!r"r#r$) rGrHrIrJrKrLrMrNrOrPrQrRrSrHrTrHrHrUrVrWrXrYrZr[r\r]r^r_r`rHrarb)0Z __future__rrrstringZEOFEZ namespaces frozensetZscopingElementsZformattingElementsZspecialElementsZhtmlIntegrationPointElementsZ"mathmlTextIntegrationPointElementsZadjustSVGAttributesZadjustMathMLAttributesZadjustForeignAttributesdictitemsZunadjustForeignAttributesZspaceCharactersZtableInsertModeElementsZascii_lowercaseZasciiLowercaseZascii_uppercaseZasciiUppercaseZ ascii_lettersZ asciiLettersZdigitsZ hexdigitsZ hexDigitsZasciiUpper2LowerZheadingElementsZ voidElementsZ cdataElementsZrcdataElementsZbooleanAttributesZentitiesWindows1252Z xmlEntitiesZentitiesZreplacementCharactersZ tokenTypesZ tagTokenTypesprefixes UserWarningr Exceptionrr2r2r2r6s<                                                                                                                                       __pycache__/constants.cpython-36.pyc000064400000201300147204715120013415 0ustar003 B;WE@s-ddlmZmZmZddlZdZddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d,d,d0d1d2d3d4d5d6d7d8d9d:d;dd?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddZdddddddZeeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgNZ eeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfgZ ddddddddddddddddddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/>Zd0d1iZd2d3ed2fd2d4ed2fd2d5ed2fd2d6ed2fd2d7ed2fd2ded2fd2d8ed2fd9ded9fd9d:ed9fd9d;ed9fddd?ejDZed@dAdBdCdDgZedddddgZeejZeejZeejZeejZeejZedEd?ejDZd|Z edddFddddddddddԐdGdHgZ!eddgZ"edddddddgZ#edIgedJgedKgedLdMgedLdMgedNdOgedPgedQdRgedSdRdTdUgedVgedWgedRdXgedRdXdYgedRdXgedRdZgedRdXd[dZdTdKgedRdXdZdQgedRdXgd\Z$d}Z%edydzd{d|d}gZ&d~d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdАdѐdҐdӐddѐdԐdՐd֐dÐdאdؐdِdڐdېdܐdݐdސdߐdddddddddddddԐddddddddddddddddddddddddddddddddddd d d d d d dddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d(d+dd,d-d.d/d0d0d1d1d2d3d4d5d5d4d6d7dڐd8d9d:d;d<d=d>d?d@dAdBdCdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdՐd֐dudvdwdxdydzd{d|d}d~ddddאdؐdِddddXdddddddddddddddddddddd"ddAddddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdϐdАdѐdҐdҐdӐdӐdԐdՐd֐dאdאdؐdِdڐdېdܐdݐdސdߐdddddddddddddddddd'ddddddddddddddddddddddddddddddd dd d d d ddddddddddddddddddd dڐd!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0dߐd^d d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d?d@dAdBdCdDddEdFdGdHdFdIdIdJdKdLd@dMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d^d_d`dadbdcdcdddedfdgdgdhdidjdkdldmdndodpd1dqdrdsdtdudvdܐdݐdwdxdydzd{d|d}d~d~ddddddddddddddddddddddddddddddddddddddddddddddddddddddddɐdɐddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐dd͐dΐdϐdYdАdѐdҐdӐdԐdYdҐdՐdՐd֐dאdYddؐdؐdِdِddڐdېdܐdݐdސdߐdddddkddܐddddddݐdddddd:ddmddddddddddddd ddddddddddddddududdddddd d d d d dddddddddddddddd*dddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.dސdd/d/d0d1dߐdd2d3d4d5d5dd6d,d,d7d8d9d:d;d<d=d>d?d$d@dAdBdBdCdDdEdFdddGdHdHddIdJdKdKdLdMdNdOdPdQdRddSdTdUdVdPdWdXdYdZdZd[ddd\d]d^d3d^ddXd_dd`ddddadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdldmdydzd{d{dndwdydzdd|d}dԐd~dddߐddiddːdddϐdddddddddddddΐdΐdddѐdddddddddddddʐdӐddddddߐdddddddddddddddd ddddddddddddddddddddddddddddddddddddddddddÐdĐdĐdŐddddddƐdǐddȐdɐdʐdːd̐dӐdd͐dΐdΐdϐdϐdАdѐdddddҐdӐdԐdՐd֐dאdؐdِdڐdېdܐdݐdސd dߐddddddddddddddddddddddddddddddddddddddddddddddd"ddddddddd d d d dd d d̐d d dddddddddddddddd ddd֐ddd(ddgdddddddddd d!d"d#d$d%d&d'd(dd)d*dd+d+d;d,d,d-d.d/d/d֐d0d1d1d7d2d3d4d5d6d7d4d@d4d8d9d:dd;d<d=d8d9d>dd>d?d@dAdBdCdDd@dEdEdFddGdHdIdJdd<dKdLdMdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d}dՐd`dadvdbdcdddedXdfd]dgd]dhdidid^d_djdkd$dldmdndodpdqdrdsdtdudvdwdxdydzd{d|dadvd}d~dܐdddddd^doddsddgd`dddduddvdydydddddddhddudbdwdzddfddwdddsdddddddddddddddddddddbdddddldddddddidddddd`dddddddddzdddwdݐdddTdTddddddddddndddddddddddddddjdvddddddddÐdÐddאdĐdddŐd!ddƐdǐdddȐdɐddʐddːd̐d̐d͐dΐddϐdАdѐdҐddӐdԐdՐdƐd֐dאd̐dؐdِdڐd̐dېdېddddddܐdݐdސdːdߐdddddddxdxdddddddddddddddddАddddddddϐdddddddddddddddd͐ddddddddddddddddddddddddӐdddddddÐdŐdĐdd͐ddɐdʐdʐd͐ddddddddddÐdĐdddȐdǐdȐddddddddddddddd d d d d dddddddddddwdwddSdddTdUddddVdddddd d!d"d#d#d$d%dِddQd&d'dd(d)d*d+d,dd-d.d/dd0dRd1d2d2d3d3d4d4d5d6d7d8d2d9d9d:d;d;dd<d=d=d>dސd?d?dސd@dAddBdCdDdEdudFdGdHdIddJdKdߐdLddMddNdOd"dPddQdRdddSdTddUdVdWdWddXdYddddYdddZd[d\dd]dd[dZd\dd^d_d`ddddadbdcddded2dfdgdhd)didjdǐddd#dڐdkddldmdnd5dod dpdqd drdrd dsd dtdudvd%ddwdxdydzd{d|ddd}d~dddddddd~dddddddd$dd!dddddddddddd dyddddddzdddʐdddddddddddddddddddd%ddddddddddddddAddddCdBdddddddDdddddd ddddddddddddddddސddddddVddWdWdddddÐdĐdŐd^ddƐdǐddȐdɐdʐdːd̐d͐dΐdϐdАdѐdѐdސd7dҐd<dӐd8d9d8d9d:d;d:d;d6d6d d d dԐddȐd>dՐddŐdIdd֐dאdؐd@dِdڐdېdܐdݐdސd֐d@dאdܐdېdߐdddAddCdBddddDdEddddddddGddddHdddddddddGdHdddddddd!dddddddddddd[dddRdRddddYdVdUdYdVddd͐ddd#ddd3dddddddddJd dddnd ddd d d d dddddddddYddܐdddd1ddddddrddtdddddqddd d d!d"d#dѐdѐd$d%d1dsdqddnd&dyd&d'd(d(d)d*d+d,d-d.d dd'd/d/d0dݐd1d2dېd3dŐdWddIdLddsddd4d5d6d7ddldd8dd0d9d:d;ddd<dlddǐd=ddd>d5d4d7d6d?d@dAddBdCdDdEdCdddFd:ddmddGdؐddHdאdddIddJdddِddKddddddLdLdMdNdOdPdPdQdRdSdTdUdVdVdWdXdYdZdd[d\d]d^d_d`dadbdcZ'dddDdАdeddݐdddddOdEd,ddѐdfddgdhdݐddܐdd5ddd͐dJddddidXddj"Z(ddkdldmdndodpdqdrZ)ee)dse)dte)dugZ*edvd?ejDZ+dwe+d<Gdxdydye,Z-Gdzd{d{e.Z/dS(~)absolute_importdivisionunicode_literalsNz5Null character in input stream, replaced with U+FFFD.zInvalid codepoint in stream.z&Solidus (/) incorrectly placed in tag.z.Incorrect CR newline entity, replaced with LF.z9Entity used with illegal number (windows-1252 reference).zPNumeric entity couldn't be converted to character (codepoint U+%(charAsInt)08x).zBNumeric entity represents an illegal codepoint: U+%(charAsInt)08x.z#Numeric entity didn't end with ';'.z1Numeric entity expected. Got end of file instead.z'Numeric entity expected but none found.z!Named entity didn't end with ';'.z Named entity expected. Got none.z'End tag contains unexpected attributes.z.End tag contains unexpected self-closing flag.z#Expected tag name. Got '>' instead.zSExpected tag name. Got '?' instead. (HTML doesn't support processing instructions.)z-Expected tag name. Got something else insteadz6Expected closing tag. Got '>' instead. Ignoring ''.z-Expected closing tag. Unexpected end of file.z' instead.z"Unexpected = in unquoted attributez*Unexpected character in unquoted attributez*Unexpected character after attribute name.z+Unexpected character after attribute value.z.Unexpected end of file in attribute value (").z.Unexpected end of file in attribute value (').z*Unexpected end of file in attribute value.z)Unexpected end of file in tag. Expected >z/Unexpected character after / in tag. Expected >z&Expected '--' or 'DOCTYPE'. Not found.z Unexpected ! after -- in commentz$Unexpected space after -- in commentzIncorrect comment.z"Unexpected end of file in comment.z%Unexpected end of file in comment (-)z+Unexpected '-' after '--' found in comment.z'Unexpected end of file in comment (--).z&Unexpected character in comment found.z(No space after literal string 'DOCTYPE'.z.Unexpected > character. Expected DOCTYPE name.z.Unexpected end of file. Expected DOCTYPE name.z'Unexpected end of file in DOCTYPE name.z"Unexpected end of file in DOCTYPE.z%Expected space or '>'. Got '%(data)s'zUnexpected end of DOCTYPE.z Unexpected character in DOCTYPE.zXXX innerHTML EOFzUnexpected DOCTYPE. Ignored.z%html needs to be the first start tag.z)Unexpected End of file. Expected DOCTYPE.zErroneous DOCTYPE.z2Unexpected non-space characters. Expected DOCTYPE.z2Unexpected start tag (%(name)s). Expected DOCTYPE.z0Unexpected end tag (%(name)s). Expected DOCTYPE.z?Unexpected end tag (%(name)s) after the (implied) root element.z4Unexpected end of file. Expected end tag (%(name)s).z4Unexpected start tag head in existing head. Ignored.z'Unexpected end tag (%(name)s). Ignored.z;Unexpected start tag (%(name)s) that can be in head. Moved.z Unexpected start tag (%(name)s).zMissing end tag (%(name)s).zMissing end tags (%(name)s).zCUnexpected start tag (%(startName)s) implies end tag (%(endName)s).z@Unexpected start tag (%(originalName)s). Treated as %(newName)s.z,Unexpected start tag %(name)s. Don't use it!z'Unexpected start tag %(name)s. Ignored.zEUnexpected end tag (%(gotName)s). Missing end tag (%(expectedName)s).z:End tag (%(name)s) seen too early. Expected other end tag.zFUnexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).z+End tag (%(name)s) seen too early. Ignored.zQEnd tag (%(name)s) violates step 1, paragraph 1 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 1, paragraph 2 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 1, paragraph 3 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 4, paragraph 4 of the adoption agency algorithm.z>Unexpected end tag (%(originalName)s). Treated as %(newName)s.z'This element (%(name)s) has no end tag.z9Unexpected implied end tag (%(name)s) in the table phase.z>Unexpected implied end tag (%(name)s) in the table body phase.zDUnexpected non-space characters in table context caused voodoo mode.z3Unexpected input with type hidden in table context.z!Unexpected form in table context.zDUnexpected start tag (%(name)s) in table context caused voodoo mode.zBUnexpected end tag (%(name)s) in table context caused voodoo mode.zCUnexpected table cell start tag (%(name)s) in the table body phase.zFGot table cell end tag (%(name)s) while required end tags are missing.z?Unexpected end tag (%(name)s) in the table body phase. Ignored.z=Unexpected implied end tag (%(name)s) in the table row phase.z>Unexpected end tag (%(name)s) in the table row phase. Ignored.zJUnexpected select start tag in the select phase treated as select end tag.z/Unexpected input start tag in the select phase.zBUnexpected start tag token (%(name)s in the select phase. Ignored.z;Unexpected end tag (%(name)s) in the select phase. Ignored.zKUnexpected table element start tag (%(name)s) in the select in table phase.zIUnexpected table element end tag (%(name)s) in the select in table phase.z8Unexpected non-space characters in the after body phase.z>Unexpected start tag token (%(name)s) in the after body phase.zZ attributenameZ attributetypeZ basefrequencyZ baseprofileZcalcmodeZ clippathunitsZcontentscripttypeZcontentstyletypeZdiffuseconstantZedgemodeZexternalresourcesrequiredZ filterresZ filterunitsZglyphrefZgradienttransformZ gradientunitsZ kernelmatrixZkernelunitlengthZ keypointsZ keysplinesZkeytimesZ lengthadjustZlimitingconeangleZ markerheightZ markerunitsZ markerwidthZmaskcontentunitsZ maskunitsZ numoctavesZ pathlengthZpatterncontentunitsZpatterntransformZ patternunitsZ pointsatxZ pointsatyZ pointsatzZ preservealphaZpreserveaspectratioZprimitiveunitsZrefxZrefyZ repeatcountZ repeatdurZrequiredextensionsZrequiredfeaturesZspecularconstantZspecularexponentZ spreadmethodZ startoffsetZ stddeviationZ stitchtilesZ surfacescaleZsystemlanguageZ tablevaluesZtargetxZtargetyZ textlengthZviewboxZ viewtargetZxchannelselectorZychannelselectorZ zoomandpanZ definitionurlZ definitionURLrZactuateZarcroleZhrefZroleZshowtyper ZlangZspacer ) z xlink:actuatez xlink:arcrolez xlink:hrefz xlink:rolez xlink:showz xlink:titlez xlink:typezxml:basezxml:langz xml:spacer z xmlns:xlinkcCs"g|]\}\}}}||f|fqSr2).0ZqnameprefixZlocalnsr2r2/usr/lib/python3.6/constants.py sr7     cCs g|]}t|t|jfqSr2)ordlower)r3cr2r2r6r7#sz event-sourcesourceZtrackZ irrelevantZscopedZismapZautoplayZcontrolsZdeferasyncopenZmultipleZdisabledZhiddenZcheckeddefaultZnoshadeZ autosubmitreadonlyZselectedZ autofocusZrequired)r0r(ZaudioZvideor.rZdatagridrr'r+rZoptionZoptgrouprr)r/output   & ! 0 `9 R}    "   "!a: S~xzlt;zgt;zamp;zapos;zquot;Æ&ÁuĂÂuАu𝔄ÀuΑuĀu⩓uĄu𝔸u⁡Åu𝒜u≔ÃÄu∖u⫧u⌆uБu∵uℬuΒu𝔅u𝔹u˘u≎uЧ©uĆu⋒uⅅuℭuČÇuĈu∰uĊ¸·uΧu⊙u⊖u⊕u⊗u∲u”u’u∷u⩴u≡u∯u∮uℂu∐u∳u⨯u𝒞u⋓u≍u⤑uЂuЅuЏu‡u↡u⫤uĎuДu∇uΔu𝔇´u˙u˝`u˜u⋄uⅆu𝔻¨u⃜u≐u⇓u⇐u⇔u⟸u⟺u⟹u⇒u⊨u⇑u⇕u∥u↓u⤓u⇵ȗu⥐u⥞u↽u⥖u⥟u⇁u⥗u⊤u↧u𝒟uĐuŊÐÉuĚÊuЭuĖu𝔈Èu∈uĒu◻u▫uĘu𝔼uΕu⩵u≂u⇌uℰu⩳uΗËu∃uⅇuФu𝔉u◼u▪u𝔽u∀uℱuЃ>uΓuϜuĞuĢuĜuГuĠu𝔊u⋙u𝔾u≥u⋛u≧u⪢u≷u⩾u≳u𝒢u≫uЪuˇ^uĤuℌuℋuℍu─uĦu≏uЕuIJuЁÍÎuИuİuℑÌuĪuⅈu∬u∫u⋂u⁣u⁢uĮu𝕀uΙuℐuĨuІÏuĴuЙu𝔍u𝕁u𝒥uЈuЄuХuЌuΚuĶuКu𝔎u𝕂u𝒦uЉ⃒u⧞u⤂u≤⃒u<⃒u⊴⃒u⤃u⊵⃒u∼⃒u⇖u⤣u⤧óôuоuőu⨸u⦼uœu⦿u𝔬u˛òu⧁u⦵u⦾u⦻u⧀uōuωuοu⦶u𝕠u⦷u⦹u∨u⩝uℴªºu⊶u⩖u⩗u⩛øu⊘õu⨶öu⌽¶u⫳u⫽uп%.u‰u‱u𝔭uφuϕu☎uπuϖuℎ+u⨣u⨢u⨥u⩲u⨦u⨧u⨕u𝕡£u⪳u⪷u⪹u⪵u⋨u′u⌮u⌒u⌓u⊰u𝓅uψu u𝔮u𝕢u⁗u𝓆u⨖?u⤜u⥤u∽̱uŕu⦳u⦒u⦥»u⥵u⤠u⤳u⤞u⥅u⥴u↣u↝u⤚u∶u❳}]u⦌u⦎u⦐uřuŗuрu⤷u⥩u↳u▭u⥽u𝔯u⥬uρuϱu⇉u⋌u˚u‏u⎱u⫮u⟭u⇾u⦆u𝕣u⨮u⨵)u⦔u⨒u›u𝓇u⋊u▹u⧎u⥨u℞uśu⪴u⪸ušuşuŝu⪶u⪺u⋩u⨓uсu⋅u⩦u⇘§;u⤩u✶u𝔰u♯uщuш­uσuςu⩪u⪞u⪠u⪝u⪟u≆u⨤u⥲u⨳u⧤u⌣u⪪u⪬u⪬︀uь/u⧄u⌿u𝕤u♠u⊓︀u⊔︀u𝓈u☆u⊂u⫅u⪽u⫃u⫁u⫋u⊊u⪿u⥹u⫇u⫕u⫓u♪¹²³u⫆u⪾u⫘u⫄u⟉u⫗u⥻u⫂u⫌u⊋u⫀u⫈u⫔u⫖u⇙u⤪ßu⌖uτuťuţuтu⌕u𝔱uθuϑþ×u⨱u⨰u⌶u⫱u𝕥u⫚u‴u▵u≜u◬u⨺u⨹u⧍u⨻u⏢u𝓉uцuћuŧu⥣úuўuŭûuуuűu⥾u𝔲ùu▀u⌜u⌏u◸uūuųu𝕦uυu⇈u⌝u⌎uůu◹u𝓊u⋰uũüu⦧u⫨u⫩u⦜u⊊︀u⫋︀u⊋︀u⫌︀uвu⊻u≚u⋮u𝔳u𝕧u𝓋u⦚uŵu⩟u≙u℘u𝔴u𝕨u𝓌u𝔵uξu⋻u𝕩u𝓍ýuяuŷuы¥u𝔶uїu𝕪u𝓎uюÿuźužuзużuζu𝔷uжu⇝u𝕫u𝓏u‍u‌(ZAEligzAElig;ZAMPzAMP;ZAacutezAacute;zAbreve;ZAcirczAcirc;zAcy;zAfr;ZAgravezAgrave;zAlpha;zAmacr;zAnd;zAogon;zAopf;zApplyFunction;ZAringzAring;zAscr;zAssign;ZAtildezAtilde;ZAumlzAuml;z Backslash;zBarv;zBarwed;zBcy;zBecause;z Bernoullis;zBeta;zBfr;zBopf;zBreve;zBscr;zBumpeq;zCHcy;ZCOPYzCOPY;zCacute;zCap;zCapitalDifferentialD;zCayleys;zCcaron;ZCcedilzCcedil;zCcirc;zCconint;zCdot;zCedilla;z CenterDot;zCfr;zChi;z CircleDot;z CircleMinus;z CirclePlus;z CircleTimes;zClockwiseContourIntegral;zCloseCurlyDoubleQuote;zCloseCurlyQuote;zColon;zColone;z Congruent;zConint;zContourIntegral;zCopf;z Coproduct;z CounterClockwiseContourIntegral;zCross;zCscr;zCup;zCupCap;zDD;z DDotrahd;zDJcy;zDScy;zDZcy;zDagger;zDarr;zDashv;zDcaron;zDcy;zDel;zDelta;zDfr;zDiacriticalAcute;zDiacriticalDot;zDiacriticalDoubleAcute;zDiacriticalGrave;zDiacriticalTilde;zDiamond;zDifferentialD;zDopf;zDot;zDotDot;z DotEqual;zDoubleContourIntegral;z DoubleDot;zDoubleDownArrow;zDoubleLeftArrow;zDoubleLeftRightArrow;zDoubleLeftTee;zDoubleLongLeftArrow;zDoubleLongLeftRightArrow;zDoubleLongRightArrow;zDoubleRightArrow;zDoubleRightTee;zDoubleUpArrow;zDoubleUpDownArrow;zDoubleVerticalBar;z DownArrow;z DownArrowBar;zDownArrowUpArrow;z DownBreve;zDownLeftRightVector;zDownLeftTeeVector;zDownLeftVector;zDownLeftVectorBar;zDownRightTeeVector;zDownRightVector;zDownRightVectorBar;zDownTee;z DownTeeArrow;z Downarrow;zDscr;zDstrok;zENG;ZETHzETH;ZEacutezEacute;zEcaron;ZEcirczEcirc;zEcy;zEdot;zEfr;ZEgravezEgrave;zElement;zEmacr;zEmptySmallSquare;zEmptyVerySmallSquare;zEogon;zEopf;zEpsilon;zEqual;z EqualTilde;z Equilibrium;zEscr;zEsim;zEta;ZEumlzEuml;zExists;z ExponentialE;zFcy;zFfr;zFilledSmallSquare;zFilledVerySmallSquare;zFopf;zForAll;z Fouriertrf;zFscr;zGJcy;ZGTzGT;zGamma;zGammad;zGbreve;zGcedil;zGcirc;zGcy;zGdot;zGfr;zGg;zGopf;z GreaterEqual;zGreaterEqualLess;zGreaterFullEqual;zGreaterGreater;z GreaterLess;zGreaterSlantEqual;z GreaterTilde;zGscr;zGt;zHARDcy;zHacek;zHat;zHcirc;zHfr;z HilbertSpace;zHopf;zHorizontalLine;zHscr;zHstrok;z HumpDownHump;z HumpEqual;zIEcy;zIJlig;zIOcy;ZIacutezIacute;ZIcirczIcirc;zIcy;zIdot;zIfr;ZIgravezIgrave;zIm;zImacr;z ImaginaryI;zImplies;zInt;z Integral;z Intersection;zInvisibleComma;zInvisibleTimes;zIogon;zIopf;zIota;zIscr;zItilde;zIukcy;ZIumlzIuml;zJcirc;zJcy;zJfr;zJopf;zJscr;zJsercy;zJukcy;zKHcy;zKJcy;zKappa;zKcedil;zKcy;zKfr;zKopf;zKscr;zLJcy;ZLTzLT;zLacute;zLambda;zLang;z Laplacetrf;zLarr;zLcaron;zLcedil;zLcy;zLeftAngleBracket;z LeftArrow;z LeftArrowBar;zLeftArrowRightArrow;z LeftCeiling;zLeftDoubleBracket;zLeftDownTeeVector;zLeftDownVector;zLeftDownVectorBar;z LeftFloor;zLeftRightArrow;zLeftRightVector;zLeftTee;z LeftTeeArrow;zLeftTeeVector;z LeftTriangle;zLeftTriangleBar;zLeftTriangleEqual;zLeftUpDownVector;zLeftUpTeeVector;z LeftUpVector;zLeftUpVectorBar;z LeftVector;zLeftVectorBar;z Leftarrow;zLeftrightarrow;zLessEqualGreater;zLessFullEqual;z LessGreater;z LessLess;zLessSlantEqual;z LessTilde;zLfr;zLl;z Lleftarrow;zLmidot;zLongLeftArrow;zLongLeftRightArrow;zLongRightArrow;zLongleftarrow;zLongleftrightarrow;zLongrightarrow;zLopf;zLowerLeftArrow;zLowerRightArrow;zLscr;zLsh;zLstrok;zLt;zMap;zMcy;z MediumSpace;z Mellintrf;zMfr;z MinusPlus;zMopf;zMscr;zMu;zNJcy;zNacute;zNcaron;zNcedil;zNcy;zNegativeMediumSpace;zNegativeThickSpace;zNegativeThinSpace;zNegativeVeryThinSpace;zNestedGreaterGreater;zNestedLessLess;zNewLine;zNfr;zNoBreak;zNonBreakingSpace;zNopf;zNot;z NotCongruent;z NotCupCap;zNotDoubleVerticalBar;z NotElement;z NotEqual;zNotEqualTilde;z NotExists;z NotGreater;zNotGreaterEqual;zNotGreaterFullEqual;zNotGreaterGreater;zNotGreaterLess;zNotGreaterSlantEqual;zNotGreaterTilde;zNotHumpDownHump;z NotHumpEqual;zNotLeftTriangle;zNotLeftTriangleBar;zNotLeftTriangleEqual;zNotLess;z NotLessEqual;zNotLessGreater;z NotLessLess;zNotLessSlantEqual;z NotLessTilde;zNotNestedGreaterGreater;zNotNestedLessLess;z NotPrecedes;zNotPrecedesEqual;zNotPrecedesSlantEqual;zNotReverseElement;zNotRightTriangle;zNotRightTriangleBar;zNotRightTriangleEqual;zNotSquareSubset;zNotSquareSubsetEqual;zNotSquareSuperset;zNotSquareSupersetEqual;z NotSubset;zNotSubsetEqual;z NotSucceeds;zNotSucceedsEqual;zNotSucceedsSlantEqual;zNotSucceedsTilde;z NotSuperset;zNotSupersetEqual;z NotTilde;zNotTildeEqual;zNotTildeFullEqual;zNotTildeTilde;zNotVerticalBar;zNscr;ZNtildezNtilde;zNu;zOElig;ZOacutezOacute;ZOcirczOcirc;zOcy;zOdblac;zOfr;ZOgravezOgrave;zOmacr;zOmega;zOmicron;zOopf;zOpenCurlyDoubleQuote;zOpenCurlyQuote;zOr;zOscr;ZOslashzOslash;ZOtildezOtilde;zOtimes;ZOumlzOuml;zOverBar;z OverBrace;z OverBracket;zOverParenthesis;z PartialD;zPcy;zPfr;zPhi;zPi;z PlusMinus;zPoincareplane;zPopf;zPr;z Precedes;zPrecedesEqual;zPrecedesSlantEqual;zPrecedesTilde;zPrime;zProduct;z Proportion;z Proportional;zPscr;zPsi;ZQUOTzQUOT;zQfr;zQopf;zQscr;zRBarr;ZREGzREG;zRacute;zRang;zRarr;zRarrtl;zRcaron;zRcedil;zRcy;zRe;zReverseElement;zReverseEquilibrium;zReverseUpEquilibrium;zRfr;zRho;zRightAngleBracket;z RightArrow;zRightArrowBar;zRightArrowLeftArrow;z RightCeiling;zRightDoubleBracket;zRightDownTeeVector;zRightDownVector;zRightDownVectorBar;z RightFloor;z RightTee;zRightTeeArrow;zRightTeeVector;zRightTriangle;zRightTriangleBar;zRightTriangleEqual;zRightUpDownVector;zRightUpTeeVector;zRightUpVector;zRightUpVectorBar;z RightVector;zRightVectorBar;z Rightarrow;zRopf;z RoundImplies;z Rrightarrow;zRscr;zRsh;z RuleDelayed;zSHCHcy;zSHcy;zSOFTcy;zSacute;zSc;zScaron;zScedil;zScirc;zScy;zSfr;zShortDownArrow;zShortLeftArrow;zShortRightArrow;z ShortUpArrow;zSigma;z SmallCircle;zSopf;zSqrt;zSquare;zSquareIntersection;z SquareSubset;zSquareSubsetEqual;zSquareSuperset;zSquareSupersetEqual;z SquareUnion;zSscr;zStar;zSub;zSubset;z SubsetEqual;z Succeeds;zSucceedsEqual;zSucceedsSlantEqual;zSucceedsTilde;z SuchThat;zSum;zSup;z Superset;zSupersetEqual;zSupset;ZTHORNzTHORN;zTRADE;zTSHcy;zTScy;zTab;zTau;zTcaron;zTcedil;zTcy;zTfr;z Therefore;zTheta;z ThickSpace;z ThinSpace;zTilde;z TildeEqual;zTildeFullEqual;z TildeTilde;zTopf;z TripleDot;zTscr;zTstrok;ZUacutezUacute;zUarr;z Uarrocir;zUbrcy;zUbreve;ZUcirczUcirc;zUcy;zUdblac;zUfr;ZUgravezUgrave;zUmacr;z UnderBar;z UnderBrace;z UnderBracket;zUnderParenthesis;zUnion;z UnionPlus;zUogon;zUopf;zUpArrow;z UpArrowBar;zUpArrowDownArrow;z UpDownArrow;zUpEquilibrium;zUpTee;z UpTeeArrow;zUparrow;z Updownarrow;zUpperLeftArrow;zUpperRightArrow;zUpsi;zUpsilon;zUring;zUscr;zUtilde;ZUumlzUuml;zVDash;zVbar;zVcy;zVdash;zVdashl;zVee;zVerbar;zVert;z VerticalBar;z VerticalLine;zVerticalSeparator;zVerticalTilde;zVeryThinSpace;zVfr;zVopf;zVscr;zVvdash;zWcirc;zWedge;zWfr;zWopf;zWscr;zXfr;zXi;zXopf;zXscr;zYAcy;zYIcy;zYUcy;ZYacutezYacute;zYcirc;zYcy;zYfr;zYopf;zYscr;zYuml;zZHcy;zZacute;zZcaron;zZcy;zZdot;zZeroWidthSpace;zZeta;zZfr;zZopf;zZscr;Zaacutezaacute;zabreve;zac;zacE;zacd;Zacirczacirc;Zacutezacute;zacy;Zaeligzaelig;zaf;zafr;Zagravezagrave;zalefsym;zaleph;zalpha;zamacr;zamalg;Zampzamp;zand;zandand;zandd;z andslope;zandv;zang;zange;zangle;zangmsd;z angmsdaa;z angmsdab;z angmsdac;z angmsdad;z angmsdae;z angmsdaf;z angmsdag;z angmsdah;zangrt;zangrtvb;z angrtvbd;zangsph;zangst;zangzarr;zaogon;zaopf;zap;zapE;zapacir;zape;zapid;zapos;zapprox;z approxeq;Zaringzaring;zascr;zast;zasymp;zasympeq;Zatildezatilde;Zaumlzauml;z awconint;zawint;zbNot;z backcong;z backepsilon;z backprime;zbacksim;z backsimeq;zbarvee;zbarwed;z barwedge;zbbrk;z bbrktbrk;zbcong;zbcy;zbdquo;zbecaus;zbecause;zbemptyv;zbepsi;zbernou;zbeta;zbeth;zbetween;zbfr;zbigcap;zbigcirc;zbigcup;zbigodot;z bigoplus;z bigotimes;z bigsqcup;zbigstar;zbigtriangledown;zbigtriangleup;z biguplus;zbigvee;z bigwedge;zbkarow;z blacklozenge;z blacksquare;zblacktriangle;zblacktriangledown;zblacktriangleleft;zblacktriangleright;zblank;zblk12;zblk14;zblk34;zblock;zbne;zbnequiv;zbnot;zbopf;zbot;zbottom;zbowtie;zboxDL;zboxDR;zboxDl;zboxDr;zboxH;zboxHD;zboxHU;zboxHd;zboxHu;zboxUL;zboxUR;zboxUl;zboxUr;zboxV;zboxVH;zboxVL;zboxVR;zboxVh;zboxVl;zboxVr;zboxbox;zboxdL;zboxdR;zboxdl;zboxdr;zboxh;zboxhD;zboxhU;zboxhd;zboxhu;z boxminus;zboxplus;z boxtimes;zboxuL;zboxuR;zboxul;zboxur;zboxv;zboxvH;zboxvL;zboxvR;zboxvh;zboxvl;zboxvr;zbprime;zbreve;Zbrvbarzbrvbar;zbscr;zbsemi;zbsim;zbsime;zbsol;zbsolb;z bsolhsub;zbull;zbullet;zbump;zbumpE;zbumpe;zbumpeq;zcacute;zcap;zcapand;z capbrcup;zcapcap;zcapcup;zcapdot;zcaps;zcaret;zcaron;zccaps;zccaron;Zccedilzccedil;zccirc;zccups;zccupssm;zcdot;Zcedilzcedil;zcemptyv;Zcentzcent;z centerdot;zcfr;zchcy;zcheck;z checkmark;zchi;zcir;zcirE;zcirc;zcirceq;zcirclearrowleft;zcirclearrowright;z circledR;z circledS;z circledast;z circledcirc;z circleddash;zcire;z cirfnint;zcirmid;zcirscir;zclubs;z clubsuit;zcolon;zcolone;zcoloneq;zcomma;zcommat;zcomp;zcompfn;z complement;z complexes;zcong;zcongdot;zconint;zcopf;zcoprod;copyzcopy;zcopysr;zcrarr;zcross;zcscr;zcsub;zcsube;zcsup;zcsupe;zctdot;zcudarrl;zcudarrr;zcuepr;zcuesc;zcularr;zcularrp;zcup;z cupbrcap;zcupcap;zcupcup;zcupdot;zcupor;zcups;zcurarr;zcurarrm;z curlyeqprec;z curlyeqsucc;z curlyvee;z curlywedge;Zcurrenzcurren;zcurvearrowleft;zcurvearrowright;zcuvee;zcuwed;z cwconint;zcwint;zcylcty;zdArr;zdHar;zdagger;zdaleth;zdarr;zdash;zdashv;zdbkarow;zdblac;zdcaron;zdcy;zdd;zddagger;zddarr;zddotseq;Zdegzdeg;zdelta;zdemptyv;zdfisht;zdfr;zdharl;zdharr;zdiam;zdiamond;z diamondsuit;zdiams;zdie;zdigamma;zdisin;zdiv;Zdividezdivide;zdivideontimes;zdivonx;zdjcy;zdlcorn;zdlcrop;zdollar;zdopf;zdot;zdoteq;z doteqdot;z dotminus;zdotplus;z dotsquare;zdoublebarwedge;z downarrow;zdowndownarrows;zdownharpoonleft;zdownharpoonright;z drbkarow;zdrcorn;zdrcrop;zdscr;zdscy;zdsol;zdstrok;zdtdot;zdtri;zdtrif;zduarr;zduhar;zdwangle;zdzcy;z dzigrarr;zeDDot;zeDot;Zeacutezeacute;zeaster;zecaron;zecir;Zecirczecirc;zecolon;zecy;zedot;zee;zefDot;zefr;zeg;Zegravezegrave;zegs;zegsdot;zel;z elinters;zell;zels;zelsdot;zemacr;zempty;z emptyset;zemptyv;zemsp13;zemsp14;zemsp;zeng;zensp;zeogon;zeopf;zepar;zeparsl;zeplus;zepsi;zepsilon;zepsiv;zeqcirc;zeqcolon;zeqsim;z eqslantgtr;z eqslantless;zequals;zequest;zequiv;zequivDD;z eqvparsl;zerDot;zerarr;zescr;zesdot;zesim;zeta;Zethzeth;Zeumlzeuml;zeuro;zexcl;zexist;z expectation;z exponentiale;zfallingdotseq;zfcy;zfemale;zffilig;zfflig;zffllig;zffr;zfilig;zfjlig;zflat;zfllig;zfltns;zfnof;zfopf;zforall;zfork;zforkv;z fpartint;Zfrac12zfrac12;zfrac13;Zfrac14zfrac14;zfrac15;zfrac16;zfrac18;zfrac23;zfrac25;Zfrac34zfrac34;zfrac35;zfrac38;zfrac45;zfrac56;zfrac58;zfrac78;zfrasl;zfrown;zfscr;zgE;zgEl;zgacute;zgamma;zgammad;zgap;zgbreve;zgcirc;zgcy;zgdot;zge;zgel;zgeq;zgeqq;z geqslant;zges;zgescc;zgesdot;zgesdoto;z gesdotol;zgesl;zgesles;zgfr;zgg;zggg;zgimel;zgjcy;zgl;zglE;zgla;zglj;zgnE;zgnap;z gnapprox;zgne;zgneq;zgneqq;zgnsim;zgopf;zgrave;zgscr;zgsim;zgsime;zgsiml;gtzgt;zgtcc;zgtcir;zgtdot;zgtlPar;zgtquest;z gtrapprox;zgtrarr;zgtrdot;z gtreqless;z gtreqqless;zgtrless;zgtrsim;z gvertneqq;zgvnE;zhArr;zhairsp;zhalf;zhamilt;zhardcy;zharr;zharrcir;zharrw;zhbar;zhcirc;zhearts;z heartsuit;zhellip;zhercon;zhfr;z hksearow;z hkswarow;zhoarr;zhomtht;zhookleftarrow;zhookrightarrow;zhopf;zhorbar;zhscr;zhslash;zhstrok;zhybull;zhyphen;Ziacuteziacute;zic;Zicirczicirc;zicy;ziecy;Ziexclziexcl;ziff;zifr;Zigravezigrave;zii;ziiiint;ziiint;ziinfin;ziiota;zijlig;zimacr;zimage;z imagline;z imagpart;zimath;zimof;zimped;zin;zincare;zinfin;z infintie;zinodot;zint;zintcal;z integers;z intercal;z intlarhk;zintprod;ziocy;ziogon;ziopf;ziota;ziprod;Ziquestziquest;ziscr;zisin;zisinE;zisindot;zisins;zisinsv;zisinv;zit;zitilde;ziukcy;Ziumlziuml;zjcirc;zjcy;zjfr;zjmath;zjopf;zjscr;zjsercy;zjukcy;zkappa;zkappav;zkcedil;zkcy;zkfr;zkgreen;zkhcy;zkjcy;zkopf;zkscr;zlAarr;zlArr;zlAtail;zlBarr;zlE;zlEg;zlHar;zlacute;z laemptyv;zlagran;zlambda;zlang;zlangd;zlangle;zlap;Zlaquozlaquo;zlarr;zlarrb;zlarrbfs;zlarrfs;zlarrhk;zlarrlp;zlarrpl;zlarrsim;zlarrtl;zlat;zlatail;zlate;zlates;zlbarr;zlbbrk;zlbrace;zlbrack;zlbrke;zlbrksld;zlbrkslu;zlcaron;zlcedil;zlceil;zlcub;zlcy;zldca;zldquo;zldquor;zldrdhar;z ldrushar;zldsh;zle;z leftarrow;zleftarrowtail;zleftharpoondown;zleftharpoonup;zleftleftarrows;zleftrightarrow;zleftrightarrows;zleftrightharpoons;zleftrightsquigarrow;zleftthreetimes;zleg;zleq;zleqq;z leqslant;zles;zlescc;zlesdot;zlesdoto;z lesdotor;zlesg;zlesges;z lessapprox;zlessdot;z lesseqgtr;z lesseqqgtr;zlessgtr;zlesssim;zlfisht;zlfloor;zlfr;zlg;zlgE;zlhard;zlharu;zlharul;zlhblk;zljcy;zll;zllarr;z llcorner;zllhard;zlltri;zlmidot;zlmoust;z lmoustache;zlnE;zlnap;z lnapprox;zlne;zlneq;zlneqq;zlnsim;zloang;zloarr;zlobrk;zlongleftarrow;zlongleftrightarrow;z longmapsto;zlongrightarrow;zlooparrowleft;zlooparrowright;zlopar;zlopf;zloplus;zlotimes;zlowast;zlowbar;zloz;zlozenge;zlozf;zlpar;zlparlt;zlrarr;z lrcorner;zlrhar;zlrhard;zlrm;zlrtri;zlsaquo;zlscr;zlsh;zlsim;zlsime;zlsimg;zlsqb;zlsquo;zlsquor;zlstrok;ltzlt;zltcc;zltcir;zltdot;zlthree;zltimes;zltlarr;zltquest;zltrPar;zltri;zltrie;zltrif;z lurdshar;zluruhar;z lvertneqq;zlvnE;zmDDot;Zmacrzmacr;zmale;zmalt;zmaltese;zmap;zmapsto;z mapstodown;z mapstoleft;z mapstoup;zmarker;zmcomma;zmcy;zmdash;zmeasuredangle;zmfr;zmho;microzmicro;zmid;zmidast;zmidcir;Zmiddotzmiddot;zminus;zminusb;zminusd;zminusdu;zmlcp;zmldr;zmnplus;zmodels;zmopf;zmp;zmscr;zmstpos;zmu;z multimap;zmumap;znGg;znGt;znGtv;z nLeftarrow;znLeftrightarrow;znLl;znLt;znLtv;z nRightarrow;znVDash;znVdash;znabla;znacute;znang;znap;znapE;znapid;znapos;znapprox;znatur;znatural;z naturals;Znbspznbsp;znbump;znbumpe;zncap;zncaron;zncedil;zncong;z ncongdot;zncup;zncy;zndash;zne;zneArr;znearhk;znearr;znearrow;znedot;znequiv;znesear;znesim;znexist;znexists;znfr;zngE;znge;zngeq;zngeqq;z ngeqslant;znges;zngsim;zngt;zngtr;znhArr;znharr;znhpar;zni;znis;znisd;zniv;znjcy;znlArr;znlE;znlarr;znldr;znle;z nleftarrow;znleftrightarrow;znleq;znleqq;z nleqslant;znles;znless;znlsim;znlt;znltri;znltrie;znmid;znopf;notznot;znotin;znotinE;z notindot;znotinva;znotinvb;znotinvc;znotni;znotniva;znotnivb;znotnivc;znpar;z nparallel;znparsl;znpart;znpolint;znpr;znprcue;znpre;znprec;znpreceq;znrArr;znrarr;znrarrc;znrarrw;z nrightarrow;znrtri;znrtrie;znsc;znsccue;znsce;znscr;z nshortmid;znshortparallel;znsim;znsime;znsimeq;znsmid;znspar;znsqsube;znsqsupe;znsub;znsubE;znsube;znsubset;z nsubseteq;z nsubseteqq;znsucc;znsucceq;znsup;znsupE;znsupe;znsupset;z nsupseteq;z nsupseteqq;zntgl;Zntildezntilde;zntlg;zntriangleleft;zntrianglelefteq;zntriangleright;zntrianglerighteq;znu;znum;znumero;znumsp;znvDash;znvHarr;znvap;znvdash;znvge;znvgt;znvinfin;znvlArr;znvle;znvlt;znvltrie;znvrArr;znvrtrie;znvsim;znwArr;znwarhk;znwarr;znwarrow;znwnear;zoS;Zoacutezoacute;zoast;zocir;Zocirczocirc;zocy;zodash;zodblac;zodiv;zodot;zodsold;zoelig;zofcir;zofr;zogon;Zogravezograve;zogt;zohbar;zohm;zoint;zolarr;zolcir;zolcross;zoline;zolt;zomacr;zomega;zomicron;zomid;zominus;zoopf;zopar;zoperp;zoplus;zor;zorarr;zord;zorder;zorderof;Zordfzordf;Zordmzordm;zorigof;zoror;zorslope;zorv;zoscr;Zoslashzoslash;zosol;Zotildezotilde;zotimes;z otimesas;Zoumlzouml;zovbar;zpar;Zparazpara;z parallel;zparsim;zparsl;zpart;zpcy;zpercnt;zperiod;zpermil;zperp;zpertenk;zpfr;zphi;zphiv;zphmmat;zphone;zpi;z pitchfork;zpiv;zplanck;zplanckh;zplankv;zplus;z plusacir;zplusb;zpluscir;zplusdo;zplusdu;zpluse;Zplusmnzplusmn;zplussim;zplustwo;zpm;z pointint;zpopf;Zpoundzpound;zpr;zprE;zprap;zprcue;zpre;zprec;z precapprox;z preccurlyeq;zpreceq;z precnapprox;z precneqq;z precnsim;zprecsim;zprime;zprimes;zprnE;zprnap;zprnsim;zprod;z profalar;z profline;z profsurf;zprop;zpropto;zprsim;zprurel;zpscr;zpsi;zpuncsp;zqfr;zqint;zqopf;zqprime;zqscr;z quaternions;zquatint;zquest;zquesteq;Zquotzquot;zrAarr;zrArr;zrAtail;zrBarr;zrHar;zrace;zracute;zradic;z raemptyv;zrang;zrangd;zrange;zrangle;Zraquozraquo;zrarr;zrarrap;zrarrb;zrarrbfs;zrarrc;zrarrfs;zrarrhk;zrarrlp;zrarrpl;zrarrsim;zrarrtl;zrarrw;zratail;zratio;z rationals;zrbarr;zrbbrk;zrbrace;zrbrack;zrbrke;zrbrksld;zrbrkslu;zrcaron;zrcedil;zrceil;zrcub;zrcy;zrdca;zrdldhar;zrdquo;zrdquor;zrdsh;zreal;zrealine;z realpart;zreals;zrect;Zregzreg;zrfisht;zrfloor;zrfr;zrhard;zrharu;zrharul;zrho;zrhov;z rightarrow;zrightarrowtail;zrightharpoondown;zrightharpoonup;zrightleftarrows;zrightleftharpoons;zrightrightarrows;zrightsquigarrow;zrightthreetimes;zring;z risingdotseq;zrlarr;zrlhar;zrlm;zrmoust;z rmoustache;zrnmid;zroang;zroarr;zrobrk;zropar;zropf;zroplus;zrotimes;zrpar;zrpargt;z rppolint;zrrarr;zrsaquo;zrscr;zrsh;zrsqb;zrsquo;zrsquor;zrthree;zrtimes;zrtri;zrtrie;zrtrif;z rtriltri;zruluhar;zrx;zsacute;zsbquo;zsc;zscE;zscap;zscaron;zsccue;zsce;zscedil;zscirc;zscnE;zscnap;zscnsim;z scpolint;zscsim;zscy;zsdot;zsdotb;zsdote;zseArr;zsearhk;zsearr;zsearrow;Zsectzsect;zsemi;zseswar;z setminus;zsetmn;zsext;zsfr;zsfrown;zsharp;zshchcy;zshcy;z shortmid;zshortparallel;Zshyzshy;zsigma;zsigmaf;zsigmav;zsim;zsimdot;zsime;zsimeq;zsimg;zsimgE;zsiml;zsimlE;zsimne;zsimplus;zsimrarr;zslarr;zsmallsetminus;zsmashp;z smeparsl;zsmid;zsmile;zsmt;zsmte;zsmtes;zsoftcy;zsol;zsolb;zsolbar;zsopf;zspades;z spadesuit;zspar;zsqcap;zsqcaps;zsqcup;zsqcups;zsqsub;zsqsube;z sqsubset;z sqsubseteq;zsqsup;zsqsupe;z sqsupset;z sqsupseteq;zsqu;zsquare;zsquarf;zsquf;zsrarr;zsscr;zssetmn;zssmile;zsstarf;zstar;zstarf;zstraightepsilon;z straightphi;zstrns;zsub;zsubE;zsubdot;zsube;zsubedot;zsubmult;zsubnE;zsubne;zsubplus;zsubrarr;zsubset;z subseteq;z subseteqq;z subsetneq;z subsetneqq;zsubsim;zsubsub;zsubsup;zsucc;z succapprox;z succcurlyeq;zsucceq;z succnapprox;z succneqq;z succnsim;zsuccsim;zsum;zsung;Zsup1zsup1;Zsup2zsup2;Zsup3zsup3;zsup;zsupE;zsupdot;zsupdsub;zsupe;zsupedot;zsuphsol;zsuphsub;zsuplarr;zsupmult;zsupnE;zsupne;zsupplus;zsupset;z supseteq;z supseteqq;z supsetneq;z supsetneqq;zsupsim;zsupsub;zsupsup;zswArr;zswarhk;zswarr;zswarrow;zswnwar;Zszligzszlig;ztarget;ztau;ztbrk;ztcaron;ztcedil;ztcy;ztdot;ztelrec;ztfr;zthere4;z therefore;ztheta;z thetasym;zthetav;z thickapprox;z thicksim;zthinsp;zthkap;zthksim;Zthornzthorn;ztilde;timesztimes;ztimesb;z timesbar;ztimesd;ztint;ztoea;ztop;ztopbot;ztopcir;ztopf;ztopfork;ztosa;ztprime;ztrade;z triangle;z triangledown;z triangleleft;ztrianglelefteq;z triangleq;ztriangleright;ztrianglerighteq;ztridot;ztrie;z triminus;ztriplus;ztrisb;ztritime;z trpezium;ztscr;ztscy;ztshcy;ztstrok;ztwixt;ztwoheadleftarrow;ztwoheadrightarrow;zuArr;zuHar;Zuacutezuacute;zuarr;zubrcy;zubreve;Zucirczucirc;zucy;zudarr;zudblac;zudhar;zufisht;zufr;Zugravezugrave;zuharl;zuharr;zuhblk;zulcorn;z ulcorner;zulcrop;zultri;zumacr;Zumlzuml;zuogon;zuopf;zuparrow;z updownarrow;zupharpoonleft;zupharpoonright;zuplus;zupsi;zupsih;zupsilon;z upuparrows;zurcorn;z urcorner;zurcrop;zuring;zurtri;zuscr;zutdot;zutilde;zutri;zutrif;zuuarr;Zuumlzuuml;zuwangle;zvArr;zvBar;zvBarv;zvDash;zvangrt;z varepsilon;z varkappa;z varnothing;zvarphi;zvarpi;z varpropto;zvarr;zvarrho;z varsigma;z varsubsetneq;zvarsubsetneqq;z varsupsetneq;zvarsupsetneqq;z vartheta;zvartriangleleft;zvartriangleright;zvcy;zvdash;zvee;zveebar;zveeeq;zvellip;zverbar;zvert;zvfr;zvltri;zvnsub;zvnsup;zvopf;zvprop;zvrtri;zvscr;zvsubnE;zvsubne;zvsupnE;zvsupne;zvzigzag;zwcirc;zwedbar;zwedge;zwedgeq;zweierp;zwfr;zwopf;zwp;zwr;zwreath;zwscr;zxcap;zxcirc;zxcup;zxdtri;zxfr;zxhArr;zxharr;zxi;zxlArr;zxlarr;zxmap;zxnis;zxodot;zxopf;zxoplus;zxotime;zxrArr;zxrarr;zxscr;zxsqcup;zxuplus;zxutri;zxvee;zxwedge;Zyacutezyacute;zyacy;zycirc;zycy;Zyenzyen;zyfr;zyicy;zyopf;zyscr;zyucy;Zyumlzyuml;zzacute;zzcaron;zzcy;zzdot;zzeetrf;zzeta;zzfr;zzhcy;zzigrarr;zzopf;zzscr;zzwj;zzwnj;u�)"r )ZDoctypeZ CharactersZSpaceCharactersStartTagEndTagEmptyTagCommentZ ParseErrorrrrcCsg|]\}}||fqSr2r2)r3kvr2r2r6r7x sZmathc@s eZdZdS)DataLossWarningN)__name__ __module__ __qualname__r2r2r2r6r| src@s eZdZdS)ReparseExceptionN)rrrr2r2r2r6r sr)rr r!r"r#r$) rGrHrIrJrKrLrMrNrOrPrQrRrSrHrTrHrHrUrVrWrXrYrZr[r\r]r^r_r`rHrarb)0Z __future__rrrstringZEOFEZ namespaces frozensetZscopingElementsZformattingElementsZspecialElementsZhtmlIntegrationPointElementsZ"mathmlTextIntegrationPointElementsZadjustSVGAttributesZadjustMathMLAttributesZadjustForeignAttributesdictitemsZunadjustForeignAttributesZspaceCharactersZtableInsertModeElementsZascii_lowercaseZasciiLowercaseZascii_uppercaseZasciiUppercaseZ ascii_lettersZ asciiLettersZdigitsZ hexdigitsZ hexDigitsZasciiUpper2LowerZheadingElementsZ voidElementsZ cdataElementsZrcdataElementsZbooleanAttributesZentitiesWindows1252Z xmlEntitiesZentitiesZreplacementCharactersZ tokenTypesZ tagTokenTypesprefixes UserWarningr Exceptionrr2r2r2r6s<                                                                                                                                       __pycache__/html5parser.cpython-36.opt-1.pyc000064400000275565147204715120014636 0ustar003 B;W @sFddlmZmZmZddlmZmZmZddlZyddl m Z Wn e k r`ddl m Z YnXddl mZddl mZddl mZdd lmZdd l mZdd lmZmZmZmZmZmZmZmZmZmZmZm Z!m"Z"m#Z#m$Z$m%Z%d!ddZ&d"ddZ'ddZ(Gddde)Z*ej+ddZ,ddZ-d#ddZ.Gdd d e/Z0dS)$)absolute_importdivisionunicode_literals)with_metaclassviewkeysPY3N) OrderedDict) _inputstream) _tokenizer) treebuilders)Marker)_utils)spaceCharactersasciiUpper2LowerspecialElementsheadingElements cdataElementsrcdataElements tokenTypes tagTokenTypes namespaceshtmlIntegrationPointElements"mathmlTextIntegrationPointElementsadjustForeignAttributesadjustMathMLAttributesadjustSVGAttributesEReparseExceptionetreeTcKs$tj|}t||d}|j|f|S)z.Parse a string or file-like object into a tree)namespaceHTMLElements)r getTreeBuilder HTMLParserparse)doc treebuilderr kwargstbpr)!/usr/lib/python3.6/html5parser.pyr#s  r#divcKs,tj|}t||d}|j|fd|i|S)N)r container)r r!r" parseFragment)r$r,r%r r&r'r(r)r)r*r-&s  r-csGfdddt}|S)NcseZdZfddZdS)z-method_decorator_metaclass..DecoratedcsBx0|jD]$\}}t|tjr&|}|||<q Wtj||||S)N)items isinstancetypes FunctionTypetype__new__)metaZ classnamebasesZ classDictZ attributeNameZ attribute)functionr)r*r3.s   z5method_decorator_metaclass..Decorated.__new__N)__name__ __module__ __qualname__r3r))r6r)r* Decorated-sr:)r2)r6r:r))r6r*method_decorator_metaclass,sr;c@seZdZdZd+ddZd,dd Zd d Zed d ZddZ ddZ ddZ ddZ ddZ ddZd-ddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*ZdS).r"zZHTML parser. Generates a tree structure from a stream of (possibly malformed) HTMLNFTcsL|_|dkrtjd}||_g_tfddt|jD_dS)a strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) Nrcs g|]\}}||jfqSr))tree).0namecls)selfr)r* Msz'HTMLParser.__init__..) strictr r!r<errorsdict getPhasesr.phases)r@r<rBr debugr))r@r*__init__<s    zHTMLParser.__init__r+c Ksh||_||_||_tj|fd|i||_|jy |jWn$tk rb|j|jYnXdS)Nparser) innerHTMLModer, scriptingr Z HTMLTokenizer tokenizerresetmainLoopr)r@stream innerHTMLr,rKr&r)r)r*_parsePs zHTMLParser._parsecCs|jjd|_g|_g|_d|_|jr|jj|_ |j t krL|j j |j _ n0|j tkrd|j j|j _ n|j dkr||j j|j _ n|jd|_|jj|jnd|_ |jd|_d|_d|_d|_dS)NFz no quirks plaintext beforeHtmlinitialT)r<rM firstStartTagrClog compatModerJr,lowerrPrrL rcdataStatestater rawtextStateplaintextStaterFphaseinsertHtmlElementresetInsertionModeZ lastPhaseZbeforeRCDataPhase framesetOK)r@r)r)r*rM^s*         zHTMLParser.resetcCst|dsdS|jjjdjS)zThe name of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet. rLNr)hasattrrLrO charEncodingr>)r@r)r)r*documentEncodings zHTMLParser.documentEncodingcCsJ|jdkr6|jtdkr6d|jko4|jdjtdkS|j|jftkSdS)Nzannotation-xmlmathmlencoding text/htmlapplication/xhtml+xml)rfrg)r> namespacer attributes translaterr)r@elementr)r)r*isHTMLIntegrationPoints   z!HTMLParser.isHTMLIntegrationPointcCs|j|jftkS)N)rhr>r)r@rkr)r)r*isMathMLTextIntegrationPointsz'HTMLParser.isMathMLTextIntegrationPointcCsjtd}td}td}td}td}td}td}x|jD]}d} |} x| dk r| } |jjrx|jjdnd} | r| jnd} | r| jnd} | d }||kr|j| d | jd id} qVt|jjd ksl| |jj ksl|j | r ||kr|d t ddgksl|||fksl| t dkrP| dkrP||krP|d dksl|j | rt||||fkrt|j}n |jd}||kr|j| } qV||kr|j| } qV||kr|j| } qV||kr|j| } qV||kr|j| } qV||krV|j| } qVW||krD| drD| d rD|jdd | d iqDWd}g}x(|rd|j|j|jj}|r>q>WdS)N CharactersZSpaceCharactersStartTagEndTagCommentZDoctype ParseErrorr r2datadatavarsrr>ZmglyphZ malignmarkrdzannotation-xmlsvginForeignContent selfClosingselfClosingAcknowledgedz&non-void-element-with-trailing-solidusT)rnormalizedTokensr< openElementsrhr> parseErrorgetlendefaultNamespacerm frozensetrrlr]rFprocessCharactersprocessSpaceCharactersprocessStartTag processEndTagprocessCommentprocessDoctypeappend processEOF)r@ZCharactersTokenZSpaceCharactersTokenZ StartTagTokenZ EndTagTokenZ CommentTokenZ DoctypeTokenZParseErrorTokentokenZ prev_token new_token currentNodeZcurrentNodeNamespaceZcurrentNodeNamer2r]Z reprocessrFr)r)r*rNsp                   zHTMLParser.mainLoopccs x|jD]}|j|VqWdS)N)rLnormalizeToken)r@rr)r)r*rzs zHTMLParser.normalizedTokenscOs |j|ddf|||jjS)aParse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) scripting - treat noscript elements as if javascript was turned on FN)rQr<Z getDocument)r@rOargsr&r)r)r*r#s zHTMLParser.parsecOs|j|df|||jjS)a2Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property if set to None, default to 'div' stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) scripting - treat noscript elements as if javascript was turned on T)rQr<Z getFragment)r@rOrr&r)r)r*r-szHTMLParser.parseFragmentXXX-undefined-errorcCs@|dkr i}|jj|jjj||f|jrrPrhrrFr])r@ZlastZnewModesnodeZnodeNameZ new_phaser)r)r*r_!s> zHTMLParser.resetInsertionModecCsF|jj||dkr"|jj|j_n |jj|j_|j|_|jd|_dS)zYGeneric RCDATA/RAWTEXT Parsing algorithm contentType - RCDATA or RAWTEXT RAWTEXTtextN) r< insertElementrLr[rZrYr] originalPhaserF)r@rZ contentTyper)r)r*parseRCDataRawtextMs   zHTMLParser.parseRCDataRawtext)NFTF)Fr+F)rN)r7r8r9__doc__rHrQrMpropertyrcrlrmrNrzr#r-r|rrrrrr_rr)r)r)r*r"8s&  "  C  ,r"cs"dd}dd}Gdddt|||Gddd}Gd d d }Gfd d d }Gfd dd}Gfddd}Gfddd}Gfddd} Gfddd} Gfddd} Gfddd} Gfddd} Gfddd}Gfdd d }Gfd!d"d"}Gfd#d$d$}Gfd%d&d&}Gfd'd(d(}Gfd)d*d*}Gfd+d,d,}Gfd-d.d.}Gfd/d0d0}Gfd1d2d2}Gfd3d4d4}||||||| | | | | ||||||||||||d5S)6Ncs(tddtjDfdd}|S)z4Logger that records which phase processes each tokencss|]\}}||fVqdS)Nr))r=keyvaluer)r)r* csz)getPhases..log..c sjjdrt|dkr|d}yd|di}WnYnX|dtkr\|d|d<|jjj|jjjj|jj j j|j jj|f|f||S|f||SdS)NZprocessrr2r>) r7 startswithr~rrIrVrrLrZr] __class__)r@rr&rinfo)r6 type_namesr)r*wrappedfs   z'getPhases..log..wrapped)rDrr.)r6rr))r6rr*rVaszgetPhases..logcSs|r t|StSdS)N)r;r2)Z use_metaclassZmetaclass_funcr)r)r* getMetaclasszszgetPhases..getMetaclassc@sXeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)zgetPhases..PhasezNBase class for helper object that implements each phase of processing cSs||_||_dS)N)rIr<)r@rIr<r)r)r*rHsz!getPhases..Phase.__init__cSstdS)N)NotImplementedError)r@r)r)r*rsz#getPhases..Phase.processEOFcSs|jj||jjddS)Nr ry)r< insertCommentr{)r@rr)r)r*rsz'getPhases..Phase.processCommentcSs|jjddS)Nzunexpected-doctype)rIr|)r@rr)r)r*rsz'getPhases..Phase.processDoctypecSs|jj|ddS)Nrs)r< insertText)r@rr)r)r*rsz*getPhases..Phase.processCharacterscSs|jj|ddS)Nrs)r<r)r@rr)r)r*rsz/getPhases..Phase.processSpaceCharacterscSs|j|d|S)Nr>)startTagHandler)r@rr)r)r*rsz(getPhases..Phase.processStartTagcSsl|jj r"|ddkr"|jjdx<|djD],\}}||jjdjkr0||jjdj|<q0Wd|j_dS)Nr>rz non-html-rootrsrF)rIrUr|r.r<r{ri)r@rattrrr)r)r* startTagHtmls  z%getPhases..Phase.startTagHtmlcSs|j|d|S)Nr>) endTagHandler)r@rr)r)r*rsz&getPhases..Phase.processEndTagN) r7r8r9rrHrrrrrrrrr)r)r)r*Phases rc@sLeZdZddZddZddZddZd d Zd d Zd dZ ddZ dS)zgetPhases..InitialPhasecSsdS)Nr))r@rr)r)r*rsz6getPhases..InitialPhase.processSpaceCharacterscSs|jj||jjdS)N)r<rdocument)r@rr)r)r*rsz.getPhases..InitialPhase.processCommentc8Ss|d}|d}|d}|d}|dks@|dk s@|dk rL|dkrL|jjd|dkrXd}|jj||dkrv|jt}| s|ddks|jdJs|dKks|jdLr|dks|r|jdDkrdE|j_n*|jdMs|jdNr|dk rdH|j_|jj dI|j_ dS)ONr>publicIdsystemIdcorrectrzabout:legacy-compatzunknown-doctype*+//silmaril//dtd html pro v0r11 19970101//4-//advasoft ltd//dtd html 3.0 aswedit + extensions//*-//as//dtd html 3.0 aswedit + extensions//-//ietf//dtd html 2.0 level 1//-//ietf//dtd html 2.0 level 2//&-//ietf//dtd html 2.0 strict level 1//&-//ietf//dtd html 2.0 strict level 2//-//ietf//dtd html 2.0 strict//-//ietf//dtd html 2.0//-//ietf//dtd html 2.1e//-//ietf//dtd html 3.0//-//ietf//dtd html 3.2 final//-//ietf//dtd html 3.2//-//ietf//dtd html 3//-//ietf//dtd html level 0//-//ietf//dtd html level 1//-//ietf//dtd html level 2//-//ietf//dtd html level 3//"-//ietf//dtd html strict level 0//"-//ietf//dtd html strict level 1//"-//ietf//dtd html strict level 2//"-//ietf//dtd html strict level 3//-//ietf//dtd html strict//-//ietf//dtd html//(-//metrius//dtd metrius presentational//5-//microsoft//dtd internet explorer 2.0 html strict//.-//microsoft//dtd internet explorer 2.0 html//0-//microsoft//dtd internet explorer 2.0 tables//5-//microsoft//dtd internet explorer 3.0 html strict//.-//microsoft//dtd internet explorer 3.0 html//0-//microsoft//dtd internet explorer 3.0 tables//#-//netscape comm. corp.//dtd html//*-//netscape comm. corp.//dtd strict html//*-//o'reilly and associates//dtd html 2.0//3-//o'reilly and associates//dtd html extended 1.0//;-//o'reilly and associates//dtd html extended relaxed 1.0//N-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//E-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//$-//spyglass//dtd html 2.0 extended//+-//sq//dtd html 2.0 hotmetal + extensions//--//sun microsystems corp.//dtd hotjava html//4-//sun microsystems corp.//dtd hotjava strict html//-//w3c//dtd html 3 1995-03-24//-//w3c//dtd html 3.2 draft//-//w3c//dtd html 3.2 final//-//w3c//dtd html 3.2//-//w3c//dtd html 3.2s draft//-//w3c//dtd html 4.0 frameset//#-//w3c//dtd html 4.0 transitional//(-//w3c//dtd html experimental 19960712//&-//w3c//dtd html experimental 970421//-//w3c//dtd w3 html//-//w3o//dtd w3 html 3.0//#-//webtechs//dtd mozilla html 2.0//-//webtechs//dtd mozilla html//$-//w3o//dtd w3 html strict 3.0//en//"-/w3c/dtd html 4.0 transitional/en -//w3c//dtd html 4.01 frameset//$-//w3c//dtd html 4.01 transitional//z:http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtdquirks -//w3c//dtd xhtml 1.0 frameset//$-//w3c//dtd xhtml 1.0 transitional//zlimited quirksrS)7rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr)rrr)rr)rr)rr) rIr|r<Z insertDoctyperjrrrXrWrFr])r@rr>rrrr)r)r*rs      z.getPhases..InitialPhase.processDoctypecSsd|j_|jjd|j_dS)NrrS)rIrWrFr])r@r)r)r* anythingElsesz,getPhases..InitialPhase.anythingElsecSs|jjd|j|S)Nzexpected-doctype-but-got-chars)rIr|r )r@rr)r)r*rs z1getPhases..InitialPhase.processCharacterscSs"|jjdd|di|j|S)Nz"expected-doctype-but-got-start-tagr>)rIr|r )r@rr)r)r*rsz/getPhases..InitialPhase.processStartTagcSs"|jjdd|di|j|S)Nz expected-doctype-but-got-end-tagr>)rIr|r )r@rr)r)r*rsz-getPhases..InitialPhase.processEndTagcSs|jjd|jdS)Nzexpected-doctype-but-got-eofT)rIr|r )r@r)r)r*r%s z*getPhases..InitialPhase.processEOFN) r7r8r9rrrr rrrrr)r)r)r* InitialPhases_r c@sDeZdZddZddZddZddZd d Zd d Zd dZ dS)z"getPhases..BeforeHtmlPhasecSs&|jjtdd|jjd|j_dS)Nrror)r<Z insertRootimpliedTagTokenrIrFr])r@r)r)r*r^,sz4getPhases..BeforeHtmlPhase.insertHtmlElementcSs |jdS)NT)r^)r@r)r)r*r1sz-getPhases..BeforeHtmlPhase.processEOFcSs|jj||jjdS)N)r<rr)r@rr)r)r*r5sz1getPhases..BeforeHtmlPhase.processCommentcSsdS)Nr))r@rr)r)r*r8sz9getPhases..BeforeHtmlPhase.processSpaceCharacterscSs |j|S)N)r^)r@rr)r)r*r;sz4getPhases..BeforeHtmlPhase.processCharacterscSs |ddkrd|j_|j|S)Nr>rT)rIrUr^)r@rr)r)r*r?s z2getPhases..BeforeHtmlPhase.processStartTagcSs4|ddkr$|jjdd|din |j|SdS)Nr>rrrbrzunexpected-end-tag-before-html)rrrr )rIr|r^)r@rr)r)r*rEs  z0getPhases..BeforeHtmlPhase.processEndTagN) r7r8r9r^rrrrrrr)r)r)r*BeforeHtmlPhase*sr csXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z"getPhases..BeforeHeadPhasecsVj|||tjd|jfd|jfg|_|j|j_tjd|jfg|_ |j |j _dS)Nrrrr )rrrr ) rHrMethodDispatcherr startTagHeadr startTagOtherdefaultendTagImplyHeadr endTagOther)r@rIr<)rr)r*rHNs z+getPhases..BeforeHeadPhase.__init__cSs|jtdddS)NrroT)rr )r@r)r)r*r\sz-getPhases..BeforeHeadPhase.processEOFcSsdS)Nr))r@rr)r)r*r`sz9getPhases..BeforeHeadPhase.processSpaceCharacterscSs|jtdd|S)Nrro)rr )r@rr)r)r*rcsz4getPhases..BeforeHeadPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rgsz/getPhases..BeforeHeadPhase.startTagHtmlcSs0|jj||jjd|j_|jjd|j_dS)Nr inHeadry)r<rr{ headPointerrIrFr])r@rr)r)r*rjs z/getPhases..BeforeHeadPhase.startTagHeadcSs|jtdd|S)Nrro)rr )r@rr)r)r*rosz0getPhases..BeforeHeadPhase.startTagOthercSs|jtdd|S)Nrro)rr )r@rr)r)r*rssz2getPhases..BeforeHeadPhase.endTagImplyHeadcSs|jjdd|didS)Nzend-tag-after-implied-rootr>)rIr|)r@rr)r)r*rwsz.getPhases..BeforeHeadPhase.endTagOtherN) r7r8r9rHrrrrrrrrr))rr)r*BeforeHeadPhaseMs rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!S)"zgetPhases..InHeadPhasec sj|||tjd|jfd|jfd|jfd|jfd|jfd|jfd |j fd |j fg|_ |j |j _ tjd |jfd|jfg|_|j|j_ dS)Nrtitlenoframesstylenoscriptscriptbasebasefontbgsoundcommandlinkr4rr r)rr)rrrrr )r rr)rHrrr startTagTitlestartTagNoFramesStylestartTagNoscriptstartTagScriptstartTagBaseLinkCommand startTagMetarrrr endTagHeadendTagHtmlBodyBrrr)r@rIr<)rr)r*rH|s  z'getPhases..InHeadPhase.__init__cSs |jdS)NT)r )r@r)r)r*rsz)getPhases..InHeadPhase.processEOFcSs |j|S)N)r )r@rr)r)r*rsz0getPhases..InHeadPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz+getPhases..InHeadPhase.startTagHtmlcSs|jjddS)Nz!two-heads-are-not-better-than-one)rIr|)r@rr)r)r*rsz+getPhases..InHeadPhase.startTagHeadcSs$|jj||jjjd|d<dS)NTrx)r<rr{pop)r@rr)r)r*r%s  z6getPhases..InHeadPhase.startTagBaseLinkCommandcSs|jj||jjjd|d<|d}|jjjjddkrd|krZ|jjjj|dnVd|krd|kr|dj d krt j |dj d }t j |}|j}|jjjj|dS) NTrxrsr Z tentativecharsetZcontentz http-equivz content-typezutf-8)r<rr{r)rIrLrOrbZchangeEncodingrXr Z EncodingBytesencodeZContentAttrParserr#)r@rrirsrIcodecr)r)r*r&s   z+getPhases..InHeadPhase.startTagMetacSs|jj|ddS)NZRCDATA)rIr)r@rr)r)r*r!sz,getPhases..InHeadPhase.startTagTitlecSs|jj|ddS)Nr)rIr)r@rr)r)r*r"sz4getPhases..InHeadPhase.startTagNoFramesStylecSs8|jjr|jj|dn|jj||jjd|j_dS)NrinHeadNoscript)rIrKrr<rrFr])r@rr)r)r*r#s z/getPhases..InHeadPhase.startTagNoscriptcSs<|jj||jjj|jj_|jj|j_|jjd|j_dS)Nr) r<rrIrLZscriptDataStaterZr]rrF)r@rr)r)r*r$s  z-getPhases..InHeadPhase.startTagScriptcSs |j|S)N)r )r@rr)r)r*rsz,getPhases..InHeadPhase.startTagOthercSs"|jjjj}|jjd|j_dS)N afterHead)rIr<r{r)rFr])r@rrr)r)r*r'sz)getPhases..InHeadPhase.endTagHeadcSs |j|S)N)r )r@rr)r)r*r(sz/getPhases..InHeadPhase.endTagHtmlBodyBrcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz*getPhases..InHeadPhase.endTagOthercSs|jtddS)Nr)r'r )r@r)r)r*r sz+getPhases..InHeadPhase.anythingElseN)r7r8r9rHrrrrr%r&r!r"r#r$rr'r(rr r))rr)r* InHeadPhase{s  r/csxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z&getPhases..InHeadNoscriptPhasecsfj|||tjd|jfd |jfd |jfg|_|j|j_tjd |j fd |j fg|_ |j |j _dS) Nrrrr r4rrrrr )rrr r4rr)rr) rHrrrr%startTagHeadNoscriptrrrendTagNoscriptendTagBrrr)r@rIr<)rr)r*rHs z/getPhases..InHeadNoscriptPhase.__init__cSs|jjd|jdS)Nzeof-in-head-noscriptT)rIr|r )r@r)r)r*rs z1getPhases..InHeadNoscriptPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz5getPhases..InHeadNoscriptPhase.processCommentcSs|jjd|j|S)Nzchar-in-head-noscript)rIr|r )r@rr)r)r*rs z8getPhases..InHeadNoscriptPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz=getPhases..InHeadNoscriptPhase.processSpaceCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz3getPhases..InHeadNoscriptPhase.startTagHtmlcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r%sz>getPhases..InHeadNoscriptPhase.startTagBaseLinkCommandcSs|jjdd|didS)Nzunexpected-start-tagr>)rIr|)r@rr)r)r*r0 sz;getPhases..InHeadNoscriptPhase.startTagHeadNoscriptcSs"|jjdd|di|j|S)Nzunexpected-inhead-noscript-tagr>)rIr|r )r@rr)r)r*r sz4getPhases..InHeadNoscriptPhase.startTagOthercSs"|jjjj}|jjd|j_dS)Nr)rIr<r{r)rFr])r@rrr)r)r*r1sz5getPhases..InHeadNoscriptPhase.endTagNoscriptcSs"|jjdd|di|j|S)Nzunexpected-inhead-noscript-tagr>)rIr|r )r@rr)r)r*r2sz/getPhases..InHeadNoscriptPhase.endTagBrcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz2getPhases..InHeadNoscriptPhase.endTagOthercSs|jtddS)Nr)r1r )r@r)r)r*r sz3getPhases..InHeadNoscriptPhase.anythingElseN)r7r8r9rHrrrrrr%r0rr1r2rr r))rr)r*InHeadNoscriptPhases r3cspeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZdS)z!getPhases..AfterHeadPhasec snj|||tjd|jfd|jfd|jfd|jfd |jfg|_|j |j_ tjd|j fg|_ |j |j _ dS)Nrrrrrrr r4rrrrrr ) rrrr r4rrrr)rrr )rHrrr startTagBodystartTagFramesetstartTagFromHeadrrrrr(rr)r@rIr<)rr)r*rH#s z*getPhases..AfterHeadPhase.__init__cSs |jdS)NT)r )r@r)r)r*r4sz,getPhases..AfterHeadPhase.processEOFcSs |j|S)N)r )r@rr)r)r*r8sz3getPhases..AfterHeadPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r<sz.getPhases..AfterHeadPhase.startTagHtmlcSs(d|j_|jj||jjd|j_dS)NFr)rIr`r<rrFr])r@rr)r)r*r4?s z.getPhases..AfterHeadPhase.startTagBodycSs |jj||jjd|j_dS)Nr)r<rrIrFr])r@rr)r)r*r5Ds z2getPhases..AfterHeadPhase.startTagFramesetcSst|jjdd|di|jjj|jj|jjdj|x4|jjdddD]}|jdkrN|jjj |PqNWdS)Nz#unexpected-start-tag-out-of-my-headr>rr rry) rIr|r<r{rrrFrr>remove)r@rrr)r)r*r6Hs z2getPhases..AfterHeadPhase.startTagFromHeadcSs|jjdd|didS)Nzunexpected-start-tagr>)rIr|)r@rr)r)r*rRsz.getPhases..AfterHeadPhase.startTagHeadcSs |j|S)N)r )r@rr)r)r*rUsz/getPhases..AfterHeadPhase.startTagOthercSs |j|S)N)r )r@rr)r)r*r(Ysz2getPhases..AfterHeadPhase.endTagHtmlBodyBrcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*r]sz-getPhases..AfterHeadPhase.endTagOthercSs.|jjtdd|jjd|j_d|j_dS)NrrorT)r<rr rIrFr]r`)r@r)r)r*r `sz.getPhases..AfterHeadPhase.anythingElseN)r7r8r9rHrrrr4r5r6rrr(rr r))rr)r*AfterHeadPhase"s  r8cseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd+d,Zd-d.Zd/d0Zd1d2Zd3d4Zd5d6Zd7d8Zd9d:Zd;d<Z d=d>Z!d?d@Z"dAdBZ#dCdDZ$dEdFZ%dGdHZ&dIdJZ'dKdLZ(dMdNZ)dOdPZ*dQdRZ+dSdTZ,dUdVZ-dWdXZ.dYdZZ/d[d\Z0d]d^Z1d_d`Z2dadbZ3dcddZ4dedfZ5dgS)hzgetPhases..InBodyPhasec,sj||||j|_tjd|jfdd|jfd |jfd |jfde|j ft |j fdf|j fd&|j fdg|jfd*|jfd+|jfdh|jfd8|jfd9|jfdi|jfd=|jfd>|jfdj|jfdk|jfdH|jfdI|jfdJ|jfdK|jfdL|jfdM|jfdN|jfdl|j fdQ|j!fdm|j"fdn|j#fdV|j$fdW|j%fdo|j&fg!|_'|j(|j'_)tjd |j*fd|j+fdp|j,fd&|j-fd |j.fdq|j/ft |j0fdr|j1fds|j2fd@|j3fg |_4|j5|j4_)dS)tNrrrrrr r4rrrrraddressarticleaside blockquotecenterdetailsdirr+dlfieldset figcaptionfigurefooterheaderhgroupmainmenunavolr(sectionsummaryulprelistingformlidddtrRabbigcodeemfontissmallstrikestrongttunobrbuttonappletmarqueeobjectZxmprarear embedimgkeygenwbrparamsourcetrackinputhrimageisindextextareaZiframernoembedrrrprtoptionoptgroupZmathrurcolrframerrrrrrrdialog) rrrrr r4rrr)r9r:r;r<r=r>r?r+r@rArBrCrDrErFrGrHrIrJr(rKrLrM)rNrO)rQrRrS) rUrVrWrXrYrZr[r\r]r^r_r`)rcrdre)rfr rgrhrirj)rkrlrm)rsr)rtru)rvrw) rrxrryrrrrrrr)r9r:r;r<rbr=r>rzr?r+r@rArBrCrDrErFrOrGrHrIrJrNrKrLrM)rRrSrQ)rTrUrVrWrXrYrZrar[r\r]r^r_r`)rcrdre)6rHprocessSpaceCharactersNonPrerrrrstartTagProcessInHeadr4r5startTagClosePrstartTagHeadingstartTagPreListing startTagFormstartTagListItemstartTagPlaintext startTagAstartTagFormatting startTagNobrstartTagButtonstartTagAppletMarqueeObject startTagXmp startTagTablestartTagVoidFormattingstartTagParamSource startTagInput startTagHr startTagImagestartTagIsIndexstartTagTextareastartTagIFramer#startTagRawtextstartTagSelect startTagRpRt startTagOpt startTagMath startTagSvgstartTagMisplacedrrr endTagBody endTagHtml endTagBlock endTagFormendTagPendTagListItem endTagHeadingendTagFormattingendTagAppletMarqueeObjectr2rr)r@rIr<)rr)r*rHhs~ z'getPhases..InBodyPhase.__init__cSs$|j|jko"|j|jko"|j|jkS)N)r>rhri)r@Znode1Znode2r)r)r*isMatchingFormattingElements  z:getPhases..InBodyPhase.isMatchingFormattingElementcSs|jj||jjd}g}x<|jjdddD]&}|tkr@Pq0|j||r0|j|q0Wt|dkrx|jjj|d|jjj|dS)Nr ryryry) r<rr{activeFormattingElementsr rrr~r7)r@rrkZmatchingElementsrr)r)r*addFormattingElements    z3getPhases..InBodyPhase.addFormattingElementc Ss@td}x2|jjdddD]}|j|kr|jjdPqWdS)NrRrSrQr(rrrrrrrrr z expected-closing-tag-but-got-eof) rRrSrQr(rrrrrrrrry)rr<r{r>rIr|)r@Zallowed_elementsrr)r)r*rs  z)getPhases..InBodyPhase.processEOFcSsh|d}|j|_|jdrJ|jjdjdkrJ|jjd j rJ|dd}|rd|jj|jj|dS) Nrs r rNrOrrry)rNrOrrry) r{rrr<r{r>Z hasContent#reconstructActiveFormattingElementsr)r@rrsr)r)r*!processSpaceCharactersDropNewlines   z@getPhases..InBodyPhase.processSpaceCharactersDropNewlinecSsT|ddkrdS|jj|jj|d|jjrPtdd|dDrPd|j_dS)NrscSsg|] }|tkqSr))r)r=charr)r)r*rAszDgetPhases..InBodyPhase.processCharacters..F)r<rrrIr`any)r@rr)r)r*rs  z0getPhases..InBodyPhase.processCharacterscSs|jj|jj|ddS)Nrs)r<rr)r@rr)r)r*r{s z;getPhases..InBodyPhase.processSpaceCharactersNonPrecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r|sz4getPhases..InBodyPhase.startTagProcessInHeadcSs|jjdddit|jjdks||jjdjdkr6nFd|j_x<|djD],\}}||jjdjkrL||jjdj|<qLWdS)Nzunexpected-start-tagr>rr Frs) rIr|r~r<r{r>r`r.ri)r@rrrr)r)r*r4sz+getPhases..InBodyPhase.startTagBodycSs|jjdddit|jjdks|jjdjdkr6nt|jjs@nj|jjdjrj|jjdjj|jjdx"|jjdjdkr|jjj qlW|jj ||jj d|j_ dS) Nzunexpected-start-tagr>rr rrrry) rIr|r~r<r{r>r`parent removeChildr)rrFr])r@rr)r)r*r5s" z/getPhases..InBodyPhase.startTagFramesetcSs.|jjdddr|jtd|jj|dS)Nr(rb)variant)r<elementInScoperr r)r@rr)r)r*r} sz-getPhases..InBodyPhase.startTagClosePcSs>|jjdddr|jtd|jj|d|j_|j|_dS)Nr(rb)rF) r<rrr rrIr`rr)r@rr)r)r*rs  z1getPhases..InBodyPhase.startTagPreListingcSsZ|jjr|jjdddin:|jjdddr:|jtd|jj||jjd|j_dS) Nzunexpected-start-tagr>rPr(rb)rr ry) r< formPointerrIr|rrr rr{)r@rr)r)r*rs  z+getPhases..InBodyPhase.startTagFormcSsd|j_dgddgddgd}||d}xLt|jjD]<}|j|kr^|jjjt|jdP|j t kr8|jd kr8Pq8W|jj d d d r|jjjtd d|jj |dS)NFrQrSrR)rQrSrRr>rpr9r+r(rb)r)r9r+r() rIr`reversedr<r{r>r]rr  nameTuplerrr)r@rZ stopNamesMapZ stopNamesrr)r)r*rs"      z/getPhases..InBodyPhase.startTagListItemcSs>|jjdddr|jtd|jj||jjj|jj_dS)Nr(rb)r) r<rrr rrIrLr\rZ)r@rr)r)r*r4s z0getPhases..InBodyPhase.startTagPlaintextcSsb|jjdddr|jtd|jjdjtkrR|jjdd|di|jjj |jj |dS)Nr(rb)rr zunexpected-start-tagr>ry) r<rrr r{r>rrIr|r)r)r@rr)r)r*r~:s  z.getPhases..InBodyPhase.startTagHeadingcSs~|jjd}|rf|jjdddd|jtd||jjkrL|jjj|||jjkrf|jjj||jj |j |dS)NrTz$unexpected-start-tag-implies-end-tag) startNameendName) r<!elementInActiveFormattingElementsrIr|rr r{r7rrr)r@rZ afeAElementr)r)r*rBs     z(getPhases..InBodyPhase.startTagAcSs|jj|j|dS)N)r<rr)r@rr)r)r*rOs z1getPhases..InBodyPhase.startTagFormattingcSsP|jj|jjdrB|jjdddd|jtd|jj|j|dS)Nraz$unexpected-start-tag-implies-end-tag)rr)r<rrrIr|rr r)r@rr)r)r*rSs    z+getPhases..InBodyPhase.startTagNobrcSsT|jjdr2|jjdddd|jtd|S|jj|jj|d|j_dS)Nrbz$unexpected-start-tag-implies-end-tag)rrF) r<rrIr|rr rrr`)r@rr)r)r*r]s    z-getPhases..InBodyPhase.startTagButtoncSs0|jj|jj||jjjtd|j_dS)NF)r<rrrrr rIr`)r@rr)r)r*rhs  z:getPhases..InBodyPhase.startTagAppletMarqueeObjectcSsB|jjdddr|jtd|jjd|j_|jj|ddS)Nr(rb)rFr)r<rrr rrIr`r)r@rr)r)r*rns  z*getPhases..InBodyPhase.startTagXmpcSsR|jjdkr*|jjdddr*|jtd|jj|d|j_|jjd|j_ dS)Nrr(rb)rFr) rIrWr<rrr rr`rFr])r@rr)r)r*rus   z,getPhases..InBodyPhase.startTagTablecSs6|jj|jj||jjjd|d<d|j_dS)NTrxF)r<rrr{r)rIr`)r@rr)r)r*r}s    z5getPhases..InBodyPhase.startTagVoidFormattingcSs@|jj}|j|d|dkr<|ddjtdkr<||j_dS)Nr2rshidden)rIr`rrjr)r@rr`r)r)r*rs   z,getPhases..InBodyPhase.startTagInputcSs$|jj||jjjd|d<dS)NTrx)r<rr{r))r@rr)r)r*rs  z2getPhases..InBodyPhase.startTagParamSourcecSsJ|jjdddr|jtd|jj||jjjd|d<d|j_dS)Nr(rb)rTrxF) r<rrr rr{r)rIr`)r@rr)r)r*rs   z)getPhases..InBodyPhase.startTagHrcSs6|jjdddd|jtdd|d|dddS) Nzunexpected-start-tag-treated-asrprh) originalNamenewNamerorsrw)rirw)rIr|rr )r@rr)r)r*rs   z,getPhases..InBodyPhase.startTagImagecSs|jjdddi|jjrdSi}d|dkr>|dd|d<|jtdd|d|jtd d|jtd dd |dkr|dd }nd }|jtd |d|dj}d|kr|d=d |kr|d =d|d<|jtdd||dd|j td |jtd d|j tddS)Nzdeprecated-tagr>rqactionrsrPro)riroZlabelpromptz3This is a searchable index. Enter search keywords: rn)r2rsrnrw)rirw) rIr|r<rrr rrcopyr)r@rZ form_attrsrrir)r)r*rs6      z.getPhases..InBodyPhase.startTagIsIndexcSs0|jj||jjj|jj_|j|_d|j_dS)NF) r<rrIrLrYrZrrr`)r@rr)r)r*rs z/getPhases..InBodyPhase.startTagTextareacSsd|j_|j|dS)NF)rIr`r)r@rr)r)r*rsz-getPhases..InBodyPhase.startTagIFramecSs"|jjr|j|n |j|dS)N)rIrKrr)r@rr)r)r*r#s z/getPhases..InBodyPhase.startTagNoscriptcSs|jj|ddS)z8iframe, noembed noframes, noscript(if scripting enabled)rN)rIr)r@rr)r)r*rsz.getPhases..InBodyPhase.startTagRawtextcSs@|jjdjdkr$|jjjtd|jj|jjj|dS)Nr rvry) r<r{r>rIr]rr rr)r@rr)r)r*rs z*getPhases..InBodyPhase.startTagOptcSs|jj|jj|d|j_|jj|jjd|jjd|jjd|jjd|jjd|jjdfkrx|jjd|j_n|jjd |j_dS) NFrrrrrrinSelectInTabler)r<rrrIr`r]rF)r@rr)r)r*rs      z-getPhases..InBodyPhase.startTagSelectcSsB|jjdr2|jj|jjdjdkr2|jj|jj|dS)Nrubyr ry)r<rgenerateImpliedEndTagsr{r>rIr|r)r@rr)r)r*rs    z+getPhases..InBodyPhase.startTagRpRtcSsZ|jj|jj||jj|td|d<|jj||drV|jjjd|d<dS)NrdrhrwTrx) r<rrIrrrrr{r))r@rr)r)r*rs      z+getPhases..InBodyPhase.startTagMathcSsZ|jj|jj||jj|td|d<|jj||drV|jjjd|d<dS)NrurhrwTrx) r<rrIrrrrr{r))r@rr)r)r*rs      z*getPhases..InBodyPhase.startTagSvgcSs|jjdd|didS)a5 Elements that should be children of other elements that have a different insertion mode; here they are ignored "caption", "col", "colgroup", "frame", "frameset", "head", "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "noscript" zunexpected-start-tag-ignoredr>N)rIr|)r@rr)r)r*rsz0getPhases..InBodyPhase.startTagMisplacedcSs|jj|jj|dS)N)r<rr)r@rr)r)r*rs z,getPhases..InBodyPhase.startTagOthercSs|jjdddsD|jtdd|jjdddi|jtddnX|jjd|jjd j dkrt|jjdddi|jjj }x|j dkr|jjj }qWdS) Nr(rb)rrozunexpected-end-tagr>rpr ry) r<rr}r rIr|rrr{r>r))r@rrr)r)r*rs   z&getPhases..InBodyPhase.endTagPcSs|jjds|jjdS|jjdjdkrlx>|jjddD]*}|jtdkr>|jjdd|jdPq>W|jjd|j_dS)Nrr rRrSrQrwrvr(rtrurrrrrrrz$expected-one-end-tag-but-got-another)gotName expectedName afterBodyry)rRrSrQrwrvr(rtrurrrrrrrr) r<rrIr|r{r>rrFr])r@rrr)r)r*r!s  z)getPhases..InBodyPhase.endTagBodycSs"|jjdr|jtd|SdS)Nr)r<rrr )r@rr)r)r*r3s z)getPhases..InBodyPhase.endTagHtmlcSs|ddkr|j|_|jj|d}|r2|jj|jjdj|dkr^|jjdd|di|r|jjj }x|j|dkr|jjj }qpWdS)Nr>rNr zend-tag-too-earlyry) r{rr<rrr{r>rIr|r))r@rZinScoperr)r)r*r9s   z*getPhases..InBodyPhase.endTagBlockcSsx|jj}d|j_|dks&|jj| r:|jjdddin:|jj|jjd|krf|jjdddi|jjj|dS)Nzunexpected-end-tagr>rPr zend-tag-too-early-ignoredry)r<rrrIr|rr{r7)r@rrr)r)r*rGs   z)getPhases..InBodyPhase.endTagFormcSs|ddkrd}nd}|jj|d|dsB|jjdd|dinj|jj|dd|jjd j|dkr|jjdd|di|jjj}x|j|dkr|jjj}qWdS) Nr>rQlist)rzunexpected-end-tag)excluder zend-tag-too-earlyry)r<rrIr|rr{r>r))r@rrrr)r)r*rTs  z-getPhases..InBodyPhase.endTagListItemcSsx$tD]}|jj|r|jjPqW|jjdj|dkrR|jjdd|dixBtD]:}|jj|rX|jjj}x|jtkr|jjj}qvWPqXWdS)Nr r>zend-tag-too-earlyry) rr<rrr{r>rIr|r))r@ritemr)r)r*res       z,getPhases..InBodyPhase.endTagHeadingcSs"d}x|dkr|d7}|jj|d}| sL||jjkrZ|jj|j rZ|j|dS||jjkr|jjdd|di|jjj |dS|jj|js|jjdd|didS||jjdkr|jjdd|di|jjj |}d}x,|jj|dD]}|j t kr|}PqW|dkrb|jjj }x||krN|jjj }q4W|jjj |dS|jj|d}|jjj |}|} } d} |jjj | } x| d krh| d7} | d8} |jj| } | |jjkr|jjj | q| |krP| |kr |jjj | d}| j} | |jj|jjj | <| |jj|jjj | <| } | jrV| jj| | j| | } qW| jr~| jj| |jtdkr|jj\}}|j| |n |j| |j} |j| |j| |jjj ||jjj|| |jjj ||jjj|jjj |d| qWdS)z)The much-feared adoption agency algorithmrr r>Nzadoption-agency-1.2zadoption-agency-4.4zadoption-agency-1.3rrrrrrry)rrrrr)r<rr{rr>rrIr|rr7indexrrr)Z cloneNoderrZ appendChildrZgetTableMisnestedNodePosition insertBeforeZreparentChildreninsert)r@rZouterLoopCounterZformattingElementZafeIndexZ furthestBlockrkZcommonAncestorZbookmarkZlastNoderZinnerLoopCounterrZclonerrr)r)r*rts                    z/getPhases..InBodyPhase.endTagFormattingcSs|jj|dr|jj|jjdj|dkrF|jjdd|di|jj|dr|jjj}x|j|dkr|jjj}qdW|jjdS)Nr>r zend-tag-too-earlyry) r<rrr{r>rIr|r)clearActiveFormattingElements)r@rrkr)r)r*rs  z8getPhases..InBodyPhase.endTagAppletMarqueeObjectcSs@|jjdddd|jj|jjtdd|jjjdS)Nzunexpected-end-tag-treated-asr z br element)rrro)rIr|r<rrr r{r))r@rr)r)r*r2#s   z'getPhases..InBodyPhase.endTagBrcSsx|jjdddD]}|j|dkr~|jj|dd|jjdj|dkrd|jjdd|dix|jjj|krxqfWPq|jtkr|jjdd|diPqWdS)Nr r>)rzunexpected-end-tagryry) r<r{r>rrIr|r)rr)r@rrr)r)r*r*s z*getPhases..InBodyPhase.endTagOtherN)6r7r8r9rHrrrrrr{r|r4r5r}rrrrr~rrrrrrrrrrrrrrrr#rrrrrrrrrrrrrrrrrr2rr))rr)r* InBodyPhaseesh G             $ rcs@eZdZfddZddZddZddZd d Zd d Zd S)zgetPhases..TextPhasecsFj|||tjg|_|j|j_tjd|jfg|_|j|j_dS)Nr) rHrrrrr endTagScriptrr)r@rIr<)rr)r*rH9s   z%getPhases..TextPhase.__init__cSs|jj|ddS)Nrs)r<r)r@rr)r)r*rAsz.getPhases..TextPhase.processCharacterscSs8|jjdd|jjdji|jjj|jj|j_dS)Nz&expected-named-closing-tag-but-got-eofr>r Try)rIr|r<r{r>r)rr])r@r)r)r*rDs   z'getPhases..TextPhase.processEOFcSsdS)Nr))r@rr)r)r*rKsz*getPhases..TextPhase.startTagOthercSs|jjj}|jj|j_dS)N)r<r{r)rIrr])r@rrr)r)r*rNs z)getPhases..TextPhase.endTagScriptcSs|jjj|jj|j_dS)N)r<r{r)rIrr])r@rr)r)r*rUs z(getPhases..TextPhase.endTagOtherN) r7r8r9rHrrrrrr))rr)r* TextPhase8s  rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'S)(zgetPhases..InTablePhasec sj|||tjd|jfd|jfd|jfd|jfd|jfd|jfd |j fd|j fd|j fd|j fg |_ |j|j _tjd |jfd|jfg|_|j|j_dS)NrrrrxrrrrrrrrrrnrPr)rrr)rrr)rr) rrrxrrrrrrrr)rHrrrstartTagCaptionstartTagColgroup startTagColstartTagRowGroupstartTagImplyTbodyrstartTagStyleScriptrrrrr endTagTable endTagIgnorerr)r@rIr<)rr)r*rH[s$ z(getPhases..InTablePhase.__init__cSs(x"|jjdjdkr"|jjjqWdS)Nr rrry)rr)r<r{r>r))r@r)r)r*clearStackToTableContextssz8getPhases..InTablePhase.clearStackToTableContextcSs$|jjdjdkr |jjdndS)Nr rz eof-in-tablery)r<r{r>rIr|)r@r)r)r*r|sz*getPhases..InTablePhase.processEOFcSs4|jj}|jjd|j_||jj_|jjj|dS)N inTableText)rIr]rFrr)r@rrr)r)r*rs z6getPhases..InTablePhase.processSpaceCharacterscSs4|jj}|jjd|j_||jj_|jjj|dS)Nr)rIr]rFrr)r@rrr)r)r*rs z1getPhases..InTablePhase.processCharacterscSs&d|j_|jjdj|d|j_dS)NTrF)r<insertFromTablerIrFr)r@rr)r)r*rsz*getPhases..InTablePhase.insertTextcSs6|j|jjjt|jj||jjd|j_dS)Nr) rr<rrr rrIrFr])r@rr)r)r*rs z/getPhases..InTablePhase.startTagCaptioncSs(|j|jj||jjd|j_dS)Nr)rr<rrIrFr])r@rr)r)r*rs z0getPhases..InTablePhase.startTagColgroupcSs|jtdd|S)Nrro)rr )r@rr)r)r*rsz+getPhases..InTablePhase.startTagColcSs(|j|jj||jjd|j_dS)Nr)rr<rrIrFr])r@rr)r)r*rs z0getPhases..InTablePhase.startTagRowGroupcSs|jtdd|S)Nrro)rr )r@rr)r)r*rsz2getPhases..InTablePhase.startTagImplyTbodycSs6|jjdddd|jjjtd|jjs2|SdS)Nz$unexpected-start-tag-implies-end-tagr)rr)rIr|r]rr rP)r@rr)r)r*rs  z-getPhases..InTablePhase.startTagTablecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz3getPhases..InTablePhase.startTagStyleScriptcSsVd|dkrH|ddjtdkrH|jjd|jj||jjjn |j|dS)Nr2rsrz unexpected-hidden-input-in-table) rjrrIr|r<rr{r)r)r@rr)r)r*rs    z-getPhases..InTablePhase.startTagInputcSsD|jjd|jjdkr@|jj||jjd|j_|jjjdS)Nzunexpected-form-in-tabler ry)rIr|r<rrr{r))r@rr)r)r*rs    z,getPhases..InTablePhase.startTagFormcSs<|jjdd|did|j_|jjdj|d|j_dS)Nz)unexpected-start-tag-implies-table-voodoor>TrF)rIr|r<rrFr)r@rr)r)r*rsz-getPhases..InTablePhase.startTagOthercSs|jjdddr|jj|jjdjdkrJ|jjdd|jjdjdx"|jjdjdkrl|jjjqLW|jjj|jjn |jjdS) Nr)rr zend-tag-too-early-named)rrryryry) r<rrr{r>rIr|r)r_)r@rr)r)r*rs   z+getPhases..InTablePhase.endTagTablecSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz,getPhases..InTablePhase.endTagIgnorecSs<|jjdd|did|j_|jjdj|d|j_dS)Nz'unexpected-end-tag-implies-table-voodoor>TrF)rIr|r<rrFr)r@rr)r)r*rsz+getPhases..InTablePhase.endTagOtherN)r7r8r9rHrrrrrrrrrrrrrrrrrrr))rr)r* InTablePhaseYs&   rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z#getPhases..InTableTextPhasecsj|||d|_g|_dS)N)rHrcharacterTokens)r@rIr<)rr)r*rHsz,getPhases..InTableTextPhase.__init__cSsddjdd|jD}tdd|DrJtd|d}|jjdj|n|rZ|jj|g|_dS)NrcSsg|] }|dqS)rsr))r=rr)r)r*rAszGgetPhases..InTableTextPhase.flushCharacters..cSsg|] }|tkqSr))r)r=rr)r)r*rAsrn)r2rsr)joinrrrrIrFrr<)r@rsrr)r)r*flushCharacterss z3getPhases..InTableTextPhase.flushCharacterscSs|j|j|j_|S)N)rrrIr])r@rr)r)r*rs z2getPhases..InTableTextPhase.processCommentcSs|j|j|j_dS)NT)rrrIr])r@r)r)r*rs z.getPhases..InTableTextPhase.processEOFcSs |ddkrdS|jj|dS)Nrsr)rr)r@rr)r)r*rs z5getPhases..InTableTextPhase.processCharacterscSs|jj|dS)N)rr)r@rr)r)r*rsz:getPhases..InTableTextPhase.processSpaceCharacterscSs|j|j|j_|S)N)rrrIr])r@rr)r)r*r s z3getPhases..InTableTextPhase.processStartTagcSs|j|j|j_|S)N)rrrIr])r@rr)r)r*rs z1getPhases..InTableTextPhase.processEndTagN) r7r8r9rHrrrrrrrr))rr)r*InTableTextPhases  rcs`eZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)z!getPhases..InCaptionPhasec sfj|||tjd|jfd |jfg|_|j|j_tjd|jfd |j fd|j fg|_ |j |j _dS)Nrrrxrrrrrrrrr) rrxrrrrrrr) rrxrrrrrrrr) rHrrrstartTagTableElementrrr endTagCaptionrrrr)r@rIr<)rr)r*rHs z*getPhases..InCaptionPhase.__init__cSs|jjddd S)Nrr)r)r<r)r@r)r)r*ignoreEndTagCaption+sz5getPhases..InCaptionPhase.ignoreEndTagCaptioncSs|jjdjdS)Nr)rIrFr)r@r)r)r*r.sz,getPhases..InCaptionPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r1sz3getPhases..InCaptionPhase.processCharacterscSs0|jj|j}|jjjtd|s,|SdS)Nr)rIr|rr]rr )r@r ignoreEndTagr)r)r*r4s  z6getPhases..InCaptionPhase.startTagTableElementcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r<sz/getPhases..InCaptionPhase.startTagOthercSs|js|jj|jjdjdkrB|jjdd|jjdjdx"|jjdjdkrd|jjjqDW|jjj|jj|jj d|j_ n |jjdS) Nr rz$expected-one-end-tag-but-got-another)rrrryryry) rr<rr{r>rIr|r)rrFr])r@rr)r)r*r?s   z/getPhases..InCaptionPhase.endTagCaptioncSs0|jj|j}|jjjtd|s,|SdS)Nr)rIr|rr]rr )r@rrr)r)r*rQs  z-getPhases..InCaptionPhase.endTagTablecSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rXsz.getPhases..InCaptionPhase.endTagIgnorecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r[sz-getPhases..InCaptionPhase.endTagOtherN) r7r8r9rHrrrrrrrrrr))rr)r*InCaptionPhases rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z%getPhases..InColumnGroupPhasecs^j|||tjd|jfd|jfg|_|j|j_tjd|jfd|j fg|_ |j |j _dS)Nrrxr) rHrrrrrrrendTagColgroup endTagColrr)r@rIr<)rr)r*rHas z.getPhases..InColumnGroupPhase.__init__cSs|jjdjdkS)Nr rry)r<r{r>)r@r)r)r*ignoreEndTagColgrouppsz:getPhases..InColumnGroupPhase.ignoreEndTagColgroupcSs8|jjdjdkrdS|j}|jtd|s4dSdS)Nr rrTry)r<r{r>rrr )r@rr)r)r*rss z0getPhases..InColumnGroupPhase.processEOFcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*r}sz7getPhases..InColumnGroupPhase.processCharacterscSs$|jj||jjjd|d<dS)NTrx)r<rr{r))r@rr)r)r*rs  z1getPhases..InColumnGroupPhase.startTagColcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*rsz3getPhases..InColumnGroupPhase.startTagOthercSs4|jr|jjn|jjj|jjd|j_dS)Nr)rrIr|r<r{r)rFr])r@rr)r)r*rs  z4getPhases..InColumnGroupPhase.endTagColgroupcSs|jjdddidS)Nz no-end-tagr>rx)rIr|)r@rr)r)r*rsz/getPhases..InColumnGroupPhase.endTagColcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*rsz1getPhases..InColumnGroupPhase.endTagOtherN) r7r8r9rHrrrrrrrrr))rr)r*InColumnGroupPhase^s   rcsxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z#getPhases..InTableBodyPhasec svj|||tjd|jfd|jfd |jfd|jfg|_|j|j_ tjd|j fd |j fd|j fg|_ |j|j _ dS)Nrrrrrrxrrrrrr)rr)rrxrrrr)rrr)rrrxrrrrr)rHrrr startTagTrstartTagTableCellstartTagTableOtherrrrendTagTableRowGrouprrrr)r@rIr<)rr)r*rHs z,getPhases..InTableBodyPhase.__init__cSs:x"|jjdjdkr"|jjjqW|jjdjdkr6dS) Nr rrrrry)rrrrry)r<r{r>r))r@r)r)r*clearStackToTableBodyContexts z@getPhases..InTableBodyPhase.clearStackToTableBodyContextcSs|jjdjdS)Nr)rIrFr)r@r)r)r*rsz.getPhases..InTableBodyPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz:getPhases..InTableBodyPhase.processSpaceCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz5getPhases..InTableBodyPhase.processCharacterscSs(|j|jj||jjd|j_dS)Nr)rr<rrIrFr])r@rr)r)r*rs z.getPhases..InTableBodyPhase.startTagTrcSs*|jjdd|di|jtdd|S)Nzunexpected-cell-in-table-bodyr>rro)rIr|rr )r@rr)r)r*rsz5getPhases..InTableBodyPhase.startTagTableCellcSsb|jjddds0|jjddds0|jjdddrT|j|jt|jjdj|S|jjdS)Nrr)rrrr ry) r<rrrr r{r>rIr|)r@rr)r)r*rsz6getPhases..InTableBodyPhase.startTagTableOthercSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz1getPhases..InTableBodyPhase.startTagOthercSsT|jj|dddr:|j|jjj|jjd|j_n|jjdd|didS)Nr>r)rrz unexpected-end-tag-in-table-body) r<rrr{r)rIrFr]r|)r@rr)r)r*rs  z7getPhases..InTableBodyPhase.endTagTableRowGroupcSsb|jjddds0|jjddds0|jjdddrT|j|jt|jjdj|S|jjdS)Nrr)rrrr ry) r<rrrr r{r>rIr|)r@rr)r)r*rsz/getPhases..InTableBodyPhase.endTagTablecSs|jjdd|didS)Nz unexpected-end-tag-in-table-bodyr>)rIr|)r@rr)r)r*rsz0getPhases..InTableBodyPhase.endTagIgnorecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz/getPhases..InTableBodyPhase.endTagOtherN)r7r8r9rHrrrrrrrrrrrrr))rr)r*InTableBodyPhases    rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZdS)zgetPhases..InRowPhasec svj|||tjd|jfd |jfd|jfg|_|j|j_tjd |j fd |j fd|j fd|j fg|_ |j|j _dS)Nrrrrrxrrrrrrr)rr)rrxrrrrr)rrr)rrrxrrrr)rHrrrrrrrrendTagTrrrrrr)r@rIr<)rr)r*rHs z&getPhases..InRowPhase.__init__cSsDx>|jjdjdkr>|jjdd|jjdji|jjjqWdS) Nr rrz'unexpected-implied-end-tag-in-table-rowr>ry)rrry)r<r{r>rIr|r))r@r)r)r*clearStackToTableRowContextsz9getPhases..InRowPhase.clearStackToTableRowContextcSs|jjddd S)Nrr)r)r<r)r@r)r)r*ignoreEndTagTrsz,getPhases..InRowPhase.ignoreEndTagTrcSs|jjdjdS)Nr)rIrFr)r@r)r)r*r"sz(getPhases..InRowPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r%sz4getPhases..InRowPhase.processSpaceCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r(sz/getPhases..InRowPhase.processCharacterscSs6|j|jj||jjd|j_|jjjtdS)Nr) rr<rrIrFr]rrr )r@rr)r)r*r+s z/getPhases..InRowPhase.startTagTableCellcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*r1sz0getPhases..InRowPhase.startTagTableOthercSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r8sz+getPhases..InRowPhase.startTagOthercSs<|js.|j|jjj|jjd|j_n |jjdS)Nr) rrr<r{r)rIrFr]r|)r@rr)r)r*r;s  z&getPhases..InRowPhase.endTagTrcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*rEsz)getPhases..InRowPhase.endTagTablecSs4|jj|dddr&|jtd|S|jjdS)Nr>r)rr)r<rrr rIr|)r@rr)r)r*rMsz1getPhases..InRowPhase.endTagTableRowGroupcSs|jjdd|didS)Nzunexpected-end-tag-in-table-rowr>)rIr|)r@rr)r)r*rTsz*getPhases..InRowPhase.endTagIgnorecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rXsz)getPhases..InRowPhase.endTagOtherN)r7r8r9rHrrrrrrrrrrrrrr))rr)r* InRowPhases  rcs`eZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)zgetPhases..InCellPhasec sfj|||tjd|jfd |jfg|_|j|j_tjd|jfd|j fd|j fg|_ |j |j _dS)Nrrrxrrrrrrrrr) rrxrrrrrrr)rr)rrrxrr)rrrrr) rHrrrrrrrendTagTableCellr endTagImplyrr)r@rIr<)rr)r*rH]s z'getPhases..InCellPhase.__init__cSsB|jjdddr |jtdn|jjdddr>|jtddS)Nrr)rr)r<rrr )r@r)r)r* closeCellnsz(getPhases..InCellPhase.closeCellcSs|jjdjdS)Nr)rIrFr)r@r)r)r*rusz)getPhases..InCellPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rxsz0getPhases..InCellPhase.processCharacterscSs:|jjddds |jjdddr,|j|S|jjdS)Nrr)rr)r<rrrIr|)r@rr)r)r*r{s z1getPhases..InCellPhase.startTagTableOthercSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz,getPhases..InCellPhase.startTagOthercSs|jj|dddr|jj|d|jjdj|dkrt|jjdd|dix.|jjj}|j|dkrRPqRWn |jjj|jj|jj d|j_ n|jjdd|didS) Nr>r)rr zunexpected-cell-end-tagrzunexpected-end-tagry) r<rrr{r>rIr|r)rrFr])r@rrr)r)r*rs   z.getPhases..InCellPhase.endTagTableCellcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz+getPhases..InCellPhase.endTagIgnorecSs.|jj|dddr |j|S|jjdS)Nr>r)r)r<rrrIr|)r@rr)r)r*rsz*getPhases..InCellPhase.endTagImplycSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz*getPhases..InCellPhase.endTagOtherN) r7r8r9rHrrrrrrrrrr))rr)r* InCellPhase[s  rcsxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z getPhases..InSelectPhasecsj|||tjd|jfd|jfd|jfd|jfd |jfd|jfg|_ |j |j _ tjd|j fd|j fd|jfg|_|j|j_ dS) Nrrvrwrrnrirrr)rnrirr)rHrrrstartTagOptionstartTagOptgrouprrr$rrr endTagOptionendTagOptgroup endTagSelectrr)r@rIr<)rr)r*rHs z)getPhases..InSelectPhase.__init__cSs$|jjdjdkr |jjdndS)Nr rz eof-in-selectry)r<r{r>rIr|)r@r)r)r*rsz+getPhases..InSelectPhase.processEOFcSs$|ddkrdS|jj|ddS)Nrsr)r<r)r@rr)r)r*rs z2getPhases..InSelectPhase.processCharacterscSs.|jjdjdkr|jjj|jj|dS)Nr rvry)r<r{r>r)r)r@rr)r)r*rs z/getPhases..InSelectPhase.startTagOptioncSsL|jjdjdkr|jjj|jjdjdkr<|jjj|jj|dS)Nr rvrwryry)r<r{r>r)r)r@rr)r)r*rs   z1getPhases..InSelectPhase.startTagOptgroupcSs|jjd|jtddS)Nzunexpected-select-in-selectr)rIr|rr )r@rr)r)r*rs z/getPhases..InSelectPhase.startTagSelectcSs2|jjd|jjdddr.|jtd|SdS)Nzunexpected-input-in-selectr)r)rIr|r<rrr )r@rr)r)r*rs  z.getPhases..InSelectPhase.startTagInputcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r$sz/getPhases..InSelectPhase.startTagScriptcSs|jjdd|didS)Nzunexpected-start-tag-in-selectr>)rIr|)r@rr)r)r*rsz.getPhases..InSelectPhase.startTagOthercSs6|jjdjdkr |jjjn|jjdddidS)Nr rvzunexpected-end-tag-in-selectr>ry)r<r{r>r)rIr|)r@rr)r)r*rsz-getPhases..InSelectPhase.endTagOptioncSsf|jjdjdkr0|jjdjdkr0|jjj|jjd jdkrP|jjjn|jjdddidS) Nr rvrrwzunexpected-end-tag-in-selectr>ryry)r<r{r>r)rIr|)r@rr)r)r*rs z/getPhases..InSelectPhase.endTagOptgroupcSsR|jjdddrD|jjj}x|jdkr6|jjj}qW|jjn |jjdS)Nr)r)r<rr{r)r>rIr_r|)r@rrr)r)r*rs    z-getPhases..InSelectPhase.endTagSelectcSs|jjdd|didS)Nzunexpected-end-tag-in-selectr>)rIr|)r@rr)r)r*r sz,getPhases..InSelectPhase.endTagOtherN)r7r8r9rHrrrrrrr$rrrrrr))rr)r* InSelectPhases   rcsHeZdZfddZddZddZddZd d Zd d Zd dZ dS)z'getPhases..InSelectInTablePhasec sNj|||tjd |jfg|_|j|j_tjd |jfg|_|j |j_dS) Nrrrrrrrr)rrrrrrrr)rrrrrrrr) rHrrrrrrrrr)r@rIr<)rr)r*rH s z0getPhases..InSelectInTablePhase.__init__cSs|jjdjdS)Nr)rIrFr)r@r)r)r*r sz2getPhases..InSelectInTablePhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r sz9getPhases..InSelectInTablePhase.processCharacterscSs(|jjdd|di|jtd|S)Nz5unexpected-table-element-start-tag-in-select-in-tabler>r)rIr|rr )r@rr)r)r*r! sz5getPhases..InSelectInTablePhase.startTagTablecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r& sz5getPhases..InSelectInTablePhase.startTagOthercSs@|jjdd|di|jj|dddr<|jtd|SdS)Nz3unexpected-table-element-end-tag-in-select-in-tabler>r)rr)rIr|r<rrr )r@rr)r)r*r) sz3getPhases..InSelectInTablePhase.endTagTablecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r/ sz3getPhases..InSelectInTablePhase.endTagOtherN) r7r8r9rHrrrrrrr))rr)r*InSelectInTablePhase s rc-seZdZeddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,g,Zfd-d.Zd/d0Zfd1d2Zd3d4Zd5d6Z d7S)8z(getPhases..InForeignContentPhaserUrVr<rr r=rWrRr+r@rSrXrgZh1Zh2Zh3Zh4Zh5Zh6rrorZrhrQrOrHr4rarJr(rNrr[r\spanr^r]subZsuprr_r`rMvarcsj|||dS)N)rH)r@rIr<)rr)r*rH< sz1getPhases..InForeignContentPhase.__init__c%Ssnddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%$}|d&|krj||d&|d&<dS)'NZaltGlyphZ altGlyphDefZ altGlyphItemZ animateColorZ animateMotionZanimateTransformZclipPathZfeBlendZ feColorMatrixZfeComponentTransferZ feCompositeZfeConvolveMatrixZfeDiffuseLightingZfeDisplacementMapZfeDistantLightZfeFloodZfeFuncAZfeFuncBZfeFuncGZfeFuncRZfeGaussianBlurZfeImageZfeMergeZ feMergeNodeZ feMorphologyZfeOffsetZ fePointLightZfeSpecularLightingZ feSpotLightZfeTileZ feTurbulenceZ foreignObjectZglyphRefZlinearGradientZradialGradientZtextPath)$ZaltglyphZ altglyphdefZ altglyphitemZ animatecolorZ animatemotionZanimatetransformZclippathZfeblendZ fecolormatrixZfecomponenttransferZ fecompositeZfeconvolvematrixZfediffuselightingZfedisplacementmapZfedistantlightZfefloodZfefuncaZfefuncbZfefuncgZfefuncrZfegaussianblurZfeimageZfemergeZ femergenodeZ femorphologyZfeoffsetZ fepointlightZfespecularlightingZ fespotlightZfetileZ feturbulenceZ foreignobjectZglyphrefZlineargradientZradialgradientZtextpathr>r))r@r replacementsr)r)r*adjustSVGTagNames? sL z:getPhases..InForeignContentPhase.adjustSVGTagNamescsL|ddkrd|d<n&|jjr.InForeignContentPhase.processCharacters..F)rIr`rr)r@r)rr)r*rh s   z:getPhases..InForeignContentPhase.processCharacterscSs6|jjd}|d|jksD|ddkrt|djtdddg@r|jjdd|dixR|jjdj|jjkr|jj |jjd r|jj |jjd r|jjj q\W|S|jt d kr|jj |n$|jt d kr|j||jj||jj||j|d <|jj||d r2|jjj d |d<dS)Nr r>rYrsZcolorZfacesizez*unexpected-html-element-in-foreign-contentrdrurhrwTrxryryryry)r<r{breakoutElementssetkeysrIr|rhrrlrmr)rrrrrr)r@rrr)r)r*rp s.          z8getPhases..InForeignContentPhase.processStartTagcSst|jjd}|jjd}|jjt|dkrF|jjdd|dix|jjt|dkr|jj|jj dkr|jjj |jjj |j_x|jjj |krqWd}P|d8}|jj|}|j |jjkrqHqH|jjj|}PqHW|S)Nr r>zunexpected-end-tagrry)r~r<r{r>rjrrIr|r]rFrrr)rhrr)r@rZ nodeIndexrrr)r)r*r s(   z6getPhases..InForeignContentPhase.processEndTagN) r7r8r9rrrHrrrrr))rr)r*InForeignContentPhase2 s       ) rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z!getPhases..AfterBodyPhasecsNj|||tjd|jfg|_|j|j_tjd|jfg|_|j |j_dS)Nr) rHrrrrrrrrr)r@rIr<)rr)r*rH s  z*getPhases..AfterBodyPhase.__init__cSsdS)Nr))r@r)r)r*r sz,getPhases..AfterBodyPhase.processEOFcSs|jj||jjddS)Nr)r<rr{)r@rr)r)r*r sz0getPhases..AfterBodyPhase.processCommentcSs |jjd|jjd|j_|S)Nzunexpected-char-after-bodyr)rIr|rFr])r@rr)r)r*r s z3getPhases..AfterBodyPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r sz.getPhases..AfterBodyPhase.startTagHtmlcSs*|jjdd|di|jjd|j_|S)Nzunexpected-start-tag-after-bodyr>r)rIr|rFr])r@rr)r)r*r sz/getPhases..AfterBodyPhase.startTagOthercSs*|jjr|jjdn|jjd|j_dS)Nz'unexpected-end-tag-after-body-innerhtmlafterAfterBody)rIrPr|rFr])r@r>r)r)r*r sz,getPhases..AfterBodyPhase.endTagHtmlcSs*|jjdd|di|jjd|j_|S)Nzunexpected-end-tag-after-bodyr>r)rIr|rFr])r@rr)r)r*r sz-getPhases..AfterBodyPhase.endTagOtherN) r7r8r9rHrrrrrrrr))rr)r*AfterBodyPhase s rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z"getPhases..InFramesetPhasecsfj|||tjd|jfd|jfd|jfd|jfg|_|j|j_ tjd|j fg|_ |j |j _ dS)Nrrryr) rHrrrr5 startTagFramestartTagNoframesrrrendTagFramesetrr)r@rIr<)rr)r*rH s z+getPhases..InFramesetPhase.__init__cSs$|jjdjdkr |jjdndS)Nr rzeof-in-framesetry)r<r{r>rIr|)r@r)r)r*r sz-getPhases..InFramesetPhase.processEOFcSs|jjddS)Nzunexpected-char-in-frameset)rIr|)r@rr)r)r*r sz4getPhases..InFramesetPhase.processCharacterscSs|jj|dS)N)r<r)r@rr)r)r*r5 sz3getPhases..InFramesetPhase.startTagFramesetcSs|jj||jjjdS)N)r<rr{r))r@rr)r)r*r  s z0getPhases..InFramesetPhase.startTagFramecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r  sz3getPhases..InFramesetPhase.startTagNoframescSs|jjdd|didS)Nz unexpected-start-tag-in-framesetr>)rIr|)r@rr)r)r*r sz0getPhases..InFramesetPhase.startTagOthercSs\|jjdjdkr |jjdn |jjj|jj rX|jjdjdkrX|jjd|j_dS)Nr rz)unexpected-frameset-in-frameset-innerhtmlr afterFramesetryry) r<r{r>rIr|r)rPrFr])r@rr)r)r*r  s   z1getPhases..InFramesetPhase.endTagFramesetcSs|jjdd|didS)Nzunexpected-end-tag-in-framesetr>)rIr|)r@rr)r)r*r sz.getPhases..InFramesetPhase.endTagOtherN) r7r8r9rHrrr5r r rr rr))rr)r*InFramesetPhase s  r csHeZdZfddZddZddZddZd d Zd d Zd dZ dS)z%getPhases..AfterFramesetPhasecsVj|||tjd|jfd|jfg|_|j|j_tjd|jfg|_ |j |j _dS)Nrr) rHrrrr rrrrrr)r@rIr<)rr)r*rH s z.getPhases..AfterFramesetPhase.__init__cSsdS)Nr))r@r)r)r*r sz0getPhases..AfterFramesetPhase.processEOFcSs|jjddS)Nzunexpected-char-after-frameset)rIr|)r@rr)r)r*r! sz7getPhases..AfterFramesetPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r $ sz6getPhases..AfterFramesetPhase.startTagNoframescSs|jjdd|didS)Nz#unexpected-start-tag-after-framesetr>)rIr|)r@rr)r)r*r' sz3getPhases..AfterFramesetPhase.startTagOthercSs|jjd|j_dS)NafterAfterFrameset)rIrFr])r@rr)r)r*r+ sz0getPhases..AfterFramesetPhase.endTagHtmlcSs|jjdd|didS)Nz!unexpected-end-tag-after-framesetr>)rIr|)r@rr)r)r*r. sz1getPhases..AfterFramesetPhase.endTagOtherN) r7r8r9rHrrr rrrr))rr)r*AfterFramesetPhase s rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z&getPhases..AfterAfterBodyPhasecs0j|||tjd|jfg|_|j|j_dS)Nr)rHrrrrrr)r@rIr<)rr)r*rH3 sz/getPhases..AfterAfterBodyPhase.__init__cSsdS)Nr))r@r)r)r*r; sz1getPhases..AfterAfterBodyPhase.processEOFcSs|jj||jjdS)N)r<rr)r@rr)r)r*r> sz5getPhases..AfterAfterBodyPhase.processCommentcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rA sz=getPhases..AfterAfterBodyPhase.processSpaceCharacterscSs |jjd|jjd|j_|S)Nzexpected-eof-but-got-charr)rIr|rFr])r@rr)r)r*rD s z8getPhases..AfterAfterBodyPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rI sz3getPhases..AfterAfterBodyPhase.startTagHtmlcSs*|jjdd|di|jjd|j_|S)Nzexpected-eof-but-got-start-tagr>r)rIr|rFr])r@rr)r)r*rL sz4getPhases..AfterAfterBodyPhase.startTagOthercSs*|jjdd|di|jjd|j_|S)Nzexpected-eof-but-got-end-tagr>r)rIr|rFr])r@rr)r)r*rR sz4getPhases..AfterAfterBodyPhase.processEndTagN) r7r8r9rHrrrrrrrr))rr)r*AfterAfterBodyPhase2 s rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z*getPhases..AfterAfterFramesetPhasecs8j|||tjd|jfd|jfg|_|j|j_dS)Nrr)rHrrrstartTagNoFramesrrr)r@rIr<)rr)r*rHY s z3getPhases..AfterAfterFramesetPhase.__init__cSsdS)Nr))r@r)r)r*rb sz5getPhases..AfterAfterFramesetPhase.processEOFcSs|jj||jjdS)N)r<rr)r@rr)r)r*re sz9getPhases..AfterAfterFramesetPhase.processCommentcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rh szAgetPhases..AfterAfterFramesetPhase.processSpaceCharacterscSs|jjddS)Nzexpected-eof-but-got-char)rIr|)r@rr)r)r*rk sz.AfterAfterFramesetPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rn sz7getPhases..AfterAfterFramesetPhase.startTagHtmlcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rq sz;getPhases..AfterAfterFramesetPhase.startTagNoFramescSs|jjdd|didS)Nzexpected-eof-but-got-start-tagr>)rIr|)r@rr)r)r*rt sz8getPhases..AfterAfterFramesetPhase.startTagOthercSs|jjdd|didS)Nzexpected-eof-but-got-end-tagr>)rIr|)r@rr)r)r*rx sz8getPhases..AfterAfterFramesetPhase.processEndTagN) r7r8r9rHrrrrrrrrr))rr)r*AfterAfterFramesetPhaseX s r)rTrSrrr-r.rrrrrrrrrrrrvrrr rr)r)rGrVrr r rr/r3r8rrrrrrrrrrrrrr rrrr))rr*rE_sp)#.g@CX!-GBbYLd's/9%&&rEcs^ts tjr t|dt@}nt|dt@}|rZtfdd|djD|d<dS)Nrsc3s"|]\}}j|||fVqdS)N)r})r=kv)rr)r*r sz$adjust_attributes..)rrZPY27rrrr.)rrZneeds_adjustmentr))rr*r s   rrpFcCs|dkr i}t||||dS)N)r2r>rsrw)r)r>r2rirwr)r)r*r  s r c@seZdZdZdS)rrzError in parsed documentN)r7r8r9rr)r)r)r*rr srr)rT)r+rT)rpNF)1Z __future__rrrZsixrrrr0 collectionsr ImportErrorZ ordereddictrr r r Ztreebuilders.baser rZ constantsrrrrrrrrrrrrrrrrrr#r-r;rer"ZmemoizerErr  Exceptionrrr)r)r)r*sR     H   )L  __pycache__/html5parser.cpython-36.pyc000064400000277155147204715120013674 0ustar003 B;W @sFddlmZmZmZddlmZmZmZddlZyddl m Z Wn e k r`ddl m Z YnXddl mZddl mZddl mZdd lmZdd l mZdd lmZmZmZmZmZmZmZmZmZmZmZm Z!m"Z"m#Z#m$Z$m%Z%d!ddZ&d"ddZ'ddZ(Gddde)Z*ej+ddZ,ddZ-d#ddZ.Gdd d e/Z0dS)$)absolute_importdivisionunicode_literals)with_metaclassviewkeysPY3N) OrderedDict) _inputstream) _tokenizer) treebuilders)Marker)_utils)spaceCharactersasciiUpper2LowerspecialElementsheadingElements cdataElementsrcdataElements tokenTypes tagTokenTypes namespaceshtmlIntegrationPointElements"mathmlTextIntegrationPointElementsadjustForeignAttributesadjustMathMLAttributesadjustSVGAttributesEReparseExceptionetreeTcKs$tj|}t||d}|j|f|S)z.Parse a string or file-like object into a tree)namespaceHTMLElements)r getTreeBuilder HTMLParserparse)doc treebuilderr kwargstbpr)!/usr/lib/python3.6/html5parser.pyr#s  r#divcKs,tj|}t||d}|j|fd|i|S)N)r container)r r!r" parseFragment)r$r,r%r r&r'r(r)r)r*r-&s  r-csGfdddt}|S)NcseZdZfddZdS)z-method_decorator_metaclass..DecoratedcsBx0|jD]$\}}t|tjr&|}|||<q Wtj||||S)N)items isinstancetypes FunctionTypetype__new__)metaZ classnamebasesZ classDictZ attributeNameZ attribute)functionr)r*r3.s   z5method_decorator_metaclass..Decorated.__new__N)__name__ __module__ __qualname__r3r))r6r)r* Decorated-sr:)r2)r6r:r))r6r*method_decorator_metaclass,sr;c@seZdZdZd+ddZd,dd Zd d Zed d ZddZ ddZ ddZ ddZ ddZ ddZd-ddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*ZdS).r"zZHTML parser. Generates a tree structure from a stream of (possibly malformed) HTMLNFTcsL|_|dkrtjd}||_g_tfddt|jD_dS)a strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) Nrcs g|]\}}||jfqSr))tree).0namecls)selfr)r* Msz'HTMLParser.__init__..) strictr r!r<errorsdict getPhasesr.phases)r@r<rBr debugr))r@r*__init__<s    zHTMLParser.__init__r+c Ksh||_||_||_tj|fd|i||_|jy |jWn$tk rb|j|jYnXdS)Nparser) innerHTMLModer, scriptingr Z HTMLTokenizer tokenizerresetmainLoopr)r@stream innerHTMLr,rKr&r)r)r*_parsePs zHTMLParser._parsecCs|jjd|_g|_g|_d|_|jr|jj|_ |j t krL|j j |j _ n0|j tkrd|j j|j _ n|j dkr||j j|j _ n|jd|_|jj|jnd|_ |jd|_d|_d|_d|_dS)NFz no quirks plaintext beforeHtmlinitialT)r<rM firstStartTagrClog compatModerJr,lowerrPrrL rcdataStatestater rawtextStateplaintextStaterFphaseinsertHtmlElementresetInsertionModeZ lastPhaseZbeforeRCDataPhase framesetOK)r@r)r)r*rM^s*         zHTMLParser.resetcCst|dsdS|jjjdjS)zThe name of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet. rLNr)hasattrrLrO charEncodingr>)r@r)r)r*documentEncodings zHTMLParser.documentEncodingcCsJ|jdkr6|jtdkr6d|jko4|jdjtdkS|j|jftkSdS)Nzannotation-xmlmathmlencoding text/htmlapplication/xhtml+xml)rfrg)r> namespacer attributes translaterr)r@elementr)r)r*isHTMLIntegrationPoints   z!HTMLParser.isHTMLIntegrationPointcCs|j|jftkS)N)rhr>r)r@rkr)r)r*isMathMLTextIntegrationPointsz'HTMLParser.isMathMLTextIntegrationPointcCsztd}td}td}td}td}td}td}x|jD]}d} |} x| dk r| } |jjrx|jjdnd} | r| jnd} | r| jnd} | d }||kr|j| d | jd id} qVt|jjd ksl| |jj ksl|j | r ||kr|d t ddgksl|||fksl| t dkrP| dkrP||krP|d dksl|j | rt||||fkrt|j}n |jd}||kr|j| } qV||kr|j| } qV||kr|j| } qV||kr|j| } qV||kr|j| } qV||krV|j| } qVW||krD| drD| d rD|jdd | d iqDWd}g}x8|rt|j|j|jj}|r>|j|ks>tq>WdS)N CharactersZSpaceCharactersStartTagEndTagCommentZDoctype ParseErrorr r2datadatavarsrr>ZmglyphZ malignmarkrdzannotation-xmlsvginForeignContent selfClosingselfClosingAcknowledgedz&non-void-element-with-trailing-solidusT)rnormalizedTokensr< openElementsrhr> parseErrorgetlendefaultNamespacerm frozensetrrlr]rFprocessCharactersprocessSpaceCharactersprocessStartTag processEndTagprocessCommentprocessDoctypeappend processEOFAssertionError)r@ZCharactersTokenZSpaceCharactersTokenZ StartTagTokenZ EndTagTokenZ CommentTokenZ DoctypeTokenZParseErrorTokentokenZ prev_token new_token currentNodeZcurrentNodeNamespaceZcurrentNodeNamer2r]Z reprocessrFr)r)r*rNsp                   zHTMLParser.mainLoopccs x|jD]}|j|VqWdS)N)rLnormalizeToken)r@rr)r)r*rzs zHTMLParser.normalizedTokenscOs |j|ddf|||jjS)aParse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) scripting - treat noscript elements as if javascript was turned on FN)rQr<Z getDocument)r@rOargsr&r)r)r*r#s zHTMLParser.parsecOs|j|df|||jjS)a2Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property if set to None, default to 'div' stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) scripting - treat noscript elements as if javascript was turned on T)rQr<Z getFragment)r@rOrr&r)r)r*r-szHTMLParser.parseFragmentXXX-undefined-errorcCs@|dkr i}|jj|jjj||f|jrrPrrhrrFr])r@ZlastZnewModesnodeZnodeNameZ new_phaser)r)r*r_!sB   zHTMLParser.resetInsertionModecCsR|dks t|jj||dkr.|jj|j_n |jj|j_|j|_|j d|_dS)zYGeneric RCDATA/RAWTEXT Parsing algorithm contentType - RCDATA or RAWTEXT RAWTEXTRCDATAtextN)rr) rr< insertElementrLr[rZrYr] originalPhaserF)r@rZ contentTyper)r)r*parseRCDataRawtextMs   zHTMLParser.parseRCDataRawtext)NFTF)Fr+F)rN)r7r8r9__doc__rHrQrMpropertyrcrlrmrNrzr#r-r|rrrrrr_rr)r)r)r*r"8s&  "  C  ,r"cs"dd}dd}Gdddt|||Gddd}Gd d d }Gfd d d }Gfd dd}Gfddd}Gfddd}Gfddd} Gfddd} Gfddd} Gfddd} Gfddd} Gfddd}Gfdd d }Gfd!d"d"}Gfd#d$d$}Gfd%d&d&}Gfd'd(d(}Gfd)d*d*}Gfd+d,d,}Gfd-d.d.}Gfd/d0d0}Gfd1d2d2}Gfd3d4d4}||||||| | | | | ||||||||||||d5S)6Ncs(tddtjDfdd}|S)z4Logger that records which phase processes each tokencss|]\}}||fVqdS)Nr))r=keyvaluer)r)r* csz)getPhases..log..c sjjdrt|dkr|d}yd|di}WnYnX|dtkr\|d|d<|jjj|jjjj|jj j j|j jj|f|f||S|f||SdS)NZprocessrr2r>) r7 startswithr~rrIrVrrLrZr] __class__)r@rr&rinfo)r6 type_namesr)r*wrappedfs   z'getPhases..log..wrapped)rDrr.)r6rr))r6rr*rVaszgetPhases..logcSs|r t|StSdS)N)r;r2)Z use_metaclassZmetaclass_funcr)r)r* getMetaclasszszgetPhases..getMetaclassc@sXeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)zgetPhases..PhasezNBase class for helper object that implements each phase of processing cSs||_||_dS)N)rIr<)r@rIr<r)r)r*rHsz!getPhases..Phase.__init__cSstdS)N)NotImplementedError)r@r)r)r*rsz#getPhases..Phase.processEOFcSs|jj||jjddS)Nr ry)r< insertCommentr{)r@rr)r)r*rsz'getPhases..Phase.processCommentcSs|jjddS)Nzunexpected-doctype)rIr|)r@rr)r)r*rsz'getPhases..Phase.processDoctypecSs|jj|ddS)Nrs)r< insertText)r@rr)r)r*rsz*getPhases..Phase.processCharacterscSs|jj|ddS)Nrs)r<r)r@rr)r)r*rsz/getPhases..Phase.processSpaceCharacterscSs|j|d|S)Nr>)startTagHandler)r@rr)r)r*rsz(getPhases..Phase.processStartTagcSsl|jj r"|ddkr"|jjdx<|djD],\}}||jjdjkr0||jjdj|<q0Wd|j_dS)Nr>rz non-html-rootrsrF)rIrUr|r.r<r{ri)r@rattrrr)r)r* startTagHtmls  z%getPhases..Phase.startTagHtmlcSs|j|d|S)Nr>) endTagHandler)r@rr)r)r*rsz&getPhases..Phase.processEndTagN) r7r8r9rrHrrrrrrrrr)r)r)r*Phases rc@sLeZdZddZddZddZddZd d Zd d Zd dZ ddZ dS)zgetPhases..InitialPhasecSsdS)Nr))r@rr)r)r*rsz6getPhases..InitialPhase.processSpaceCharacterscSs|jj||jjdS)N)r<rdocument)r@rr)r)r*rsz.getPhases..InitialPhase.processCommentc8Ss|d}|d}|d}|d}|dks@|dk s@|dk rL|dkrL|jjd|dkrXd}|jj||dkrv|jt}| s|ddks|jdJs|dKks|jdLr|dks|r|jdDkrdE|j_n*|jdMs|jdNr|dk rdH|j_|jj dI|j_ dS)ONr>publicIdsystemIdcorrectrzabout:legacy-compatzunknown-doctype*+//silmaril//dtd html pro v0r11 19970101//4-//advasoft ltd//dtd html 3.0 aswedit + extensions//*-//as//dtd html 3.0 aswedit + extensions//-//ietf//dtd html 2.0 level 1//-//ietf//dtd html 2.0 level 2//&-//ietf//dtd html 2.0 strict level 1//&-//ietf//dtd html 2.0 strict level 2//-//ietf//dtd html 2.0 strict//-//ietf//dtd html 2.0//-//ietf//dtd html 2.1e//-//ietf//dtd html 3.0//-//ietf//dtd html 3.2 final//-//ietf//dtd html 3.2//-//ietf//dtd html 3//-//ietf//dtd html level 0//-//ietf//dtd html level 1//-//ietf//dtd html level 2//-//ietf//dtd html level 3//"-//ietf//dtd html strict level 0//"-//ietf//dtd html strict level 1//"-//ietf//dtd html strict level 2//"-//ietf//dtd html strict level 3//-//ietf//dtd html strict//-//ietf//dtd html//(-//metrius//dtd metrius presentational//5-//microsoft//dtd internet explorer 2.0 html strict//.-//microsoft//dtd internet explorer 2.0 html//0-//microsoft//dtd internet explorer 2.0 tables//5-//microsoft//dtd internet explorer 3.0 html strict//.-//microsoft//dtd internet explorer 3.0 html//0-//microsoft//dtd internet explorer 3.0 tables//#-//netscape comm. corp.//dtd html//*-//netscape comm. corp.//dtd strict html//*-//o'reilly and associates//dtd html 2.0//3-//o'reilly and associates//dtd html extended 1.0//;-//o'reilly and associates//dtd html extended relaxed 1.0//N-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//E-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//$-//spyglass//dtd html 2.0 extended//+-//sq//dtd html 2.0 hotmetal + extensions//--//sun microsystems corp.//dtd hotjava html//4-//sun microsystems corp.//dtd hotjava strict html//-//w3c//dtd html 3 1995-03-24//-//w3c//dtd html 3.2 draft//-//w3c//dtd html 3.2 final//-//w3c//dtd html 3.2//-//w3c//dtd html 3.2s draft//-//w3c//dtd html 4.0 frameset//#-//w3c//dtd html 4.0 transitional//(-//w3c//dtd html experimental 19960712//&-//w3c//dtd html experimental 970421//-//w3c//dtd w3 html//-//w3o//dtd w3 html 3.0//#-//webtechs//dtd mozilla html 2.0//-//webtechs//dtd mozilla html//$-//w3o//dtd w3 html strict 3.0//en//"-/w3c/dtd html 4.0 transitional/en -//w3c//dtd html 4.01 frameset//$-//w3c//dtd html 4.01 transitional//z:http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtdquirks -//w3c//dtd xhtml 1.0 frameset//$-//w3c//dtd xhtml 1.0 transitional//zlimited quirksrS)7rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr)rrr)rr)r r )rr) rIr|r<Z insertDoctyperjrrrXrWrFr])r@rr>rrrr)r)r*rs      z.getPhases..InitialPhase.processDoctypecSsd|j_|jjd|j_dS)NrrS)rIrWrFr])r@r)r)r* anythingElsesz,getPhases..InitialPhase.anythingElsecSs|jjd|j|S)Nzexpected-doctype-but-got-chars)rIr|r )r@rr)r)r*rs z1getPhases..InitialPhase.processCharacterscSs"|jjdd|di|j|S)Nz"expected-doctype-but-got-start-tagr>)rIr|r )r@rr)r)r*rsz/getPhases..InitialPhase.processStartTagcSs"|jjdd|di|j|S)Nz expected-doctype-but-got-end-tagr>)rIr|r )r@rr)r)r*rsz-getPhases..InitialPhase.processEndTagcSs|jjd|jdS)Nzexpected-doctype-but-got-eofT)rIr|r )r@r)r)r*r%s z*getPhases..InitialPhase.processEOFN) r7r8r9rrrr rrrrr)r)r)r* InitialPhases_r c@sDeZdZddZddZddZddZd d Zd d Zd dZ dS)z"getPhases..BeforeHtmlPhasecSs&|jjtdd|jjd|j_dS)Nrror)r<Z insertRootimpliedTagTokenrIrFr])r@r)r)r*r^,sz4getPhases..BeforeHtmlPhase.insertHtmlElementcSs |jdS)NT)r^)r@r)r)r*r1sz-getPhases..BeforeHtmlPhase.processEOFcSs|jj||jjdS)N)r<rr)r@rr)r)r*r5sz1getPhases..BeforeHtmlPhase.processCommentcSsdS)Nr))r@rr)r)r*r8sz9getPhases..BeforeHtmlPhase.processSpaceCharacterscSs |j|S)N)r^)r@rr)r)r*r;sz4getPhases..BeforeHtmlPhase.processCharacterscSs |ddkrd|j_|j|S)Nr>rT)rIrUr^)r@rr)r)r*r?s z2getPhases..BeforeHtmlPhase.processStartTagcSs4|ddkr$|jjdd|din |j|SdS)Nr>rrrbrzunexpected-end-tag-before-html)rrrr)rIr|r^)r@rr)r)r*rEs  z0getPhases..BeforeHtmlPhase.processEndTagN) r7r8r9r^rrrrrrr)r)r)r*BeforeHtmlPhase*srcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z"getPhases..BeforeHeadPhasecsVj|||tjd|jfd|jfg|_|j|j_tjd|jfg|_ |j |j _dS)Nrrrr)rrrr) rHrMethodDispatcherr startTagHeadr startTagOtherdefaultendTagImplyHeadr endTagOther)r@rIr<)rr)r*rHNs z+getPhases..BeforeHeadPhase.__init__cSs|jtdddS)NrroT)rr )r@r)r)r*r\sz-getPhases..BeforeHeadPhase.processEOFcSsdS)Nr))r@rr)r)r*r`sz9getPhases..BeforeHeadPhase.processSpaceCharacterscSs|jtdd|S)Nrro)rr )r@rr)r)r*rcsz4getPhases..BeforeHeadPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rgsz/getPhases..BeforeHeadPhase.startTagHtmlcSs0|jj||jjd|j_|jjd|j_dS)Nr inHeadry)r<rr{ headPointerrIrFr])r@rr)r)r*rjs z/getPhases..BeforeHeadPhase.startTagHeadcSs|jtdd|S)Nrro)rr )r@rr)r)r*rosz0getPhases..BeforeHeadPhase.startTagOthercSs|jtdd|S)Nrro)rr )r@rr)r)r*rssz2getPhases..BeforeHeadPhase.endTagImplyHeadcSs|jjdd|didS)Nzend-tag-after-implied-rootr>)rIr|)r@rr)r)r*rwsz.getPhases..BeforeHeadPhase.endTagOtherN) r7r8r9rHrrrrrrrrr))rr)r*BeforeHeadPhaseMs rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!S)"zgetPhases..InHeadPhasec sj|||tjd|jfd|jfd|jfd|jfd|jfd|jfd |j fd |j fg|_ |j |j _ tjd |jfd|jfg|_|j|j_ dS)Nrtitlenoframesstylenoscriptscriptbasebasefontbgsoundcommandlinkr4rrr)rr)rrr r!r")rrr)rHrrr startTagTitlestartTagNoFramesStylestartTagNoscriptstartTagScriptstartTagBaseLinkCommand startTagMetarrrr endTagHeadendTagHtmlBodyBrrr)r@rIr<)rr)r*rH|s  z'getPhases..InHeadPhase.__init__cSs |jdS)NT)r )r@r)r)r*rsz)getPhases..InHeadPhase.processEOFcSs |j|S)N)r )r@rr)r)r*rsz0getPhases..InHeadPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz+getPhases..InHeadPhase.startTagHtmlcSs|jjddS)Nz!two-heads-are-not-better-than-one)rIr|)r@rr)r)r*rsz+getPhases..InHeadPhase.startTagHeadcSs$|jj||jjjd|d<dS)NTrx)r<rr{pop)r@rr)r)r*r's  z6getPhases..InHeadPhase.startTagBaseLinkCommandcSs|jj||jjjd|d<|d}|jjjjddkrd|krZ|jjjj|dnVd|krd|kr|dj d krt j |dj d }t j |}|j}|jjjj|dS) NTrxrsr Z tentativecharsetZcontentz http-equivz content-typezutf-8)r<rr{r+rIrLrOrbZchangeEncodingrXr Z EncodingBytesencodeZContentAttrParserr#)r@rrirsrIcodecr)r)r*r(s   z+getPhases..InHeadPhase.startTagMetacSs|jj|ddS)Nr)rIr)r@rr)r)r*r#sz,getPhases..InHeadPhase.startTagTitlecSs|jj|ddS)Nr)rIr)r@rr)r)r*r$sz4getPhases..InHeadPhase.startTagNoFramesStylecSs8|jjr|jj|dn|jj||jjd|j_dS)NrinHeadNoscript)rIrKrr<rrFr])r@rr)r)r*r%s z/getPhases..InHeadPhase.startTagNoscriptcSs<|jj||jjj|jj_|jj|j_|jjd|j_dS)Nr) r<rrIrLZscriptDataStaterZr]rrF)r@rr)r)r*r&s  z-getPhases..InHeadPhase.startTagScriptcSs |j|S)N)r )r@rr)r)r*rsz,getPhases..InHeadPhase.startTagOthercSs:|jjjj}|jdks&td|j|jjd|j_dS)NrzExpected head got %s afterHead)rIr<r{r+r>rrFr])r@rrr)r)r*r)sz)getPhases..InHeadPhase.endTagHeadcSs |j|S)N)r )r@rr)r)r*r*sz/getPhases..InHeadPhase.endTagHtmlBodyBrcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz*getPhases..InHeadPhase.endTagOthercSs|jtddS)Nr)r)r )r@r)r)r*r sz+getPhases..InHeadPhase.anythingElseN)r7r8r9rHrrrrr'r(r#r$r%r&rr)r*rr r))rr)r* InHeadPhase{s  r1csxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z&getPhases..InHeadNoscriptPhasecsfj|||tjd|jfd |jfd |jfg|_|j|j_tjd |j fd |j fg|_ |j |j _dS) Nrrr r"r4rrrrr)rr r"r4rr)rr) rHrrrr'startTagHeadNoscriptrrrendTagNoscriptendTagBrrr)r@rIr<)rr)r*rHs z/getPhases..InHeadNoscriptPhase.__init__cSs|jjd|jdS)Nzeof-in-head-noscriptT)rIr|r )r@r)r)r*rs z1getPhases..InHeadNoscriptPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz5getPhases..InHeadNoscriptPhase.processCommentcSs|jjd|j|S)Nzchar-in-head-noscript)rIr|r )r@rr)r)r*rs z8getPhases..InHeadNoscriptPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz=getPhases..InHeadNoscriptPhase.processSpaceCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz3getPhases..InHeadNoscriptPhase.startTagHtmlcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r'sz>getPhases..InHeadNoscriptPhase.startTagBaseLinkCommandcSs|jjdd|didS)Nzunexpected-start-tagr>)rIr|)r@rr)r)r*r2 sz;getPhases..InHeadNoscriptPhase.startTagHeadNoscriptcSs"|jjdd|di|j|S)Nzunexpected-inhead-noscript-tagr>)rIr|r )r@rr)r)r*r sz4getPhases..InHeadNoscriptPhase.startTagOthercSs:|jjjj}|jdks&td|j|jjd|j_dS)NrzExpected noscript got %sr)rIr<r{r+r>rrFr])r@rrr)r)r*r3sz5getPhases..InHeadNoscriptPhase.endTagNoscriptcSs"|jjdd|di|j|S)Nzunexpected-inhead-noscript-tagr>)rIr|r )r@rr)r)r*r4sz/getPhases..InHeadNoscriptPhase.endTagBrcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz2getPhases..InHeadNoscriptPhase.endTagOthercSs|jtddS)Nr)r3r )r@r)r)r*r sz3getPhases..InHeadNoscriptPhase.anythingElseN)r7r8r9rHrrrrrr'r2rr3r4rr r))rr)r*InHeadNoscriptPhases r5cspeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZdS)z!getPhases..AfterHeadPhasec snj|||tjd|jfd|jfd|jfd|jfd |jfg|_|j |j_ tjd|j fg|_ |j |j _ dS)Nrrrrrr r"r4rrrrrr) rrr r"r4rrrr)rrr)rHrrr startTagBodystartTagFramesetstartTagFromHeadrrrrr*rr)r@rIr<)rr)r*rH#s z*getPhases..AfterHeadPhase.__init__cSs |jdS)NT)r )r@r)r)r*r4sz,getPhases..AfterHeadPhase.processEOFcSs |j|S)N)r )r@rr)r)r*r8sz3getPhases..AfterHeadPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r<sz.getPhases..AfterHeadPhase.startTagHtmlcSs(d|j_|jj||jjd|j_dS)NFr)rIr`r<rrFr])r@rr)r)r*r6?s z.getPhases..AfterHeadPhase.startTagBodycSs |jj||jjd|j_dS)Nr)r<rrIrFr])r@rr)r)r*r7Ds z2getPhases..AfterHeadPhase.startTagFramesetcSst|jjdd|di|jjj|jj|jjdj|x4|jjdddD]}|jdkrN|jjj |PqNWdS)Nz#unexpected-start-tag-out-of-my-headr>rr rry) rIr|r<r{rrrFrr>remove)r@rrr)r)r*r8Hs z2getPhases..AfterHeadPhase.startTagFromHeadcSs|jjdd|didS)Nzunexpected-start-tagr>)rIr|)r@rr)r)r*rRsz.getPhases..AfterHeadPhase.startTagHeadcSs |j|S)N)r )r@rr)r)r*rUsz/getPhases..AfterHeadPhase.startTagOthercSs |j|S)N)r )r@rr)r)r*r*Ysz2getPhases..AfterHeadPhase.endTagHtmlBodyBrcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*r]sz-getPhases..AfterHeadPhase.endTagOthercSs.|jjtdd|jjd|j_d|j_dS)NrrorT)r<rr rIrFr]r`)r@r)r)r*r `sz.getPhases..AfterHeadPhase.anythingElseN)r7r8r9rHrrrr6r7r8rrr*rr r))rr)r*AfterHeadPhase"s  r:cseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd+d,Zd-d.Zd/d0Zd1d2Zd3d4Zd5d6Zd7d8Zd9d:Zd;d<Z d=d>Z!d?d@Z"dAdBZ#dCdDZ$dEdFZ%dGdHZ&dIdJZ'dKdLZ(dMdNZ)dOdPZ*dQdRZ+dSdTZ,dUdVZ-dWdXZ.dYdZZ/d[d\Z0d]d^Z1d_d`Z2dadbZ3dcddZ4dedfZ5dgS)hzgetPhases..InBodyPhasec,sj||||j|_tjd|jfdd|jfd |jfd |jfde|j ft |j fdf|j fd&|j fdg|jfd*|jfd+|jfdh|jfd8|jfd9|jfdi|jfd=|jfd>|jfdj|jfdk|jfdH|jfdI|jfdJ|jfdK|jfdL|jfdM|jfdN|jfdl|j fdQ|j!fdm|j"fdn|j#fdV|j$fdW|j%fdo|j&fg!|_'|j(|j'_)tjd |j*fd|j+fdp|j,fd&|j-fd |j.fdq|j/ft |j0fdr|j1fds|j2fd@|j3fg |_4|j5|j4_)dS)tNrrrr r!r"r4rrrrraddressarticleaside blockquotecenterdetailsdirr+dlfieldset figcaptionfigurefooterheaderhgroupmainmenunavolr(sectionsummaryulprelistingformlidddtrRabbigcodeemfontissmallstrikestrongttunobrbuttonappletmarqueeobjectZxmprarearembedimgkeygenwbrparamsourcetrackinputhrimageisindextextareaZiframernoembedrrrprtoptionoptgroupZmathrurcolrframerrrrrrrdialog) rrr r!r"r4rrr)r;r<r=r>r?r@rAr+rBrCrDrErFrGrHrIrJrKrLr(rMrNrO)rPrQ)rSrTrU) rWrXrYrZr[r\r]r^r_r`rarb)rerfrg)rhrrirjrkrl)rmrnro)rur)rvrw)rxry) rrzrr{rrrrrrr)r;r<r=r>rdr?r@r|rAr+rBrCrDrErFrGrHrQrIrJrKrLrPrMrNrO)rTrUrS)rVrWrXrYrZr[r\rcr]r^r_r`rarb)rerfrg)6rHprocessSpaceCharactersNonPrerrrrstartTagProcessInHeadr6r7startTagClosePrstartTagHeadingstartTagPreListing startTagFormstartTagListItemstartTagPlaintext startTagAstartTagFormatting startTagNobrstartTagButtonstartTagAppletMarqueeObject startTagXmp startTagTablestartTagVoidFormattingstartTagParamSource startTagInput startTagHr startTagImagestartTagIsIndexstartTagTextareastartTagIFramer%startTagRawtextstartTagSelect startTagRpRt startTagOpt startTagMath startTagSvgstartTagMisplacedrrr endTagBody endTagHtml endTagBlock endTagFormendTagPendTagListItem endTagHeadingendTagFormattingendTagAppletMarqueeObjectr4rr)r@rIr<)rr)r*rHhs~ z'getPhases..InBodyPhase.__init__cSs$|j|jko"|j|jko"|j|jkS)N)r>rhri)r@Znode1Znode2r)r)r*isMatchingFormattingElements  z:getPhases..InBodyPhase.isMatchingFormattingElementcSs|jj||jjd}g}x<|jjdddD]&}|tkr@Pq0|j||r0|j|q0Wt|dksjtt|dkr|jjj |d|jjj|dS)Nr ryryry) r<rr{activeFormattingElementsr rrr~rr9)r@rrkZmatchingElementsrr)r)r*addFormattingElements    z3getPhases..InBodyPhase.addFormattingElementc Ss@td}x2|jjdddD]}|j|kr|jjdPqWdS)NrTrUrSr(rrrrrrrrr z expected-closing-tag-but-got-eof) rTrUrSr(rrrrrrrrry)rr<r{r>rIr|)r@Zallowed_elementsrr)r)r*rs  z)getPhases..InBodyPhase.processEOFcSsh|d}|j|_|jdrJ|jjdjdkrJ|jjd j rJ|dd}|rd|jj|jj|dS) Nrs r rPrQrtry)rPrQrtry) r}rrr<r{r>Z hasContent#reconstructActiveFormattingElementsr)r@rrsr)r)r*!processSpaceCharactersDropNewlines   z@getPhases..InBodyPhase.processSpaceCharactersDropNewlinecSsT|ddkrdS|jj|jj|d|jjrPtdd|dDrPd|j_dS)NrscSsg|] }|tkqSr))r)r=charr)r)r*rAszDgetPhases..InBodyPhase.processCharacters..F)r<rrrIr`any)r@rr)r)r*rs  z0getPhases..InBodyPhase.processCharacterscSs|jj|jj|ddS)Nrs)r<rr)r@rr)r)r*r}s z;getPhases..InBodyPhase.processSpaceCharactersNonPrecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r~sz4getPhases..InBodyPhase.startTagProcessInHeadcSs|jjdddit|jjdks4|jjdjdkrB|jjstnFd|j_x<|dj D],\}}||jjdj krX||jjdj |<qXWdS)Nzunexpected-start-tagr>rr Frs) rIr|r~r<r{r>rPrr`r.ri)r@rrrr)r)r*r6sz+getPhases..InBodyPhase.startTagBodycSs|jjdddit|jjdks4|jjdjdkrB|jjstnt|jjsLnj|jjdj rv|jjdj j |jjdx"|jjdjdkr|jjj qxW|jj ||jj d|j_dS) Nzunexpected-start-tagr>rr rrrry)rIr|r~r<r{r>rPrr`parent removeChildr+rrFr])r@rr)r)r*r7s" z/getPhases..InBodyPhase.startTagFramesetcSs.|jjdddr|jtd|jj|dS)Nr(rd)variant)r<elementInScoperr r)r@rr)r)r*r sz-getPhases..InBodyPhase.startTagClosePcSs>|jjdddr|jtd|jj|d|j_|j|_dS)Nr(rd)rF) r<rrr rrIr`rr)r@rr)r)r*rs  z1getPhases..InBodyPhase.startTagPreListingcSsZ|jjr|jjdddin:|jjdddr:|jtd|jj||jjd|j_dS) Nzunexpected-start-tagr>rRr(rd)rr ry) r< formPointerrIr|rrr rr{)r@rr)r)r*rs  z+getPhases..InBodyPhase.startTagFormcSsd|j_dgddgddgd}||d}xLt|jjD]<}|j|kr^|jjjt|jdP|j t kr8|jd kr8Pq8W|jj d d d r|jjjtd d|jj |dS)NFrSrUrT)rSrUrTr>rpr;r+r(rd)r)r;r+r() rIr`reversedr<r{r>r]rr  nameTuplerrr)r@rZ stopNamesMapZ stopNamesrr)r)r*rs"      z/getPhases..InBodyPhase.startTagListItemcSs>|jjdddr|jtd|jj||jjj|jj_dS)Nr(rd)r) r<rrr rrIrLr\rZ)r@rr)r)r*r4s z0getPhases..InBodyPhase.startTagPlaintextcSsb|jjdddr|jtd|jjdjtkrR|jjdd|di|jjj |jj |dS)Nr(rd)rr zunexpected-start-tagr>ry) r<rrr r{r>rrIr|r+r)r@rr)r)r*r:s  z.getPhases..InBodyPhase.startTagHeadingcSs~|jjd}|rf|jjdddd|jtd||jjkrL|jjj|||jjkrf|jjj||jj |j |dS)NrVz$unexpected-start-tag-implies-end-tag) startNameendName) r<!elementInActiveFormattingElementsrIr|rr r{r9rrr)r@rZ afeAElementr)r)r*rBs     z(getPhases..InBodyPhase.startTagAcSs|jj|j|dS)N)r<rr)r@rr)r)r*rOs z1getPhases..InBodyPhase.startTagFormattingcSsP|jj|jjdrB|jjdddd|jtd|jj|j|dS)Nrcz$unexpected-start-tag-implies-end-tag)rr)r<rrrIr|rr r)r@rr)r)r*rSs    z+getPhases..InBodyPhase.startTagNobrcSsT|jjdr2|jjdddd|jtd|S|jj|jj|d|j_dS)Nrdz$unexpected-start-tag-implies-end-tag)rrF) r<rrIr|rr rrr`)r@rr)r)r*r]s    z-getPhases..InBodyPhase.startTagButtoncSs0|jj|jj||jjjtd|j_dS)NF)r<rrrrr rIr`)r@rr)r)r*rhs  z:getPhases..InBodyPhase.startTagAppletMarqueeObjectcSsB|jjdddr|jtd|jjd|j_|jj|ddS)Nr(rd)rFr)r<rrr rrIr`r)r@rr)r)r*rns  z*getPhases..InBodyPhase.startTagXmpcSsR|jjdkr*|jjdddr*|jtd|jj|d|j_|jjd|j_ dS)Nrr(rd)rFr) rIrWr<rrr rr`rFr])r@rr)r)r*rus   z,getPhases..InBodyPhase.startTagTablecSs6|jj|jj||jjjd|d<d|j_dS)NTrxF)r<rrr{r+rIr`)r@rr)r)r*r}s    z5getPhases..InBodyPhase.startTagVoidFormattingcSs@|jj}|j|d|dkr<|ddjtdkr<||j_dS)Nr2rshidden)rIr`rrjr)r@rr`r)r)r*rs   z,getPhases..InBodyPhase.startTagInputcSs$|jj||jjjd|d<dS)NTrx)r<rr{r+)r@rr)r)r*rs  z2getPhases..InBodyPhase.startTagParamSourcecSsJ|jjdddr|jtd|jj||jjjd|d<d|j_dS)Nr(rd)rTrxF) r<rrr rr{r+rIr`)r@rr)r)r*rs   z)getPhases..InBodyPhase.startTagHrcSs6|jjdddd|jtdd|d|dddS) Nzunexpected-start-tag-treated-asrrrj) originalNamenewNamerorsrw)rirw)rIr|rr )r@rr)r)r*rs   z,getPhases..InBodyPhase.startTagImagecSs|jjdddi|jjrdSi}d|dkr>|dd|d<|jtdd|d|jtd d|jtd dd |dkr|dd }nd }|jtd |d|dj}d|kr|d=d |kr|d =d|d<|jtdd||dd|j td |jtd d|j tddS)Nzdeprecated-tagr>rsactionrsrRro)rirqZlabelpromptz3This is a searchable index. Enter search keywords: rn)r2rsrprw)rirw) rIr|r<rrr rrcopyr)r@rZ form_attrsrrir)r)r*rs6      z.getPhases..InBodyPhase.startTagIsIndexcSs0|jj||jjj|jj_|j|_d|j_dS)NF) r<rrIrLrYrZrrr`)r@rr)r)r*rs z/getPhases..InBodyPhase.startTagTextareacSsd|j_|j|dS)NF)rIr`r)r@rr)r)r*rsz-getPhases..InBodyPhase.startTagIFramecSs"|jjr|j|n |j|dS)N)rIrKrr)r@rr)r)r*r%s z/getPhases..InBodyPhase.startTagNoscriptcSs|jj|ddS)z8iframe, noembed noframes, noscript(if scripting enabled)rN)rIr)r@rr)r)r*rsz.getPhases..InBodyPhase.startTagRawtextcSs@|jjdjdkr$|jjjtd|jj|jjj|dS)Nr rxry) r<r{r>rIr]rr rr)r@rr)r)r*rs z*getPhases..InBodyPhase.startTagOptcSs|jj|jj|d|j_|jj|jjd|jjd|jjd|jjd|jjd|jjdfkrx|jjd|j_n|jjd |j_dS) NFrrrrrrinSelectInTabler)r<rrrIr`r]rF)r@rr)r)r*rs      z-getPhases..InBodyPhase.startTagSelectcSsB|jjdr2|jj|jjdjdkr2|jj|jj|dS)Nrubyr ry)r<rgenerateImpliedEndTagsr{r>rIr|r)r@rr)r)r*rs    z+getPhases..InBodyPhase.startTagRpRtcSsZ|jj|jj||jj|td|d<|jj||drV|jjjd|d<dS)NrdrhrwTrx) r<rrIrrrrr{r+)r@rr)r)r*rs      z+getPhases..InBodyPhase.startTagMathcSsZ|jj|jj||jj|td|d<|jj||drV|jjjd|d<dS)NrurhrwTrx) r<rrIrrrrr{r+)r@rr)r)r*rs      z*getPhases..InBodyPhase.startTagSvgcSs|jjdd|didS)a5 Elements that should be children of other elements that have a different insertion mode; here they are ignored "caption", "col", "colgroup", "frame", "frameset", "head", "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "noscript" zunexpected-start-tag-ignoredr>N)rIr|)r@rr)r)r*rsz0getPhases..InBodyPhase.startTagMisplacedcSs|jj|jj|dS)N)r<rr)r@rr)r)r*rs z,getPhases..InBodyPhase.startTagOthercSs|jjdddsD|jtdd|jjdddi|jtddnX|jjd|jjd j dkrt|jjdddi|jjj }x|j dkr|jjj }qWdS) Nr(rd)rrozunexpected-end-tagr>rpr ry) r<rrr rIr|rrr{r>r+)r@rrr)r)r*rs   z&getPhases..InBodyPhase.endTagPcSs|jjds|jjdS|jjdjdkrlx>|jjddD]*}|jtdkr>|jjdd|jdPq>W|jjd|j_dS)Nrr rTrUrSryrxr(rvrwrrrrrrrz$expected-one-end-tag-but-got-another)gotName expectedName afterBodyry)rTrUrSryrxr(rvrwrrrrrrrr) r<rrIr|r{r>rrFr])r@rrr)r)r*r!s  z)getPhases..InBodyPhase.endTagBodycSs"|jjdr|jtd|SdS)Nr)r<rrr )r@rr)r)r*r3s z)getPhases..InBodyPhase.endTagHtmlcSs|ddkr|j|_|jj|d}|r2|jj|jjdj|dkr^|jjdd|di|r|jjj }x|j|dkr|jjj }qpWdS)Nr>rPr zend-tag-too-earlyry) r}rr<rrr{r>rIr|r+)r@rZinScoperr)r)r*r9s   z*getPhases..InBodyPhase.endTagBlockcSsx|jj}d|j_|dks&|jj| r:|jjdddin:|jj|jjd|krf|jjdddi|jjj|dS)Nzunexpected-end-tagr>rRr zend-tag-too-early-ignoredry)r<rrrIr|rr{r9)r@rrr)r)r*rGs   z)getPhases..InBodyPhase.endTagFormcSs|ddkrd}nd}|jj|d|dsB|jjdd|dinj|jj|dd|jjd j|dkr|jjdd|di|jjj}x|j|dkr|jjj}qWdS) Nr>rSlist)rzunexpected-end-tag)excluder zend-tag-too-earlyry)r<rrIr|rr{r>r+)r@rrrr)r)r*rTs  z-getPhases..InBodyPhase.endTagListItemcSsx$tD]}|jj|r|jjPqW|jjdj|dkrR|jjdd|dixBtD]:}|jj|rX|jjj}x|jtkr|jjj}qvWPqXWdS)Nr r>zend-tag-too-earlyry) rr<rrr{r>rIr|r+)r@ritemr)r)r*res       z,getPhases..InBodyPhase.endTagHeadingcSs"d}x|dkr|d7}|jj|d}| sL||jjkrZ|jj|j rZ|j|dS||jjkr|jjdd|di|jjj |dS|jj|js|jjdd|didS||jjdkr|jjdd|di|jjj |}d}x,|jj|dD]}|j t kr|}PqW|dkrb|jjj }x||krN|jjj }q4W|jjj |dS|jj|d}|jjj |}|} } d} |jjj | } x| d krh| d7} | d8} |jj| } | |jjkr|jjj | q| |krP| |kr |jjj | d}| j} | |jj|jjj | <| |jj|jjj | <| } | jrV| jj| | j| | } qW| jr~| jj| |jtdkr|jj\}}|j| |n |j| |j} |j| |j| |jjj ||jjj|| |jjj ||jjj|jjj |d| qWdS)z)The much-feared adoption agency algorithmrr r>Nzadoption-agency-1.2zadoption-agency-4.4zadoption-agency-1.3rrrrrrry)rrrrr)r<rr{rr>rrIr|rr9indexrrr+Z cloneNoderrZ appendChildrZgetTableMisnestedNodePosition insertBeforeZreparentChildreninsert)r@rZouterLoopCounterZformattingElementZafeIndexZ furthestBlockrkZcommonAncestorZbookmarkZlastNoderZinnerLoopCounterrZclonerrr)r)r*rts                    z/getPhases..InBodyPhase.endTagFormattingcSs|jj|dr|jj|jjdj|dkrF|jjdd|di|jj|dr|jjj}x|j|dkr|jjj}qdW|jjdS)Nr>r zend-tag-too-earlyry) r<rrr{r>rIr|r+clearActiveFormattingElements)r@rrkr)r)r*rs  z8getPhases..InBodyPhase.endTagAppletMarqueeObjectcSs@|jjdddd|jj|jjtdd|jjjdS)Nzunexpected-end-tag-treated-asrz br element)rrro)rIr|r<rrr r{r+)r@rr)r)r*r4#s   z'getPhases..InBodyPhase.endTagBrcSsx|jjdddD]}|j|dkr~|jj|dd|jjdj|dkrd|jjdd|dix|jjj|krxqfWPq|jtkr|jjdd|diPqWdS)Nr r>)rzunexpected-end-tagryry) r<r{r>rrIr|r+rr)r@rrr)r)r*r*s z*getPhases..InBodyPhase.endTagOtherN)6r7r8r9rHrrrrrr}r~r6r7rrrrrrrrrrrrrrrrrrrrrr%rrrrrrrrrrrrrrrrrr4rr))rr)r* InBodyPhaseesh G             $ rcs@eZdZfddZddZddZddZd d Zd d Zd S)zgetPhases..TextPhasecsFj|||tjg|_|j|j_tjd|jfg|_|j|j_dS)Nr) rHrrrrr endTagScriptrr)r@rIr<)rr)r*rH9s   z%getPhases..TextPhase.__init__cSs|jj|ddS)Nrs)r<r)r@rr)r)r*rAsz.getPhases..TextPhase.processCharacterscSs8|jjdd|jjdji|jjj|jj|j_dS)Nz&expected-named-closing-tag-but-got-eofr>r Try)rIr|r<r{r>r+rr])r@r)r)r*rDs   z'getPhases..TextPhase.processEOFcSsdstd|ddS)NFz4Tried to process start tag %s in RCDATA/RAWTEXT moder>)r)r@rr)r)r*rKsz*getPhases..TextPhase.startTagOthercSs*|jjj}|jdkst|jj|j_dS)Nr)r<r{r+r>rrIrr])r@rrr)r)r*rNs z)getPhases..TextPhase.endTagScriptcSs|jjj|jj|j_dS)N)r<r{r+rIrr])r@rr)r)r*rUs z(getPhases..TextPhase.endTagOtherN) r7r8r9rHrrrrrr))rr)r* TextPhase8s  rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'S)(zgetPhases..InTablePhasec sj|||tjd|jfd|jfd|jfd|jfd|jfd|jfd |j fd|j fd|j fd|j fg |_ |j|j _tjd |jfd|jfg|_|j|j_dS)NrrrrzrrrrrrrrrrprRr)rrr)rrr)rr) rrrzrrrrrrrr)rHrrrstartTagCaptionstartTagColgroup startTagColstartTagRowGroupstartTagImplyTbodyrstartTagStyleScriptrrrrr endTagTable endTagIgnorerr)r@rIr<)rr)r*rH[s$ z(getPhases..InTablePhase.__init__cSs(x"|jjdjdkr"|jjjqWdS)Nr rrry)rr)r<r{r>r+)r@r)r)r*clearStackToTableContextssz8getPhases..InTablePhase.clearStackToTableContextcSs0|jjdjdkr |jjdn |jjs,tdS)Nr rz eof-in-tablery)r<r{r>rIr|rPr)r@r)r)r*r|sz*getPhases..InTablePhase.processEOFcSs4|jj}|jjd|j_||jj_|jjj|dS)N inTableText)rIr]rFrr)r@rrr)r)r*rs z6getPhases..InTablePhase.processSpaceCharacterscSs4|jj}|jjd|j_||jj_|jjj|dS)Nr)rIr]rFrr)r@rrr)r)r*rs z1getPhases..InTablePhase.processCharacterscSs&d|j_|jjdj|d|j_dS)NTrF)r<insertFromTablerIrFr)r@rr)r)r*rsz*getPhases..InTablePhase.insertTextcSs6|j|jjjt|jj||jjd|j_dS)Nr) rr<rrr rrIrFr])r@rr)r)r*rs z/getPhases..InTablePhase.startTagCaptioncSs(|j|jj||jjd|j_dS)Nr)rr<rrIrFr])r@rr)r)r*rs z0getPhases..InTablePhase.startTagColgroupcSs|jtdd|S)Nrro)rr )r@rr)r)r*rsz+getPhases..InTablePhase.startTagColcSs(|j|jj||jjd|j_dS)Nr)rr<rrIrFr])r@rr)r)r*rs z0getPhases..InTablePhase.startTagRowGroupcSs|jtdd|S)Nrro)rr )r@rr)r)r*rsz2getPhases..InTablePhase.startTagImplyTbodycSs6|jjdddd|jjjtd|jjs2|SdS)Nz$unexpected-start-tag-implies-end-tagr)rr)rIr|r]rr rP)r@rr)r)r*rs  z-getPhases..InTablePhase.startTagTablecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz3getPhases..InTablePhase.startTagStyleScriptcSsVd|dkrH|ddjtdkrH|jjd|jj||jjjn |j|dS)Nr2rsrz unexpected-hidden-input-in-table) rjrrIr|r<rr{r+r)r@rr)r)r*rs    z-getPhases..InTablePhase.startTagInputcSsD|jjd|jjdkr@|jj||jjd|j_|jjjdS)Nzunexpected-form-in-tabler ry)rIr|r<rrr{r+)r@rr)r)r*rs    z,getPhases..InTablePhase.startTagFormcSs<|jjdd|did|j_|jjdj|d|j_dS)Nz)unexpected-start-tag-implies-table-voodoor>TrF)rIr|r<rrFr)r@rr)r)r*rsz-getPhases..InTablePhase.startTagOthercSs|jjdddr|jj|jjdjdkrJ|jjdd|jjdjdx"|jjdjdkrl|jjjqLW|jjj|jjn|jj st |jjdS) Nr)rr zend-tag-too-early-named)rrryryry) r<rrr{r>rIr|r+r_rPr)r@rr)r)r*rs    z+getPhases..InTablePhase.endTagTablecSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz,getPhases..InTablePhase.endTagIgnorecSs<|jjdd|did|j_|jjdj|d|j_dS)Nz'unexpected-end-tag-implies-table-voodoor>TrF)rIr|r<rrFr)r@rr)r)r*rsz+getPhases..InTablePhase.endTagOtherN)r7r8r9rHrrrrrrrrrrrrrrrrrrr))rr)r* InTablePhaseYs&   rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z#getPhases..InTableTextPhasecsj|||d|_g|_dS)N)rHrcharacterTokens)r@rIr<)rr)r*rHsz,getPhases..InTableTextPhase.__init__cSsddjdd|jD}tdd|DrJtd|d}|jjdj|n|rZ|jj|g|_dS)NrcSsg|] }|dqS)rsr))r=rr)r)r*rAszGgetPhases..InTableTextPhase.flushCharacters..cSsg|] }|tkqSr))r)r=rr)r)r*rAsrn)r2rsr)joinrrrrIrFrr<)r@rsrr)r)r*flushCharacterss z3getPhases..InTableTextPhase.flushCharacterscSs|j|j|j_|S)N)rrrIr])r@rr)r)r*rs z2getPhases..InTableTextPhase.processCommentcSs|j|j|j_dS)NT)rrrIr])r@r)r)r*rs z.getPhases..InTableTextPhase.processEOFcSs |ddkrdS|jj|dS)Nrsr)rr)r@rr)r)r*rs z5getPhases..InTableTextPhase.processCharacterscSs|jj|dS)N)rr)r@rr)r)r*rsz:getPhases..InTableTextPhase.processSpaceCharacterscSs|j|j|j_|S)N)rrrIr])r@rr)r)r*r s z3getPhases..InTableTextPhase.processStartTagcSs|j|j|j_|S)N)rrrIr])r@rr)r)r*rs z1getPhases..InTableTextPhase.processEndTagN) r7r8r9rHrrrrrrrr))rr)r*InTableTextPhases  rcs`eZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)z!getPhases..InCaptionPhasec sfj|||tjd|jfd |jfg|_|j|j_tjd|jfd |j fd|j fg|_ |j |j _dS)Nrrrzrrrrrrrrr) rrzrrrrrrr) rrzrrrrrrrr) rHrrrstartTagTableElementrrr endTagCaptionrrrr)r@rIr<)rr)r*rHs z*getPhases..InCaptionPhase.__init__cSs|jjddd S)Nrr)r)r<r)r@r)r)r*ignoreEndTagCaption+sz5getPhases..InCaptionPhase.ignoreEndTagCaptioncSs|jjdjdS)Nr)rIrFr)r@r)r)r*r.sz,getPhases..InCaptionPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r1sz3getPhases..InCaptionPhase.processCharacterscSs0|jj|j}|jjjtd|s,|SdS)Nr)rIr|rr]rr )r@r ignoreEndTagr)r)r*r4s  z6getPhases..InCaptionPhase.startTagTableElementcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r<sz/getPhases..InCaptionPhase.startTagOthercSs|js|jj|jjdjdkrB|jjdd|jjdjdx"|jjdjdkrd|jjjqDW|jjj|jj|jj d|j_ n|jj st |jjdS) Nr rz$expected-one-end-tag-but-got-another)rrrryryry) rr<rr{r>rIr|r+rrFr]rPr)r@rr)r)r*r?s    z/getPhases..InCaptionPhase.endTagCaptioncSs0|jj|j}|jjjtd|s,|SdS)Nr)rIr|rr]rr )r@rrr)r)r*rQs  z-getPhases..InCaptionPhase.endTagTablecSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rXsz.getPhases..InCaptionPhase.endTagIgnorecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r[sz-getPhases..InCaptionPhase.endTagOtherN) r7r8r9rHrrrrrrrrrr))rr)r*InCaptionPhases rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z%getPhases..InColumnGroupPhasecs^j|||tjd|jfd|jfg|_|j|j_tjd|jfd|j fg|_ |j |j _dS)Nrrzr) rHrrrrrrrendTagColgroup endTagColrr)r@rIr<)rr)r*rHas z.getPhases..InColumnGroupPhase.__init__cSs|jjdjdkS)Nr rry)r<r{r>)r@r)r)r*ignoreEndTagColgrouppsz:getPhases..InColumnGroupPhase.ignoreEndTagColgroupcSsD|jjdjdkr"|jjstdS|j}|jtd|s@dSdS)Nr rrTry) r<r{r>rIrPrrrr )r@rr)r)r*rss z0getPhases..InColumnGroupPhase.processEOFcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*r}sz7getPhases..InColumnGroupPhase.processCharacterscSs$|jj||jjjd|d<dS)NTrx)r<rr{r+)r@rr)r)r*rs  z1getPhases..InColumnGroupPhase.startTagColcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*rsz3getPhases..InColumnGroupPhase.startTagOthercSs@|jr |jjst|jjn|jjj|jjd|j_ dS)Nr) rrIrPrr|r<r{r+rFr])r@rr)r)r*rs    z4getPhases..InColumnGroupPhase.endTagColgroupcSs|jjdddidS)Nz no-end-tagr>rz)rIr|)r@rr)r)r*rsz/getPhases..InColumnGroupPhase.endTagColcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*rsz1getPhases..InColumnGroupPhase.endTagOtherN) r7r8r9rHrrrrrrrrr))rr)r*InColumnGroupPhase^s   rcsxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z#getPhases..InTableBodyPhasec svj|||tjd|jfd|jfd |jfd|jfg|_|j|j_ tjd|j fd |j fd|j fg|_ |j|j _ dS)Nrrrrrrzrrrrrr)rr)rrzrrrr)rrr)rrrzrrrrr)rHrrr startTagTrstartTagTableCellstartTagTableOtherrrrendTagTableRowGrouprrrr)r@rIr<)rr)r*rHs z,getPhases..InTableBodyPhase.__init__cSsFx"|jjdjdkr"|jjjqW|jjdjdkrB|jjsBtdS) Nr rrrrry)rrrrry)r<r{r>r+rIrPr)r@r)r)r*clearStackToTableBodyContexts z@getPhases..InTableBodyPhase.clearStackToTableBodyContextcSs|jjdjdS)Nr)rIrFr)r@r)r)r*rsz.getPhases..InTableBodyPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz:getPhases..InTableBodyPhase.processSpaceCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz5getPhases..InTableBodyPhase.processCharacterscSs(|j|jj||jjd|j_dS)Nr)rr<rrIrFr])r@rr)r)r*rs z.getPhases..InTableBodyPhase.startTagTrcSs*|jjdd|di|jtdd|S)Nzunexpected-cell-in-table-bodyr>rro)rIr|rr )r@rr)r)r*rsz5getPhases..InTableBodyPhase.startTagTableCellcSsn|jjddds0|jjddds0|jjdddrT|j|jt|jjdj|S|jjs`t |jj dS)Nrr)rrrr ry) r<rrrr r{r>rIrPrr|)r@rr)r)r*rs z6getPhases..InTableBodyPhase.startTagTableOthercSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz1getPhases..InTableBodyPhase.startTagOthercSsT|jj|dddr:|j|jjj|jjd|j_n|jjdd|didS)Nr>r)rrz unexpected-end-tag-in-table-body) r<rrr{r+rIrFr]r|)r@rr)r)r*rs  z7getPhases..InTableBodyPhase.endTagTableRowGroupcSsn|jjddds0|jjddds0|jjdddrT|j|jt|jjdj|S|jjs`t |jj dS)Nrr)rrrr ry) r<rrrr r{r>rIrPrr|)r@rr)r)r*rs z/getPhases..InTableBodyPhase.endTagTablecSs|jjdd|didS)Nz unexpected-end-tag-in-table-bodyr>)rIr|)r@rr)r)r*rsz0getPhases..InTableBodyPhase.endTagIgnorecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz/getPhases..InTableBodyPhase.endTagOtherN)r7r8r9rHrrrrrrrrrrrrr))rr)r*InTableBodyPhases    rcseZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZddZdS)zgetPhases..InRowPhasec svj|||tjd|jfd |jfd|jfg|_|j|j_tjd |j fd |j fd|j fd|j fg|_ |j|j _dS)Nrrrrrzrrrrrrr)rr)rrzrrrrr)rrr)rrrzrrrr)rHrrrrrrrrendTagTrrrrrr)r@rIr<)rr)r*rHs z&getPhases..InRowPhase.__init__cSsDx>|jjdjdkr>|jjdd|jjdji|jjjqWdS) Nr rrz'unexpected-implied-end-tag-in-table-rowr>ry)rrry)r<r{r>rIr|r+)r@r)r)r*clearStackToTableRowContextsz9getPhases..InRowPhase.clearStackToTableRowContextcSs|jjddd S)Nrr)r)r<r)r@r)r)r*ignoreEndTagTrsz,getPhases..InRowPhase.ignoreEndTagTrcSs|jjdjdS)Nr)rIrFr)r@r)r)r*r"sz(getPhases..InRowPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r%sz4getPhases..InRowPhase.processSpaceCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r(sz/getPhases..InRowPhase.processCharacterscSs6|j|jj||jjd|j_|jjjtdS)Nr) rr<rrIrFr]rrr )r@rr)r)r*r+s z/getPhases..InRowPhase.startTagTableCellcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*r1sz0getPhases..InRowPhase.startTagTableOthercSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r8sz+getPhases..InRowPhase.startTagOthercSsH|js.|j|jjj|jjd|j_n|jjs:t |jj dS)Nr) rrr<r{r+rIrFr]rPrr|)r@rr)r)r*r;s   z&getPhases..InRowPhase.endTagTrcSs"|j}|jtd|s|SdS)Nr)rrr )r@rrr)r)r*rEsz)getPhases..InRowPhase.endTagTablecSs4|jj|dddr&|jtd|S|jjdS)Nr>r)rr)r<rrr rIr|)r@rr)r)r*rMsz1getPhases..InRowPhase.endTagTableRowGroupcSs|jjdd|didS)Nzunexpected-end-tag-in-table-rowr>)rIr|)r@rr)r)r*rTsz*getPhases..InRowPhase.endTagIgnorecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rXsz)getPhases..InRowPhase.endTagOtherN)r7r8r9rHrrrrrrrrrrrrrr))rr)r* InRowPhases  rcs`eZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)zgetPhases..InCellPhasec sfj|||tjd|jfd |jfg|_|j|j_tjd|jfd|j fd|j fg|_ |j |j _dS)Nrrrzrrrrrrrrr) rrzrrrrrrr)rr)rrrzrr)rrrrr) rHrrrrrrrendTagTableCellr endTagImplyrr)r@rIr<)rr)r*rH]s z'getPhases..InCellPhase.__init__cSsB|jjdddr |jtdn|jjdddr>|jtddS)Nrr)rr)r<rrr )r@r)r)r* closeCellnsz(getPhases..InCellPhase.closeCellcSs|jjdjdS)Nr)rIrFr)r@r)r)r*rusz)getPhases..InCellPhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rxsz0getPhases..InCellPhase.processCharacterscSsF|jjddds |jjdddr,|j|S|jjs8t|jjdS)Nrr)rr)r<rrrIrPrr|)r@rr)r)r*r{s  z1getPhases..InCellPhase.startTagTableOthercSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz,getPhases..InCellPhase.startTagOthercSs|jj|dddr|jj|d|jjdj|dkrt|jjdd|dix.|jjj}|j|dkrRPqRWn |jjj|jj|jj d|j_ n|jjdd|didS) Nr>r)rr zunexpected-cell-end-tagrzunexpected-end-tagry) r<rrr{r>rIr|r+rrFr])r@rrr)r)r*rs   z.getPhases..InCellPhase.endTagTableCellcSs|jjdd|didS)Nzunexpected-end-tagr>)rIr|)r@rr)r)r*rsz+getPhases..InCellPhase.endTagIgnorecSs.|jj|dddr |j|S|jjdS)Nr>r)r)r<rrrIr|)r@rr)r)r*rsz*getPhases..InCellPhase.endTagImplycSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rsz*getPhases..InCellPhase.endTagOtherN) r7r8r9rHrrrrrrrrrr))rr)r* InCellPhase[s  rcsxeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ ddZ ddZddZdS)z getPhases..InSelectPhasecsj|||tjd|jfd|jfd|jfd|jfd |jfd|jfg|_ |j |j _ tjd|j fd|j fd|jfg|_|j|j_ dS) Nrrxryrrprkrtr)rprkrt)rHrrrstartTagOptionstartTagOptgrouprrr&rrr endTagOptionendTagOptgroup endTagSelectrr)r@rIr<)rr)r*rHs z)getPhases..InSelectPhase.__init__cSs0|jjdjdkr |jjdn |jjs,tdS)Nr rz eof-in-selectry)r<r{r>rIr|rPr)r@r)r)r*rsz+getPhases..InSelectPhase.processEOFcSs$|ddkrdS|jj|ddS)Nrsr)r<r)r@rr)r)r*rs z2getPhases..InSelectPhase.processCharacterscSs.|jjdjdkr|jjj|jj|dS)Nr rxry)r<r{r>r+r)r@rr)r)r*rs z/getPhases..InSelectPhase.startTagOptioncSsL|jjdjdkr|jjj|jjdjdkr<|jjj|jj|dS)Nr rxryryry)r<r{r>r+r)r@rr)r)r*rs   z1getPhases..InSelectPhase.startTagOptgroupcSs|jjd|jtddS)Nzunexpected-select-in-selectr)rIr|rr )r@rr)r)r*rs z/getPhases..InSelectPhase.startTagSelectcSs>|jjd|jjdddr.|jtd|S|jjs:tdS)Nzunexpected-input-in-selectr)r)rIr|r<rrr rPr)r@rr)r)r*rs  z.getPhases..InSelectPhase.startTagInputcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r&sz/getPhases..InSelectPhase.startTagScriptcSs|jjdd|didS)Nzunexpected-start-tag-in-selectr>)rIr|)r@rr)r)r*rsz.getPhases..InSelectPhase.startTagOthercSs6|jjdjdkr |jjjn|jjdddidS)Nr rxzunexpected-end-tag-in-selectr>ry)r<r{r>r+rIr|)r@rr)r)r*rsz-getPhases..InSelectPhase.endTagOptioncSsf|jjdjdkr0|jjdjdkr0|jjj|jjd jdkrP|jjjn|jjdddidS) Nr rxrryzunexpected-end-tag-in-selectr>ryry)r<r{r>r+rIr|)r@rr)r)r*rs z/getPhases..InSelectPhase.endTagOptgroupcSs^|jjdddrD|jjj}x|jdkr6|jjj}qW|jjn|jjsPt|jj dS)Nr)r) r<rr{r+r>rIr_rPrr|)r@rrr)r)r*rs    z-getPhases..InSelectPhase.endTagSelectcSs|jjdd|didS)Nzunexpected-end-tag-in-selectr>)rIr|)r@rr)r)r*r sz,getPhases..InSelectPhase.endTagOtherN)r7r8r9rHrrrrrrr&rrrrrr))rr)r* InSelectPhases   rcsHeZdZfddZddZddZddZd d Zd d Zd dZ dS)z'getPhases..InSelectInTablePhasec sNj|||tjd |jfg|_|j|j_tjd |jfg|_|j |j_dS) Nrrrrrrrr)rrrrrrrr)rrrrrrrr) rHrrrrrrrrr)r@rIr<)rr)r*rH s z0getPhases..InSelectInTablePhase.__init__cSs|jjdjdS)Nr)rIrFr)r@r)r)r*r sz2getPhases..InSelectInTablePhase.processEOFcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r sz9getPhases..InSelectInTablePhase.processCharacterscSs(|jjdd|di|jtd|S)Nz5unexpected-table-element-start-tag-in-select-in-tabler>r)rIr|rr )r@rr)r)r*r! sz5getPhases..InSelectInTablePhase.startTagTablecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r& sz5getPhases..InSelectInTablePhase.startTagOthercSs@|jjdd|di|jj|dddr<|jtd|SdS)Nz3unexpected-table-element-end-tag-in-select-in-tabler>r)rr)rIr|r<rrr )r@rr)r)r*r) sz3getPhases..InSelectInTablePhase.endTagTablecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r/ sz3getPhases..InSelectInTablePhase.endTagOtherN) r7r8r9rHrrrrrrr))rr)r*InSelectInTablePhase s rc-seZdZeddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,g,Zfd-d.Zd/d0Zfd1d2Zd3d4Zd5d6Z d7S)8z(getPhases..InForeignContentPhaserWrXr>rrr?rYrTr+rBrUrZriZh1Zh2Zh3Zh4Zh5Zh6rrqr\rjrSrQrJr4rcrLr(rPrr]r^spanr`r_subZsuprrarbrOvarcsj|||dS)N)rH)r@rIr<)rr)r*rH< sz1getPhases..InForeignContentPhase.__init__c%Ssnddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%$}|d&|krj||d&|d&<dS)'NZaltGlyphZ altGlyphDefZ altGlyphItemZ animateColorZ animateMotionZanimateTransformZclipPathZfeBlendZ feColorMatrixZfeComponentTransferZ feCompositeZfeConvolveMatrixZfeDiffuseLightingZfeDisplacementMapZfeDistantLightZfeFloodZfeFuncAZfeFuncBZfeFuncGZfeFuncRZfeGaussianBlurZfeImageZfeMergeZ feMergeNodeZ feMorphologyZfeOffsetZ fePointLightZfeSpecularLightingZ feSpotLightZfeTileZ feTurbulenceZ foreignObjectZglyphRefZlinearGradientZradialGradientZtextPath)$ZaltglyphZ altglyphdefZ altglyphitemZ animatecolorZ animatemotionZanimatetransformZclippathZfeblendZ fecolormatrixZfecomponenttransferZ fecompositeZfeconvolvematrixZfediffuselightingZfedisplacementmapZfedistantlightZfefloodZfefuncaZfefuncbZfefuncgZfefuncrZfegaussianblurZfeimageZfemergeZ femergenodeZ femorphologyZfeoffsetZ fepointlightZfespecularlightingZ fespotlightZfetileZ feturbulenceZ foreignobjectZglyphrefZlineargradientZradialgradientZtextpathr>r))r@r replacementsr)r)r*adjustSVGTagNames? sL z:getPhases..InForeignContentPhase.adjustSVGTagNamescsL|ddkrd|d<n&|jjr.InForeignContentPhase.processCharacters..F)rIr`rr)r@r)rr)r*rh s   z:getPhases..InForeignContentPhase.processCharacterscSs6|jjd}|d|jksD|ddkrt|djtdddg@r|jjdd|dixR|jjdj|jjkr|jj |jjd r|jj |jjd r|jjj q\W|S|jt d kr|jj |n$|jt d kr|j||jj||jj||j|d <|jj||d r2|jjj d |d<dS)Nr r>r[rsZcolorZfacesizez*unexpected-html-element-in-foreign-contentrdrurhrwTrxryryryry)r<r{breakoutElementssetkeysrIr|rhrrlrmr+rrrrrr)r@rrr)r)r*rp s.          z8getPhases..InForeignContentPhase.processStartTagcSst|jjd}|jjd}|jjt|dkrF|jjdd|dix|jjt|dkr|jj|jj dkr|jjj |jjj |j_x |jjj |kr|jjst qWd}P|d8}|jj|}|j|jjkrqHqH|jjj|}PqHW|S)Nr r>zunexpected-end-tagrry)r~r<r{r>rjrrIr|r]rFrrr+rrhrr)r@rZ nodeIndexrrr)r)r*r s(   z6getPhases..InForeignContentPhase.processEndTagN) r7r8r9rrrHrrrrr))rr)r*InForeignContentPhase2 s       ) rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z!getPhases..AfterBodyPhasecsNj|||tjd|jfg|_|j|j_tjd|jfg|_|j |j_dS)Nr) rHrrrrrrrrr)r@rIr<)rr)r*rH s  z*getPhases..AfterBodyPhase.__init__cSsdS)Nr))r@r)r)r*r sz,getPhases..AfterBodyPhase.processEOFcSs|jj||jjddS)Nr)r<rr{)r@rr)r)r*r sz0getPhases..AfterBodyPhase.processCommentcSs |jjd|jjd|j_|S)Nzunexpected-char-after-bodyr)rIr|rFr])r@rr)r)r*r s z3getPhases..AfterBodyPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r sz.getPhases..AfterBodyPhase.startTagHtmlcSs*|jjdd|di|jjd|j_|S)Nzunexpected-start-tag-after-bodyr>r)rIr|rFr])r@rr)r)r*r sz/getPhases..AfterBodyPhase.startTagOthercSs*|jjr|jjdn|jjd|j_dS)Nz'unexpected-end-tag-after-body-innerhtmlafterAfterBody)rIrPr|rFr])r@r>r)r)r*r sz,getPhases..AfterBodyPhase.endTagHtmlcSs*|jjdd|di|jjd|j_|S)Nzunexpected-end-tag-after-bodyr>r)rIr|rFr])r@rr)r)r*r sz-getPhases..AfterBodyPhase.endTagOtherN) r7r8r9rHrrrrrrrr))rr)r*AfterBodyPhase s r csXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z"getPhases..InFramesetPhasecsfj|||tjd|jfd|jfd|jfd|jfg|_|j|j_ tjd|j fg|_ |j |j _ dS)Nrrr{r) rHrrrr7 startTagFramestartTagNoframesrrrendTagFramesetrr)r@rIr<)rr)r*rH s z+getPhases..InFramesetPhase.__init__cSs0|jjdjdkr |jjdn |jjs,tdS)Nr rzeof-in-framesetry)r<r{r>rIr|rPr)r@r)r)r*r sz-getPhases..InFramesetPhase.processEOFcSs|jjddS)Nzunexpected-char-in-frameset)rIr|)r@rr)r)r*r sz4getPhases..InFramesetPhase.processCharacterscSs|jj|dS)N)r<r)r@rr)r)r*r7 sz3getPhases..InFramesetPhase.startTagFramesetcSs|jj||jjjdS)N)r<rr{r+)r@rr)r)r*r  s z0getPhases..InFramesetPhase.startTagFramecSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r  sz3getPhases..InFramesetPhase.startTagNoframescSs|jjdd|didS)Nz unexpected-start-tag-in-framesetr>)rIr|)r@rr)r)r*r sz0getPhases..InFramesetPhase.startTagOthercSs\|jjdjdkr |jjdn |jjj|jj rX|jjdjdkrX|jjd|j_dS)Nr rz)unexpected-frameset-in-frameset-innerhtmlr afterFramesetryry) r<r{r>rIr|r+rPrFr])r@rr)r)r*r  s   z1getPhases..InFramesetPhase.endTagFramesetcSs|jjdd|didS)Nzunexpected-end-tag-in-framesetr>)rIr|)r@rr)r)r*r sz.getPhases..InFramesetPhase.endTagOtherN) r7r8r9rHrrr7r r rr rr))rr)r*InFramesetPhase s  rcsHeZdZfddZddZddZddZd d Zd d Zd dZ dS)z%getPhases..AfterFramesetPhasecsVj|||tjd|jfd|jfg|_|j|j_tjd|jfg|_ |j |j _dS)Nrr) rHrrrr rrrrrr)r@rIr<)rr)r*rH s z.getPhases..AfterFramesetPhase.__init__cSsdS)Nr))r@r)r)r*r sz0getPhases..AfterFramesetPhase.processEOFcSs|jjddS)Nzunexpected-char-after-frameset)rIr|)r@rr)r)r*r! sz7getPhases..AfterFramesetPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*r $ sz6getPhases..AfterFramesetPhase.startTagNoframescSs|jjdd|didS)Nz#unexpected-start-tag-after-framesetr>)rIr|)r@rr)r)r*r' sz3getPhases..AfterFramesetPhase.startTagOthercSs|jjd|j_dS)NafterAfterFrameset)rIrFr])r@rr)r)r*r+ sz0getPhases..AfterFramesetPhase.endTagHtmlcSs|jjdd|didS)Nz!unexpected-end-tag-after-framesetr>)rIr|)r@rr)r)r*r. sz1getPhases..AfterFramesetPhase.endTagOtherN) r7r8r9rHrrr rrrr))rr)r*AfterFramesetPhase s rcsPeZdZfddZddZddZddZd d Zd d Zd dZ ddZ dS)z&getPhases..AfterAfterBodyPhasecs0j|||tjd|jfg|_|j|j_dS)Nr)rHrrrrrr)r@rIr<)rr)r*rH3 sz/getPhases..AfterAfterBodyPhase.__init__cSsdS)Nr))r@r)r)r*r; sz1getPhases..AfterAfterBodyPhase.processEOFcSs|jj||jjdS)N)r<rr)r@rr)r)r*r> sz5getPhases..AfterAfterBodyPhase.processCommentcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rA sz=getPhases..AfterAfterBodyPhase.processSpaceCharacterscSs |jjd|jjd|j_|S)Nzexpected-eof-but-got-charr)rIr|rFr])r@rr)r)r*rD s z8getPhases..AfterAfterBodyPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rI sz3getPhases..AfterAfterBodyPhase.startTagHtmlcSs*|jjdd|di|jjd|j_|S)Nzexpected-eof-but-got-start-tagr>r)rIr|rFr])r@rr)r)r*rL sz4getPhases..AfterAfterBodyPhase.startTagOthercSs*|jjdd|di|jjd|j_|S)Nzexpected-eof-but-got-end-tagr>r)rIr|rFr])r@rr)r)r*rR sz4getPhases..AfterAfterBodyPhase.processEndTagN) r7r8r9rHrrrrrrrr))rr)r*AfterAfterBodyPhase2 s rcsXeZdZfddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)z*getPhases..AfterAfterFramesetPhasecs8j|||tjd|jfd|jfg|_|j|j_dS)Nrr)rHrrrstartTagNoFramesrrr)r@rIr<)rr)r*rHY s z3getPhases..AfterAfterFramesetPhase.__init__cSsdS)Nr))r@r)r)r*rb sz5getPhases..AfterAfterFramesetPhase.processEOFcSs|jj||jjdS)N)r<rr)r@rr)r)r*re sz9getPhases..AfterAfterFramesetPhase.processCommentcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rh szAgetPhases..AfterAfterFramesetPhase.processSpaceCharacterscSs|jjddS)Nzexpected-eof-but-got-char)rIr|)r@rr)r)r*rk sz.AfterAfterFramesetPhase.processCharacterscSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rn sz7getPhases..AfterAfterFramesetPhase.startTagHtmlcSs|jjdj|S)Nr)rIrFr)r@rr)r)r*rq sz;getPhases..AfterAfterFramesetPhase.startTagNoFramescSs|jjdd|didS)Nzexpected-eof-but-got-start-tagr>)rIr|)r@rr)r)r*rt sz8getPhases..AfterAfterFramesetPhase.startTagOthercSs|jjdd|didS)Nzexpected-eof-but-got-end-tagr>)rIr|)r@rr)r)r*rx sz8getPhases..AfterAfterFramesetPhase.processEndTagN) r7r8r9rHrrrrrrrrr))rr)r*AfterAfterFramesetPhaseX s r)rTrSrrr/r0rrrrrrrrrrrrvrrrr r)r)rGrVrr rrr1r5r:rrrrrrrrrrrrr rrrrr))rr*rE_sp)#.g@CX!-GBbYLd's/9%&&rEcs^ts tjr t|dt@}nt|dt@}|rZtfdd|djD|d<dS)Nrsc3s"|]\}}j|||fVqdS)N)r})r=kv)rr)r*r sz$adjust_attributes..)rrZPY27rrrr.)rrZneeds_adjustmentr))rr*r s   rrpFcCs|dkr i}t||||dS)N)r2r>rsrw)r)r>r2rirwr)r)r*r  s r c@seZdZdZdS)rrzError in parsed documentN)r7r8r9rr)r)r)r*rr srr)rT)r+rT)rpNF)1Z __future__rrrZsixrrrr0 collectionsr ImportErrorZ ordereddictrr r r Ztreebuilders.baser rZ constantsrrrrrrrrrrrrrrrrrr#r-r;rgr"ZmemoizerErr  Exceptionrrr)r)r)r*sR     H   )L  __pycache__/serializer.cpython-36.opt-1.pyc000064400000021761147204715120014524 0ustar003 B;WU7@sddlmZmZmZddlmZddlZddlmZm Z ddl m Z m Z m Z ddl mZmZmZddlmZmZdd lmZd je d Zejd ed Zejd edZiZeddkZxeejD]p\Z Z!eree!dkse ree!dkrqe!dkree!dkrej"e!Z!ne#e!Z!e!eks4e j$re ee!<qWddZ%ede%dddZ&Gddde'Z(Gddde)Z*dS))absolute_importdivisionunicode_literals) text_typeN)register_errorxmlcharrefreplace_errors) voidElementsbooleanAttributesspaceCharacters)rcdataElementsentities xmlEntities) treewalkers_utils)escapez"'=<>`[]u_  /`  ᠎᠏           

   ]u􏿿&c Cs"t|ttfrg}g}d}xt|j|j|jD]n\}}|rFd}q4||j}tj|j|t |j|dgrtj |j||d}d}nt |}|j |q4Wx^|D]V}t j|} | r|j d|j | | jds|j dq|j dt|ddqWdj||jfSt|SdS)NFrTr;z&#x%s;r) isinstanceUnicodeEncodeErrorUnicodeTranslateError enumerateobjectstartendrZisSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr) excresZ codepointsskipicindexZ codepointZcper/ /usr/lib/python3.6/serializer.pyhtmlentityreplace_errors*s0  "      r1htmlentityreplaceetreecKs$tj|}tf|}|j|||S)N)rZ getTreeWalkerHTMLSerializerrender)inputZtreeencodingZserializer_optsZwalkersr/r/r0 serializeJs  r9c@s~eZdZdZdZdZdZdZdZdZ dZ dZ dZ dZ dZdZdZd!ZddZddZddZd"ddZd#ddZd$dd ZdS)%r4legacy"TFquote_attr_values quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrs escape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec Kszt|t|j}t|dkr2tdtt|d|kr@d|_x(|jD]}t|||j|t ||qHWg|_ d|_ dS)a6 Initialize HTMLSerializer. Keyword options (default given first unless specified) include: inject_meta_charset=True|False Whether it insert a meta element to define the character set of the document. quote_attr_values="legacy"|"spec"|"always" Whether to quote attribute values that don't require quoting per legacy browser behaviour, when required by the standard, or always. quote_char=u'"'|u"'" Use given quote character for attribute quoting. Default is to use double quote unless attribute value contains a double quote, in which case single quotes are used instead. escape_lt_in_attrs=False|True Whether to escape < in attribute values. escape_rcdata=False|True Whether to escape characters that need to be escaped within normal elements within rcdata elements such as style. resolve_entities=True|False Whether to resolve named character entities that appear in the source tree. The XML predefined entities < > & " ' are unaffected by this setting. strip_whitespace=False|True Whether to remove semantically meaningless whitespace. (This compresses all whitespace to a single space except within pre.) minimize_boolean_attributes=True|False Shortens boolean attributes to give just the attribute value, for example becomes . use_trailing_solidus=False|True Includes a close-tag slash at the end of the start tag of void elements (empty elements whose end tag is forbidden). E.g.
. space_before_trailing_solidus=True|False Places a space immediately before the closing slash in a tag using a trailing solidus. E.g.
. Requires use_trailing_solidus. sanitize=False|True Strip all unsafe or unknown constructs from output. See `html5lib user documentation`_ omit_optional_tags=True|False Omit start/end tags that are optional. alphabetical_attributes=False|True Reorder attributes to be in alphabetical order. .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation rz2__init__() got an unexpected keyword argument '%s'r=FN) frozensetoptionslen TypeErrornextiterr>setattrr$getattrerrorsstrict)selfkwargsZunexpected_argsattrr/r/r0__init__ps.  zHTMLSerializer.__init__cCs|jr|j|jdS|SdS)Nr2)r7encode)rTstringr/r/r0rXszHTMLSerializer.encodecCs|jr|j|jdS|SdS)NrS)r7rX)rTrYr/r/r0 encodeStrictszHTMLSerializer.encodeStrictNccs||_d}g|_|r0|jr0ddlm}|||}|jrJddlm}||}|jrdddlm}||}|j r~ddl m}||}|j rddl m}||}xR|D]H}|d}|dkr`d|d}|dr|d |d7}n|d r|d 7}|d rJ|d j d d kr0|d j dd kr*|jdd}nd }|d||d |f7}|d7}|j|Vq|d5kr|dksz|r|r|dj dd kr|jd|j|dVn|jt|dVq|d6kr|d} |jd| V| tkr|j rd}n|r|jdx|djD]\\} } } | } | }|jdV|j| V|j s| tj| tkr"| tjdtkr"|jdV|jdkst|d krd}n@|jd krtj|dk }n$|jd!krtj|dk }ntd"|jd#d$}|j r|jd%d&}|r|j!}|j"rTd|kr CharactersSpaceCharactersdatazCommentz--zComment contains --z ZEntityrzEntity %s not recognizedz&%s;)r`ra)rcrd))r7rRrGZfilters.inject_meta_charsetr[rFZfilters.alphabeticalattributesrHZfilters.whitespacerIZfilters.sanitizerr?Zfilters.optionaltagsfindserializeErrorrZrXrr rDitemsr@r r$tupler<rL_quoteAttributeSpecsearch_quoteAttributeLegacy ValueErrorreplacerCr=r>r rArBr rEr)rT treewalkerr7Zin_cdatar[tokenr\Zdoctyper=r]_Z attr_nameZ attr_valuekvZ quote_attrrbkeyr/r/r0r9s                                    zHTMLSerializer.serializecCs2|rdjt|j||Sdjt|j|SdS)Nr)r'listr9)rTrur7r/r/r0r5?szHTMLSerializer.renderXXX ERROR MESSAGE NEEDEDcCs|jj||jrtdS)N)rRr"rSSerializeError)rTrbr/r/r0rmEs zHTMLSerializer.serializeError)r<r=r>r?r@rArBrCrDrErFrGrHrI)N)N)r})__name__ __module__ __qualname__r<r=r>r?r@rArBrCrDrErFrGrHrIrKrWrXrZr9r5rmr/r/r/r0r4Qs68  r4c@seZdZdZdS)r~zError in serialized treeN)rrr__doc__r/r/r/r0r~Lsr~)r3N)+Z __future__rrrZsixrrecodecsrrZ constantsr r r r r rrrrZxml.sax.saxutilsrr'Z_quoteAttributeSpecCharscompilerprrr#rLZ_is_ucs4r|rnrxryr r!islowerr1r9rr4 Exceptionr~r/r/r/r0s:         |__pycache__/serializer.cpython-36.pyc000064400000022072147204715120013561 0ustar003 B;WU7@sddlmZmZmZddlmZddlZddlmZm Z ddl m Z m Z m Z ddl mZmZmZddlmZmZdd lmZd je d Zejd ed Zejd edZiZeddkZxeejD]p\Z Z!eree!dkse ree!dkrqe!dkree!dkrej"e!Z!ne#e!Z!e!eks4e j$re ee!<qWddZ%ede%dddZ&Gddde'Z(Gddde)Z*dS))absolute_importdivisionunicode_literals) text_typeN)register_errorxmlcharrefreplace_errors) voidElementsbooleanAttributesspaceCharacters)rcdataElementsentities xmlEntities) treewalkers_utils)escapez"'=<>`[]u_  /`  ᠎᠏           

   ]u􏿿&c Cs"t|ttfrg}g}d}xt|j|j|jD]n\}}|rFd}q4||j}tj|j|t |j|dgrtj |j||d}d}nt |}|j |q4Wx^|D]V}t j|} | r|j d|j | | jds|j dq|j dt|ddqWdj||jfSt|SdS)NFrTr;z&#x%s;r) isinstanceUnicodeEncodeErrorUnicodeTranslateError enumerateobjectstartendrZisSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr) excresZ codepointsskipicindexZ codepointZcper/ /usr/lib/python3.6/serializer.pyhtmlentityreplace_errors*s0  "      r1htmlentityreplaceetreecKs$tj|}tf|}|j|||S)N)rZ getTreeWalkerHTMLSerializerrender)inputZtreeencodingZserializer_optsZwalkersr/r/r0 serializeJs  r9c@s~eZdZdZdZdZdZdZdZdZ dZ dZ dZ dZ dZdZdZd!ZddZddZddZd"ddZd#ddZd$dd ZdS)%r4legacy"TFquote_attr_values quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrs escape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec Kszt|t|j}t|dkr2tdtt|d|kr@d|_x(|jD]}t|||j|t ||qHWg|_ d|_ dS)a6 Initialize HTMLSerializer. Keyword options (default given first unless specified) include: inject_meta_charset=True|False Whether it insert a meta element to define the character set of the document. quote_attr_values="legacy"|"spec"|"always" Whether to quote attribute values that don't require quoting per legacy browser behaviour, when required by the standard, or always. quote_char=u'"'|u"'" Use given quote character for attribute quoting. Default is to use double quote unless attribute value contains a double quote, in which case single quotes are used instead. escape_lt_in_attrs=False|True Whether to escape < in attribute values. escape_rcdata=False|True Whether to escape characters that need to be escaped within normal elements within rcdata elements such as style. resolve_entities=True|False Whether to resolve named character entities that appear in the source tree. The XML predefined entities < > & " ' are unaffected by this setting. strip_whitespace=False|True Whether to remove semantically meaningless whitespace. (This compresses all whitespace to a single space except within pre.) minimize_boolean_attributes=True|False Shortens boolean attributes to give just the attribute value, for example becomes . use_trailing_solidus=False|True Includes a close-tag slash at the end of the start tag of void elements (empty elements whose end tag is forbidden). E.g.
. space_before_trailing_solidus=True|False Places a space immediately before the closing slash in a tag using a trailing solidus. E.g.
. Requires use_trailing_solidus. sanitize=False|True Strip all unsafe or unknown constructs from output. See `html5lib user documentation`_ omit_optional_tags=True|False Omit start/end tags that are optional. alphabetical_attributes=False|True Reorder attributes to be in alphabetical order. .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation rz2__init__() got an unexpected keyword argument '%s'r=FN) frozensetoptionslen TypeErrornextiterr>setattrr$getattrerrorsstrict)selfkwargsZunexpected_argsattrr/r/r0__init__ps.  zHTMLSerializer.__init__cCs*t|tst|jr"|j|jdS|SdS)Nr2)rrAssertionErrorr7encode)rTstringr/r/r0rYszHTMLSerializer.encodecCs*t|tst|jr"|j|jdS|SdS)NrS)rrrXr7rY)rTrZr/r/r0 encodeStrictszHTMLSerializer.encodeStrictNccs||_d}g|_|r0|jr0ddlm}|||}|jrJddlm}||}|jrdddlm}||}|j r~ddl m}||}|j rddl m}||}xR|D]H}|d}|dkr`d|d}|dr|d |d7}n|d r|d 7}|d rJ|d j d d kr0|d j dd kr*|jdd}nd }|d||d |f7}|d7}|j|Vq|d5kr|dksz|r|r|dj dd kr|jd|j|dVn|jt|dVq|d6kr|d} |jd| V| tkr|j rd}n|r|jdx|djD]\\} } } | } | }|jdV|j| V|j s| tj| tkr"| tjdtkr"|jdV|jdkst|d krd}n@|jd krtj|dk }n$|jd!krtj|dk }ntd"|jd#d$}|j r|jd%d&}|r|j!}|j"rTd|kr CharactersSpaceCharactersdatazCommentz--zComment contains --z ZEntityrzEntity %s not recognizedz&%s;)rarb)rdre))r7rRrGZfilters.inject_meta_charsetr\rFZfilters.alphabeticalattributesrHZfilters.whitespacerIZfilters.sanitizerr?Zfilters.optionaltagsfindserializeErrorr[rYrr rDitemsr@r r$tupler<rL_quoteAttributeSpecsearch_quoteAttributeLegacy ValueErrorreplacerCr=r>r rArBr rEr)rT treewalkerr7Zin_cdatar\tokenr]Zdoctyper=r^_Z attr_nameZ attr_valuekvZ quote_attrrckeyr/r/r0r9s                                    zHTMLSerializer.serializecCs2|rdjt|j||Sdjt|j|SdS)Nr)r'listr9)rTrvr7r/r/r0r5?szHTMLSerializer.renderXXX ERROR MESSAGE NEEDEDcCs|jj||jrtdS)N)rRr"rSSerializeError)rTrcr/r/r0rnEs zHTMLSerializer.serializeError)r<r=r>r?r@rArBrCrDrErFrGrHrI)N)N)r~)__name__ __module__ __qualname__r<r=r>r?r@rArBrCrDrErFrGrHrIrKrWrYr[r9r5rnr/r/r/r0r4Qs68  r4c@seZdZdZdS)rzError in serialized treeN)rrr__doc__r/r/r/r0rLsr)r3N)+Z __future__rrrZsixrrecodecsrrZ constantsr r r r r rrrrZxml.sax.saxutilsrr'Z_quoteAttributeSpecCharscompilerqrsr#rLZ_is_ucs4r}roryrzr r!islowerr1r9rr4 Exceptionrr/r/r/r0s:         |_trie/__pycache__/__init__.cpython-36.opt-1.pyc000064400000000524147204715120015206 0ustar003 B;W! @sRddlmZmZmZddlmZeZyddlmZWne k rHYnXeZdS))absolute_importdivisionunicode_literals)TrieN) Z __future__rrrpyrZPyTrieZdatrieZDATrie ImportErrorr r /usr/lib/python3.6/__init__.pys _trie/__pycache__/__init__.cpython-36.pyc000064400000000524147204715120014247 0ustar003 B;W! @sRddlmZmZmZddlmZeZyddlmZWne k rHYnXeZdS))absolute_importdivisionunicode_literals)TrieN) Z __future__rrrpyrZPyTrieZdatrieZDATrie ImportErrorr r /usr/lib/python3.6/__init__.pys _trie/__pycache__/_base.cpython-36.opt-1.pyc000064400000002625147204715120014524 0ustar003 B;W@s4ddlmZmZmZddlmZGdddeZdS))absolute_importdivisionunicode_literals)Mappingcs:eZdZdZd fdd ZddZddZd d ZZS) TriezAbstract base class for triesNcs4tt|j}dkrt|Stfdd|DS)Ncsg|]}|jr|qS) startswith).0x)prefixr/usr/lib/python3.6/_base.py szTrie.keys..)superrkeysset)selfr r) __class__)r r r sz Trie.keyscCs$x|jD]}|j|r dSq WdS)NTF)rr)rr keyrrr has_keys_with_prefixs zTrie.has_keys_with_prefixcCsT||kr |Sx:tdt|dD]$}|d| |kr |d| Sq Wt|dS)N)rangelenKeyError)rr irrr longest_prefixs zTrie.longest_prefixcCs|j|}|||fS)N)r)rr Zlprefixrrr longest_prefix_item$s zTrie.longest_prefix_item)N) __name__ __module__ __qualname____doc__rrrr __classcell__rr)rr rs   rN)Z __future__rrr collectionsrrrrrr s _trie/__pycache__/_base.cpython-36.pyc000064400000002625147204715120013565 0ustar003 B;W@s4ddlmZmZmZddlmZGdddeZdS))absolute_importdivisionunicode_literals)Mappingcs:eZdZdZd fdd ZddZddZd d ZZS) TriezAbstract base class for triesNcs4tt|j}dkrt|Stfdd|DS)Ncsg|]}|jr|qS) startswith).0x)prefixr/usr/lib/python3.6/_base.py szTrie.keys..)superrkeysset)selfr r) __class__)r r r sz Trie.keyscCs$x|jD]}|j|r dSq WdS)NTF)rr)rr keyrrr has_keys_with_prefixs zTrie.has_keys_with_prefixcCsT||kr |Sx:tdt|dD]$}|d| |kr |d| Sq Wt|dS)N)rangelenKeyError)rr irrr longest_prefixs zTrie.longest_prefixcCs|j|}|||fS)N)r)rr Zlprefixrrr longest_prefix_item$s zTrie.longest_prefix_item)N) __name__ __module__ __qualname____doc__rrrr __classcell__rr)rr rs   rN)Z __future__rrr collectionsrrrrrr s _trie/__pycache__/datrie.cpython-36.opt-1.pyc000064400000003612147204715120014720 0ustar003 B;W@sLddlmZmZmZddlmZddlmZddl mZ Gddde ZdS))absolute_importdivisionunicode_literals)Trie) text_typec@sVeZdZddZddZddZddZd d Zdd d ZddZ ddZ ddZ d S)rcCsvt}x:|jD].}t|ts&tdx|D]}|j|q,WqWtdj||_x|j D]\}}||j|<q\WdS)NzAll keys must be strings) setkeys isinstancer TypeErroraddDATriejoin_dataitems)selfdatacharskeycharvaluer/usr/lib/python3.6/datrie.py__init__ s  z Trie.__init__cCs ||jkS)N)r)rrrrr __contains__szTrie.__contains__cCs t|jS)N)lenr)rrrr__len__sz Trie.__len__cCs tdS)N)NotImplementedError)rrrr__iter__sz Trie.__iter__cCs |j|S)N)r)rrrrr __getitem__szTrie.__getitem__NcCs |jj|S)N)rr )rprefixrrrr "sz Trie.keyscCs |jj|S)N)rhas_keys_with_prefix)rr!rrrr"%szTrie.has_keys_with_prefixcCs |jj|S)N)rlongest_prefix)rr!rrrr#(szTrie.longest_prefixcCs |jj|S)N)rlongest_prefix_item)rr!rrrr$+szTrie.longest_prefix_item)N) __name__ __module__ __qualname__rrrrr r r"r#r$rrrrr s  rN) Z __future__rrrZdatrierrZsixrZ_baseZABCTrierrrrs   _trie/__pycache__/datrie.cpython-36.pyc000064400000003612147204715120013761 0ustar003 B;W@sLddlmZmZmZddlmZddlmZddl mZ Gddde ZdS))absolute_importdivisionunicode_literals)Trie) text_typec@sVeZdZddZddZddZddZd d Zdd d ZddZ ddZ ddZ d S)rcCsvt}x:|jD].}t|ts&tdx|D]}|j|q,WqWtdj||_x|j D]\}}||j|<q\WdS)NzAll keys must be strings) setkeys isinstancer TypeErroraddDATriejoin_dataitems)selfdatacharskeycharvaluer/usr/lib/python3.6/datrie.py__init__ s  z Trie.__init__cCs ||jkS)N)r)rrrrr __contains__szTrie.__contains__cCs t|jS)N)lenr)rrrr__len__sz Trie.__len__cCs tdS)N)NotImplementedError)rrrr__iter__sz Trie.__iter__cCs |j|S)N)r)rrrrr __getitem__szTrie.__getitem__NcCs |jj|S)N)rr )rprefixrrrr "sz Trie.keyscCs |jj|S)N)rhas_keys_with_prefix)rr!rrrr"%szTrie.has_keys_with_prefixcCs |jj|S)N)rlongest_prefix)rr!rrrr#(szTrie.longest_prefixcCs |jj|S)N)rlongest_prefix_item)rr!rrrr$+szTrie.longest_prefix_item)N) __name__ __module__ __qualname__rrrrr r r"r#r$rrrrr s  rN) Z __future__rrrZdatrierrZsixrZ_baseZABCTrierrrrs   _trie/__pycache__/py.cpython-36.opt-1.pyc000064400000004127147204715120014102 0ustar003 B;W@sLddlmZmZmZddlmZddlmZddlm Z Gddde Z dS) )absolute_importdivisionunicode_literals) text_type) bisect_left)Triec@sFeZdZddZddZddZddZd d Zdd d ZddZ d S)rcCsJtdd|jDstd||_t|j|_d|_dt|f|_dS)Ncss|]}t|tVqdS)N) isinstancer).0xr /usr/lib/python3.6/py.py sz Trie.__init__..zAll keys must be stringsr) allkeys TypeError_datasorted_keys _cachestrlen _cachepoints)selfdatar r r __init__ s z Trie.__init__cCs ||jkS)N)r)rkeyr r r __contains__szTrie.__contains__cCs t|jS)N)rr)rr r r __len__sz Trie.__len__cCs t|jS)N)iterr)rr r r __iter__sz Trie.__iter__cCs |j|S)N)r)rrr r r __getitem__szTrie.__getitem__NcCs|dks|dks|j r"t|jS|j|jrN|j\}}t|j|||}}nt|j|}}t}|t|jkrv|Sx,|j|j|r|j|j||d7}qxW||_||f|_|S)Nrr)rset startswithrrrradd)rprefixlohistartirr r r rs     z Trie.keyscCsd||jkrdS|j|jr6|j\}}t|j|||}n t|j|}|t|jkrTdS|j|j|S)NTF)rr#rrrrr)rr%r&r'r)r r r has_keys_with_prefix6s    zTrie.has_keys_with_prefix)N) __name__ __module__ __qualname__rrrr r!rr*r r r r r s  rN) Z __future__rrrZsixrZbisectrZ_baserZABCTrier r r r s   _trie/__pycache__/py.cpython-36.pyc000064400000004127147204715120013143 0ustar003 B;W@sLddlmZmZmZddlmZddlmZddlm Z Gddde Z dS) )absolute_importdivisionunicode_literals) text_type) bisect_left)Triec@sFeZdZddZddZddZddZd d Zdd d ZddZ d S)rcCsJtdd|jDstd||_t|j|_d|_dt|f|_dS)Ncss|]}t|tVqdS)N) isinstancer).0xr /usr/lib/python3.6/py.py sz Trie.__init__..zAll keys must be stringsr) allkeys TypeError_datasorted_keys _cachestrlen _cachepoints)selfdatar r r __init__ s z Trie.__init__cCs ||jkS)N)r)rkeyr r r __contains__szTrie.__contains__cCs t|jS)N)rr)rr r r __len__sz Trie.__len__cCs t|jS)N)iterr)rr r r __iter__sz Trie.__iter__cCs |j|S)N)r)rrr r r __getitem__szTrie.__getitem__NcCs|dks|dks|j r"t|jS|j|jrN|j\}}t|j|||}}nt|j|}}t}|t|jkrv|Sx,|j|j|r|j|j||d7}qxW||_||f|_|S)Nrr)rset startswithrrrradd)rprefixlohistartirr r r rs     z Trie.keyscCsd||jkrdS|j|jr6|j\}}t|j|||}n t|j|}|t|jkrTdS|j|j|S)NTF)rr#rrrrr)rr%r&r'r)r r r has_keys_with_prefix6s    zTrie.has_keys_with_prefix)N) __name__ __module__ __qualname__rrrr r!rr*r r r r r s  rN) Z __future__rrrZsixrZbisectrZ_baserZABCTrier r r r s   _trie/__init__.py000064400000000441147204715120007761 0ustar00from __future__ import absolute_import, division, unicode_literals from .py import Trie as PyTrie Trie = PyTrie # pylint:disable=wrong-import-position try: from .datrie import Trie as DATrie except ImportError: pass else: Trie = DATrie # pylint:enable=wrong-import-position _trie/_base.py000064400000001723147204715120007277 0ustar00from __future__ import absolute_import, division, unicode_literals from collections import Mapping class Trie(Mapping): """Abstract base class for tries""" def keys(self, prefix=None): # pylint:disable=arguments-differ keys = super(Trie, self).keys() if prefix is None: return set(keys) # Python 2.6: no set comprehensions return set([x for x in keys if x.startswith(prefix)]) def has_keys_with_prefix(self, prefix): for key in self.keys(): if key.startswith(prefix): return True return False def longest_prefix(self, prefix): if prefix in self: return prefix for i in range(1, len(prefix) + 1): if prefix[:-i] in self: return prefix[:-i] raise KeyError(prefix) def longest_prefix_item(self, prefix): lprefix = self.longest_prefix(prefix) return (lprefix, self[lprefix]) _trie/datrie.py000064400000002216147204715120007474 0ustar00from __future__ import absolute_import, division, unicode_literals from datrie import Trie as DATrie from six import text_type from ._base import Trie as ABCTrie class Trie(ABCTrie): def __init__(self, data): chars = set() for key in data.keys(): if not isinstance(key, text_type): raise TypeError("All keys must be strings") for char in key: chars.add(char) self._data = DATrie("".join(chars)) for key, value in data.items(): self._data[key] = value def __contains__(self, key): return key in self._data def __len__(self): return len(self._data) def __iter__(self): raise NotImplementedError() def __getitem__(self, key): return self._data[key] def keys(self, prefix=None): return self._data.keys(prefix) def has_keys_with_prefix(self, prefix): return self._data.has_keys_with_prefix(prefix) def longest_prefix(self, prefix): return self._data.longest_prefix(prefix) def longest_prefix_item(self, prefix): return self._data.longest_prefix_item(prefix) _trie/py.py000064400000003343147204715120006656 0ustar00from __future__ import absolute_import, division, unicode_literals from six import text_type from bisect import bisect_left from ._base import Trie as ABCTrie class Trie(ABCTrie): def __init__(self, data): if not all(isinstance(x, text_type) for x in data.keys()): raise TypeError("All keys must be strings") self._data = data self._keys = sorted(data.keys()) self._cachestr = "" self._cachepoints = (0, len(data)) def __contains__(self, key): return key in self._data def __len__(self): return len(self._data) def __iter__(self): return iter(self._data) def __getitem__(self, key): return self._data[key] def keys(self, prefix=None): if prefix is None or prefix == "" or not self._keys: return set(self._keys) if prefix.startswith(self._cachestr): lo, hi = self._cachepoints start = i = bisect_left(self._keys, prefix, lo, hi) else: start = i = bisect_left(self._keys, prefix) keys = set() if start == len(self._keys): return keys while self._keys[i].startswith(prefix): keys.add(self._keys[i]) i += 1 self._cachestr = prefix self._cachepoints = (start, i) return keys def has_keys_with_prefix(self, prefix): if prefix in self._data: return True if prefix.startswith(self._cachestr): lo, hi = self._cachepoints i = bisect_left(self._keys, prefix, lo, hi) else: i = bisect_left(self._keys, prefix) if i == len(self._keys): return False return self._keys[i].startswith(prefix) filters/__pycache__/__init__.cpython-36.opt-1.pyc000064400000000161147204715120015551 0ustar003 B;W@sdS)Nrrr/usr/lib/python3.6/__init__.pysfilters/__pycache__/__init__.cpython-36.pyc000064400000000161147204715120014612 0ustar003 B;W@sdS)Nrrr/usr/lib/python3.6/__init__.pysfilters/__pycache__/alphabeticalattributes.cpython-36.opt-1.pyc000064400000001730147204715120020535 0ustar003 B;Wm @shddlmZmZmZddlmZyddlmZWn ek rPddl mZYnXGdddej Z dS))absolute_importdivisionunicode_literals)base) OrderedDictc@seZdZddZdS)Filterccshxbtjj|D]R}|ddkrZt}x,t|djdddD]\}}|||<q>W||d<|VqWdS) NtypeStartTagEmptyTagdatacSs|dS)Nr)xr r ,/usr/lib/python3.6/alphabeticalattributes.pysz!Filter.__iter__..)key)r r )rr__iter__rsorteditems)selftokenZattrsnamevaluer r rr s  zFilter.__iter__N)__name__ __module__ __qualname__rr r r rr srN) Z __future__rrrr collectionsr ImportErrorZ ordereddictrr r r rs  filters/__pycache__/alphabeticalattributes.cpython-36.pyc000064400000001730147204715120017576 0ustar003 B;Wm @shddlmZmZmZddlmZyddlmZWn ek rPddl mZYnXGdddej Z dS))absolute_importdivisionunicode_literals)base) OrderedDictc@seZdZddZdS)Filterccshxbtjj|D]R}|ddkrZt}x,t|djdddD]\}}|||<q>W||d<|VqWdS) NtypeStartTagEmptyTagdatacSs|dS)Nr)xr r ,/usr/lib/python3.6/alphabeticalattributes.pysz!Filter.__iter__..)key)r r )rr__iter__rsorteditems)selftokenZattrsnamevaluer r rr s  zFilter.__iter__N)__name__ __module__ __qualname__rr r r rr srN) Z __future__rrrr collectionsr ImportErrorZ ordereddictrr r r rs  filters/__pycache__/base.cpython-36.opt-1.pyc000064400000001373147204715120014732 0ustar003 B;W@s(ddlmZmZmZGdddeZdS))absolute_importdivisionunicode_literalsc@s$eZdZddZddZddZdS)FiltercCs ||_dS)N)source)selfrr/usr/lib/python3.6/base.py__init__szFilter.__init__cCs t|jS)N)iterr)rrrr __iter__szFilter.__iter__cCs t|j|S)N)getattrr)rnamerrr __getattr__ szFilter.__getattr__N)__name__ __module__ __qualname__r r rrrrr rsrN)Z __future__rrrobjectrrrrr sfilters/__pycache__/base.cpython-36.pyc000064400000001373147204715120013773 0ustar003 B;W@s(ddlmZmZmZGdddeZdS))absolute_importdivisionunicode_literalsc@s$eZdZddZddZddZdS)FiltercCs ||_dS)N)source)selfrr/usr/lib/python3.6/base.py__init__szFilter.__init__cCs t|jS)N)iterr)rrrr __iter__szFilter.__iter__cCs t|j|S)N)getattrr)rnamerrr __getattr__ szFilter.__getattr__N)__name__ __module__ __qualname__r r rrrrr rsrN)Z __future__rrrobjectrrrrr sfilters/__pycache__/inject_meta_charset.cpython-36.opt-1.pyc000064400000003116147204715120020010 0ustar003 B;W @s6ddlmZmZmZddlmZGdddejZdS))absolute_importdivisionunicode_literals)basec@seZdZddZddZdS)FiltercCstjj||||_dS)N)rr__init__encoding)selfsourcer r )/usr/lib/python3.6/inject_meta_charset.pyrszFilter.__init__c csd}|jdk}g}xtjj|D]}|d}|dkrP|djdkrLd}np|dkrV|djdkrd }x|d jD]V\\}}} |dk rq~q~|jd kr|j|d ||f<d }Pq~|d kr~| jdkr~d }q~W|od|d krTd|j|d d<d }nR|djdkr| rdd|d dVddd|jidVdddVd }q"nj|dkr|djdkr|r|jdV|sddd|jidVx|r|jdVqWd }d}|dkr|j|q"|Vq"WdS)NZpre_headtypeZStartTagnameheadZin_headZEmptyTagmetaFdatacharsetTz http-equivz content-typecontentztext/html; charset=%s)rrrZEndTag)rrrZ post_head)Nr)Nr)Nr)Nr)r rr__iter__loweritemspopappend) r stateZ meta_foundpendingtokenrZhas_http_equiv_content_type namespacervaluer r r r sX        zFilter.__iter__N)__name__ __module__ __qualname__rrr r r r rsrN)Z __future__rrrrrr r r r s filters/__pycache__/inject_meta_charset.cpython-36.pyc000064400000003116147204715120017051 0ustar003 B;W @s6ddlmZmZmZddlmZGdddejZdS))absolute_importdivisionunicode_literals)basec@seZdZddZddZdS)FiltercCstjj||||_dS)N)rr__init__encoding)selfsourcer r )/usr/lib/python3.6/inject_meta_charset.pyrszFilter.__init__c csd}|jdk}g}xtjj|D]}|d}|dkrP|djdkrLd}np|dkrV|djdkrd }x|d jD]V\\}}} |dk rq~q~|jd kr|j|d ||f<d }Pq~|d kr~| jdkr~d }q~W|od|d krTd|j|d d<d }nR|djdkr| rdd|d dVddd|jidVdddVd }q"nj|dkr|djdkr|r|jdV|sddd|jidVx|r|jdVqWd }d}|dkr|j|q"|Vq"WdS)NZpre_headtypeZStartTagnameheadZin_headZEmptyTagmetaFdatacharsetTz http-equivz content-typecontentztext/html; charset=%s)rrrZEndTag)rrrZ post_head)Nr)Nr)Nr)Nr)r rr__iter__loweritemspopappend) r stateZ meta_foundpendingtokenrZhas_http_equiv_content_type namespacervaluer r r r sX        zFilter.__iter__N)__name__ __module__ __qualname__rrr r r r rsrN)Z __future__rrrrrr r r r s filters/__pycache__/lint.cpython-36.opt-1.pyc000064400000003011147204715120014755 0ustar003 B;W @shddlmZmZmZddlmZddlmZddlm Z m Z ddlm Z dj e Z Gd d d ej Z d S) )absolute_importdivisionunicode_literals) text_type)base) namespaces voidElements)spaceCharacterscs&eZdZdfdd ZddZZS)FilterTcstt|j|||_dS)N)superr __init__require_matching_tags)selfsourcer) __class__/usr/lib/python3.6/lint.pyr szFilter.__init__c cs@g}x4tjj|D]"}|d}|dkr|d}|d}| sL|tdkrV|tkrVn|dkrr|jrr|j||fx|djD] \\}}}qWn|dkr|d}|d}| s|tdkr|tkrn|jr|j}n\|d kr|d}nJ|dkr|d}|d kr2n,|d kr|d}n|d kr&n |dkr2n|VqWdS)NtypeStartTagEmptyTag namespacenameZhtmldataZEndTagComment CharactersSpaceCharactersZDoctypeZEntityZSerializerError)rr)rr) rr __iter__r r rappenditemspop) rZ open_elementstokenrrrvaluestartrrrrrsF     zFilter.__iter__)T)__name__ __module__ __qualname__rr __classcell__rr)rrr sr N)Z __future__rrrZsixrr rZ constantsr r r joinr rrrrs     filters/__pycache__/lint.cpython-36.pyc000064400000004341147204715120014025 0ustar003 B;W @shddlmZmZmZddlmZddlmZddlm Z m Z ddlm Z dj e Z Gd d d ej Z d S) )absolute_importdivisionunicode_literals) text_type)base) namespaces voidElements)spaceCharacterscs&eZdZdfdd ZddZZS)FilterTcstt|j|||_dS)N)superr __init__require_matching_tags)selfsourcer) __class__/usr/lib/python3.6/lint.pyr szFilter.__init__c csRg}xFtjj|D]4}|d}|dkrP|d}|d}|dksRt|tsRt|dks^tt|tslt|dksxtt|dtst| s|tdkr|tkr|dkstn |dkst|dkr|j r|j ||fxp|dj D]`\\}}}|dkst|tst|dkstt|ts,t|dks:tt|tstqWn|d kr|d}|d}|dkst|tst|dkstt|tst|dkst| s|tdkr|tkrd s td d |in"|j rD|j }|||fksDtn6|d kr4|d}t|tsDtn|dkr|d}t|tsVt|dksdt|dkrD|j tdksDtn|dkr|d}|dkst|tst|ddkst|tst|ddksDt|tsDtnV|dkrt|dtsDtn6|dkr.t|dtsDtnd sDtdd|i|VqWdS)NtypeStartTagEmptyTag namespacenamer dataZhtmlZEndTagFz.Void element reported as EndTag token: %(tag)stagComment CharactersSpaceCharactersZDoctypeZpublicIdZsystemIdZEntityZSerializerErrorzUnknown token type: %(type)s)rr)rr)rr __iter__ isinstancerAssertionErrordictr r rappenditemspopstripr ) rZ open_elementstokenrrrvaluestartrrrrr sl             zFilter.__iter__)T)__name__ __module__ __qualname__rr __classcell__rr)rrr sr N)Z __future__rrrZsixrr rZ constantsr r r joinr rrrrs     filters/__pycache__/optionaltags.cpython-36.opt-1.pyc000064400000005615147204715120016527 0ustar003 B;W&)@s6ddlmZmZmZddlmZGdddejZdS))absolute_importdivisionunicode_literals)basec@s,eZdZddZddZddZddZd S) FilterccsLd}}x*|jD] }|dk r(|||fV|}|}qW|dk rH||dfVdS)N)source)selfZ previous1Z previous2tokenr "/usr/lib/python3.6/optionaltags.pysliders  z Filter.sliderccsvxp|jD]d\}}}|d}|dkrH|ds@|j|d|| rn|Vq |dkrh|j|d|sn|Vq |Vq WdS)NtypeStartTagdatanameEndTag)r is_optional_startis_optional_end)r previousr nextrr r r __iter__szFilter.__iter__cCs|r |dpd}|dkr |dkS|dkrJ|dkr4dS|d krH|d dkSn|d krx|dkr^d S|dkrr|d dkSdSnd|dkr|dkr|d dkSd SnB|dkr|dkr|r|dd kr|d dkrd S|d dkSd Sd S)NrhtmlCommentSpaceCharactersheadrEmptyTagTrrbodyFscriptstylecolgroupcoltbodytheadtfoottr)rr)rr)rr)rr)rr)r"r#r$r )r tagnamerrrr r r rs4     zFilter.is_optional_startcCs |r |dpd}|d7kr |d8kS|d9krP|d kr<|d |kS|d kpJ|dkSn|d:kr|d krl|d d;kS|dkr|d kp|dkSdSn||dkr|dkS|d kp|dkSn|d?kr,|d kr|d d@kS|d kp(|dkSn|d0kr`|dAkrDdS|d krZ|d d0kSd1Sn|dBkr|d kr|d dCkS|d3kr|d kp|dkSdSnf|d4kr|d kr|d d3kS|d kp|dkSn2|dDkr|d kr|d dEkS|d kp|dkSdS)FNrrrrrrlioptgroupr%rrrdtddFpraddressarticleaside blockquotedatagriddialogdirdivdlfieldsetfooterformh1h2h3h4h5h6headerhrmenunavolpresectiontableuloptionrtrpr Tr#r"r$tdth)rrr)rr)r'r(r%)r)r*)r)r*)rr)r,r-r.r/r0r1r2r3r4r5r6r7r8r9r:r;r<r=r>r?r@rArBr+rCrDrErF)rGr()rHrI)rHrI)rr)r#r")r"r$)rJrK)rJrKr )r r&rrr r r rWsf                        zFilter.is_optional_endN)__name__ __module__ __qualname__r rrrr r r r rs  9rN)Z __future__rrrrrr r r r s filters/__pycache__/optionaltags.cpython-36.pyc000064400000005615147204715120015570 0ustar003 B;W&)@s6ddlmZmZmZddlmZGdddejZdS))absolute_importdivisionunicode_literals)basec@s,eZdZddZddZddZddZd S) FilterccsLd}}x*|jD] }|dk r(|||fV|}|}qW|dk rH||dfVdS)N)source)selfZ previous1Z previous2tokenr "/usr/lib/python3.6/optionaltags.pysliders  z Filter.sliderccsvxp|jD]d\}}}|d}|dkrH|ds@|j|d|| rn|Vq |dkrh|j|d|sn|Vq |Vq WdS)NtypeStartTagdatanameEndTag)r is_optional_startis_optional_end)r previousr nextrr r r __iter__szFilter.__iter__cCs|r |dpd}|dkr |dkS|dkrJ|dkr4dS|d krH|d dkSn|d krx|dkr^d S|dkrr|d dkSdSnd|dkr|dkr|d dkSd SnB|dkr|dkr|r|dd kr|d dkrd S|d dkSd Sd S)NrhtmlCommentSpaceCharactersheadrEmptyTagTrrbodyFscriptstylecolgroupcoltbodytheadtfoottr)rr)rr)rr)rr)rr)r"r#r$r )r tagnamerrrr r r rs4     zFilter.is_optional_startcCs |r |dpd}|d7kr |d8kS|d9krP|d kr<|d |kS|d kpJ|dkSn|d:kr|d krl|d d;kS|dkr|d kp|dkSdSn||dkr|dkS|d kp|dkSn|d?kr,|d kr|d d@kS|d kp(|dkSn|d0kr`|dAkrDdS|d krZ|d d0kSd1Sn|dBkr|d kr|d dCkS|d3kr|d kp|dkSdSnf|d4kr|d kr|d d3kS|d kp|dkSn2|dDkr|d kr|d dEkS|d kp|dkSdS)FNrrrrrrlioptgroupr%rrrdtddFpraddressarticleaside blockquotedatagriddialogdirdivdlfieldsetfooterformh1h2h3h4h5h6headerhrmenunavolpresectiontableuloptionrtrpr Tr#r"r$tdth)rrr)rr)r'r(r%)r)r*)r)r*)rr)r,r-r.r/r0r1r2r3r4r5r6r7r8r9r:r;r<r=r>r?r@rArBr+rCrDrErF)rGr()rHrI)rHrI)rr)r#r")r"r$)rJrK)rJrKr )r r&rrr r r rWsf                        zFilter.is_optional_endN)__name__ __module__ __qualname__r rrrr r r r rs  9rN)Z __future__rrrrrr r r r s filters/__pycache__/sanitizer.cpython-36.opt-1.pyc000064400000042715147204715120016035 0ustar003 B;W bE@s ddlmZmZmZddlZddlmZmZddlm Z ddl m Z ddl mZmZd gZeed d fed d fed d fed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed d fed d!fed d"fed d#fed d$fed d%fed d&fed d'fed d(fed d)fed d*fed d+fed d,fed d-fed d.fed d/fed d0fed d1fed d2fed d3fed d4fed d5fed d6fed d7fed d8fed d9fed d:fed d;fed dfed d?fed d@fed dAfed dBfed dCfed dDfed dEfed dFfed dGfed dHfed dIfed dJfed dKfed dLfed dMfed dNfed dOfed dPfed dQfed dRfed dSfed dTfed dUfed dVfed dWfed dXfed dYfed dZfed d[fed d\fed d]fed d^fed d_fed d`fed dafed dbfed dcfed ddfed defed dffed dgfed dhfed difed djfed dkfed dlfed dmfedndofedndpfedndqfedndrfedndsfedndtfedndufedndvfedndwfedndxfedndyfedndzfednd{fednd|fednd}fednd~fedndfedndfedndfedndfedndfedndfedndfedndfedndfedndfedndfedd feddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddffZed3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddded1dfdddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdАdѐdҐdӐdԐdՐd֐dאdؐdِdڐdېdܐdݐdސdߐdddddddedOdfedOdPfedOd%fddddddddddddddddddddddddddddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcedOdfedOdfedOdfedOdfedOdPfedOdfedOd%fed1dfed1dfed1dfdddedfdgfCZedhdidjdkdldmdndodpdqdredOdfed1dff Zed~ZedZedZedZedZedZedZejd1ejZGd2d d e jZdS()absolute_importdivisionunicode_literalsN)escapeunescape) urllib_parse)base) namespacesprefixesFilterhtmlaabbrZacronymZaddressZareaZarticleZasideZaudiobZbigZ blockquotebrZbuttonZcanvasZcaptioncentercitecodecolZcolgroupZcommandZdatagridZdatalistZdddelZdetailsZdfnZdialogdirZdivZdlZdtZemz event-sourceZfieldsetZ figcaptionZfigureZfooterfontformheaderZh1Zh2Zh3Zh4Zh5Zh6ZhriZimginputZinsZkeygenZkbdlabelZlegendZlimmapZmenuZmeterZmulticolZnavZnextidZoloutputZoptgroupZoptionpZpreZprogressqsZsampZsectionZselectZsmallZsoundsourceZspacerspanZstrikeZstrongsubZsuptableZtbodyZtdZtextareaZtimeZtfootZthZtheadZtrZttuZulvarZvideoZmathmlZmactionZmathZmerrorZmfracZmiZ mmultiscriptsZmnmoZmoverZmpaddedZmphantomZ mprescriptsZmrootZmrowZmspaceZmsqrtZmstyleZmsubZmsubsupZmsupZmtableZmtdZmtextZmtrZmunderZ munderovernoneZsvganimate animateColor animateMotionanimateTransformZclipPathZcircleZdefsZdescZellipsez font-facezfont-face-namez font-face-srcgZglyphZhkernlinearGradientlinemarkerZmetadataz missing-glyphZmpathpathZpolygonZpolylineradialGradientZrectsetstopZswitchtexttitleZtspanuseacceptaccept-charset accesskeyactionalignalt autocomplete autofocusaxis backgroundbalancebgcolor bgpropertiesborder bordercolorbordercolordarkbordercolorlight bottompadding cellpadding cellspacingch challengecharcharoffchoffcharsetcheckedclassclearcolorcolscolspancompactcontenteditablecontrolscoordsdatadatafld datapagesizedatasrcdatetimedefaultdelaydisabled draggabledynsrcenctypeendfaceforframe galleryimggutterheadersheight hidefocushiddenhighhrefhreflanghspaceiconid inputmodeismapkeytype leftspacinglanglistlongdescloop loopcountloopend loopstartlowlowsrcmax maxlengthmediamethodminmultiplenamenohrefnoshadenowrapopenoptimumpatternping point-sizeposterpqgpreloadprompt radiogroupreadonlyrel repeat-max repeat-minreplacerequiredrev rightspacingrowsrowspanrulesscopeselectedshapesizesrcstartstepstylesummarysuppresstabindextargettemplate toppaddingtype unselectableusemapurnvalignvaluevariablevolumevspacevrmlwidthwrapZxml actiontype columnalign columnlines columnspacing columnspandepthdisplay displaystyle equalcolumns equalrowsfence fontstyle fontweight linethicknesslspacemathbackground mathcolor mathvariantmaxsizeminsizeotherrowalignrowlines rowspacingrspace scriptlevel selection separatorstretchyxlinkZshow accent-height accumulateadditive alphabetic arabic-formascent attributeName attributeType baseProfilebboxbeginbycalcMode cap-height clip-pathcolor-renderingcontentcxcyddxdydescentdurfill fill-opacity fill-rule font-family font-size font-stretch font-style font-variant font-weightfromfxfyg1g2 glyph-name gradientUnitshanging horiz-adv-xhoriz-origin-x ideographick keyPoints keySplineskeyTimes marker-end marker-mid marker-start markerHeight markerUnits markerWidth mathematicaloffsetopacityorientoriginoverline-positionoverline-thicknesspanose-1 pathLengthpointspreserveAspectRatiorrefXrefY repeatCount repeatDurrequiredExtensionsrequiredFeaturesrestartrotaterxryslopestemhstemv stop-color stop-opacitystrikethrough-positionstrikethrough-thicknessstrokestroke-dasharraystroke-dashoffsetstroke-linecapstroke-linejoinstroke-miterlimitstroke-opacity stroke-widthsystemLanguage text-anchorto transformu1u2underline-positionunderline-thicknessunicode unicode-range units-per-emvaluesversionviewBox visibilitywidthsxx-heightx1x2ZactuateZarcroleZroler Zspaceyy1y2 zoomAndPan color-profilecursorfiltermaskaltGlyphfeImagetextpathtrefazimuthbackground-colorborder-bottom-colorborder-collapse border-colorborder-left-colorborder-right-colorborder-top-color direction elevationfloatletter-spacing line-heightoverflowpause pause-after pause-beforepitch pitch-rangerichnessspeak speak-header speak-numeralspeak-punctuation speech-ratestress text-aligntext-decoration text-indent unicode-bidivertical-align voice-family white-spaceautoaquablackblockblueboldbothbottombrowncollapsedasheddottedfuchsiagraygreen !importantitalicleftlimemaroonmediumnavynormalolivepointerpurpleredrightsolidsilvertealtop transparent underlinewhiteyellowed2kftphttphttpsircmailtonewsgophernntptelnetwebcalxmppcalltofeedaimrsynctagsshsftprtspafs image/png image/jpeg image/gif image/webp image/bmp text/plainaL ^ # Match a content type / (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) # Match any character set and encoding (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) # Assume the rest is data ,.* $ c s^eZdZdZeeeeee e e e e f fdd ZddZddZdd Zd d Zd d ZZS)r zA sanitization of XHTML+MathML+SVG and of inline style attributes.c sPtt|j|||_||_||_||_||_||_||_ | |_ | |_ | |_ dS)N) superr __init__allowed_elementsallowed_attributesallowed_css_propertiesallowed_css_keywordsallowed_svg_propertiesallowed_protocolsallowed_content_typesattr_val_is_urisvg_attr_val_allows_refsvg_allow_local_href) selfr%rrrrrrrrrr) __class__/usr/lib/python3.6/sanitizer.pyrs zFilter.__init__ccs.x(tjj|D]}|j|}|r|VqWdS)N)r r __iter__sanitize_token)rtokenrrrrs zFilter.__iter__cCsp|d}|d kr^|d}|d}||f|jksH|dkrRtd|f|jkrR|j|S|j|Sn|dkrhn|SdS) NrStartTagEndTagEmptyTagr namespacerComment)rrr)rr allowed_tokendisallowed_token)rr token_typerrrrrrs  zFilter.sanitize_tokenc Csd|kr|d}t|j}x&||jD]}|d|=|j|q*Wx||j@D]}tjddt||j}|j dd}yt j |}Wnt k rd}||=YnX|o|j rR|j |j kr||=|j dkrRtj|j}|s||=qR|jd|jkrR||=qRWx4|jD]*}||kr tjddt||||<q W|d|jkrtd d f|krtjd |td d fr|td d f=d |kr|j|d|d<||d<|S)Nr`u [`- - \s]+u�Z content_typezurl\s*\(\s*[^#\s][^)]+?\) rrrvz ^\s*[^#\s].*r)Nr)Nr)Nr)r7keysrremoverrer'rlowerrurlparse ValueErrorschemerdata_content_typematchr5grouprrrr search sanitize_css) rrattrsZ attr_namesZ to_removeattrZ val_unescapedZurirrrrrsJ             zFilter.allowed_tokencCs|d}|dkr"d|d|d<n|drg}xJ|djD]:\\}}}|jd|dkrZ|ndt||ft|fqrr`z %s="%s"z%s:%sz<%s%s>rz<%s>Z selfClosingrz/>Z Characters)itemsappendr rjoinget)rrrrnsrvrrrr2s2 zFilter.disallowed_tokencCstjdjd|}tjd|s"dStjd|s2dSg}xtjd|D]\}}|sRqD|j|jkrx|j|d|dqD|jd d jdkrxf|jD]}||j krtjd| rPqW|j|d|dqD|j|j krD|j|d|dqDWdj |S)Nzurl\s*\(\s*[^\s)]+?\s*\)\s*rz@^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$rz ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$z([-\w]+)\s*:\s*([^:;]*)z: ;-rrErImarginpaddingz\^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$)rErIrr) rcompiler'rfindallrrrsplitrrr)rrZcleanZproprkeywordrrrrFs*   zFilter.sanitize_css)__name__ __module__ __qualname____doc__rrrrrrrrrrrrrrrr __classcell__rr)rrr s  2)Nr)Nr<)Nr=)Nr>)Nr?)Nr@)NrA)NrB)NrC)NrD)NrE)NrF)NrG)NrH)NrI)NrJ)NrK)NrL)NrM)NrN)NrO)NrP)NrQ)NrR)NrS)NrT)NrU)NrV)Nr)NrW)NrX)NrY)NrZ)Nr[)Nr\)Nr])Nr^)Nr_)Nr`)Nra)Nrb)Nrc)Nrd)Nre)Nrf)Nr)Nrg)Nrh)Nri)Nrj)Nrk)Nrl)Nrm)Nr)Nrn)Nro)Nrp)Nrq)Nrr)Nrs)Nrt)Nru)Nrv)Nrw)Nrx)Nry)Nrz)Nr{)Nr|)Nr})Nr)Nr~)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr&)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr:)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr@)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nrn)Nrr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)NrW)Nr)NrY)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nrk)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nrr)Nr)Nr )Nrz)Nr )Nr )Nr )Nr )Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr5)Nr)Nr)Nr)Nr )Nr!)Nr")Nr#)Nr$)Nr%)Nr&)Nr')Nr()Nr))Nr*)Nr+)Nr,)Nr-)Nr.)Nr/)Nr0)Nr1)Nr2)Nr3)Nr4)Nr5)Nr6)Nr7)Nr8)Nr9)Nr:)Nr)Nr;)Nr<)Nr=)Nr)Nr>)Nr?)Nr@)NrA)NrB)NrC)NrD)NrE)NrF)NrG)NrH)Nr)NrI)NrJ)NrK)NrL)NrM)NrN)NrO)NrP)NrQ)Nrv)Nr)Nr)Nr?)Nr)Nr)NrE)Nrc)Nri)Nr)NrNrNrRNrSNrNrTNr4NrNrNrNrUNr2) rrrrrrrrrrrNrVNr-Nr.Nr/Nr0NrSNrWNrTNr2NrNr6NrXNrYNr7Nr;)rrrrrr r r r r rrrrr).rZr[r\r]r^r_r`rarXrYrSrbrrcrdrrrrrrrrrerfrgrhrirjrkrlrmrnrorprqrrrsrtrurvrwrxryrrzr)'r{r|r}r~rrrrrrrrrrrrrrrrrrr,rrrrrrrrrrrrrrrr)rrrr2r9r5r6r8)rrrrrrrrrrrrrrrrrrrrrrr`)rrrrrr) Z __future__rrrrZxml.sax.saxutilsrrZ six.movesrrrr Z constantsr r __all__ frozensetrrrrrrrrrrrVERBOSErr rrrrs2                                                                                                                                                                          filters/__pycache__/sanitizer.cpython-36.pyc000064400000043024147204715120015070 0ustar003 B;W bE@s ddlmZmZmZddlZddlmZmZddlm Z ddl m Z ddl mZmZd gZeed d fed d fed d fed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed d fed d!fed d"fed d#fed d$fed d%fed d&fed d'fed d(fed d)fed d*fed d+fed d,fed d-fed d.fed d/fed d0fed d1fed d2fed d3fed d4fed d5fed d6fed d7fed d8fed d9fed d:fed d;fed dfed d?fed d@fed dAfed dBfed dCfed dDfed dEfed dFfed dGfed dHfed dIfed dJfed dKfed dLfed dMfed dNfed dOfed dPfed dQfed dRfed dSfed dTfed dUfed dVfed dWfed dXfed dYfed dZfed d[fed d\fed d]fed d^fed d_fed d`fed dafed dbfed dcfed ddfed defed dffed dgfed dhfed difed djfed dkfed dlfed dmfedndofedndpfedndqfedndrfedndsfedndtfedndufedndvfedndwfedndxfedndyfedndzfednd{fednd|fednd}fednd~fedndfedndfedndfedndfedndfedndfedndfedndfedndfedndfedndfedd feddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddffZed3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddded1dfdddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐d͐dΐdϐdАdѐdҐdӐdԐdՐd֐dאdؐdِdڐdېdܐdݐdސdߐdddddddedOdfedOdPfedOd%fddddddddddddddddddddddddddddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcedOdfedOdfedOdfedOdfedOdPfedOdfedOd%fed1dfed1dfed1dfdddedfdgfCZedhdidjdkdldmdndodpdqdredOdfed1dff Zed~ZedZedZedZedZedZedZejd1ejZGd2d d e jZdS()absolute_importdivisionunicode_literalsN)escapeunescape) urllib_parse)base) namespacesprefixesFilterhtmlaabbrZacronymZaddressZareaZarticleZasideZaudiobZbigZ blockquotebrZbuttonZcanvasZcaptioncentercitecodecolZcolgroupZcommandZdatagridZdatalistZdddelZdetailsZdfnZdialogdirZdivZdlZdtZemz event-sourceZfieldsetZ figcaptionZfigureZfooterfontformheaderZh1Zh2Zh3Zh4Zh5Zh6ZhriZimginputZinsZkeygenZkbdlabelZlegendZlimmapZmenuZmeterZmulticolZnavZnextidZoloutputZoptgroupZoptionpZpreZprogressqsZsampZsectionZselectZsmallZsoundsourceZspacerspanZstrikeZstrongsubZsuptableZtbodyZtdZtextareaZtimeZtfootZthZtheadZtrZttuZulvarZvideoZmathmlZmactionZmathZmerrorZmfracZmiZ mmultiscriptsZmnmoZmoverZmpaddedZmphantomZ mprescriptsZmrootZmrowZmspaceZmsqrtZmstyleZmsubZmsubsupZmsupZmtableZmtdZmtextZmtrZmunderZ munderovernoneZsvganimate animateColor animateMotionanimateTransformZclipPathZcircleZdefsZdescZellipsez font-facezfont-face-namez font-face-srcgZglyphZhkernlinearGradientlinemarkerZmetadataz missing-glyphZmpathpathZpolygonZpolylineradialGradientZrectsetstopZswitchtexttitleZtspanuseacceptaccept-charset accesskeyactionalignalt autocomplete autofocusaxis backgroundbalancebgcolor bgpropertiesborder bordercolorbordercolordarkbordercolorlight bottompadding cellpadding cellspacingch challengecharcharoffchoffcharsetcheckedclassclearcolorcolscolspancompactcontenteditablecontrolscoordsdatadatafld datapagesizedatasrcdatetimedefaultdelaydisabled draggabledynsrcenctypeendfaceforframe galleryimggutterheadersheight hidefocushiddenhighhrefhreflanghspaceiconid inputmodeismapkeytype leftspacinglanglistlongdescloop loopcountloopend loopstartlowlowsrcmax maxlengthmediamethodminmultiplenamenohrefnoshadenowrapopenoptimumpatternping point-sizeposterpqgpreloadprompt radiogroupreadonlyrel repeat-max repeat-minreplacerequiredrev rightspacingrowsrowspanrulesscopeselectedshapesizesrcstartstepstylesummarysuppresstabindextargettemplate toppaddingtype unselectableusemapurnvalignvaluevariablevolumevspacevrmlwidthwrapZxml actiontype columnalign columnlines columnspacing columnspandepthdisplay displaystyle equalcolumns equalrowsfence fontstyle fontweight linethicknesslspacemathbackground mathcolor mathvariantmaxsizeminsizeotherrowalignrowlines rowspacingrspace scriptlevel selection separatorstretchyxlinkZshow accent-height accumulateadditive alphabetic arabic-formascent attributeName attributeType baseProfilebboxbeginbycalcMode cap-height clip-pathcolor-renderingcontentcxcyddxdydescentdurfill fill-opacity fill-rule font-family font-size font-stretch font-style font-variant font-weightfromfxfyg1g2 glyph-name gradientUnitshanging horiz-adv-xhoriz-origin-x ideographick keyPoints keySplineskeyTimes marker-end marker-mid marker-start markerHeight markerUnits markerWidth mathematicaloffsetopacityorientoriginoverline-positionoverline-thicknesspanose-1 pathLengthpointspreserveAspectRatiorrefXrefY repeatCount repeatDurrequiredExtensionsrequiredFeaturesrestartrotaterxryslopestemhstemv stop-color stop-opacitystrikethrough-positionstrikethrough-thicknessstrokestroke-dasharraystroke-dashoffsetstroke-linecapstroke-linejoinstroke-miterlimitstroke-opacity stroke-widthsystemLanguage text-anchorto transformu1u2underline-positionunderline-thicknessunicode unicode-range units-per-emvaluesversionviewBox visibilitywidthsxx-heightx1x2ZactuateZarcroleZroler Zspaceyy1y2 zoomAndPan color-profilecursorfiltermaskaltGlyphfeImagetextpathtrefazimuthbackground-colorborder-bottom-colorborder-collapse border-colorborder-left-colorborder-right-colorborder-top-color direction elevationfloatletter-spacing line-heightoverflowpause pause-after pause-beforepitch pitch-rangerichnessspeak speak-header speak-numeralspeak-punctuation speech-ratestress text-aligntext-decoration text-indent unicode-bidivertical-align voice-family white-spaceautoaquablackblockblueboldbothbottombrowncollapsedasheddottedfuchsiagraygreen !importantitalicleftlimemaroonmediumnavynormalolivepointerpurpleredrightsolidsilvertealtop transparent underlinewhiteyellowed2kftphttphttpsircmailtonewsgophernntptelnetwebcalxmppcalltofeedaimrsynctagsshsftprtspafs image/png image/jpeg image/gif image/webp image/bmp text/plainaL ^ # Match a content type / (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) # Match any character set and encoding (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) # Assume the rest is data ,.* $ c s^eZdZdZeeeeee e e e e f fdd ZddZddZdd Zd d Zd d ZZS)r zA sanitization of XHTML+MathML+SVG and of inline style attributes.c sPtt|j|||_||_||_||_||_||_||_ | |_ | |_ | |_ dS)N) superr __init__allowed_elementsallowed_attributesallowed_css_propertiesallowed_css_keywordsallowed_svg_propertiesallowed_protocolsallowed_content_typesattr_val_is_urisvg_attr_val_allows_refsvg_allow_local_href) selfr%rrrrrrrrrr) __class__/usr/lib/python3.6/sanitizer.pyrs zFilter.__init__ccs.x(tjj|D]}|j|}|r|VqWdS)N)r r __iter__sanitize_token)rtokenrrrrs zFilter.__iter__cCsp|d}|d kr^|d}|d}||f|jksH|dkrRtd|f|jkrR|j|S|j|Sn|dkrhn|SdS) NrStartTagEndTagEmptyTagr namespacerComment)rrr)rr allowed_tokendisallowed_token)rr token_typerrrrrrs  zFilter.sanitize_tokenc Csd|kr|d}t|j}x&||jD]}|d|=|j|q*Wx||j@D]}||ksbttjddt||j }|j dd}yt j |}Wnt k rd}||=YnX|o|j rR|j |jkr||=|j dkrRtj|j}|s||=qR|jd|jkrR||=qRWx4|jD]*}||krtjddt||||<qW|d|jkrtd d f|krtjd |td d fr|td d f=d |kr|j|d|d<||d<|S)Nr`u [`- - \s]+u�Z content_typezurl\s*\(\s*[^#\s][^)]+?\) rrrvz ^\s*[^#\s].*r)Nr)Nr)Nr)r7keysrremoverAssertionErrorrer'rlowerrurlparse ValueErrorschemerdata_content_typematchr5grouprrrr search sanitize_css) rrattrsZ attr_namesZ to_removeattrZ val_unescapedZurirrrrrsL              zFilter.allowed_tokencCs|d}|dkr"d|d|d<n|dr|dks6tg}xJ|djD]:\\}}}|jd|dkrf|nd t||ft|fqHWd |dd j|f|d<nd |d|d<|jd r|dddd|d<d|d<|d=|S)Nrrzrr`rrz %s="%s"z%s:%sz<%s%s>rz<%s>Z selfClosingrz/>Z Characters)rr)ritemsappendr rjoinget)rrrrnsrvrrrr2s 2 zFilter.disallowed_tokencCstjdjd|}tjd|s"dStjd|s2dSg}xtjd|D]\}}|sRqD|j|jkrx|j|d|dqD|jd d jdkrxf|jD]}||j krtjd| rPqW|j|d|dqD|j|j krD|j|d|dqDWdj |S)Nzurl\s*\(\s*[^\s)]+?\s*\)\s*rz@^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$rz ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$z([-\w]+)\s*:\s*([^:;]*)z: ;-rrErImarginpaddingz\^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$)rErIrr) rcompiler'rfindallrrrsplitrrr)rrZcleanZproprkeywordrrrrFs*   zFilter.sanitize_css)__name__ __module__ __qualname____doc__rrrrrrrrrrrrrrrr __classcell__rr)rrr s  2)Nr)Nr<)Nr=)Nr>)Nr?)Nr@)NrA)NrB)NrC)NrD)NrE)NrF)NrG)NrH)NrI)NrJ)NrK)NrL)NrM)NrN)NrO)NrP)NrQ)NrR)NrS)NrT)NrU)NrV)Nr)NrW)NrX)NrY)NrZ)Nr[)Nr\)Nr])Nr^)Nr_)Nr`)Nra)Nrb)Nrc)Nrd)Nre)Nrf)Nr)Nrg)Nrh)Nri)Nrj)Nrk)Nrl)Nrm)Nr)Nrn)Nro)Nrp)Nrq)Nrr)Nrs)Nrt)Nru)Nrv)Nrw)Nrx)Nry)Nrz)Nr{)Nr|)Nr})Nr)Nr~)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr&)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr:)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr@)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nrn)Nrr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)NrW)Nr)NrY)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nrk)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nrr)Nr)Nr )Nrz)Nr )Nr )Nr )Nr )Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr)Nr5)Nr)Nr)Nr)Nr )Nr!)Nr")Nr#)Nr$)Nr%)Nr&)Nr')Nr()Nr))Nr*)Nr+)Nr,)Nr-)Nr.)Nr/)Nr0)Nr1)Nr2)Nr3)Nr4)Nr5)Nr6)Nr7)Nr8)Nr9)Nr:)Nr)Nr;)Nr<)Nr=)Nr)Nr>)Nr?)Nr@)NrA)NrB)NrC)NrD)NrE)NrF)NrG)NrH)Nr)NrI)NrJ)NrK)NrL)NrM)NrN)NrO)NrP)NrQ)Nrv)Nr)Nr)Nr?)Nr)Nr)NrE)Nrc)Nri)Nr)NrNrNrRNrSNrNrTNr4NrNrNrNrUNr2) rrrrrrrrrrrNrVNr-Nr.Nr/Nr0NrSNrWNrTNr2NrNr6NrXNrYNr7Nr;)rrrrr r r r r rrrrrr).rZr[r\r]r^r_r`rarXrYrSrbrrcrdrrrrrrrrrerfrgrhrirjrkrlrmrnrorprqrrrsrtrurvrwrxryrrzr)'r{r|r}r~rrrrrrrrrrrrrrrrrrr,rrrrrrrrrrrrrrrr)rrrr2r9r5r6r8)rrrrrrrrrrrrrrrrrrrrrrr`)rrrrrr) Z __future__rrrrZxml.sax.saxutilsrrZ six.movesrrrr Z constantsr r __all__ frozensetrrrrrrrrrrrVERBOSErr rrrrs2                                                                                                                                                                          filters/__pycache__/whitespace.cpython-36.opt-1.pyc000064400000002245147204715120016153 0ustar003 B;Ws@snddlmZmZmZddlZddlmZddlmZm Z dj e Z ej de Z Gd d d ej Z d d ZdS) )absolute_importdivisionunicode_literalsN)base)rcdataElementsspaceCharactersz[%s]+c@s(eZdZeddgeeZddZdS)FilterZpreZtextareaccsd}xtjj|D]}|d}|dkrB|s8|d|jkrB|d7}nT|dkrX|rX|d8}n>| rx|dkrx|drxd |d<n| r|d krt|d|d<|VqWdS) NrtypeZStartTagnamerZEndTagZSpaceCharactersdata Z Characters)rr __iter__spacePreserveElementscollapse_spaces)selfZpreservetokenr r /usr/lib/python3.6/whitespace.pyrs    zFilter.__iter__N)__name__ __module__ __qualname__ frozensetlistrrrrrrrr sr cCs tjd|S)Nr) SPACES_REGEXsub)textrrrr%sr)Z __future__rrrrer rZ constantsrr joincompilerr rrrrrs  filters/__pycache__/whitespace.cpython-36.pyc000064400000002245147204715120015214 0ustar003 B;Ws@snddlmZmZmZddlZddlmZddlmZm Z dj e Z ej de Z Gd d d ej Z d d ZdS) )absolute_importdivisionunicode_literalsN)base)rcdataElementsspaceCharactersz[%s]+c@s(eZdZeddgeeZddZdS)FilterZpreZtextareaccsd}xtjj|D]}|d}|dkrB|s8|d|jkrB|d7}nT|dkrX|rX|d8}n>| rx|dkrx|drxd |d<n| r|d krt|d|d<|VqWdS) NrtypeZStartTagnamerZEndTagZSpaceCharactersdata Z Characters)rr __iter__spacePreserveElementscollapse_spaces)selfZpreservetokenr r /usr/lib/python3.6/whitespace.pyrs    zFilter.__iter__N)__name__ __module__ __qualname__ frozensetlistrrrrrrrr sr cCs tjd|S)Nr) SPACES_REGEXsub)textrrrr%sr)Z __future__rrrrer rZ constantsrr joincompilerr rrrrrs  filters/__init__.py000064400000000000147204715120010316 0ustar00filters/alphabeticalattributes.py000064400000001155147204715120013313 0ustar00from __future__ import absolute_import, division, unicode_literals from . import base try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict class Filter(base.Filter): def __iter__(self): for token in base.Filter.__iter__(self): if token["type"] in ("StartTag", "EmptyTag"): attrs = OrderedDict() for name, value in sorted(token["data"].items(), key=lambda x: x[0]): attrs[name] = value token["data"] = attrs yield token filters/base.py000064400000000436147204715120007506 0ustar00from __future__ import absolute_import, division, unicode_literals class Filter(object): def __init__(self, source): self.source = source def __iter__(self): return iter(self.source) def __getattr__(self, name): return getattr(self.source, name) filters/inject_meta_charset.py000064400000005266147204715120012575 0ustar00from __future__ import absolute_import, division, unicode_literals from . import base class Filter(base.Filter): def __init__(self, source, encoding): base.Filter.__init__(self, source) self.encoding = encoding def __iter__(self): state = "pre_head" meta_found = (self.encoding is None) pending = [] for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag": if token["name"].lower() == "head": state = "in_head" elif type == "EmptyTag": if token["name"].lower() == "meta": # replace charset with actual encoding has_http_equiv_content_type = False for (namespace, name), value in token["data"].items(): if namespace is not None: continue elif name.lower() == 'charset': token["data"][(namespace, name)] = self.encoding meta_found = True break elif name == 'http-equiv' and value.lower() == 'content-type': has_http_equiv_content_type = True else: if has_http_equiv_content_type and (None, "content") in token["data"]: token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding meta_found = True elif token["name"].lower() == "head" and not meta_found: # insert meta into empty head yield {"type": "StartTag", "name": "head", "data": token["data"]} yield {"type": "EmptyTag", "name": "meta", "data": {(None, "charset"): self.encoding}} yield {"type": "EndTag", "name": "head"} meta_found = True continue elif type == "EndTag": if token["name"].lower() == "head" and pending: # insert meta into head (if necessary) and flush pending queue yield pending.pop(0) if not meta_found: yield {"type": "EmptyTag", "name": "meta", "data": {(None, "charset"): self.encoding}} while pending: yield pending.pop(0) meta_found = True state = "post_head" if state == "in_head": pending.append(token) else: yield token filters/lint.py000064400000006431147204715120007543 0ustar00from __future__ import absolute_import, division, unicode_literals from six import text_type from . import base from ..constants import namespaces, voidElements from ..constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) class Filter(base.Filter): def __init__(self, source, require_matching_tags=True): super(Filter, self).__init__(source) self.require_matching_tags = require_matching_tags def __iter__(self): open_elements = [] for token in base.Filter.__iter__(self): type = token["type"] if type in ("StartTag", "EmptyTag"): namespace = token["namespace"] name = token["name"] assert namespace is None or isinstance(namespace, text_type) assert namespace != "" assert isinstance(name, text_type) assert name != "" assert isinstance(token["data"], dict) if (not namespace or namespace == namespaces["html"]) and name in voidElements: assert type == "EmptyTag" else: assert type == "StartTag" if type == "StartTag" and self.require_matching_tags: open_elements.append((namespace, name)) for (namespace, name), value in token["data"].items(): assert namespace is None or isinstance(namespace, text_type) assert namespace != "" assert isinstance(name, text_type) assert name != "" assert isinstance(value, text_type) elif type == "EndTag": namespace = token["namespace"] name = token["name"] assert namespace is None or isinstance(namespace, text_type) assert namespace != "" assert isinstance(name, text_type) assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} elif self.require_matching_tags: start = open_elements.pop() assert start == (namespace, name) elif type == "Comment": data = token["data"] assert isinstance(data, text_type) elif type in ("Characters", "SpaceCharacters"): data = token["data"] assert isinstance(data, text_type) assert data != "" if type == "SpaceCharacters": assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] assert name is None or isinstance(name, text_type) assert token["publicId"] is None or isinstance(name, text_type) assert token["systemId"] is None or isinstance(name, text_type) elif type == "Entity": assert isinstance(token["name"], text_type) elif type == "SerializerError": assert isinstance(token["data"], text_type) else: assert False, "Unknown token type: %(type)s" % {"type": type} yield token filters/optionaltags.py000064400000024446147204715120011307 0ustar00from __future__ import absolute_import, division, unicode_literals from . import base class Filter(base.Filter): def slider(self): previous1 = previous2 = None for token in self.source: if previous1 is not None: yield previous2, previous1, token previous2 = previous1 previous1 = token if previous1 is not None: yield previous2, previous1, None def __iter__(self): for previous, token, next in self.slider(): type = token["type"] if type == "StartTag": if (token["data"] or not self.is_optional_start(token["name"], previous, next)): yield token elif type == "EndTag": if not self.is_optional_end(token["name"], next): yield token else: yield token def is_optional_start(self, tagname, previous, next): type = next and next["type"] or None if tagname in 'html': # An html element's start tag may be omitted if the first thing # inside the html element is not a space character or a comment. return type not in ("Comment", "SpaceCharacters") elif tagname == 'head': # A head element's start tag may be omitted if the first thing # inside the head element is an element. # XXX: we also omit the start tag if the head element is empty if type in ("StartTag", "EmptyTag"): return True elif type == "EndTag": return next["name"] == "head" elif tagname == 'body': # A body element's start tag may be omitted if the first thing # inside the body element is not a space character or a comment, # except if the first thing inside the body element is a script # or style element and the node immediately preceding the body # element is a head element whose end tag has been omitted. if type in ("Comment", "SpaceCharacters"): return False elif type == "StartTag": # XXX: we do not look at the preceding event, so we never omit # the body element's start tag if it's followed by a script or # a style element. return next["name"] not in ('script', 'style') else: return True elif tagname == 'colgroup': # A colgroup element's start tag may be omitted if the first thing # inside the colgroup element is a col element, and if the element # is not immediately preceded by another colgroup element whose # end tag has been omitted. if type in ("StartTag", "EmptyTag"): # XXX: we do not look at the preceding event, so instead we never # omit the colgroup element's end tag when it is immediately # followed by another colgroup element. See is_optional_end. return next["name"] == "col" else: return False elif tagname == 'tbody': # A tbody element's start tag may be omitted if the first thing # inside the tbody element is a tr element, and if the element is # not immediately preceded by a tbody, thead, or tfoot element # whose end tag has been omitted. if type == "StartTag": # omit the thead and tfoot elements' end tag when they are # immediately followed by a tbody element. See is_optional_end. if previous and previous['type'] == 'EndTag' and \ previous['name'] in ('tbody', 'thead', 'tfoot'): return False return next["name"] == 'tr' else: return False return False def is_optional_end(self, tagname, next): type = next and next["type"] or None if tagname in ('html', 'head', 'body'): # An html element's end tag may be omitted if the html element # is not immediately followed by a space character or a comment. return type not in ("Comment", "SpaceCharacters") elif tagname in ('li', 'optgroup', 'tr'): # A li element's end tag may be omitted if the li element is # immediately followed by another li element or if there is # no more content in the parent element. # An optgroup element's end tag may be omitted if the optgroup # element is immediately followed by another optgroup element, # or if there is no more content in the parent element. # A tr element's end tag may be omitted if the tr element is # immediately followed by another tr element, or if there is # no more content in the parent element. if type == "StartTag": return next["name"] == tagname else: return type == "EndTag" or type is None elif tagname in ('dt', 'dd'): # A dt element's end tag may be omitted if the dt element is # immediately followed by another dt element or a dd element. # A dd element's end tag may be omitted if the dd element is # immediately followed by another dd element or a dt element, # or if there is no more content in the parent element. if type == "StartTag": return next["name"] in ('dt', 'dd') elif tagname == 'dd': return type == "EndTag" or type is None else: return False elif tagname == 'p': # A p element's end tag may be omitted if the p element is # immediately followed by an address, article, aside, # blockquote, datagrid, dialog, dir, div, dl, fieldset, # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, # nav, ol, p, pre, section, table, or ul, element, or if # there is no more content in the parent element. if type in ("StartTag", "EmptyTag"): return next["name"] in ('address', 'article', 'aside', 'blockquote', 'datagrid', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'menu', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul') else: return type == "EndTag" or type is None elif tagname == 'option': # An option element's end tag may be omitted if the option # element is immediately followed by another option element, # or if it is immediately followed by an optgroup # element, or if there is no more content in the parent # element. if type == "StartTag": return next["name"] in ('option', 'optgroup') else: return type == "EndTag" or type is None elif tagname in ('rt', 'rp'): # An rt element's end tag may be omitted if the rt element is # immediately followed by an rt or rp element, or if there is # no more content in the parent element. # An rp element's end tag may be omitted if the rp element is # immediately followed by an rt or rp element, or if there is # no more content in the parent element. if type == "StartTag": return next["name"] in ('rt', 'rp') else: return type == "EndTag" or type is None elif tagname == 'colgroup': # A colgroup element's end tag may be omitted if the colgroup # element is not immediately followed by a space character or # a comment. if type in ("Comment", "SpaceCharacters"): return False elif type == "StartTag": # XXX: we also look for an immediately following colgroup # element. See is_optional_start. return next["name"] != 'colgroup' else: return True elif tagname in ('thead', 'tbody'): # A thead element's end tag may be omitted if the thead element # is immediately followed by a tbody or tfoot element. # A tbody element's end tag may be omitted if the tbody element # is immediately followed by a tbody or tfoot element, or if # there is no more content in the parent element. # A tfoot element's end tag may be omitted if the tfoot element # is immediately followed by a tbody element, or if there is no # more content in the parent element. # XXX: we never omit the end tag when the following element is # a tbody. See is_optional_start. if type == "StartTag": return next["name"] in ['tbody', 'tfoot'] elif tagname == 'tbody': return type == "EndTag" or type is None else: return False elif tagname == 'tfoot': # A tfoot element's end tag may be omitted if the tfoot element # is immediately followed by a tbody element, or if there is no # more content in the parent element. # XXX: we never omit the end tag when the following element is # a tbody. See is_optional_start. if type == "StartTag": return next["name"] == 'tbody' else: return type == "EndTag" or type is None elif tagname in ('td', 'th'): # A td element's end tag may be omitted if the td element is # immediately followed by a td or th element, or if there is # no more content in the parent element. # A th element's end tag may be omitted if the th element is # immediately followed by a td or th element, or if there is # no more content in the parent element. if type == "StartTag": return next["name"] in ('td', 'th') else: return type == "EndTag" or type is None return False filters/sanitizer.py000064400000061014147204715120010603 0ustar00from __future__ import absolute_import, division, unicode_literals import re from xml.sax.saxutils import escape, unescape from six.moves import urllib_parse as urlparse from . import base from ..constants import namespaces, prefixes __all__ = ["Filter"] allowed_elements = frozenset(( (namespaces['html'], 'a'), (namespaces['html'], 'abbr'), (namespaces['html'], 'acronym'), (namespaces['html'], 'address'), (namespaces['html'], 'area'), (namespaces['html'], 'article'), (namespaces['html'], 'aside'), (namespaces['html'], 'audio'), (namespaces['html'], 'b'), (namespaces['html'], 'big'), (namespaces['html'], 'blockquote'), (namespaces['html'], 'br'), (namespaces['html'], 'button'), (namespaces['html'], 'canvas'), (namespaces['html'], 'caption'), (namespaces['html'], 'center'), (namespaces['html'], 'cite'), (namespaces['html'], 'code'), (namespaces['html'], 'col'), (namespaces['html'], 'colgroup'), (namespaces['html'], 'command'), (namespaces['html'], 'datagrid'), (namespaces['html'], 'datalist'), (namespaces['html'], 'dd'), (namespaces['html'], 'del'), (namespaces['html'], 'details'), (namespaces['html'], 'dfn'), (namespaces['html'], 'dialog'), (namespaces['html'], 'dir'), (namespaces['html'], 'div'), (namespaces['html'], 'dl'), (namespaces['html'], 'dt'), (namespaces['html'], 'em'), (namespaces['html'], 'event-source'), (namespaces['html'], 'fieldset'), (namespaces['html'], 'figcaption'), (namespaces['html'], 'figure'), (namespaces['html'], 'footer'), (namespaces['html'], 'font'), (namespaces['html'], 'form'), (namespaces['html'], 'header'), (namespaces['html'], 'h1'), (namespaces['html'], 'h2'), (namespaces['html'], 'h3'), (namespaces['html'], 'h4'), (namespaces['html'], 'h5'), (namespaces['html'], 'h6'), (namespaces['html'], 'hr'), (namespaces['html'], 'i'), (namespaces['html'], 'img'), (namespaces['html'], 'input'), (namespaces['html'], 'ins'), (namespaces['html'], 'keygen'), (namespaces['html'], 'kbd'), (namespaces['html'], 'label'), (namespaces['html'], 'legend'), (namespaces['html'], 'li'), (namespaces['html'], 'm'), (namespaces['html'], 'map'), (namespaces['html'], 'menu'), (namespaces['html'], 'meter'), (namespaces['html'], 'multicol'), (namespaces['html'], 'nav'), (namespaces['html'], 'nextid'), (namespaces['html'], 'ol'), (namespaces['html'], 'output'), (namespaces['html'], 'optgroup'), (namespaces['html'], 'option'), (namespaces['html'], 'p'), (namespaces['html'], 'pre'), (namespaces['html'], 'progress'), (namespaces['html'], 'q'), (namespaces['html'], 's'), (namespaces['html'], 'samp'), (namespaces['html'], 'section'), (namespaces['html'], 'select'), (namespaces['html'], 'small'), (namespaces['html'], 'sound'), (namespaces['html'], 'source'), (namespaces['html'], 'spacer'), (namespaces['html'], 'span'), (namespaces['html'], 'strike'), (namespaces['html'], 'strong'), (namespaces['html'], 'sub'), (namespaces['html'], 'sup'), (namespaces['html'], 'table'), (namespaces['html'], 'tbody'), (namespaces['html'], 'td'), (namespaces['html'], 'textarea'), (namespaces['html'], 'time'), (namespaces['html'], 'tfoot'), (namespaces['html'], 'th'), (namespaces['html'], 'thead'), (namespaces['html'], 'tr'), (namespaces['html'], 'tt'), (namespaces['html'], 'u'), (namespaces['html'], 'ul'), (namespaces['html'], 'var'), (namespaces['html'], 'video'), (namespaces['mathml'], 'maction'), (namespaces['mathml'], 'math'), (namespaces['mathml'], 'merror'), (namespaces['mathml'], 'mfrac'), (namespaces['mathml'], 'mi'), (namespaces['mathml'], 'mmultiscripts'), (namespaces['mathml'], 'mn'), (namespaces['mathml'], 'mo'), (namespaces['mathml'], 'mover'), (namespaces['mathml'], 'mpadded'), (namespaces['mathml'], 'mphantom'), (namespaces['mathml'], 'mprescripts'), (namespaces['mathml'], 'mroot'), (namespaces['mathml'], 'mrow'), (namespaces['mathml'], 'mspace'), (namespaces['mathml'], 'msqrt'), (namespaces['mathml'], 'mstyle'), (namespaces['mathml'], 'msub'), (namespaces['mathml'], 'msubsup'), (namespaces['mathml'], 'msup'), (namespaces['mathml'], 'mtable'), (namespaces['mathml'], 'mtd'), (namespaces['mathml'], 'mtext'), (namespaces['mathml'], 'mtr'), (namespaces['mathml'], 'munder'), (namespaces['mathml'], 'munderover'), (namespaces['mathml'], 'none'), (namespaces['svg'], 'a'), (namespaces['svg'], 'animate'), (namespaces['svg'], 'animateColor'), (namespaces['svg'], 'animateMotion'), (namespaces['svg'], 'animateTransform'), (namespaces['svg'], 'clipPath'), (namespaces['svg'], 'circle'), (namespaces['svg'], 'defs'), (namespaces['svg'], 'desc'), (namespaces['svg'], 'ellipse'), (namespaces['svg'], 'font-face'), (namespaces['svg'], 'font-face-name'), (namespaces['svg'], 'font-face-src'), (namespaces['svg'], 'g'), (namespaces['svg'], 'glyph'), (namespaces['svg'], 'hkern'), (namespaces['svg'], 'linearGradient'), (namespaces['svg'], 'line'), (namespaces['svg'], 'marker'), (namespaces['svg'], 'metadata'), (namespaces['svg'], 'missing-glyph'), (namespaces['svg'], 'mpath'), (namespaces['svg'], 'path'), (namespaces['svg'], 'polygon'), (namespaces['svg'], 'polyline'), (namespaces['svg'], 'radialGradient'), (namespaces['svg'], 'rect'), (namespaces['svg'], 'set'), (namespaces['svg'], 'stop'), (namespaces['svg'], 'svg'), (namespaces['svg'], 'switch'), (namespaces['svg'], 'text'), (namespaces['svg'], 'title'), (namespaces['svg'], 'tspan'), (namespaces['svg'], 'use'), )) allowed_attributes = frozenset(( # HTML attributes (None, 'abbr'), (None, 'accept'), (None, 'accept-charset'), (None, 'accesskey'), (None, 'action'), (None, 'align'), (None, 'alt'), (None, 'autocomplete'), (None, 'autofocus'), (None, 'axis'), (None, 'background'), (None, 'balance'), (None, 'bgcolor'), (None, 'bgproperties'), (None, 'border'), (None, 'bordercolor'), (None, 'bordercolordark'), (None, 'bordercolorlight'), (None, 'bottompadding'), (None, 'cellpadding'), (None, 'cellspacing'), (None, 'ch'), (None, 'challenge'), (None, 'char'), (None, 'charoff'), (None, 'choff'), (None, 'charset'), (None, 'checked'), (None, 'cite'), (None, 'class'), (None, 'clear'), (None, 'color'), (None, 'cols'), (None, 'colspan'), (None, 'compact'), (None, 'contenteditable'), (None, 'controls'), (None, 'coords'), (None, 'data'), (None, 'datafld'), (None, 'datapagesize'), (None, 'datasrc'), (None, 'datetime'), (None, 'default'), (None, 'delay'), (None, 'dir'), (None, 'disabled'), (None, 'draggable'), (None, 'dynsrc'), (None, 'enctype'), (None, 'end'), (None, 'face'), (None, 'for'), (None, 'form'), (None, 'frame'), (None, 'galleryimg'), (None, 'gutter'), (None, 'headers'), (None, 'height'), (None, 'hidefocus'), (None, 'hidden'), (None, 'high'), (None, 'href'), (None, 'hreflang'), (None, 'hspace'), (None, 'icon'), (None, 'id'), (None, 'inputmode'), (None, 'ismap'), (None, 'keytype'), (None, 'label'), (None, 'leftspacing'), (None, 'lang'), (None, 'list'), (None, 'longdesc'), (None, 'loop'), (None, 'loopcount'), (None, 'loopend'), (None, 'loopstart'), (None, 'low'), (None, 'lowsrc'), (None, 'max'), (None, 'maxlength'), (None, 'media'), (None, 'method'), (None, 'min'), (None, 'multiple'), (None, 'name'), (None, 'nohref'), (None, 'noshade'), (None, 'nowrap'), (None, 'open'), (None, 'optimum'), (None, 'pattern'), (None, 'ping'), (None, 'point-size'), (None, 'poster'), (None, 'pqg'), (None, 'preload'), (None, 'prompt'), (None, 'radiogroup'), (None, 'readonly'), (None, 'rel'), (None, 'repeat-max'), (None, 'repeat-min'), (None, 'replace'), (None, 'required'), (None, 'rev'), (None, 'rightspacing'), (None, 'rows'), (None, 'rowspan'), (None, 'rules'), (None, 'scope'), (None, 'selected'), (None, 'shape'), (None, 'size'), (None, 'span'), (None, 'src'), (None, 'start'), (None, 'step'), (None, 'style'), (None, 'summary'), (None, 'suppress'), (None, 'tabindex'), (None, 'target'), (None, 'template'), (None, 'title'), (None, 'toppadding'), (None, 'type'), (None, 'unselectable'), (None, 'usemap'), (None, 'urn'), (None, 'valign'), (None, 'value'), (None, 'variable'), (None, 'volume'), (None, 'vspace'), (None, 'vrml'), (None, 'width'), (None, 'wrap'), (namespaces['xml'], 'lang'), # MathML attributes (None, 'actiontype'), (None, 'align'), (None, 'columnalign'), (None, 'columnalign'), (None, 'columnalign'), (None, 'columnlines'), (None, 'columnspacing'), (None, 'columnspan'), (None, 'depth'), (None, 'display'), (None, 'displaystyle'), (None, 'equalcolumns'), (None, 'equalrows'), (None, 'fence'), (None, 'fontstyle'), (None, 'fontweight'), (None, 'frame'), (None, 'height'), (None, 'linethickness'), (None, 'lspace'), (None, 'mathbackground'), (None, 'mathcolor'), (None, 'mathvariant'), (None, 'mathvariant'), (None, 'maxsize'), (None, 'minsize'), (None, 'other'), (None, 'rowalign'), (None, 'rowalign'), (None, 'rowalign'), (None, 'rowlines'), (None, 'rowspacing'), (None, 'rowspan'), (None, 'rspace'), (None, 'scriptlevel'), (None, 'selection'), (None, 'separator'), (None, 'stretchy'), (None, 'width'), (None, 'width'), (namespaces['xlink'], 'href'), (namespaces['xlink'], 'show'), (namespaces['xlink'], 'type'), # SVG attributes (None, 'accent-height'), (None, 'accumulate'), (None, 'additive'), (None, 'alphabetic'), (None, 'arabic-form'), (None, 'ascent'), (None, 'attributeName'), (None, 'attributeType'), (None, 'baseProfile'), (None, 'bbox'), (None, 'begin'), (None, 'by'), (None, 'calcMode'), (None, 'cap-height'), (None, 'class'), (None, 'clip-path'), (None, 'color'), (None, 'color-rendering'), (None, 'content'), (None, 'cx'), (None, 'cy'), (None, 'd'), (None, 'dx'), (None, 'dy'), (None, 'descent'), (None, 'display'), (None, 'dur'), (None, 'end'), (None, 'fill'), (None, 'fill-opacity'), (None, 'fill-rule'), (None, 'font-family'), (None, 'font-size'), (None, 'font-stretch'), (None, 'font-style'), (None, 'font-variant'), (None, 'font-weight'), (None, 'from'), (None, 'fx'), (None, 'fy'), (None, 'g1'), (None, 'g2'), (None, 'glyph-name'), (None, 'gradientUnits'), (None, 'hanging'), (None, 'height'), (None, 'horiz-adv-x'), (None, 'horiz-origin-x'), (None, 'id'), (None, 'ideographic'), (None, 'k'), (None, 'keyPoints'), (None, 'keySplines'), (None, 'keyTimes'), (None, 'lang'), (None, 'marker-end'), (None, 'marker-mid'), (None, 'marker-start'), (None, 'markerHeight'), (None, 'markerUnits'), (None, 'markerWidth'), (None, 'mathematical'), (None, 'max'), (None, 'min'), (None, 'name'), (None, 'offset'), (None, 'opacity'), (None, 'orient'), (None, 'origin'), (None, 'overline-position'), (None, 'overline-thickness'), (None, 'panose-1'), (None, 'path'), (None, 'pathLength'), (None, 'points'), (None, 'preserveAspectRatio'), (None, 'r'), (None, 'refX'), (None, 'refY'), (None, 'repeatCount'), (None, 'repeatDur'), (None, 'requiredExtensions'), (None, 'requiredFeatures'), (None, 'restart'), (None, 'rotate'), (None, 'rx'), (None, 'ry'), (None, 'slope'), (None, 'stemh'), (None, 'stemv'), (None, 'stop-color'), (None, 'stop-opacity'), (None, 'strikethrough-position'), (None, 'strikethrough-thickness'), (None, 'stroke'), (None, 'stroke-dasharray'), (None, 'stroke-dashoffset'), (None, 'stroke-linecap'), (None, 'stroke-linejoin'), (None, 'stroke-miterlimit'), (None, 'stroke-opacity'), (None, 'stroke-width'), (None, 'systemLanguage'), (None, 'target'), (None, 'text-anchor'), (None, 'to'), (None, 'transform'), (None, 'type'), (None, 'u1'), (None, 'u2'), (None, 'underline-position'), (None, 'underline-thickness'), (None, 'unicode'), (None, 'unicode-range'), (None, 'units-per-em'), (None, 'values'), (None, 'version'), (None, 'viewBox'), (None, 'visibility'), (None, 'width'), (None, 'widths'), (None, 'x'), (None, 'x-height'), (None, 'x1'), (None, 'x2'), (namespaces['xlink'], 'actuate'), (namespaces['xlink'], 'arcrole'), (namespaces['xlink'], 'href'), (namespaces['xlink'], 'role'), (namespaces['xlink'], 'show'), (namespaces['xlink'], 'title'), (namespaces['xlink'], 'type'), (namespaces['xml'], 'base'), (namespaces['xml'], 'lang'), (namespaces['xml'], 'space'), (None, 'y'), (None, 'y1'), (None, 'y2'), (None, 'zoomAndPan'), )) attr_val_is_uri = frozenset(( (None, 'href'), (None, 'src'), (None, 'cite'), (None, 'action'), (None, 'longdesc'), (None, 'poster'), (None, 'background'), (None, 'datasrc'), (None, 'dynsrc'), (None, 'lowsrc'), (None, 'ping'), (namespaces['xlink'], 'href'), (namespaces['xml'], 'base'), )) svg_attr_val_allows_ref = frozenset(( (None, 'clip-path'), (None, 'color-profile'), (None, 'cursor'), (None, 'fill'), (None, 'filter'), (None, 'marker'), (None, 'marker-start'), (None, 'marker-mid'), (None, 'marker-end'), (None, 'mask'), (None, 'stroke'), )) svg_allow_local_href = frozenset(( (None, 'altGlyph'), (None, 'animate'), (None, 'animateColor'), (None, 'animateMotion'), (None, 'animateTransform'), (None, 'cursor'), (None, 'feImage'), (None, 'filter'), (None, 'linearGradient'), (None, 'pattern'), (None, 'radialGradient'), (None, 'textpath'), (None, 'tref'), (None, 'set'), (None, 'use') )) allowed_css_properties = frozenset(( 'azimuth', 'background-color', 'border-bottom-color', 'border-collapse', 'border-color', 'border-left-color', 'border-right-color', 'border-top-color', 'clear', 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', 'white-space', 'width', )) allowed_css_keywords = frozenset(( 'auto', 'aqua', 'black', 'block', 'blue', 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', 'transparent', 'underline', 'white', 'yellow', )) allowed_svg_properties = frozenset(( 'fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', 'stroke-opacity', )) allowed_protocols = frozenset(( 'ed2k', 'ftp', 'http', 'https', 'irc', 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', 'ssh', 'sftp', 'rtsp', 'afs', 'data', )) allowed_content_types = frozenset(( 'image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain', )) data_content_type = re.compile(r''' ^ # Match a content type / (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) # Match any character set and encoding (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) # Assume the rest is data ,.* $ ''', re.VERBOSE) class Filter(base.Filter): """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" def __init__(self, source, allowed_elements=allowed_elements, allowed_attributes=allowed_attributes, allowed_css_properties=allowed_css_properties, allowed_css_keywords=allowed_css_keywords, allowed_svg_properties=allowed_svg_properties, allowed_protocols=allowed_protocols, allowed_content_types=allowed_content_types, attr_val_is_uri=attr_val_is_uri, svg_attr_val_allows_ref=svg_attr_val_allows_ref, svg_allow_local_href=svg_allow_local_href): super(Filter, self).__init__(source) self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes self.allowed_css_properties = allowed_css_properties self.allowed_css_keywords = allowed_css_keywords self.allowed_svg_properties = allowed_svg_properties self.allowed_protocols = allowed_protocols self.allowed_content_types = allowed_content_types self.attr_val_is_uri = attr_val_is_uri self.svg_attr_val_allows_ref = svg_attr_val_allows_ref self.svg_allow_local_href = svg_allow_local_href def __iter__(self): for token in base.Filter.__iter__(self): token = self.sanitize_token(token) if token: yield token # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style # attributes are parsed, and a restricted set, # specified by # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through. # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified # in ALLOWED_PROTOCOLS are allowed. # # sanitize_html('') # => <script> do_nasty_stuff() </script> # sanitize_html('Click here for $100') # => Click here for $100 def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in ("StartTag", "EndTag", "EmptyTag"): name = token["name"] namespace = token["namespace"] if ((namespace, name) in self.allowed_elements or (namespace is None and (namespaces["html"], name) in self.allowed_elements)): return self.allowed_token(token) else: return self.disallowed_token(token) elif token_type == "Comment": pass else: return token def allowed_token(self, token): if "data" in token: attrs = token["data"] attr_names = set(attrs.keys()) # Remove forbidden attributes for to_remove in (attr_names - self.allowed_attributes): del token["data"][to_remove] attr_names.remove(to_remove) # Remove attributes with disallowed URL values for attr in (attr_names & self.attr_val_is_uri): assert attr in attrs # I don't have a clue where this regexp comes from or why it matches those # characters, nor why we call unescape. I just know it's always been here. # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all # this will do is remove *more* than it otherwise would. val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: uri = urlparse.urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] if uri and uri.scheme: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = data_content_type.match(uri.path) if not m: del attrs[attr] elif m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*', attrs[(namespaces['xlink'], 'href')])): del attrs[(namespaces['xlink'], 'href')] if (None, 'style') in attrs: attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) token["data"] = attrs return token def disallowed_token(self, token): token_type = token["type"] if token_type == "EndTag": token["data"] = "" % token["name"] elif token["data"]: assert token_type in ("StartTag", "EmptyTag") attrs = [] for (ns, name), v in token["data"].items(): attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): token["data"] = token["data"][:-1] + "/>" token["type"] = "Characters" del token["name"] return token def sanitize_css(self, style): # disallow urls style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) # gauntlet if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return '' clean = [] for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): if not value: continue if prop.lower() in self.allowed_css_properties: clean.append(prop + ': ' + value + ';') elif prop.split('-')[0].lower() in ['background', 'border', 'margin', 'padding']: for keyword in value.split(): if keyword not in self.allowed_css_keywords and \ not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa break else: clean.append(prop + ': ' + value + ';') elif prop.lower() in self.allowed_svg_properties: clean.append(prop + ': ' + value + ';') return ' '.join(clean) filters/whitespace.py000064400000002163147204715120010727 0ustar00from __future__ import absolute_import, division, unicode_literals import re from . import base from ..constants import rcdataElements, spaceCharacters spaceCharacters = "".join(spaceCharacters) SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) class Filter(base.Filter): spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) def __iter__(self): preserve = 0 for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag" \ and (preserve or token["name"] in self.spacePreserveElements): preserve += 1 elif type == "EndTag" and preserve: preserve -= 1 elif not preserve and type == "SpaceCharacters" and token["data"]: # Test on token["data"] above to not introduce spaces where there were not token["data"] = " " elif not preserve and type == "Characters": token["data"] = collapse_spaces(token["data"]) yield token def collapse_spaces(text): return SPACES_REGEX.sub(' ', text) treeadapters/__pycache__/__init__.cpython-36.opt-1.pyc000064400000000553147204715120016571 0ustar003 B;W @sZddlmZmZmZddlmZdgZyddlmZWnek rJYn Xej ddS))absolute_importdivisionunicode_literals)saxr)genshirN) Z __future__rrrr__all__r ImportErrorappendr r /usr/lib/python3.6/__init__.pys treeadapters/__pycache__/__init__.cpython-36.pyc000064400000000553147204715120015632 0ustar003 B;W @sZddlmZmZmZddlmZdgZyddlmZWnek rJYn Xej ddS))absolute_importdivisionunicode_literals)saxr)genshirN) Z __future__rrrr__all__r ImportErrorappendr r /usr/lib/python3.6/__init__.pys treeadapters/__pycache__/genshi.cpython-36.opt-1.pyc000064400000002640147204715120016306 0ustar003 B;W@sLddlmZmZmZddlmZmZddlmZmZm Z m Z m Z ddZ dS))absolute_importdivisionunicode_literals)QNameAttrs)STARTENDTEXTCOMMENTDOCTYPEccsZg}x6|D],}|d}|dkr2|j|dn|rLtdj|dfVg}|dkr|d rrd |d |d f}n|d }td d |djD}tt||fdfV|dkrd}|dkr|d rd |d |d f}n|d }tt|dfVq |dkrt|dd fVq |dkr t |d |d|dfd#fVq q W|rVtdj|d&fVdS)'Ntype CharactersSpaceCharactersdataStartTagEmptyTag namespacez{%s}%snamecSs4g|],\}}t|ddk r"d|n|d|fqS)rNz{%s}%sr)r).0attrvaluer/usr/lib/python3.6/genshi.py szto_genshi..ZEndTagCommentZDoctypeZpublicIdZsystemId)r rr)Nrr)rrrr)Nrrrr)Nrrrr)Nrrrr)Nrrrr)Nrr) appendr joinritemsrrrr r )Zwalkertexttokenr rZattrsrrr to_genshis<  r#N) Z __future__rrrZ genshi.corerrrrr r r r#rrrrstreeadapters/__pycache__/genshi.cpython-36.pyc000064400000002640147204715120015347 0ustar003 B;W@sLddlmZmZmZddlmZmZddlmZmZm Z m Z m Z ddZ dS))absolute_importdivisionunicode_literals)QNameAttrs)STARTENDTEXTCOMMENTDOCTYPEccsZg}x6|D],}|d}|dkr2|j|dn|rLtdj|dfVg}|dkr|d rrd |d |d f}n|d }td d |djD}tt||fdfV|dkrd}|dkr|d rd |d |d f}n|d }tt|dfVq |dkrt|dd fVq |dkr t |d |d|dfd#fVq q W|rVtdj|d&fVdS)'Ntype CharactersSpaceCharactersdataStartTagEmptyTag namespacez{%s}%snamecSs4g|],\}}t|ddk r"d|n|d|fqS)rNz{%s}%sr)r).0attrvaluer/usr/lib/python3.6/genshi.py szto_genshi..ZEndTagCommentZDoctypeZpublicIdZsystemId)r rr)Nrr)rrrr)Nrrrr)Nrrrr)Nrrrr)Nrrrr)Nrr) appendr joinritemsrrrr r )Zwalkertexttokenr rZattrsrrr to_genshis<  r#N) Z __future__rrrZ genshi.corerrrrr r r r#rrrrstreeadapters/__pycache__/sax.cpython-36.opt-1.pyc000064400000002335147204715120015625 0ustar003 B;W}@shddlmZmZmZddlmZddlmZmZiZ x&ej D]\Z Z Z e dk r>e e e <q>WddZdS))absolute_importdivisionunicode_literals)AttributesNSImpl)adjustForeignAttributesunadjustForeignAttributesNcCs|jx tjD]\}}|j||qWx|D]}|d}|dkrHq0q0|d krt|dt}|j|d|df|d||dkr|j|d|df|dq0|dkr|j|d|df|dq0|dkr|j|dq0|d kr0q0q0Wx tjD]\}}|j |qW|j d S)z8Call SAX-like content handler based on treewalker walkertypeZDoctypeStartTagEmptyTagdata namespacenameZEndTag CharactersSpaceCharactersCommentN)r r )rr) Z startDocumentprefix_mappingitemsZstartPrefixMappingrrZstartElementNSZ endElementNSZ charactersZendPrefixMappingZ endDocument)ZwalkerZhandlerprefixr tokenr Zattrsr/usr/lib/python3.6/sax.pyto_sax s6   r)Z __future__rrrZxml.sax.xmlreaderrZ constantsrrrvaluesrZ localNamer rrrrrs  treeadapters/__pycache__/sax.cpython-36.pyc000064400000002416147204715120014666 0ustar003 B;W}@shddlmZmZmZddlmZddlmZmZiZ x&ej D]\Z Z Z e dk r>e e e <q>WddZdS))absolute_importdivisionunicode_literals)AttributesNSImpl)adjustForeignAttributesunadjustForeignAttributesNcCs(|jx tjD]\}}|j||qWx|D]}|d}|dkrHq0q0|dkrt|dt}|j|d|df|d||dkr|j|d|df|dq0|dkr|j|d|df|dq0|dkr|j|dq0|d krq0d s0t d q0Wx tjD]\}}|j |qW|j dS)z8Call SAX-like content handler based on treewalker walkertypeZDoctypeStartTagEmptyTagdata namespacenameZEndTag CharactersSpaceCharactersCommentFzUnknown token typeN)r r )rr) Z startDocumentprefix_mappingitemsZstartPrefixMappingrrZstartElementNSZ endElementNSZ charactersAssertionErrorZendPrefixMappingZ endDocument)ZwalkerZhandlerprefixr tokenr Zattrsr/usr/lib/python3.6/sax.pyto_sax s6   r)Z __future__rrrZxml.sax.xmlreaderrZ constantsrrrvaluesrZ localNamer rrrrrs  treeadapters/__init__.py000064400000000320147204715120011336 0ustar00from __future__ import absolute_import, division, unicode_literals from . import sax __all__ = ["sax"] try: from . import genshi # noqa except ImportError: pass else: __all__.append("genshi") treeadapters/genshi.py000064400000003023147204715120011057 0ustar00from __future__ import absolute_import, division, unicode_literals from genshi.core import QName, Attrs from genshi.core import START, END, TEXT, COMMENT, DOCTYPE def to_genshi(walker): text = [] for token in walker: type = token["type"] if type in ("Characters", "SpaceCharacters"): text.append(token["data"]) elif text: yield TEXT, "".join(text), (None, -1, -1) text = [] if type in ("StartTag", "EmptyTag"): if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) for attr, value in token["data"].items()]) yield (START, (QName(name), attrs), (None, -1, -1)) if type == "EmptyTag": type = "EndTag" if type == "EndTag": if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] yield END, QName(name), (None, -1, -1) elif type == "Comment": yield COMMENT, token["data"], (None, -1, -1) elif type == "Doctype": yield DOCTYPE, (token["name"], token["publicId"], token["systemId"]), (None, -1, -1) else: pass # FIXME: What to do? if text: yield TEXT, "".join(text), (None, -1, -1) treeadapters/sax.py000064400000003175147204715120010405 0ustar00from __future__ import absolute_import, division, unicode_literals from xml.sax.xmlreader import AttributesNSImpl from ..constants import adjustForeignAttributes, unadjustForeignAttributes prefix_mapping = {} for prefix, localName, namespace in adjustForeignAttributes.values(): if prefix is not None: prefix_mapping[prefix] = namespace def to_sax(walker, handler): """Call SAX-like content handler based on treewalker walker""" handler.startDocument() for prefix, namespace in prefix_mapping.items(): handler.startPrefixMapping(prefix, namespace) for token in walker: type = token["type"] if type == "Doctype": continue elif type in ("StartTag", "EmptyTag"): attrs = AttributesNSImpl(token["data"], unadjustForeignAttributes) handler.startElementNS((token["namespace"], token["name"]), token["name"], attrs) if type == "EmptyTag": handler.endElementNS((token["namespace"], token["name"]), token["name"]) elif type == "EndTag": handler.endElementNS((token["namespace"], token["name"]), token["name"]) elif type in ("Characters", "SpaceCharacters"): handler.characters(token["data"]) elif type == "Comment": pass else: assert False, "Unknown token type" for prefix, namespace in prefix_mapping.items(): handler.endPrefixMapping(prefix) handler.endDocument() treebuilders/__pycache__/__init__.cpython-36.opt-1.pyc000064400000005742147204715120016604 0ustar003 B;WN @s6dZddlmZmZmZddlmZiZdddZdS) aA collection of modules for building different kinds of tree from HTML documents. To create a treebuilder for a new type of tree, you need to do implement several things: 1) A set of classes for various types of elements: Document, Doctype, Comment, Element. These must implement the interface of _base.treebuilders.Node (although comment nodes have a different signature for their constructor, see treebuilders.etree.Comment) Textual content may also be implemented as another node type, or not, as your tree implementation requires. 2) A treebuilder object (called TreeBuilder by convention) that inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: documentClass - the class to use for the bottommost node of a document elementClass - the class to use for HTML Elements commentClass - the class to use for comments doctypeClass - the class to use for doctypes It also has one required method: getDocument - Returns the root node of the complete document tree 3) If you wish to run the unit tests, you must also create a testSerializer method on your treebuilder which accepts a node and returns a string containing Node and its children serialized according to the format used in the unittests )absolute_importdivisionunicode_literals) default_etreeNcKs|j}|tkr|dkrLddlm}|dkrs treebuilders/__pycache__/__init__.cpython-36.pyc000064400000005742147204715120015645 0ustar003 B;WN @s6dZddlmZmZmZddlmZiZdddZdS) aA collection of modules for building different kinds of tree from HTML documents. To create a treebuilder for a new type of tree, you need to do implement several things: 1) A set of classes for various types of elements: Document, Doctype, Comment, Element. These must implement the interface of _base.treebuilders.Node (although comment nodes have a different signature for their constructor, see treebuilders.etree.Comment) Textual content may also be implemented as another node type, or not, as your tree implementation requires. 2) A treebuilder object (called TreeBuilder by convention) that inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: documentClass - the class to use for the bottommost node of a document elementClass - the class to use for HTML Elements commentClass - the class to use for comments doctypeClass - the class to use for doctypes It also has one required method: getDocument - Returns the root node of the complete document tree 3) If you wish to run the unit tests, you must also create a testSerializer method on your treebuilder which accepts a node and returns a string containing Node and its children serialized according to the format used in the unittests )absolute_importdivisionunicode_literals) default_etreeNcKs|j}|tkr|dkrLddlm}|dkrs treebuilders/__pycache__/base.cpython-36.opt-1.pyc000064400000025323147204715120015754 0ustar003 B;Wj6@sddlmZmZmZddlmZddlmZmZm Z dZ e edfe ee e ddfgBdfe ee e dd fe dd fgBdfe e ddfe dd fgdfe e dd fe dd fgdfdZ GdddeZGdddeZGdddeZdS))absolute_importdivisionunicode_literals) text_type)scopingElementstableInsertModeElements namespacesNFhtmlbuttonZolZultableoptgroupoptionT)Nr listr Zselectc@s^eZdZddZddZddZddZdd d Zd d ZddZ ddZ ddZ ddZ d S)NodecCs(||_d|_d|_i|_g|_g|_dS)a6Node representing an item in the tree. name - The tag name associated with the node parent - The parent of the current node (or None for the document node) value - The value of the current node (applies to text nodes and comments attributes - a dict holding name, value pairs for attributes of the node childNodes - a list of child nodes of the current node. This must include all elements but not necessarily other node types _flags - A list of miscellaneous flags that can be set on the node N)nameparentvalue attributes childNodesZ_flags)selfrr/usr/lib/python3.6/base.py__init__s z Node.__init__cCs:djdd|jjD}|r,d|j|fSd|jSdS)N cSsg|]\}}d||fqS)z%s="%s"r).0rrrrr +sz Node.__str__..z<%s %s>z<%s>)joinritemsr)rZ attributesStrrrr__str__*s  z Node.__str__cCs d|jS)Nz<%s>)r)rrrr__repr__3sz Node.__repr__cCstdS)z3Insert node as a child of the current node N)NotImplementedError)rnoderrr appendChild6szNode.appendChildNcCstdS)zInsert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. N)r!)rdata insertBeforerrr insertText;szNode.insertTextcCstdS)zInsert node as a child of the current node, before refNode in the list of child nodes. Raises ValueError if refNode is not a child of the current nodeN)r!)rr"ZrefNoderrrr%AszNode.insertBeforecCstdS)z:Remove node from the children of the current node N)r!)rr"rrr removeChildGszNode.removeChildcCs$x|jD]}|j|qWg|_dS)zMove all the children of the current node to newParent. This is needed so that trees that don't store text as nodes move the text in the correct way N)rr#)rZ newParentZchildrrrreparentChildrenLs zNode.reparentChildrencCstdS)zReturn a shallow copy of the current node i.e. a node with the same name and attributes but with no parent or child nodes N)r!)rrrr cloneNodeVszNode.cloneNodecCstdS)zFReturn true if the node has children or text, false otherwise N)r!)rrrr hasContent\szNode.hasContent)N) __name__ __module__ __qualname__rrr r#r&r%r'r(r)r*rrrrrs   rc@seZdZddZddZdS)ActiveFormattingElementscCsfd}|tkrVxH|dddD]6}|tkr*P|j||r>|d7}|dkr|j|PqWtj||dS)Nr)Marker nodesEqualremoverappend)rr"Z equalCountelementrrrr5cs  zActiveFormattingElements.appendcCs$|j|jksdS|j|jks dSdS)NFT) nameTupler)rZnode1Znode2rrrr3ps   z#ActiveFormattingElements.nodesEqualN)r+r,r-r5r3rrrrr.bs r.c@seZdZdZdZdZdZdZdZddZ ddZ d+ddZ d d Z d d Z d dZddZddZd,ddZddZddZddZeeeZddZddZd-dd Zd!d"Zd.d#d$Zd%d&Zd'd(Zd)d*ZdS)/ TreeBuilderaBase treebuilder implementation documentClass - the class to use for the bottommost node of a document elementClass - the class to use for HTML Elements commentClass - the class to use for comments doctypeClass - the class to use for doctypes NcCs|r d|_nd|_|jdS)Nzhttp://www.w3.org/1999/xhtml)defaultNamespacereset)rZnamespaceHTMLElementsrrrrszTreeBuilder.__init__cCs.g|_t|_d|_d|_d|_|j|_dS)NF) openElementsr.activeFormattingElementsZ headPointerZ formPointerinsertFromTable documentClassdocument)rrrrr:s zTreeBuilder.resetcCs~t|d}|s$t|tr$td|f}t|\}}xHt|jD]:}|rP||krPdS| rd|j|krddS||j|kArs    zTreeBuilder.insertTextcCsvd}d}d}x(|jdddD]}|jdkr|}PqW|rd|jrL|j}|}qn|j|jj|d}n |jd}||fS)zsGet the foster parent element, and sibling to insert before (or None) when inserting a misnested table nodeNr/r rr1)r;rrindex)rZ lastTableZ fosterParentr%ZelmrrrrbMs  z)TreeBuilder.getTableMisnestedNodePositionc Cs8|jd j}|td kr4||kr4|jj|j|dS) Nr/dddtlirr prprtr1)rdrerfrr rgrhri)r;r frozensetrNgenerateImpliedEndTags)rexcluderrrrrkgs    z"TreeBuilder.generateImpliedEndTagscCs|jS)zReturn the final tree)r?)rrrr getDocumentqszTreeBuilder.getDocumentcCs|j}|jdj||S)zReturn the final fragmentr) fragmentClassr;r()rZfragmentrrr getFragmentuszTreeBuilder.getFragmentcCstdS)zzSerialize the subtree of node in the format required by unit tests node - the node from which to start serializingN)r!)rr"rrrtestSerializer|szTreeBuilder.testSerializer)N)N)N)N)r+r,r-__doc__r>r\rYrWrnrr:rFrMrOrQrTrXrZrRr^rapropertyr=r`r_r&rbrkrmrorprrrrr8zs6 .     r8)Z __future__rrrZsixrZ constantsrrr r2rjsetrBobjectrrr.r8rrrrs    Ktreebuilders/__pycache__/base.cpython-36.pyc000064400000025501147204715120015013 0ustar003 B;Wj6@sddlmZmZmZddlmZddlmZmZm Z dZ e edfe ee e ddfgBdfe ee e dd fe dd fgBdfe e ddfe dd fgdfe e dd fe dd fgdfdZ GdddeZGdddeZGdddeZdS))absolute_importdivisionunicode_literals) text_type)scopingElementstableInsertModeElements namespacesNFhtmlbuttonZolZultableoptgroupoptionT)Nr listr Zselectc@s^eZdZddZddZddZddZdd d Zd d ZddZ ddZ ddZ ddZ d S)NodecCs(||_d|_d|_i|_g|_g|_dS)a6Node representing an item in the tree. name - The tag name associated with the node parent - The parent of the current node (or None for the document node) value - The value of the current node (applies to text nodes and comments attributes - a dict holding name, value pairs for attributes of the node childNodes - a list of child nodes of the current node. This must include all elements but not necessarily other node types _flags - A list of miscellaneous flags that can be set on the node N)nameparentvalue attributes childNodesZ_flags)selfrr/usr/lib/python3.6/base.py__init__s z Node.__init__cCs:djdd|jjD}|r,d|j|fSd|jSdS)N cSsg|]\}}d||fqS)z%s="%s"r).0rrrrr +sz Node.__str__..z<%s %s>z<%s>)joinritemsr)rZ attributesStrrrr__str__*s  z Node.__str__cCs d|jS)Nz<%s>)r)rrrr__repr__3sz Node.__repr__cCstdS)z3Insert node as a child of the current node N)NotImplementedError)rnoderrr appendChild6szNode.appendChildNcCstdS)zInsert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. N)r!)rdata insertBeforerrr insertText;szNode.insertTextcCstdS)zInsert node as a child of the current node, before refNode in the list of child nodes. Raises ValueError if refNode is not a child of the current nodeN)r!)rr"ZrefNoderrrr%AszNode.insertBeforecCstdS)z:Remove node from the children of the current node N)r!)rr"rrr removeChildGszNode.removeChildcCs$x|jD]}|j|qWg|_dS)zMove all the children of the current node to newParent. This is needed so that trees that don't store text as nodes move the text in the correct way N)rr#)rZ newParentZchildrrrreparentChildrenLs zNode.reparentChildrencCstdS)zReturn a shallow copy of the current node i.e. a node with the same name and attributes but with no parent or child nodes N)r!)rrrr cloneNodeVszNode.cloneNodecCstdS)zFReturn true if the node has children or text, false otherwise N)r!)rrrr hasContent\szNode.hasContent)N) __name__ __module__ __qualname__rrr r#r&r%r'r(r)r*rrrrrs   rc@seZdZddZddZdS)ActiveFormattingElementscCsfd}|tkrVxH|dddD]6}|tkr*P|j||r>|d7}|dkr|j|PqWtj||dS)Nr)Marker nodesEqualremoverappend)rr"Z equalCountelementrrrr5cs  zActiveFormattingElements.appendcCs$|j|jksdS|j|jks dSdS)NFT) nameTupler)rZnode1Znode2rrrr3ps   z#ActiveFormattingElements.nodesEqualN)r+r,r-r5r3rrrrr.bs r.c@seZdZdZdZdZdZdZdZddZ ddZ d+ddZ d d Z d d Z d dZddZddZd,ddZddZddZddZeeeZddZddZd-dd Zd!d"Zd.d#d$Zd%d&Zd'd(Zd)d*ZdS)/ TreeBuilderaBase treebuilder implementation documentClass - the class to use for the bottommost node of a document elementClass - the class to use for HTML Elements commentClass - the class to use for comments doctypeClass - the class to use for doctypes NcCs|r d|_nd|_|jdS)Nzhttp://www.w3.org/1999/xhtml)defaultNamespacereset)rZnamespaceHTMLElementsrrrrszTreeBuilder.__init__cCs.g|_t|_d|_d|_d|_|j|_dS)NF) openElementsr.activeFormattingElementsZ headPointerZ formPointerinsertFromTable documentClassdocument)rrrrr:s zTreeBuilder.resetcCst|d}|s2t|tr$td|f}t|ts2tt|\}}xHt|jD]:}|r^||kr^dS| rr|j |krrdS||j |kArJdSqJWdstdS)Nr7r TF) hasattr isinstancerr tupleAssertionErrorlistElementsMapreversedr;r7)rtargetZvariantZ exactNodeZ listElementsinvertr"rrrelementInScopes     zTreeBuilder.elementInScopecCs|js dSt|jd}|j|}|tks4||jkr8dSx6|tkrn||jkrn|dkrZd}P|d8}|j|}q:WxR|d7}|j|}|j}|jd|j|j|jd}||j|<||jdkrrPqrWdS)Nr/rZStartTag)typer namespacer$r1r1) r<lenr2r;r) insertElementrrJr)rientryZcloner6rrr#reconstructActiveFormattingElementss.    z/TreeBuilder.reconstructActiveFormattingElementscCs,|jj}x|jr&|tkr&|jj}q WdS)N)r<popr2)rrNrrrclearActiveFormattingElementss z)TreeBuilder.clearActiveFormattingElementscCs8x2|jdddD]}|tkr"Pq|j|kr|SqWdS)zCheck if an element exists between the end of the active formatting elements and the last marker. If it does, return it, else return falseNr/Fr1)r<r2r)rritemrrr!elementInActiveFormattingElementss  z-TreeBuilder.elementInActiveFormattingElementscCs&|j|}|jj||jj|dS)N) createElementr;r5r?r#)rtokenr6rrr insertRoots  zTreeBuilder.insertRootcCs6|d}|d}|d}|j|||}|jj|dS)NrpublicIdsystemId) doctypeClassr?r#)rrUrrWrXZdoctyperrr insertDoctypes zTreeBuilder.insertDoctypecCs*|dkr|jd}|j|j|ddS)Nr/r$r1)r;r# commentClass)rrUrrrr insertComment s zTreeBuilder.insertCommentcCs0|d}|jd|j}|j||}|d|_|S)z.Create an element but don't insert it anywhererrJr$)getr9 elementClassr)rrUrrJr6rrrrTs   zTreeBuilder.createElementcCs|jS)N)_insertFromTable)rrrr_getInsertFromTableszTreeBuilder._getInsertFromTablecCs ||_|r|j|_n|j|_dS)zsSwitch the function used to insert an element from the normal one to the misnested table one and back againN)r_insertElementTablerLinsertElementNormal)rrrrr_setInsertFromTables zTreeBuilder._setInsertFromTablecCsb|d}t|tstd||jd|j}|j||}|d|_|jdj||jj ||S)NrzElement %s not unicoderJr$r/r1) rArrCr]r9r^rr;r#r5)rrUrrJr6rrrrb$s   zTreeBuilder.insertElementNormalcCs`|j|}|jdjtkr$|j|S|j\}}|dkrD|j|n |j|||jj||S)z-Create an element and insert it into the treer/Nr1) rTr;rrrbgetTableMisnestedNodePositionr#r%r5)rrUr6rr%rrrra.s      zTreeBuilder.insertElementTablecCsX|dkr|jd}|j s0|jr<|jdjtkr<|j|n|j\}}|j||dS)zInsert text data.Nr/r1r1)r;r=rrr&rd)rr$rr%rrrr&>s    zTreeBuilder.insertTextcCsvd}d}d}x(|jdddD]}|jdkr|}PqW|rd|jrL|j}|}qn|j|jj|d}n |jd}||fS)zsGet the foster parent element, and sibling to insert before (or None) when inserting a misnested table nodeNr/r rr1)r;rrindex)rZ lastTableZ fosterParentr%ZelmrrrrdMs  z)TreeBuilder.getTableMisnestedNodePositionc Cs8|jd j}|td kr4||kr4|jj|j|dS) Nr/dddtlirr prprtr1)rfrgrhrr rirjrk)r;r frozensetrPgenerateImpliedEndTags)rexcluderrrrrmgs    z"TreeBuilder.generateImpliedEndTagscCs|jS)zReturn the final tree)r?)rrrr getDocumentqszTreeBuilder.getDocumentcCs|j}|jdj||S)zReturn the final fragmentr) fragmentClassr;r()rZfragmentrrr getFragmentuszTreeBuilder.getFragmentcCstdS)zzSerialize the subtree of node in the format required by unit tests node - the node from which to start serializingN)r!)rr"rrrtestSerializer|szTreeBuilder.testSerializer)N)N)N)N)r+r,r-__doc__r>r^r[rYrprr:rHrOrQrSrVrZr\rTr`rcpropertyr=rbrar&rdrmrorqrrrrrrr8zs6 .     r8)Z __future__rrrZsixrZ constantsrrr r2rlsetrDobjectrrr.r8rrrrs    Ktreebuilders/__pycache__/dom.cpython-36.opt-1.pyc000064400000021735147204715120015624 0ustar003 B;W"@s|ddlmZmZmZddlmZddlmZmZddl Z ddl m Z ddl m Z dd l m Z dd lmZd d ZeeZdS) )absolute_importdivisionunicode_literals)MutableMapping)minidomNodeN)base) constants) namespaces)moduleFactoryFactorycsVGdddtGfdddtjGfdddtj}ddtS) Nc@sLeZdZddZddZddZddZd d Zd d Zd dZ ddZ dS)zgetDomBuilder..AttrListcSs ||_dS)N)element)selfrr/usr/lib/python3.6/dom.py__init__sz(getDomBuilder..AttrList.__init__cSst|jjjS)N)iterr attributeskeys)rrrr__iter__sz(getDomBuilder..AttrList.__iter__cSs4t|trtn |jjj|}||_||jj|<dS)N) isinstancetupleNotImplementedErrorr ownerDocumentZcreateAttributevaluer)rnamerattrrrr __setitem__s  z+getDomBuilder..AttrList.__setitem__cSs t|jjS)N)lenrr)rrrr__len__ sz'getDomBuilder..AttrList.__len__cSst|jjjS)N)listrritems)rrrrr"#sz%getDomBuilder..AttrList.itemscSst|jjjS)N)r!rrvalues)rrrrr#&sz&getDomBuilder..AttrList.valuescSs"t|trtn|jj|jSdS)N)rrrrrr)rrrrr __getitem__)s z+getDomBuilder..AttrList.__getitem__cSst|trtn |jj|=dS)N)rrrrr)rrrrr __delitem__/s z+getDomBuilder..AttrList.__delitem__N) __name__ __module__ __qualname__rrrr r"r#r$r%rrrrAttrListsr)cseZdZddZeddZddZddd Zd d Zd d Z ddZ fddZ ddZ ee e Z fddZddZddZeeZdS)z"getDomBuilder..NodeBuildercSstjj||j||_dS)N)r rrnodeNamer)rrrrrr6sz+getDomBuilder..NodeBuilder.__init__cSst|jdr|jjpdS)N namespaceURI)hasattrrr+)rrrr:s z+getDomBuilder..NodeBuilder.cSs||_|jj|jdS)N)parentr appendChild)rnoderrrr/=sz.getDomBuilder..NodeBuilder.appendChildNcSs4|jjj|}|r$|jj||jn |jj|dS)N)rrcreateTextNode insertBeforer/)rdatar2textrrr insertTextAsz-getDomBuilder..NodeBuilder.insertTextcSs|jj|j|j||_dS)N)rr2r.)rr0ZrefNoderrrr2Hsz/getDomBuilder..NodeBuilder.insertBeforecSs&|jj|jkr|jj|jd|_dS)N)rZ parentNode removeChildr.)rr0rrrr6Lsz.getDomBuilder..NodeBuilder.removeChildcSs:x.|jjr.|jj}|jj||jj|qWg|_dS)N)r hasChildNodesZ firstChildr6r/ childNodes)rZ newParentchildrrrreparentChildrenQs   z3getDomBuilder..NodeBuilder.reparentChildrencs |jS)N)r)r)r)rr getAttributesXsz0getDomBuilder..NodeBuilder.getAttributescSsz|rvxpt|jD]`\}}t|trd|ddk rF|dd|d}n|d}|jj|d||q|jj||qWdS)Nr:rr )r!r"rrrZsetAttributeNSZ setAttribute)rrrrZ qualifiedNamerrr setAttributes[s  z0getDomBuilder..NodeBuilder.setAttributescs|jjdS)NF)r cloneNode)r) NodeBuilderrrr>jsz,getDomBuilder..NodeBuilder.cloneNodecSs |jjS)N)rr7)rrrr hasContentmsz-getDomBuilder..NodeBuilder.hasContentcSs(|jdkrtd|jfS|j|jfSdS)NZhtml) namespacer r)rrrr getNameTupleps z/getDomBuilder..NodeBuilder.getNameTuple)N)r&r'r(rpropertyrAr/r5r2r6r:r;r=rr>r@rBZ nameTupler)r)r?rrr?5s     r?cseZdZfddZfddZdfdd Zfdd Zfd d Zd d ZfddZ ddZ ddZ dddZ Z dZdS)z"getDomBuilder..TreeBuildercsjjddd|_tj|S)N)getDOMImplementationZcreateDocumentdomweakrefproxy)r)Domrr documentClassysz0getDomBuilder..TreeBuilder.documentClasscsR|d}|d}|d}j}|j|||}|jj|tkrN|j|_dS)NrpublicIdsystemId)rDZcreateDocumentTypeZdocumentr/rrEr)rtokenrrJrKZdomimplZdoctype)rHr?rr insertDoctype}sz0getDomBuilder..TreeBuilder.insertDoctypeNcs6|dkr |jdkr |jj|}n|jj||}|S)N)ZdefaultNamespacerEZ createElementZcreateElementNS)rrrAr0)r?rr elementClasssz/getDomBuilder..TreeBuilder.elementClasscs|jj|S)N)rEZ createComment)rr3)r?rr commentClasssz/getDomBuilder..TreeBuilder.commentClasscs|jjS)N)rEZcreateDocumentFragment)r)r?rr fragmentClasssz0getDomBuilder..TreeBuilder.fragmentClasscSs|jj|jdS)N)rEr/r)rr0rrrr/sz.getDomBuilder..TreeBuilder.appendChildcs|S)Nr)rr)testSerializerrrrQsz1getDomBuilder..TreeBuilder.testSerializercSs|jS)N)rE)rrrr getDocumentsz.getDomBuilder..TreeBuilder.getDocumentcSstjj|jS)N)r TreeBuilder getFragmentr)rrrrrTsz.getDomBuilder..TreeBuilder.getFragmentcSsp|}||krtjj|||nNt|jdrXtj|jjkrXt|jj|j_|jjj tj|jj |jj |dS)N_child_node_types) r rSr5r,rEr TEXT_NODErUr!appendr/r1)rr3r.rrrr5s z-getDomBuilder..TreeBuilder.insertText)N)N)r&r'r(rIrMrNrOrPr/rQrRrTr5implementationrr)rHDomImplementationr?rQrrrSxs      rScs0|jgdfdd |ddjS)Nrc s$|jtjkr|jrj|js|jrP|jp&d}|jp0d}jdd||j||fq~jdd||jfnjdd|fnz|jtjkrjdn`|jtjkrjdnF|jtj krވjdd||j fn|jtj krjd d||j fnt |d r6|j dk r6d tj|j |jf}n|j}jd d||f|jrg}xftt|jD]T}|jj|}|j}|j}|j } | rd tj| |jf}n|j}|j||fqpWx2t|D]&\}}jd d|d||fqW|d7}x|jD]} | |q WdS)Nz|%s z|%sz|%sz #documentz#document-fragmentz|%sz|%s"%s"r+z%s %sz|%s<%s>z |%s%s="%s"r )ZnodeTyperZDOCUMENT_TYPE_NODErrJrKrWZ DOCUMENT_NODEZDOCUMENT_FRAGMENT_NODEZ COMMENT_NODEZ nodeValuerVr,r+r prefixesr*Z hasAttributesrangerritemrZ localNamesortedr8) rindentrJrKrrirrnsr9)rvserializeElementrrrdsN             " z?getDomBuilder..testSerializer..serializeElement )r)Z normalizejoin)rr)rcrdrrQs . z%getDomBuilder..testSerializer)rr rrSlocals)rYrSr)r)rHrYr?rQr getDomBuilders $C:6rh)Z __future__rrr collectionsrZxml.domrrrFrZr r r Z_utilsr rhZ getDomModulerrrrs     _treebuilders/__pycache__/dom.cpython-36.pyc000064400000021735147204715120014665 0ustar003 B;W"@s|ddlmZmZmZddlmZddlmZmZddl Z ddl m Z ddl m Z dd l m Z dd lmZd d ZeeZdS) )absolute_importdivisionunicode_literals)MutableMapping)minidomNodeN)base) constants) namespaces)moduleFactoryFactorycsVGdddtGfdddtjGfdddtj}ddtS) Nc@sLeZdZddZddZddZddZd d Zd d Zd dZ ddZ dS)zgetDomBuilder..AttrListcSs ||_dS)N)element)selfrr/usr/lib/python3.6/dom.py__init__sz(getDomBuilder..AttrList.__init__cSst|jjjS)N)iterr attributeskeys)rrrr__iter__sz(getDomBuilder..AttrList.__iter__cSs4t|trtn |jjj|}||_||jj|<dS)N) isinstancetupleNotImplementedErrorr ownerDocumentZcreateAttributevaluer)rnamerattrrrr __setitem__s  z+getDomBuilder..AttrList.__setitem__cSs t|jjS)N)lenrr)rrrr__len__ sz'getDomBuilder..AttrList.__len__cSst|jjjS)N)listrritems)rrrrr"#sz%getDomBuilder..AttrList.itemscSst|jjjS)N)r!rrvalues)rrrrr#&sz&getDomBuilder..AttrList.valuescSs"t|trtn|jj|jSdS)N)rrrrrr)rrrrr __getitem__)s z+getDomBuilder..AttrList.__getitem__cSst|trtn |jj|=dS)N)rrrrr)rrrrr __delitem__/s z+getDomBuilder..AttrList.__delitem__N) __name__ __module__ __qualname__rrrr r"r#r$r%rrrrAttrListsr)cseZdZddZeddZddZddd Zd d Zd d Z ddZ fddZ ddZ ee e Z fddZddZddZeeZdS)z"getDomBuilder..NodeBuildercSstjj||j||_dS)N)r rrnodeNamer)rrrrrr6sz+getDomBuilder..NodeBuilder.__init__cSst|jdr|jjpdS)N namespaceURI)hasattrrr+)rrrr:s z+getDomBuilder..NodeBuilder.cSs||_|jj|jdS)N)parentr appendChild)rnoderrrr/=sz.getDomBuilder..NodeBuilder.appendChildNcSs4|jjj|}|r$|jj||jn |jj|dS)N)rrcreateTextNode insertBeforer/)rdatar2textrrr insertTextAsz-getDomBuilder..NodeBuilder.insertTextcSs|jj|j|j||_dS)N)rr2r.)rr0ZrefNoderrrr2Hsz/getDomBuilder..NodeBuilder.insertBeforecSs&|jj|jkr|jj|jd|_dS)N)rZ parentNode removeChildr.)rr0rrrr6Lsz.getDomBuilder..NodeBuilder.removeChildcSs:x.|jjr.|jj}|jj||jj|qWg|_dS)N)r hasChildNodesZ firstChildr6r/ childNodes)rZ newParentchildrrrreparentChildrenQs   z3getDomBuilder..NodeBuilder.reparentChildrencs |jS)N)r)r)r)rr getAttributesXsz0getDomBuilder..NodeBuilder.getAttributescSsz|rvxpt|jD]`\}}t|trd|ddk rF|dd|d}n|d}|jj|d||q|jj||qWdS)Nr:rr )r!r"rrrZsetAttributeNSZ setAttribute)rrrrZ qualifiedNamerrr setAttributes[s  z0getDomBuilder..NodeBuilder.setAttributescs|jjdS)NF)r cloneNode)r) NodeBuilderrrr>jsz,getDomBuilder..NodeBuilder.cloneNodecSs |jjS)N)rr7)rrrr hasContentmsz-getDomBuilder..NodeBuilder.hasContentcSs(|jdkrtd|jfS|j|jfSdS)NZhtml) namespacer r)rrrr getNameTupleps z/getDomBuilder..NodeBuilder.getNameTuple)N)r&r'r(rpropertyrAr/r5r2r6r:r;r=rr>r@rBZ nameTupler)r)r?rrr?5s     r?cseZdZfddZfddZdfdd Zfdd Zfd d Zd d ZfddZ ddZ ddZ dddZ Z dZdS)z"getDomBuilder..TreeBuildercsjjddd|_tj|S)N)getDOMImplementationZcreateDocumentdomweakrefproxy)r)Domrr documentClassysz0getDomBuilder..TreeBuilder.documentClasscsR|d}|d}|d}j}|j|||}|jj|tkrN|j|_dS)NrpublicIdsystemId)rDZcreateDocumentTypeZdocumentr/rrEr)rtokenrrJrKZdomimplZdoctype)rHr?rr insertDoctype}sz0getDomBuilder..TreeBuilder.insertDoctypeNcs6|dkr |jdkr |jj|}n|jj||}|S)N)ZdefaultNamespacerEZ createElementZcreateElementNS)rrrAr0)r?rr elementClasssz/getDomBuilder..TreeBuilder.elementClasscs|jj|S)N)rEZ createComment)rr3)r?rr commentClasssz/getDomBuilder..TreeBuilder.commentClasscs|jjS)N)rEZcreateDocumentFragment)r)r?rr fragmentClasssz0getDomBuilder..TreeBuilder.fragmentClasscSs|jj|jdS)N)rEr/r)rr0rrrr/sz.getDomBuilder..TreeBuilder.appendChildcs|S)Nr)rr)testSerializerrrrQsz1getDomBuilder..TreeBuilder.testSerializercSs|jS)N)rE)rrrr getDocumentsz.getDomBuilder..TreeBuilder.getDocumentcSstjj|jS)N)r TreeBuilder getFragmentr)rrrrrTsz.getDomBuilder..TreeBuilder.getFragmentcSsp|}||krtjj|||nNt|jdrXtj|jjkrXt|jj|j_|jjj tj|jj |jj |dS)N_child_node_types) r rSr5r,rEr TEXT_NODErUr!appendr/r1)rr3r.rrrr5s z-getDomBuilder..TreeBuilder.insertText)N)N)r&r'r(rIrMrNrOrPr/rQrRrTr5implementationrr)rHDomImplementationr?rQrrrSxs      rScs0|jgdfdd |ddjS)Nrc s$|jtjkr|jrj|js|jrP|jp&d}|jp0d}jdd||j||fq~jdd||jfnjdd|fnz|jtjkrjdn`|jtjkrjdnF|jtj krވjdd||j fn|jtj krjd d||j fnt |d r6|j dk r6d tj|j |jf}n|j}jd d||f|jrg}xftt|jD]T}|jj|}|j}|j}|j } | rd tj| |jf}n|j}|j||fqpWx2t|D]&\}}jd d|d||fqW|d7}x|jD]} | |q WdS)Nz|%s z|%sz|%sz #documentz#document-fragmentz|%sz|%s"%s"r+z%s %sz|%s<%s>z |%s%s="%s"r )ZnodeTyperZDOCUMENT_TYPE_NODErrJrKrWZ DOCUMENT_NODEZDOCUMENT_FRAGMENT_NODEZ COMMENT_NODEZ nodeValuerVr,r+r prefixesr*Z hasAttributesrangerritemrZ localNamesortedr8) rindentrJrKrrirrnsr9)rvserializeElementrrrdsN             " z?getDomBuilder..testSerializer..serializeElement )r)Z normalizejoin)rr)rcrdrrQs . z%getDomBuilder..testSerializer)rr rrSlocals)rYrSr)r)rHrYr?rQr getDomBuilders $C:6rh)Z __future__rrr collectionsrZxml.domrrrFrZr r r Z_utilsr rhZ getDomModulerrrrs     _treebuilders/__pycache__/etree.cpython-36.opt-1.pyc000064400000026614147204715120016152 0ustar003 B;W1@sddlmZmZmZddlmZddlZddlmZddlm Z ddlm Z dd l m Z dd l m Z ejd Zdd dZe eZdS))absolute_importdivisionunicode_literals) text_typeN)base) _ihatexml) constants) namespaces)moduleFactoryFactoryz {([^}]*)}(.*)Fc sjdjGfdddtjGfdddGfdddGfdd d Gfd d d fd d  fdd}G fdddtj}tS)NZasdcseZdZd$fdd ZddZddZdd ZeeeZd d Z d d Z ee e Z ddZ ddZ ee e ZddZddZeeeZddZddZddZddZd%ddZd d!Zd"d#ZdS)&z getETreeBuilder..ElementNcs^||_||_j|j|||_|dkr:td|jf|_n|j|jf|_d|_g|_g|_ dS)Nhtml) _name _namespaceElement _getETreeTag_elementr Z nameTupleparent _childNodes_flags)selfname namespace) ElementTree/usr/lib/python3.6/etree.py__init__s  z)getETreeBuilder..Element.__init__cSs|dkr|}n d||f}|S)Nz{%s}%sr)rrrZ etree_tagrrrr#s z-getETreeBuilder..Element._getETreeTagcSs||_|j|j|j|j_dS)N)rrrrtag)rrrrr_setName*sz)getETreeBuilder..Element._setNamecSs|jS)N)r)rrrr_getName.sz)getETreeBuilder..Element._getNamecSs||_|j|j|j|j_dS)N)rrrrr)rrrrr _setNamespace3sz.getETreeBuilder..Element._setNamespacecSs|jS)N)r)rrrr _getNamespace7sz.getETreeBuilder..Element._getNamespacecSs|jjS)N)rattrib)rrrr_getAttributes<sz/getETreeBuilder..Element._getAttributescSspx"t|jjjD]}|jj|=qWxF|jD]:\}}t|trVd|d|df}n|}|jj||q.WdS)Nz{%s}%srr)listrr"keysitems isinstancetupleset)r attributeskeyvaluerrrr_setAttributes?s z/getETreeBuilder..Element._setAttributescSs|jS)N)r)rrrr_getChildNodesMsz/getETreeBuilder..Element._getChildNodescSs.|jdd=g|_x|D]}|j|qWdS)N)rrZ insertChild)rr,elementrrr_setChildNodesPs  z/getETreeBuilder..Element._setChildNodescSst|jjpt|jS)z,Return true if the node has children or text)boolrtextlen)rrrr hasContentXsz+getETreeBuilder..Element.hasContentcSs$|jj||jj|j||_dS)N)rappendrr)rnoderrr appendChild\s z,getETreeBuilder..Element.appendChildcSs,t|jj|j}|jj||j||_dS)N)r$rindexinsertr)rr6ZrefNoder8rrr insertBeforeasz-getETreeBuilder..Element.insertBeforecSs$|jj||jj|jd|_dS)N)rremoverr)rr6rrr removeChildfs z,getETreeBuilder..Element.removeChildcSst|js,|jjsd|j_|jj|7_n|dkrb|jdjsLd|jd_|jdj|7_nxt|j}|j|j}|dkr|j|djsd|j|d_|j|dj|7_n |jjsd|j_|jj|7_dS)Nrrr>r>)r3rr2tailr$r8)rdatar:Zchildrenr8rrr insertTextks"     z+getETreeBuilder..Element.insertTextcSs8t||j|j}x |jjD]\}}||j|<qW|S)N)typerrr*r&)rr/rr,rrr cloneNodesz*getETreeBuilder..Element.cloneNodecSsl|jr"|jdjj|jj7_n0|jjs2d|j_|jjdk rR|jj|jj7_d|j_tjj||dS)Nrr=r>) childNodesrr?r2rNodereparentChildren)rZ newParentrrrrFs z1getETreeBuilder..Element.reparentChildren)N)N)__name__ __module__ __qualname__rrrrpropertyrr r!rr#r-r*r.r0rDr4r7r:r<rArCrFr)rrrrs*      rcs2eZdZfddZddZddZeeeZdS)z getETreeBuilder..Commentcs"j||_d|_g|_g|_dS)N)Commentrrrr)rr@)rrrrs z)getETreeBuilder..Comment.__init__cSs|jjS)N)rr2)rrrr_getDatasz)getETreeBuilder..Comment._getDatacSs ||j_dS)N)rr2)rr,rrr_setDatasz)getETreeBuilder..Comment._setDataN)rGrHrIrrLrMrJr@r)rrrrKs rKcsLeZdZfddZddZddZeeeZddZd d Z eee Z d S) z%getETreeBuilder..DocumentTypecs$j|d||j_||_||_dS)Nz )rrr2publicIdsystemId)rrrNrO)rrrrs z.getETreeBuilder..DocumentType.__init__cSs|jjddS)NrNr=)rget)rrrr _getPublicIdsz2getETreeBuilder..DocumentType._getPublicIdcSs|dk r|jjd|dS)NrN)rr))rr,rrr _setPublicIdsz2getETreeBuilder..DocumentType._setPublicIdcSs|jjddS)NrOr=)rrP)rrrr _getSystemIdsz2getETreeBuilder..DocumentType._getSystemIdcSs|dk r|jjd|dS)NrO)rr))rr,rrr _setSystemIdsz2getETreeBuilder..DocumentType._setSystemIdN) rGrHrIrrQrRrJrNrSrTrOr)rrr DocumentTypes  rUcseZdZfddZdS)z!getETreeBuilder..Documentcsj|ddS)N DOCUMENT_ROOT)r)r)rrrrsz*getETreeBuilder..Document.__init__N)rGrHrIrr)rrrDocumentsrWcseZdZfddZdS)z)getETreeBuilder..DocumentFragmentcsj|ddS)NZDOCUMENT_FRAGMENT)r)r)rrrrsz2getETreeBuilder..DocumentFragment.__init__N)rGrHrIrr)rrrDocumentFragmentsrXcs*gdfdd |ddjS)Nrc szt|ds|j}|jdkrz|jds0|jdrd|jdpW|jrvjd d |d |jfdS)Nrz rNrOr=zz rVz #documentz|%s"%s" rzDocument node cannot have tailr"z$Document node cannot have attributesz|%sz%s %sz|%s<%s>z |%s%s="%s")hasattrgetrootrrPr5r2r? TypeErrorr3r" tag_regexpmatchgroupsr prefixesr&sorted) r/indentrNrOZnsmatchrnsprefixr*r,Z attr_stringchild)ElementTreeCommentTypervserializeElementrrrhsZ                  " zAgetETreeBuilder..testSerializer..serializeElement )r)join)r/)rf)rgrhrtestSerializers7 z'getETreeBuilder..testSerializercs2gtjfdd|djS)z4Serialize an element and its child nodes to a stringcst|jr|j}|jdkr||jds2|jdrf|jdp>d}|jdpLd}jd|j||fnjd|jfn|jdkr|jdk rj|j|jdk rtdt |d rt |j rtd x|D] }|qWn|jkrjd |jfn|j s$jd j |jfn2d j fdd|j jD}jd|j|f|jrjj|jx|D]}|qpWjd|jf|jrj|jdS)Nz rNrOr=zz rVzDocument node cannot have tailr"z$Document node cannot have attributesz z<%s>rYcs"g|]\}}dj||fqS)z%s="%s") fromXmlName).0rr,)filterrr &szOgetETreeBuilder..tostring..serializeElement..z<%s %s>z)r'rr[rrPr5r2r?r\rZr3r"rlrjr&)r/rNrOreattr)rrfrnrgrhrrrhs@          z;getETreeBuilder..tostring..serializeElementr=)r Z InfosetFilterrj)r/)rrf)rnrgrhrtostrings -z!getETreeBuilder..tostringcsDeZdZZZZZZZfddZ fddZ ddZ dS)z$getETreeBuilder..TreeBuildercs|S)Nr)rr/)rkrrrkAsz3getETreeBuilder..TreeBuilder.testSerializercs<r |jjS|jdk r*|jjjd|jS|jjjdSdS)Nz{%s}htmlr )ZdocumentrZdefaultNamespacefind)r)fullTreerr getDocumentDs   z0getETreeBuilder..TreeBuilder.getDocumentcSstjj|jS)N)r TreeBuilder getFragmentr)rrrrrvNsz0getETreeBuilder..TreeBuilder.getFragmentN) rGrHrIZ documentClassZ doctypeClassZ elementClassZ commentClassZ fragmentClassimplementationrkrtrvr)rKrWrXrUrElementTreeImplementationrsrkrrru9s  ru)rKrrrErulocals)rxrsrqrur) rKrWrXrUrrrfrxrsrkrgetETreeBuilders ~ >6$rz)F)Z __future__rrrZsixrrer=rr r r Z_utilsr compiler]rzZgetETreeModulerrrrs        Etreebuilders/__pycache__/etree.cpython-36.pyc000064400000026763147204715120015220 0ustar003 B;W1@sddlmZmZmZddlmZddlZddlmZddlm Z ddlm Z dd l m Z dd l m Z ejd Zdd dZe eZdS))absolute_importdivisionunicode_literals) text_typeN)base) _ihatexml) constants) namespaces)moduleFactoryFactoryz {([^}]*)}(.*)Fc sjdjGfdddtjGfdddGfdddGfdd d Gfd d d fd d  fdd}G fdddtj}tS)NZasdcseZdZd$fdd ZddZddZdd ZeeeZd d Z d d Z ee e Z ddZ ddZ ee e ZddZddZeeeZddZddZddZddZd%ddZd d!Zd"d#ZdS)&z getETreeBuilder..ElementNcs^||_||_j|j|||_|dkr:td|jf|_n|j|jf|_d|_g|_g|_ dS)Nhtml) _name _namespaceElement _getETreeTag_elementr Z nameTupleparent _childNodes_flags)selfname namespace) ElementTree/usr/lib/python3.6/etree.py__init__s  z)getETreeBuilder..Element.__init__cSs|dkr|}n d||f}|S)Nz{%s}%sr)rrrZ etree_tagrrrr#s z-getETreeBuilder..Element._getETreeTagcSs||_|j|j|j|j_dS)N)rrrrtag)rrrrr_setName*sz)getETreeBuilder..Element._setNamecSs|jS)N)r)rrrr_getName.sz)getETreeBuilder..Element._getNamecSs||_|j|j|j|j_dS)N)rrrrr)rrrrr _setNamespace3sz.getETreeBuilder..Element._setNamespacecSs|jS)N)r)rrrr _getNamespace7sz.getETreeBuilder..Element._getNamespacecSs|jjS)N)rattrib)rrrr_getAttributes<sz/getETreeBuilder..Element._getAttributescSspx"t|jjjD]}|jj|=qWxF|jD]:\}}t|trVd|d|df}n|}|jj||q.WdS)Nz{%s}%srr)listrr"keysitems isinstancetupleset)r attributeskeyvaluerrrr_setAttributes?s z/getETreeBuilder..Element._setAttributescSs|jS)N)r)rrrr_getChildNodesMsz/getETreeBuilder..Element._getChildNodescSs.|jdd=g|_x|D]}|j|qWdS)N)rrZ insertChild)rr,elementrrr_setChildNodesPs  z/getETreeBuilder..Element._setChildNodescSst|jjpt|jS)z,Return true if the node has children or text)boolrtextlen)rrrr hasContentXsz+getETreeBuilder..Element.hasContentcSs$|jj||jj|j||_dS)N)rappendrr)rnoderrr appendChild\s z,getETreeBuilder..Element.appendChildcSs,t|jj|j}|jj||j||_dS)N)r$rindexinsertr)rr6ZrefNoder8rrr insertBeforeasz-getETreeBuilder..Element.insertBeforecSs$|jj||jj|jd|_dS)N)rremoverr)rr6rrr removeChildfs z,getETreeBuilder..Element.removeChildcSst|js,|jjsd|j_|jj|7_n|dkrb|jdjsLd|jd_|jdj|7_nxt|j}|j|j}|dkr|j|djsd|j|d_|j|dj|7_n |jjsd|j_|jj|7_dS)Nrrr>r>)r3rr2tailr$r8)rdatar:Zchildrenr8rrr insertTextks"     z+getETreeBuilder..Element.insertTextcSs8t||j|j}x |jjD]\}}||j|<qW|S)N)typerrr*r&)rr/rr,rrr cloneNodesz*getETreeBuilder..Element.cloneNodecSsl|jr"|jdjj|jj7_n0|jjs2d|j_|jjdk rR|jj|jj7_d|j_tjj||dS)Nrr=r>) childNodesrr?r2rNodereparentChildren)rZ newParentrrrrFs z1getETreeBuilder..Element.reparentChildren)N)N)__name__ __module__ __qualname__rrrrpropertyrr r!rr#r-r*r.r0rDr4r7r:r<rArCrFr)rrrrs*      rcs2eZdZfddZddZddZeeeZdS)z getETreeBuilder..Commentcs"j||_d|_g|_g|_dS)N)Commentrrrr)rr@)rrrrs z)getETreeBuilder..Comment.__init__cSs|jjS)N)rr2)rrrr_getDatasz)getETreeBuilder..Comment._getDatacSs ||j_dS)N)rr2)rr,rrr_setDatasz)getETreeBuilder..Comment._setDataN)rGrHrIrrLrMrJr@r)rrrrKs rKcsLeZdZfddZddZddZeeeZddZd d Z eee Z d S) z%getETreeBuilder..DocumentTypecs$j|d||j_||_||_dS)Nz )rrr2publicIdsystemId)rrrNrO)rrrrs z.getETreeBuilder..DocumentType.__init__cSs|jjddS)NrNr=)rget)rrrr _getPublicIdsz2getETreeBuilder..DocumentType._getPublicIdcSs|dk r|jjd|dS)NrN)rr))rr,rrr _setPublicIdsz2getETreeBuilder..DocumentType._setPublicIdcSs|jjddS)NrOr=)rrP)rrrr _getSystemIdsz2getETreeBuilder..DocumentType._getSystemIdcSs|dk r|jjd|dS)NrO)rr))rr,rrr _setSystemIdsz2getETreeBuilder..DocumentType._setSystemIdN) rGrHrIrrQrRrJrNrSrTrOr)rrr DocumentTypes  rUcseZdZfddZdS)z!getETreeBuilder..Documentcsj|ddS)N DOCUMENT_ROOT)r)r)rrrrsz*getETreeBuilder..Document.__init__N)rGrHrIrr)rrrDocumentsrWcseZdZfddZdS)z)getETreeBuilder..DocumentFragmentcsj|ddS)NZDOCUMENT_FRAGMENT)r)r)rrrrsz2getETreeBuilder..DocumentFragment.__init__N)rGrHrIrr)rrrDocumentFragmentsrXcs*gdfdd |ddjS)Nrc st|ds|j}|jdkrz|jds0|jdrd|jdprNrOr=zz rVz #documentz|%s"%s" rzDocument node cannot have tailr"z$Document node cannot have attributesz|%szExpected unicode, got %s, %sz%s %sz|%s<%s>z |%s%s="%s")hasattrgetrootrrPr5r2r? TypeErrorr3r"r'rAssertionErrorrB tag_regexpmatchgroupsr prefixesr&sorted) r/indentrNrOZnsmatchrnsprefixr*r,Z attr_stringchild)ElementTreeCommentTypervserializeElementrrris^                  " zAgetETreeBuilder..testSerializer..serializeElement )r)join)r/)rg)rhrirtestSerializers7 z'getETreeBuilder..testSerializercs2gtjfdd|djS)z4Serialize an element and its child nodes to a stringcst|jr|j}|jdkr||jds2|jdrf|jdp>d}|jdpLd}jd|j||fnjd|jfn|jdkr|jdk rj|j|jdk rtdt |d rt |j rtd x|D] }|qWn|jkrjd |jfn|j s$jd j |jfn2d j fdd|j jD}jd|j|f|jrjj|jx|D]}|qpWjd|jf|jrj|jdS)Nz rNrOr=zz rVzDocument node cannot have tailr"z$Document node cannot have attributesz z<%s>rYcs"g|]\}}dj||fqS)z%s="%s") fromXmlName).0rr,)filterrr &szOgetETreeBuilder..tostring..serializeElement..z<%s %s>z)r'rr[rrPr5r2r?r\rZr3r"rmrkr&)r/rNrOrfattr)rrgrorhrirrris@          z;getETreeBuilder..tostring..serializeElementr=)r Z InfosetFilterrk)r/)rrg)rorhrirtostrings -z!getETreeBuilder..tostringcsDeZdZZZZZZZfddZ fddZ ddZ dS)z$getETreeBuilder..TreeBuildercs|S)Nr)rr/)rlrrrlAsz3getETreeBuilder..TreeBuilder.testSerializercs<r |jjS|jdk r*|jjjd|jS|jjjdSdS)Nz{%s}htmlr )ZdocumentrZdefaultNamespacefind)r)fullTreerr getDocumentDs   z0getETreeBuilder..TreeBuilder.getDocumentcSstjj|jS)N)r TreeBuilder getFragmentr)rrrrrwNsz0getETreeBuilder..TreeBuilder.getFragmentN) rGrHrIZ documentClassZ doctypeClassZ elementClassZ commentClassZ fragmentClassimplementationrlrurwr)rKrWrXrUrElementTreeImplementationrtrlrrrv9s  rv)rKrrrErvlocals)ryrtrrrvr) rKrWrXrUrrrgryrtrlrgetETreeBuilders ~ >6$r{)F)Z __future__rrrZsixrrer=rr r r Z_utilsr compiler^r{ZgetETreeModulerrrrs        Etreebuilders/__pycache__/etree_lxml.cpython-36.opt-1.pyc000064400000026430147204715120017202 0ustar003 B;WQ7@sdZddlmZmZmZddlZddlZddlZddlm Z ddl m Z ddlm Z dd lm Z dd lmZddlj Z d Zejd Ze jd jZGdddeZGdddeZddZddZGddde jZdS)aModule for supporting the lxml.etree library. The idea here is to use as much of the native library as possible, without using fragile hacks like custom element names that break between releases. The downside of this is that we cannot represent all possible trees; specifically the following are known to cause problems: Text or comments as siblings of the root element Docypes with no name When any of these things occur, we emit a DataLossWarning )absolute_importdivisionunicode_literalsN)base)DataLossWarning) constants)etree) _ihatexmlTz {([^}]*)}(.*)Zasdc@seZdZddZdS) DocumentTypecCs||_||_||_dS)N)namepublicIdsystemId)selfr rrr /usr/lib/python3.6/etree_lxml.py__init__#szDocumentType.__init__N)__name__ __module__ __qualname__rrrrrr "sr c@s,eZdZddZddZddZeeZdS)DocumentcCsd|_g|_dS)N) _elementTree _childNodes)rrrrr*szDocument.__init__cCs|jjj|jdS)N)rgetrootZaddnext_element)relementrrr appendChild.szDocument.appendChildcCs|jS)N)r)rrrr_getChildNodes1szDocument._getChildNodesN)rrrrrrpropertyZ childNodesrrrrr)srcs6gtjdddfdd |ddjS)NT)preventDoubleDashCommentsrc st|dst|drˆjd|jjrz|jjp6|jjsFd|jj}nd|jj|jj|jjf}jdd|d|f|j}x|jdk r|j}qWxx|dk r||d|j }qWnTt |t st |t rjd d||fn(jd x|D]}||dqWn|j tkrnjd d||jft|d r|jrjd d||jfntjj|j }|dk r|jd }|jd}tj|}jdd||j|fnjdd|j|j ft|drg}xr|jjD]d\} } tj| }|dk rH|j\}} j| } tj|}d|| f} n j| } |j| | fqWx2t|D]&\} } jdd|d| | fqpW|jrjd d|d|jf|d7}x|D]} | |qWt|d r|jrjd d|d|jfdS)Ntagrz #documentz zz|%s%s rz|%s"%s"z#document-fragmentz|%stailrz |%s<%s %s>z|%s<%s>attribz%s %sz |%s%s="%s")hasattrappenddocinfo internalDTDZ public_idZ system_url root_namerZ getpreviousZgetnext isinstancestrbytesr! comment_typetextr#etree_builders tag_regexpmatchgroupr prefixes fromXmlNamer$itemsgroupssorted) rindentdtd_strZ next_elementZnsmatchnsr!prefix attributesr valueZ attr_stringchild) infosetFilterrvserializeElementrrrA;sp                     " z(testSerializer..serializeElement )r)r InfosetFilterjoin)rr)r?r@rArtestSerializer7s  F rEcs$gfdd|djS)z4Serialize an element and its child nodes to a stringcs t|dsH|jjr:|jjr$|jj}n d|jj}j||jn|jtkrfjd|j fn|j sjd|jfn.dj dd|j j D}jd|j|f|j rj|j x|D] }|qWjd |jft|d o|j rj|j dS) Nr!z z z<%s>r"cSsg|]\}}d||fqS)z%s="%s"r).0r r=rrr sz6tostring..serializeElement..z<%s %s>zr#)r%r'r(doctyper)r&rr!r-r.r$rDr5r#)rr9attrr>)r@rArrrAs*         z"tostring..serializeElement)rD)rr)r@rArtostrings rKcszeZdZeZeZdZdZeZ e Z dddZ ddZ ddZd d Zd d Zd dZdddZdfdd ZddZZS) TreeBuilderNFcstjt|dtjdd|_||_GfdddtGfdddj}Gfdd d j }||_ ||_ t j j||dS) N)fullTreeT)r cs&eZdZdfdd ZfddZdS)z(TreeBuilder.__init__..AttributesNcsv|dkr i}||_tj||xR|jD]F\}}t|trVd|dj|df}n j|}||jjj|<q(WdS)Nz{%s}%srr)rdictrr5r*tuplecoerceAttributer$)rrr=keyr )r?rrrs   z1TreeBuilder.__init__..Attributes.__init__csPtj|||t|tr4d|dj|df}n j|}||jjj|<dS)Nz{%s}%srr)rN __setitem__r*rOrPrr$)rrQr=r )r?rrrRs   z4TreeBuilder.__init__..Attributes.__setitem__)N)rrrrrRr)r?rr Attributess rScsxeZdZfddZfddZfddZeeeZddZfd d Z eee Z dfd d Z fddZ d S)z%TreeBuilder.__init__..Elementcs*j|}jj|||d||_dS)N) namespace) coerceElementElementr _attributes)rr rT)rSbuilderr?rrrs z.TreeBuilder.__init__..Element.__init__cs$j||_|j|j|j|j_dS)N)rU_nameZ _getETreeTagZ _namespacerr!)rr )r?rr_setNames z.TreeBuilder.__init__..Element._setNamecs j|jS)N)r4rY)r)r?rr_getNamesz.TreeBuilder.__init__..Element._getNamecSs|jS)N)rW)rrrr_getAttributessz4TreeBuilder.__init__..Element._getAttributescs|||_dS)N)rW)rr<)rSrr_setAttributessz4TreeBuilder.__init__..Element._setAttributesNcsj|}jj|||dS)N)ZcoerceCharactersrV insertText)rdataZ insertBefore)rXr?rrr^s z0TreeBuilder.__init__..Element.insertTextcsjj||dS)N)rVr)rr>)rXrrrsz1TreeBuilder.__init__..Element.appendChild)N) rrrrrZr[rr r\r]r<r^rr)rSrXr?rrrVs     rVcs8eZdZfddZfddZddZeeeZdS)z%TreeBuilder.__init__..Commentcsj|}jj||dS)N) coerceCommentCommentr)rr_)rXr?rrrs z.TreeBuilder.__init__..Comment.__init__csj|}||j_dS)N)r`rr.)rr_)r?rr_setDatas z.TreeBuilder.__init__..Comment._setDatacSs|jjS)N)rr.)rrrr_getDatasz.TreeBuilder.__init__..Comment._getDataN)rrrrrbrcrr_r)rXr?rrras ra)r/ZgetETreeModuler r rCr?namespaceHTMLElementsrNrVra elementClass commentClassrrLr)rrdrMrVrar)rSrXr?rrszTreeBuilder.__init__cCs$tjj||j|_g|_d|_dS)N)rrLresetinsertCommentInitial insertCommentinitial_commentsrH)rrrrrgs zTreeBuilder.resetcCst|S)N)rE)rrrrrrE szTreeBuilder.testSerializercCstr |jjS|jjjSdS)N)rMdocumentrr)rrrr getDocument szTreeBuilder.getDocumentcCsFg}|jdj}|jr"|j|j|jt||jrB|j|j|S)Nr) openElementsrr.r&extendlistr#)rZfragmentrrrr getFragments   zTreeBuilder.getFragmentcCsh|d}|d}|d}|s0tjdtd|_n4|jj|}||krPtjdt|j|||}||_dS)Nr rrz#lxml cannot represent empty doctypez%lxml cannot represent non-xml doctype)warningswarnrrHr?rU doctypeClass)rtokenr rrZ coercedNamerHrrr insertDoctypes   zTreeBuilder.insertDoctypecCs|jj|dS)N)rjr&)rr_parentrrrrh,sz TreeBuilder.insertCommentInitialcsB||jkr,|jjjdjtkr,tjdttt |j ||dS)Nrz@lxml cannot represent adjacent comments beyond the root elements) rkrrr!r-rqrrrsuperrLri)rr_rv) __class__rrinsertCommentMain1s  zTreeBuilder.insertCommentMainc Csd}|jr|d|jj7}|jjdk s2|jjdk r|d|jj|jjpFd7}|jjr|jj}|jddkr|jddkrtjdt |j dd }|jddkr|d |7}q|d |7}n|d 7}|d 7}|jj|dkrtjdt |d7}t j |}x*|j D] }|j|d}|j|jqW|j|_|j|j_|d}|jd|j}|dkrb|} n d||f} | |_|j||} || _|jjj| |jj| |j|_dS)zCreate the document rootrJz r zGlxml cannot represent doctype with a different name to the root elementz$r_rTz{%s}%s)rHr rrr?Z coercePubidfindrqrrrreplacer Z fromstringrjrfZ addpreviousr documentClassrkZ getroottreergetZdefaultNamespacer!rerr&rmrzri) rrtZdocStrZsysidroot comment_tokenZcommentr rTZ etree_tagZ root_elementrrr insertRoot7sJ             zTreeBuilder.insertRoot)F)N)N)rrrrrr rsrerfZ fragmentClassr implementationrrgrErlrprurhrzr __classcell__rr)ryrrLs L  rL)__doc__Z __future__rrrrqresysrJrr rr r/r Z lxml.etreerMcompiler0rar!r-objectr rrErKrLrrrr s$        O)treebuilders/__pycache__/etree_lxml.cpython-36.pyc000064400000026717147204715120016253 0ustar003 B;WQ7@sdZddlmZmZmZddlZddlZddlZddlm Z ddl m Z ddlm Z dd lm Z dd lmZddlj Z d Zejd Ze jd jZGdddeZGdddeZddZddZGddde jZdS)aModule for supporting the lxml.etree library. The idea here is to use as much of the native library as possible, without using fragile hacks like custom element names that break between releases. The downside of this is that we cannot represent all possible trees; specifically the following are known to cause problems: Text or comments as siblings of the root element Docypes with no name When any of these things occur, we emit a DataLossWarning )absolute_importdivisionunicode_literalsN)base)DataLossWarning) constants)etree) _ihatexmlTz {([^}]*)}(.*)Zasdc@seZdZddZdS) DocumentTypecCs||_||_||_dS)N)namepublicIdsystemId)selfr rrr /usr/lib/python3.6/etree_lxml.py__init__#szDocumentType.__init__N)__name__ __module__ __qualname__rrrrrr "sr c@s,eZdZddZddZddZeeZdS)DocumentcCsd|_g|_dS)N) _elementTree _childNodes)rrrrr*szDocument.__init__cCs|jjj|jdS)N)rgetrootZaddnext_element)relementrrr appendChild.szDocument.appendChildcCs|jS)N)r)rrrr_getChildNodes1szDocument._getChildNodesN)rrrrrrpropertyZ childNodesrrrrr)srcs6gtjdddfdd |ddjS)NT)preventDoubleDashCommentsrc sDt|ds8t|drˆjd|jjrz|jjp6|jjsFd|jj}nd|jj|jj|jjf}jdd|d|f|j}x|jdk r|j}qWx|dk r||d|j }qWnrt |t st |t r t |t st jd dkstjd d||fn(jd x|D]}||dqWn|jtkrjd d||jft|d r@|jr@jd d||jfnt |tjsttjj|j}|dk r|jd}|jd}tj|}jdd||j|fnjdd|j|jft|drg}xr|jjD]d\} } tj| }|dk rx|j\}} j| } tj|}d|| f} n j| } |j| | fq.Wx2t |D]&\} } jdd|d| | fqW|jrjd d|d|jf|d7}x|D]} | |qWt|d r@|jr@jd d|d|jfdS)Ntagrz #documentz zz|%s%s rrz|%s"%s"z#document-fragmentz|%stailrz |%s<%s %s>z|%s<%s>attribz%s %sz |%s%s="%s")!hasattrappenddocinfo internalDTDZ public_idZ system_url root_namerZ getpreviousZgetnext isinstancestrbytessys version_infoAssertionErrorr! comment_typetextr#r Z_Elementetree_builders tag_regexpmatchgroupr prefixes fromXmlNamer$itemsgroupssorted) rindentdtd_strZ next_elementZnsmatchnsr!prefix attributesr valueZ attr_stringchild) infosetFilterrvserializeElementrrrD;st                     " z(testSerializer..serializeElement )r)r InfosetFilterjoin)rr)rBrCrDrtestSerializer7s  F rHcs$gfdd|djS)z4Serialize an element and its child nodes to a stringcs t|dsH|jjr:|jjr$|jj}n d|jj}j||jn|jtkrfjd|j fn|j sjd|jfn.dj dd|j j D}jd|j|f|j rj|j x|D] }|qWjd |jft|d o|j rj|j dS) Nr!z z z<%s>r"cSsg|]\}}d||fqS)z%s="%s"r).0r r@rrr sz6tostring..serializeElement..z<%s %s>zr#)r%r'r(doctyper)r&rr!r0r1r$rGr8r#)rr<attrrA)rCrDrrrDs*         z"tostring..serializeElement)rG)rr)rCrDrtostrings rNcszeZdZeZeZdZdZeZ e Z dddZ ddZ ddZd d Zd d Zd dZdddZdfdd ZddZZS) TreeBuilderNFcstjt|dtjdd|_||_GfdddtGfdddj}Gfdd d j }||_ ||_ t j j||dS) N)fullTreeT)r cs&eZdZdfdd ZfddZdS)z(TreeBuilder.__init__..AttributesNcsv|dkr i}||_tj||xR|jD]F\}}t|trVd|dj|df}n j|}||jjj|<q(WdS)Nz{%s}%srr)rdictrr8r*tuplecoerceAttributer$)rrr@keyr )rBrrrs   z1TreeBuilder.__init__..Attributes.__init__csPtj|||t|tr4d|dj|df}n j|}||jjj|<dS)Nz{%s}%srr)rQ __setitem__r*rRrSrr$)rrTr@r )rBrrrUs   z4TreeBuilder.__init__..Attributes.__setitem__)N)rrrrrUr)rBrr Attributess rVcsxeZdZfddZfddZfddZeeeZddZfd d Z eee Z dfd d Z fddZ d S)z%TreeBuilder.__init__..Elementcs*j|}jj|||d||_dS)N) namespace) coerceElementElementr _attributes)rr rW)rVbuilderrBrrrs z.TreeBuilder.__init__..Element.__init__cs$j||_|j|j|j|j_dS)N)rX_nameZ _getETreeTagZ _namespacerr!)rr )rBrr_setNames z.TreeBuilder.__init__..Element._setNamecs j|jS)N)r7r\)r)rBrr_getNamesz.TreeBuilder.__init__..Element._getNamecSs|jS)N)rZ)rrrr_getAttributessz4TreeBuilder.__init__..Element._getAttributescs|||_dS)N)rZ)rr?)rVrr_setAttributessz4TreeBuilder.__init__..Element._setAttributesNcsj|}jj|||dS)N)ZcoerceCharactersrY insertText)rdataZ insertBefore)r[rBrrras z0TreeBuilder.__init__..Element.insertTextcsjj||dS)N)rYr)rrA)r[rrrsz1TreeBuilder.__init__..Element.appendChild)N) rrrrr]r^rr r_r`r?rarr)rVr[rBrrrYs     rYcs8eZdZfddZfddZddZeeeZdS)z%TreeBuilder.__init__..Commentcsj|}jj||dS)N) coerceCommentCommentr)rrb)r[rBrrrs z.TreeBuilder.__init__..Comment.__init__csj|}||j_dS)N)rcrr1)rrb)rBrr_setDatas z.TreeBuilder.__init__..Comment._setDatacSs|jjS)N)rr1)rrrr_getDatasz.TreeBuilder.__init__..Comment._getDataN)rrrrrerfrrbr)r[rBrrrds rd)r2ZgetETreeModuler r rFrBnamespaceHTMLElementsrQrYrd elementClass commentClassrrOr)rrgrPrYrdr)rVr[rBrrszTreeBuilder.__init__cCs$tjj||j|_g|_d|_dS)N)rrOresetinsertCommentInitial insertCommentinitial_commentsrK)rrrrrjs zTreeBuilder.resetcCst|S)N)rH)rrrrrrH szTreeBuilder.testSerializercCstr |jjS|jjjSdS)N)rPdocumentrr)rrrr getDocument szTreeBuilder.getDocumentcCsFg}|jdj}|jr"|j|j|jt||jrB|j|j|S)Nr) openElementsrr1r&extendlistr#)rZfragmentrrrr getFragments   zTreeBuilder.getFragmentcCsh|d}|d}|d}|s0tjdtd|_n4|jj|}||krPtjdt|j|||}||_dS)Nr rrz#lxml cannot represent empty doctypez%lxml cannot represent non-xml doctype)warningswarnrrKrBrX doctypeClass)rtokenr rrZ coercedNamerKrrr insertDoctypes   zTreeBuilder.insertDoctypecCs6|dks||jkst|jjdks&t|jj|dS)N)rnr/rrmr&)rrbparentrrrrk,sz TreeBuilder.insertCommentInitialcsB||jkr,|jjjdjtkr,tjdttt |j ||dS)Nrz@lxml cannot represent adjacent comments beyond the root elements) rnrrr!r0rtrursuperrOrl)rrbry) __class__rrinsertCommentMain1s  zTreeBuilder.insertCommentMainc Csd}|jr|jjst|d|jj7}|jjdk s>|jjdk r|d|jj|jjpRd7}|jjr|jj}|jddkr|jddkrtj dt |j dd }|jddkr|d |7}q|d |7}n|d 7}|d 7}|jj|dkrtj dt |d7}t j |}x*|jD] }|j|d}|j|jqW|j|_|j|j_|d}|jd|j}|dkrn|} n d||f} | |_|j||} || _|jjj| |jj| |j|_dS)zCreate the document rootrMz r zGlxml cannot represent doctype with a different name to the root elementz$rbrWz{%s}%s)rKr r/rrrBZ coercePubidfindrtrurreplacer Z fromstringrmriZ addpreviousr documentClassrnZ getroottreergetZdefaultNamespacer!rhrr&rpr}rl) rrwZdocStrZsysidroot comment_tokenZcommentr rWZ etree_tagZ root_elementrrr insertRoot7sL              zTreeBuilder.insertRoot)F)N)N)rrrrrr rvrhriZ fragmentClassr implementationrrjrHrorsrxrkr}r __classcell__rr)r|rrOs L  rO)__doc__Z __future__rrrrtrer-rMrr rr r2r Z lxml.etreerPcompiler3rdr!r0objectr rrHrNrOrrrr s$        O)treebuilders/__init__.py000064400000006516147204715120011361 0ustar00"""A collection of modules for building different kinds of tree from HTML documents. To create a treebuilder for a new type of tree, you need to do implement several things: 1) A set of classes for various types of elements: Document, Doctype, Comment, Element. These must implement the interface of _base.treebuilders.Node (although comment nodes have a different signature for their constructor, see treebuilders.etree.Comment) Textual content may also be implemented as another node type, or not, as your tree implementation requires. 2) A treebuilder object (called TreeBuilder by convention) that inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: documentClass - the class to use for the bottommost node of a document elementClass - the class to use for HTML Elements commentClass - the class to use for comments doctypeClass - the class to use for doctypes It also has one required method: getDocument - Returns the root node of the complete document tree 3) If you wish to run the unit tests, you must also create a testSerializer method on your treebuilder which accepts a node and returns a string containing Node and its children serialized according to the format used in the unittests """ from __future__ import absolute_import, division, unicode_literals from .._utils import default_etree treeBuilderCache = {} def getTreeBuilder(treeType, implementation=None, **kwargs): """Get a TreeBuilder class for various types of tree with built-in support treeType - the name of the tree type required (case-insensitive). Supported values are: "dom" - A generic builder for DOM implementations, defaulting to a xml.dom.minidom based implementation. "etree" - A generic builder for tree implementations exposing an ElementTree-like interface, defaulting to xml.etree.cElementTree if available and xml.etree.ElementTree if not. "lxml" - A etree-based builder for lxml.etree, handling limitations of lxml's implementation. implementation - (Currently applies to the "etree" and "dom" tree types). A module implementing the tree type e.g. xml.etree.ElementTree or xml.etree.cElementTree.""" treeType = treeType.lower() if treeType not in treeBuilderCache: if treeType == "dom": from . import dom # Come up with a sane default (pref. from the stdlib) if implementation is None: from xml.dom import minidom implementation = minidom # NEVER cache here, caching is done in the dom submodule return dom.getDomModule(implementation, **kwargs).TreeBuilder elif treeType == "lxml": from . import etree_lxml treeBuilderCache[treeType] = etree_lxml.TreeBuilder elif treeType == "etree": from . import etree if implementation is None: implementation = default_etree # NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeBuilder else: raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) return treeBuilderCache.get(treeType) treebuilders/base.py000064400000033152147204715120010530 0ustar00from __future__ import absolute_import, division, unicode_literals from six import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces # The scope markers are inserted when entering object elements, # marquees, table cells, and table captions, and are used to prevent formatting # from "leaking" into tables, object elements, and marquees. Marker = None listElementsMap = { None: (frozenset(scopingElements), False), "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), (namespaces["html"], "ul")])), False), "table": (frozenset([(namespaces["html"], "html"), (namespaces["html"], "table")]), False), "select": (frozenset([(namespaces["html"], "optgroup"), (namespaces["html"], "option")]), True) } class Node(object): def __init__(self, name): """Node representing an item in the tree. name - The tag name associated with the node parent - The parent of the current node (or None for the document node) value - The value of the current node (applies to text nodes and comments attributes - a dict holding name, value pairs for attributes of the node childNodes - a list of child nodes of the current node. This must include all elements but not necessarily other node types _flags - A list of miscellaneous flags that can be set on the node """ self.name = name self.parent = None self.value = None self.attributes = {} self.childNodes = [] self._flags = [] def __str__(self): attributesStr = " ".join(["%s=\"%s\"" % (name, value) for name, value in self.attributes.items()]) if attributesStr: return "<%s %s>" % (self.name, attributesStr) else: return "<%s>" % (self.name) def __repr__(self): return "<%s>" % (self.name) def appendChild(self, node): """Insert node as a child of the current node """ raise NotImplementedError def insertText(self, data, insertBefore=None): """Insert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. """ raise NotImplementedError def insertBefore(self, node, refNode): """Insert node as a child of the current node, before refNode in the list of child nodes. Raises ValueError if refNode is not a child of the current node""" raise NotImplementedError def removeChild(self, node): """Remove node from the children of the current node """ raise NotImplementedError def reparentChildren(self, newParent): """Move all the children of the current node to newParent. This is needed so that trees that don't store text as nodes move the text in the correct way """ # XXX - should this method be made more general? for child in self.childNodes: newParent.appendChild(child) self.childNodes = [] def cloneNode(self): """Return a shallow copy of the current node i.e. a node with the same name and attributes but with no parent or child nodes """ raise NotImplementedError def hasContent(self): """Return true if the node has children or text, false otherwise """ raise NotImplementedError class ActiveFormattingElements(list): def append(self, node): equalCount = 0 if node != Marker: for element in self[::-1]: if element == Marker: break if self.nodesEqual(element, node): equalCount += 1 if equalCount == 3: self.remove(element) break list.append(self, node) def nodesEqual(self, node1, node2): if not node1.nameTuple == node2.nameTuple: return False if not node1.attributes == node2.attributes: return False return True class TreeBuilder(object): """Base treebuilder implementation documentClass - the class to use for the bottommost node of a document elementClass - the class to use for HTML Elements commentClass - the class to use for comments doctypeClass - the class to use for doctypes """ # pylint:disable=not-callable # Document class documentClass = None # The class to use for creating a node elementClass = None # The class to use for creating comments commentClass = None # The class to use for creating doctypes doctypeClass = None # Fragment class fragmentClass = None def __init__(self, namespaceHTMLElements): if namespaceHTMLElements: self.defaultNamespace = "http://www.w3.org/1999/xhtml" else: self.defaultNamespace = None self.reset() def reset(self): self.openElements = [] self.activeFormattingElements = ActiveFormattingElements() # XXX - rename these to headElement, formElement self.headPointer = None self.formPointer = None self.insertFromTable = False self.document = self.documentClass() def elementInScope(self, target, variant=None): # If we pass a node in we match that. if we pass a string # match any node with that name exactNode = hasattr(target, "nameTuple") if not exactNode: if isinstance(target, text_type): target = (namespaces["html"], target) assert isinstance(target, tuple) listElements, invert = listElementsMap[variant] for node in reversed(self.openElements): if exactNode and node == target: return True elif not exactNode and node.nameTuple == target: return True elif (invert ^ (node.nameTuple in listElements)): return False assert False # We should never reach this point def reconstructActiveFormattingElements(self): # Within this algorithm the order of steps described in the # specification is not quite the same as the order of steps in the # code. It should still do the same though. # Step 1: stop the algorithm when there's nothing to do. if not self.activeFormattingElements: return # Step 2 and step 3: we start with the last element. So i is -1. i = len(self.activeFormattingElements) - 1 entry = self.activeFormattingElements[i] if entry == Marker or entry in self.openElements: return # Step 6 while entry != Marker and entry not in self.openElements: if i == 0: # This will be reset to 0 below i = -1 break i -= 1 # Step 5: let entry be one earlier in the list. entry = self.activeFormattingElements[i] while True: # Step 7 i += 1 # Step 8 entry = self.activeFormattingElements[i] clone = entry.cloneNode() # Mainly to get a new copy of the attributes # Step 9 element = self.insertElement({"type": "StartTag", "name": clone.name, "namespace": clone.namespace, "data": clone.attributes}) # Step 10 self.activeFormattingElements[i] = element # Step 11 if element == self.activeFormattingElements[-1]: break def clearActiveFormattingElements(self): entry = self.activeFormattingElements.pop() while self.activeFormattingElements and entry != Marker: entry = self.activeFormattingElements.pop() def elementInActiveFormattingElements(self, name): """Check if an element exists between the end of the active formatting elements and the last marker. If it does, return it, else return false""" for item in self.activeFormattingElements[::-1]: # Check for Marker first because if it's a Marker it doesn't have a # name attribute. if item == Marker: break elif item.name == name: return item return False def insertRoot(self, token): element = self.createElement(token) self.openElements.append(element) self.document.appendChild(element) def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] doctype = self.doctypeClass(name, publicId, systemId) self.document.appendChild(doctype) def insertComment(self, token, parent=None): if parent is None: parent = self.openElements[-1] parent.appendChild(self.commentClass(token["data"])) def createElement(self, token): """Create an element but don't insert it anywhere""" name = token["name"] namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] return element def _getInsertFromTable(self): return self._insertFromTable def _setInsertFromTable(self, value): """Switch the function used to insert an element from the normal one to the misnested table one and back again""" self._insertFromTable = value if value: self.insertElement = self.insertElementTable else: self.insertElement = self.insertElementNormal insertFromTable = property(_getInsertFromTable, _setInsertFromTable) def insertElementNormal(self, token): name = token["name"] assert isinstance(name, text_type), "Element %s not unicode" % name namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] self.openElements[-1].appendChild(element) self.openElements.append(element) return element def insertElementTable(self, token): """Create an element and insert it into the tree""" element = self.createElement(token) if self.openElements[-1].name not in tableInsertModeElements: return self.insertElementNormal(token) else: # We should be in the InTable mode. This means we want to do # special magic element rearranging parent, insertBefore = self.getTableMisnestedNodePosition() if insertBefore is None: parent.appendChild(element) else: parent.insertBefore(element, insertBefore) self.openElements.append(element) return element def insertText(self, data, parent=None): """Insert text data.""" if parent is None: parent = self.openElements[-1] if (not self.insertFromTable or (self.insertFromTable and self.openElements[-1].name not in tableInsertModeElements)): parent.insertText(data) else: # We should be in the InTable mode. This means we want to do # special magic element rearranging parent, insertBefore = self.getTableMisnestedNodePosition() parent.insertText(data, insertBefore) def getTableMisnestedNodePosition(self): """Get the foster parent element, and sibling to insert before (or None) when inserting a misnested table node""" # The foster parent element is the one which comes before the most # recently opened table element # XXX - this is really inelegant lastTable = None fosterParent = None insertBefore = None for elm in self.openElements[::-1]: if elm.name == "table": lastTable = elm break if lastTable: # XXX - we should really check that this parent is actually a # node here if lastTable.parent: fosterParent = lastTable.parent insertBefore = lastTable else: fosterParent = self.openElements[ self.openElements.index(lastTable) - 1] else: fosterParent = self.openElements[0] return fosterParent, insertBefore def generateImpliedEndTags(self, exclude=None): name = self.openElements[-1].name # XXX td, th and tr are not actually needed if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and name != exclude): self.openElements.pop() # XXX This is not entirely what the specification says. We should # investigate it more closely. self.generateImpliedEndTags(exclude) def getDocument(self): "Return the final tree" return self.document def getFragment(self): "Return the final fragment" # assert self.innerHTML fragment = self.fragmentClass() self.openElements[0].reparentChildren(fragment) return fragment def testSerializer(self, node): """Serialize the subtree of node in the format required by unit tests node - the node from which to start serializing""" raise NotImplementedError treebuilders/dom.py000064400000021203147204715120010367 0ustar00from __future__ import absolute_import, division, unicode_literals from collections import MutableMapping from xml.dom import minidom, Node import weakref from . import base from .. import constants from ..constants import namespaces from .._utils import moduleFactoryFactory def getDomBuilder(DomImplementation): Dom = DomImplementation class AttrList(MutableMapping): def __init__(self, element): self.element = element def __iter__(self): return iter(self.element.attributes.keys()) def __setitem__(self, name, value): if isinstance(name, tuple): raise NotImplementedError else: attr = self.element.ownerDocument.createAttribute(name) attr.value = value self.element.attributes[name] = attr def __len__(self): return len(self.element.attributes) def items(self): return list(self.element.attributes.items()) def values(self): return list(self.element.attributes.values()) def __getitem__(self, name): if isinstance(name, tuple): raise NotImplementedError else: return self.element.attributes[name].value def __delitem__(self, name): if isinstance(name, tuple): raise NotImplementedError else: del self.element.attributes[name] class NodeBuilder(base.Node): def __init__(self, element): base.Node.__init__(self, element.nodeName) self.element = element namespace = property(lambda self: hasattr(self.element, "namespaceURI") and self.element.namespaceURI or None) def appendChild(self, node): node.parent = self self.element.appendChild(node.element) def insertText(self, data, insertBefore=None): text = self.element.ownerDocument.createTextNode(data) if insertBefore: self.element.insertBefore(text, insertBefore.element) else: self.element.appendChild(text) def insertBefore(self, node, refNode): self.element.insertBefore(node.element, refNode.element) node.parent = self def removeChild(self, node): if node.element.parentNode == self.element: self.element.removeChild(node.element) node.parent = None def reparentChildren(self, newParent): while self.element.hasChildNodes(): child = self.element.firstChild self.element.removeChild(child) newParent.element.appendChild(child) self.childNodes = [] def getAttributes(self): return AttrList(self.element) def setAttributes(self, attributes): if attributes: for name, value in list(attributes.items()): if isinstance(name, tuple): if name[0] is not None: qualifiedName = (name[0] + ":" + name[1]) else: qualifiedName = name[1] self.element.setAttributeNS(name[2], qualifiedName, value) else: self.element.setAttribute( name, value) attributes = property(getAttributes, setAttributes) def cloneNode(self): return NodeBuilder(self.element.cloneNode(False)) def hasContent(self): return self.element.hasChildNodes() def getNameTuple(self): if self.namespace is None: return namespaces["html"], self.name else: return self.namespace, self.name nameTuple = property(getNameTuple) class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable def documentClass(self): self.dom = Dom.getDOMImplementation().createDocument(None, None, None) return weakref.proxy(self) def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] domimpl = Dom.getDOMImplementation() doctype = domimpl.createDocumentType(name, publicId, systemId) self.document.appendChild(NodeBuilder(doctype)) if Dom == minidom: doctype.ownerDocument = self.dom def elementClass(self, name, namespace=None): if namespace is None and self.defaultNamespace is None: node = self.dom.createElement(name) else: node = self.dom.createElementNS(namespace, name) return NodeBuilder(node) def commentClass(self, data): return NodeBuilder(self.dom.createComment(data)) def fragmentClass(self): return NodeBuilder(self.dom.createDocumentFragment()) def appendChild(self, node): self.dom.appendChild(node.element) def testSerializer(self, element): return testSerializer(element) def getDocument(self): return self.dom def getFragment(self): return base.TreeBuilder.getFragment(self).element def insertText(self, data, parent=None): data = data if parent != self: base.TreeBuilder.insertText(self, data, parent) else: # HACK: allow text nodes as children of the document node if hasattr(self.dom, '_child_node_types'): # pylint:disable=protected-access if Node.TEXT_NODE not in self.dom._child_node_types: self.dom._child_node_types = list(self.dom._child_node_types) self.dom._child_node_types.append(Node.TEXT_NODE) self.dom.appendChild(self.dom.createTextNode(data)) implementation = DomImplementation name = None def testSerializer(element): element.normalize() rv = [] def serializeElement(element, indent=0): if element.nodeType == Node.DOCUMENT_TYPE_NODE: if element.name: if element.publicId or element.systemId: publicId = element.publicId or "" systemId = element.systemId or "" rv.append("""|%s""" % (' ' * indent, element.name, publicId, systemId)) else: rv.append("|%s" % (' ' * indent, element.name)) else: rv.append("|%s" % (' ' * indent,)) elif element.nodeType == Node.DOCUMENT_NODE: rv.append("#document") elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: rv.append("#document-fragment") elif element.nodeType == Node.COMMENT_NODE: rv.append("|%s" % (' ' * indent, element.nodeValue)) elif element.nodeType == Node.TEXT_NODE: rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) else: if (hasattr(element, "namespaceURI") and element.namespaceURI is not None): name = "%s %s" % (constants.prefixes[element.namespaceURI], element.nodeName) else: name = element.nodeName rv.append("|%s<%s>" % (' ' * indent, name)) if element.hasAttributes(): attributes = [] for i in range(len(element.attributes)): attr = element.attributes.item(i) name = attr.nodeName value = attr.value ns = attr.namespaceURI if ns: name = "%s %s" % (constants.prefixes[ns], attr.localName) else: name = attr.nodeName attributes.append((name, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) indent += 2 for child in element.childNodes: serializeElement(child, indent) serializeElement(element, 0) return "\n".join(rv) return locals() # The actual means to get a module! getDomModule = moduleFactoryFactory(getDomBuilder) treebuilders/etree.py000064400000030720147204715120010720 0ustar00from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access from six import text_type import re from . import base from .. import _ihatexml from .. import constants from ..constants import namespaces from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") def getETreeBuilder(ElementTreeImplementation, fullTree=False): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag class Element(base.Node): def __init__(self, name, namespace=None): self._name = name self._namespace = namespace self._element = ElementTree.Element(self._getETreeTag(name, namespace)) if namespace is None: self.nameTuple = namespaces["html"], self._name else: self.nameTuple = self._namespace, self._name self.parent = None self._childNodes = [] self._flags = [] def _getETreeTag(self, name, namespace): if namespace is None: etree_tag = name else: etree_tag = "{%s}%s" % (namespace, name) return etree_tag def _setName(self, name): self._name = name self._element.tag = self._getETreeTag(self._name, self._namespace) def _getName(self): return self._name name = property(_getName, _setName) def _setNamespace(self, namespace): self._namespace = namespace self._element.tag = self._getETreeTag(self._name, self._namespace) def _getNamespace(self): return self._namespace namespace = property(_getNamespace, _setNamespace) def _getAttributes(self): return self._element.attrib def _setAttributes(self, attributes): # Delete existing attributes first # XXX - there may be a better way to do this... for key in list(self._element.attrib.keys()): del self._element.attrib[key] for key, value in attributes.items(): if isinstance(key, tuple): name = "{%s}%s" % (key[2], key[1]) else: name = key self._element.set(name, value) attributes = property(_getAttributes, _setAttributes) def _getChildNodes(self): return self._childNodes def _setChildNodes(self, value): del self._element[:] self._childNodes = [] for element in value: self.insertChild(element) childNodes = property(_getChildNodes, _setChildNodes) def hasContent(self): """Return true if the node has children or text""" return bool(self._element.text or len(self._element)) def appendChild(self, node): self._childNodes.append(node) self._element.append(node._element) node.parent = self def insertBefore(self, node, refNode): index = list(self._element).index(refNode._element) self._element.insert(index, node._element) node.parent = self def removeChild(self, node): self._childNodes.remove(node) self._element.remove(node._element) node.parent = None def insertText(self, data, insertBefore=None): if not(len(self._element)): if not self._element.text: self._element.text = "" self._element.text += data elif insertBefore is None: # Insert the text as the tail of the last child element if not self._element[-1].tail: self._element[-1].tail = "" self._element[-1].tail += data else: # Insert the text before the specified node children = list(self._element) index = children.index(insertBefore._element) if index > 0: if not self._element[index - 1].tail: self._element[index - 1].tail = "" self._element[index - 1].tail += data else: if not self._element.text: self._element.text = "" self._element.text += data def cloneNode(self): element = type(self)(self.name, self.namespace) for name, value in self.attributes.items(): element.attributes[name] = value return element def reparentChildren(self, newParent): if newParent.childNodes: newParent.childNodes[-1]._element.tail += self._element.text else: if not newParent._element.text: newParent._element.text = "" if self._element.text is not None: newParent._element.text += self._element.text self._element.text = "" base.Node.reparentChildren(self, newParent) class Comment(Element): def __init__(self, data): # Use the superclass constructor to set all properties on the # wrapper element self._element = ElementTree.Comment(data) self.parent = None self._childNodes = [] self._flags = [] def _getData(self): return self._element.text def _setData(self, value): self._element.text = value data = property(_getData, _setData) class DocumentType(Element): def __init__(self, name, publicId, systemId): Element.__init__(self, "") self._element.text = name self.publicId = publicId self.systemId = systemId def _getPublicId(self): return self._element.get("publicId", "") def _setPublicId(self, value): if value is not None: self._element.set("publicId", value) publicId = property(_getPublicId, _setPublicId) def _getSystemId(self): return self._element.get("systemId", "") def _setSystemId(self, value): if value is not None: self._element.set("systemId", value) systemId = property(_getSystemId, _setSystemId) class Document(Element): def __init__(self): Element.__init__(self, "DOCUMENT_ROOT") class DocumentFragment(Element): def __init__(self): Element.__init__(self, "DOCUMENT_FRAGMENT") def testSerializer(element): rv = [] def serializeElement(element, indent=0): if not(hasattr(element, "tag")): element = element.getroot() if element.tag == "": if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" rv.append("""""" % (element.text, publicId, systemId)) else: rv.append("" % (element.text,)) elif element.tag == "DOCUMENT_ROOT": rv.append("#document") if element.text is not None: rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") elif element.tag == ElementTreeCommentType: rv.append("|%s" % (' ' * indent, element.text)) else: assert isinstance(element.tag, text_type), \ "Expected unicode, got %s, %s" % (type(element.tag), element.tag) nsmatch = tag_regexp.match(element.tag) if nsmatch is None: name = element.tag else: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] name = "%s %s" % (prefix, name) rv.append("|%s<%s>" % (' ' * indent, name)) if hasattr(element, "attrib"): attributes = [] for name, value in element.attrib.items(): nsmatch = tag_regexp.match(name) if nsmatch is not None: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] attr_string = "%s %s" % (prefix, name) else: attr_string = name attributes.append((attr_string, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) if element.text: rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if element.tail: rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) def tostring(element): # pylint:disable=unused-variable """Serialize an element and its child nodes to a string""" rv = [] filter = _ihatexml.InfosetFilter() def serializeElement(element): if isinstance(element, ElementTree.ElementTree): element = element.getroot() if element.tag == "": if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" rv.append("""""" % (element.text, publicId, systemId)) else: rv.append("" % (element.text,)) elif element.tag == "DOCUMENT_ROOT": if element.text is not None: rv.append(element.text) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") for child in element: serializeElement(child) elif element.tag == ElementTreeCommentType: rv.append("" % (element.text,)) else: # This is assumed to be an ordinary element if not element.attrib: rv.append("<%s>" % (filter.fromXmlName(element.tag),)) else: attr = " ".join(["%s=\"%s\"" % ( filter.fromXmlName(name), value) for name, value in element.attrib.items()]) rv.append("<%s %s>" % (element.tag, attr)) if element.text: rv.append(element.text) for child in element: serializeElement(child) rv.append("" % (element.tag,)) if element.tail: rv.append(element.tail) serializeElement(element) return "".join(rv) class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable documentClass = Document doctypeClass = DocumentType elementClass = Element commentClass = Comment fragmentClass = DocumentFragment implementation = ElementTreeImplementation def testSerializer(self, element): return testSerializer(element) def getDocument(self): if fullTree: return self.document._element else: if self.defaultNamespace is not None: return self.document._element.find( "{%s}html" % self.defaultNamespace) else: return self.document._element.find("html") def getFragment(self): return base.TreeBuilder.getFragment(self)._element return locals() getETreeModule = moduleFactoryFactory(getETreeBuilder) treebuilders/etree_lxml.py000064400000033521147204715120011756 0ustar00"""Module for supporting the lxml.etree library. The idea here is to use as much of the native library as possible, without using fragile hacks like custom element names that break between releases. The downside of this is that we cannot represent all possible trees; specifically the following are known to cause problems: Text or comments as siblings of the root element Docypes with no name When any of these things occur, we emit a DataLossWarning """ from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access import warnings import re import sys from . import base from ..constants import DataLossWarning from .. import constants from . import etree as etree_builders from .. import _ihatexml import lxml.etree as etree fullTree = True tag_regexp = re.compile("{([^}]*)}(.*)") comment_type = etree.Comment("asd").tag class DocumentType(object): def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId class Document(object): def __init__(self): self._elementTree = None self._childNodes = [] def appendChild(self, element): self._elementTree.getroot().addnext(element._element) def _getChildNodes(self): return self._childNodes childNodes = property(_getChildNodes) def testSerializer(element): rv = [] infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) def serializeElement(element, indent=0): if not hasattr(element, "tag"): if hasattr(element, "getroot"): # Full tree case rv.append("#document") if element.docinfo.internalDTD: if not (element.docinfo.public_id or element.docinfo.system_url): dtd_str = "" % element.docinfo.root_name else: dtd_str = """""" % ( element.docinfo.root_name, element.docinfo.public_id, element.docinfo.system_url) rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) next_element = element.getroot() while next_element.getprevious() is not None: next_element = next_element.getprevious() while next_element is not None: serializeElement(next_element, indent + 2) next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case rv.append("#document-fragment") for next_element in element: serializeElement(next_element, indent + 2) elif element.tag == comment_type: rv.append("|%s" % (' ' * indent, element.text)) if hasattr(element, "tail") and element.tail: rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) else: assert isinstance(element, etree._Element) nsmatch = etree_builders.tag_regexp.match(element.tag) if nsmatch is not None: ns = nsmatch.group(1) tag = nsmatch.group(2) prefix = constants.prefixes[ns] rv.append("|%s<%s %s>" % (' ' * indent, prefix, infosetFilter.fromXmlName(tag))) else: rv.append("|%s<%s>" % (' ' * indent, infosetFilter.fromXmlName(element.tag))) if hasattr(element, "attrib"): attributes = [] for name, value in element.attrib.items(): nsmatch = tag_regexp.match(name) if nsmatch is not None: ns, name = nsmatch.groups() name = infosetFilter.fromXmlName(name) prefix = constants.prefixes[ns] attr_string = "%s %s" % (prefix, name) else: attr_string = infosetFilter.fromXmlName(name) attributes.append((attr_string, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) if element.text: rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if hasattr(element, "tail") and element.tail: rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) def tostring(element): """Serialize an element and its child nodes to a string""" rv = [] def serializeElement(element): if not hasattr(element, "tag"): if element.docinfo.internalDTD: if element.docinfo.doctype: dtd_str = element.docinfo.doctype else: dtd_str = "" % element.docinfo.root_name rv.append(dtd_str) serializeElement(element.getroot()) elif element.tag == comment_type: rv.append("" % (element.text,)) else: # This is assumed to be an ordinary element if not element.attrib: rv.append("<%s>" % (element.tag,)) else: attr = " ".join(["%s=\"%s\"" % (name, value) for name, value in element.attrib.items()]) rv.append("<%s %s>" % (element.tag, attr)) if element.text: rv.append(element.text) for child in element: serializeElement(child) rv.append("" % (element.tag,)) if hasattr(element, "tail") and element.tail: rv.append(element.tail) serializeElement(element) return "".join(rv) class TreeBuilder(base.TreeBuilder): documentClass = Document doctypeClass = DocumentType elementClass = None commentClass = None fragmentClass = Document implementation = etree def __init__(self, namespaceHTMLElements, fullTree=False): builder = etree_builders.getETreeModule(etree, fullTree=fullTree) infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) self.namespaceHTMLElements = namespaceHTMLElements class Attributes(dict): def __init__(self, element, value=None): if value is None: value = {} self._element = element dict.__init__(self, value) # pylint:disable=non-parent-init-called for key, value in self.items(): if isinstance(key, tuple): name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) else: name = infosetFilter.coerceAttribute(key) self._element._element.attrib[name] = value def __setitem__(self, key, value): dict.__setitem__(self, key, value) if isinstance(key, tuple): name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) else: name = infosetFilter.coerceAttribute(key) self._element._element.attrib[name] = value class Element(builder.Element): def __init__(self, name, namespace): name = infosetFilter.coerceElement(name) builder.Element.__init__(self, name, namespace=namespace) self._attributes = Attributes(self) def _setName(self, name): self._name = infosetFilter.coerceElement(name) self._element.tag = self._getETreeTag( self._name, self._namespace) def _getName(self): return infosetFilter.fromXmlName(self._name) name = property(_getName, _setName) def _getAttributes(self): return self._attributes def _setAttributes(self, attributes): self._attributes = Attributes(self, attributes) attributes = property(_getAttributes, _setAttributes) def insertText(self, data, insertBefore=None): data = infosetFilter.coerceCharacters(data) builder.Element.insertText(self, data, insertBefore) def appendChild(self, child): builder.Element.appendChild(self, child) class Comment(builder.Comment): def __init__(self, data): data = infosetFilter.coerceComment(data) builder.Comment.__init__(self, data) def _setData(self, data): data = infosetFilter.coerceComment(data) self._element.text = data def _getData(self): return self._element.text data = property(_getData, _setData) self.elementClass = Element self.commentClass = Comment # self.fragmentClass = builder.DocumentFragment base.TreeBuilder.__init__(self, namespaceHTMLElements) def reset(self): base.TreeBuilder.reset(self) self.insertComment = self.insertCommentInitial self.initial_comments = [] self.doctype = None def testSerializer(self, element): return testSerializer(element) def getDocument(self): if fullTree: return self.document._elementTree else: return self.document._elementTree.getroot() def getFragment(self): fragment = [] element = self.openElements[0]._element if element.text: fragment.append(element.text) fragment.extend(list(element)) if element.tail: fragment.append(element.tail) return fragment def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] if not name: warnings.warn("lxml cannot represent empty doctype", DataLossWarning) self.doctype = None else: coercedName = self.infosetFilter.coerceElement(name) if coercedName != name: warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) doctype = self.doctypeClass(coercedName, publicId, systemId) self.doctype = doctype def insertCommentInitial(self, data, parent=None): assert parent is None or parent is self.document assert self.document._elementTree is None self.initial_comments.append(data) def insertCommentMain(self, data, parent=None): if (parent == self.document and self.document._elementTree.getroot()[-1].tag == comment_type): warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) super(TreeBuilder, self).insertComment(data, parent) def insertRoot(self, token): """Create the document root""" # Because of the way libxml2 works, it doesn't seem to be possible to # alter information like the doctype after the tree has been parsed. # Therefore we need to use the built-in parser to create our initial # tree, after which we can add elements like normal docStr = "" if self.doctype: assert self.doctype.name docStr += "= 0 and sysid.find('"') >= 0: warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) sysid = sysid.replace("'", 'U00027') if sysid.find("'") >= 0: docStr += '"%s"' % sysid else: docStr += "'%s'" % sysid else: docStr += "''" docStr += ">" if self.doctype.name != token["name"]: warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) docStr += "" root = etree.fromstring(docStr) # Append the initial comments: for comment_token in self.initial_comments: comment = self.commentClass(comment_token["data"]) root.addprevious(comment._element) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Give the root element the right name name = token["name"] namespace = token.get("namespace", self.defaultNamespace) if namespace is None: etree_tag = name else: etree_tag = "{%s}%s" % (namespace, name) root.tag = etree_tag # Add the root element to the internal child/open data structures root_element = self.elementClass(name, namespace) root_element._element = root self.document._childNodes.append(root_element) self.openElements.append(root_element) # Reset to the default insert comment function self.insertComment = self.insertCommentMain treewalkers/__pycache__/__init__.cpython-36.opt-1.pyc000064400000007063147204715120016441 0ustar003 B;W@sbdZddlmZmZmZddlmZddlmZdddd d d gZ iZ dd dZ ddZ ddZ d S)aA collection of modules for iterating through different kinds of tree, generating tokens identical to those produced by the tokenizer module. To create a tree walker for a new type of tree, you need to do implement a tree walker object (called TreeWalker by convention) that implements a 'serialize' method taking a tree as sole argument and returning an iterator generating tokens. )absolute_importdivisionunicode_literals) constants) default_etree getTreeWalkerpprintdometreegenshi etree_lxmlNcKs|j}|tkr|dkr0ddlm}|jt|<np|dkrPddlm}|jt|<nP|dkrpddlm}|jt|<n0|dkrdd lm}|d krt}|j |f|jStj |S) aGet a TreeWalker class for various types of tree with built-in support Args: treeType (str): the name of the tree type required (case-insensitive). Supported values are: - "dom": The xml.dom.minidom DOM implementation - "etree": A generic walker for tree implementations exposing an elementtree-like interface (known to work with ElementTree, cElementTree and lxml.etree). - "lxml": Optimized walker for lxml.etree - "genshi": a Genshi stream Implementation: A module implementing the tree type e.g. xml.etree.ElementTree or cElementTree (Currently applies to the "etree" tree type only). r )r r )r Zlxml)r r )r N) lowertreeWalkerCacher Z TreeWalkerr r r rZgetETreeModuleget)ZtreeTypeimplementationkwargsr r r r r/usr/lib/python3.6/__init__.pyrs"       ccslg}xL|D]D}|d}|dkr.|j|dq |rHddj|dVg}|Vq W|rhddj|dVdS)Ntype CharactersSpaceCharactersdatar)rr)rr)appendjoin)tokensZpendingCharacterstokenrrrrconcatenateCharacterTokens<s  rc Cs^g}d}xHt|D]:}|d}|dkr&|dr~|dtjdkr~|dtjkrdtj|d}n|d}d||df}n|d}|jd d ||f|d 7}|d }xdt|jD]T\\}} } |r|tjkrtj|}n|}d|| f}n| }|jd d ||| fqW|dkrP|d 8}q|dkr:|d 8}q|dkr`|jdd ||d fq|dkr|dr|dr|jdd ||d|d|dr|dndfnF|dr|jdd ||d|dfn|jdd ||dfn|jdd |fq|dkr8|jdd ||d fq|dkrDqtd|qWdj|S)zPretty printer for tree walkersrrStartTagEmptyTag namespaceZhtmlz%s %snamez%s<%s> rrz %s%s="%s"ZEndTagCommentz %sZDoctypeZpublicIdz%sZsystemIdrz%sz%sz %srz%s"%s"rzUnknown token type, %s )r r!) rrZ namespacesprefixesrsorteditems ValueErrorr) Zwalkeroutputindentrrnsr#Zattrsr"Z localnamevaluerrrr Ksd               )N)__doc__Z __future__rrrrrZ_utilsr__all__rrrr rrrr s   'treewalkers/__pycache__/__init__.cpython-36.pyc000064400000007226147204715120015503 0ustar003 B;W@sbdZddlmZmZmZddlmZddlmZdddd d d gZ iZ dd dZ ddZ ddZ d S)aA collection of modules for iterating through different kinds of tree, generating tokens identical to those produced by the tokenizer module. To create a tree walker for a new type of tree, you need to do implement a tree walker object (called TreeWalker by convention) that implements a 'serialize' method taking a tree as sole argument and returning an iterator generating tokens. )absolute_importdivisionunicode_literals) constants) default_etree getTreeWalkerpprintdometreegenshi etree_lxmlNcKs|j}|tkr|dkr0ddlm}|jt|<np|dkrPddlm}|jt|<nP|dkrpddlm}|jt|<n0|dkrdd lm}|d krt}|j |f|jStj |S) aGet a TreeWalker class for various types of tree with built-in support Args: treeType (str): the name of the tree type required (case-insensitive). Supported values are: - "dom": The xml.dom.minidom DOM implementation - "etree": A generic walker for tree implementations exposing an elementtree-like interface (known to work with ElementTree, cElementTree and lxml.etree). - "lxml": Optimized walker for lxml.etree - "genshi": a Genshi stream Implementation: A module implementing the tree type e.g. xml.etree.ElementTree or cElementTree (Currently applies to the "etree" tree type only). r )r r )r Zlxml)r r )r N) lowertreeWalkerCacher Z TreeWalkerr r r rZgetETreeModuleget)ZtreeTypeimplementationkwargsr r r r r/usr/lib/python3.6/__init__.pyrs"       ccslg}xL|D]D}|d}|dkr.|j|dq |rHddj|dVg}|Vq W|rhddj|dVdS)Ntype CharactersSpaceCharactersdatar)rr)rr)appendjoin)tokensZpendingCharacterstokenrrrrconcatenateCharacterTokens<s  rc Cslg}d}xVt|D]H}|d}|d kr&|dr~|dtjdkr~|dtjkrdtj|d}n|d}d||df}n|d}|jd d ||f|d 7}|d }xdt|jD]T\\}} } |r|tjkrtj|}n|}d|| f}n| }|jd d ||| fqW|dkr^|d 8}q|dkr:|d 8}q|dkr`|jdd ||d fq|dkr|dr|dr|jdd ||d|d|dr|dndfnF|dr|jdd ||d|dfn|jdd ||dfn|jdd |fq|dkr8|jdd ||d fq|dkrRds^tdqtd|qWdj |S)!zPretty printer for tree walkersrrStartTagEmptyTag namespaceZhtmlz%s %snamez%s<%s> rrz %s%s="%s"ZEndTagCommentz %sZDoctypeZpublicIdz%sZsystemIdrz%sz%sz %srz%s"%s"rFzBconcatenateCharacterTokens should have got rid of all Space tokenszUnknown token type, %s )r r!) rrZ namespacesprefixesrsorteditemsAssertionError ValueErrorr) Zwalkeroutputindentrrnsr#Zattrsr"Z localnamevaluerrrr Ksd               )N)__doc__Z __future__rrrrrZ_utilsr__all__rrrr rrrr s   'treewalkers/__pycache__/base.cpython-36.opt-1.pyc000064400000010612147204715120015606 0ustar003 B;WK @sddlmZmZmZddlmZddlmZmZm Z ddddd d d d d g Z ej Z ej ZejZejZejZejZdZdje Z Gdd d eZGdd d eZdS))absolute_importdivisionunicode_literals)Node) namespaces voidElementsspaceCharactersDOCUMENTDOCTYPETEXTELEMENTCOMMENTENTITYUNKNOWN TreeWalkerNonRecursiveTreeWalkerz <#UNKNOWN#>c@sheZdZddZddZddZddd Zd d Zd d ZddZ ddZ dddZ ddZ ddZ dS)rcCs ||_dS)N)tree)selfrr/usr/lib/python3.6/base.py__init__szTreeWalker.__init__cCstdS)N)NotImplementedError)rrrr__iter__szTreeWalker.__iter__cCs d|dS)NZSerializeError)typedatar)rmsgrrrerrorszTreeWalker.errorFccs$d|||dV|r |jdVdS)NZEmptyTag)rname namespacerzVoid element has children)r)rr rattrs hasChildrenrrremptyTags  zTreeWalker.emptyTagcCsd|||dS)NZStartTag)rrr rr)rr rr!rrrstartTag%szTreeWalker.startTagcCs d||dS)NZEndTag)rrr r)rr rrrrendTag+szTreeWalker.endTagccsx|}|jt}|dt|t|}|r6d|dV|}|jt}|t|d}|rdd|dV|rtd|dVdS)NZSpaceCharacters)rrZ Characters)lstripr lenrstrip)rrZmiddleleftrightrrrtext0s    zTreeWalker.textcCs d|dS)NComment)rrr)rrrrrcomment>szTreeWalker.commentNcCsd|||dS)NZDoctype)rrpublicIdsystemIdr)rrr.r/rrrdoctypeAszTreeWalker.doctypecCs d|dS)NZEntity)rrr)rrrrrentityGszTreeWalker.entitycCs|jd|S)NzUnknown node type: )r)rZnodeTyperrrunknownJszTreeWalker.unknown)F)NN)__name__ __module__ __qualname__rrrr#r$r%r+r-r0r1r2rrrrrs  c@s4eZdZddZddZddZddZd d Zd S) rcCstdS)N)r)rnoderrrgetNodeDetailsOsz%NonRecursiveTreeWalker.getNodeDetailscCstdS)N)r)rr6rrr getFirstChildRsz$NonRecursiveTreeWalker.getFirstChildcCstdS)N)r)rr6rrrgetNextSiblingUsz%NonRecursiveTreeWalker.getNextSiblingcCstdS)N)r)rr6rrr getParentNodeXsz$NonRecursiveTreeWalker.getParentNodec cs|j}x|dk r|j|}|d|dd}}d}|tkrN|j|Vn|tkrrx|j|D] }|VqbWn|tkr|\}}}}| s|tdkr|tkrx|j ||||D] }|VqWd}n|j |||VnV|t kr|j |dVn<|t kr|j|dVn |tkrd}n|j|dV|r@|j|} nd} | dk rT| }q x|dk r|j|}|d|dd}}|tkr|\}}}}|r|tdks|tkr|j||V|j|krd}P|j|} | dk r| }Pn |j|}qVWq WdS)NrFZhtmlT)rr7r r0r r+r rrr#r$rr-rr1r r2r8r%r9r:) rZ currentNodeZdetailsrr"tokenr rZ attributesZ firstChildZ nextSiblingrrrr[sZ                 zNonRecursiveTreeWalker.__iter__N)r3r4r5r7r8r9r:rrrrrrNs N)Z __future__rrrZxml.domrZ constantsrrr __all__Z DOCUMENT_NODEr ZDOCUMENT_TYPE_NODEr Z TEXT_NODEr Z ELEMENT_NODEr Z COMMENT_NODErZ ENTITY_NODErrjoinobjectrrrrrrs  :treewalkers/__pycache__/base.cpython-36.pyc000064400000010612147204715120014647 0ustar003 B;WK @sddlmZmZmZddlmZddlmZmZm Z ddddd d d d d g Z ej Z ej ZejZejZejZejZdZdje Z Gdd d eZGdd d eZdS))absolute_importdivisionunicode_literals)Node) namespaces voidElementsspaceCharactersDOCUMENTDOCTYPETEXTELEMENTCOMMENTENTITYUNKNOWN TreeWalkerNonRecursiveTreeWalkerz <#UNKNOWN#>c@sheZdZddZddZddZddd Zd d Zd d ZddZ ddZ dddZ ddZ ddZ dS)rcCs ||_dS)N)tree)selfrr/usr/lib/python3.6/base.py__init__szTreeWalker.__init__cCstdS)N)NotImplementedError)rrrr__iter__szTreeWalker.__iter__cCs d|dS)NZSerializeError)typedatar)rmsgrrrerrorszTreeWalker.errorFccs$d|||dV|r |jdVdS)NZEmptyTag)rname namespacerzVoid element has children)r)rr rattrs hasChildrenrrremptyTags  zTreeWalker.emptyTagcCsd|||dS)NZStartTag)rrr rr)rr rr!rrrstartTag%szTreeWalker.startTagcCs d||dS)NZEndTag)rrr r)rr rrrrendTag+szTreeWalker.endTagccsx|}|jt}|dt|t|}|r6d|dV|}|jt}|t|d}|rdd|dV|rtd|dVdS)NZSpaceCharacters)rrZ Characters)lstripr lenrstrip)rrZmiddleleftrightrrrtext0s    zTreeWalker.textcCs d|dS)NComment)rrr)rrrrrcomment>szTreeWalker.commentNcCsd|||dS)NZDoctype)rrpublicIdsystemIdr)rrr.r/rrrdoctypeAszTreeWalker.doctypecCs d|dS)NZEntity)rrr)rrrrrentityGszTreeWalker.entitycCs|jd|S)NzUnknown node type: )r)rZnodeTyperrrunknownJszTreeWalker.unknown)F)NN)__name__ __module__ __qualname__rrrr#r$r%r+r-r0r1r2rrrrrs  c@s4eZdZddZddZddZddZd d Zd S) rcCstdS)N)r)rnoderrrgetNodeDetailsOsz%NonRecursiveTreeWalker.getNodeDetailscCstdS)N)r)rr6rrr getFirstChildRsz$NonRecursiveTreeWalker.getFirstChildcCstdS)N)r)rr6rrrgetNextSiblingUsz%NonRecursiveTreeWalker.getNextSiblingcCstdS)N)r)rr6rrr getParentNodeXsz$NonRecursiveTreeWalker.getParentNodec cs|j}x|dk r|j|}|d|dd}}d}|tkrN|j|Vn|tkrrx|j|D] }|VqbWn|tkr|\}}}}| s|tdkr|tkrx|j ||||D] }|VqWd}n|j |||VnV|t kr|j |dVn<|t kr|j|dVn |tkrd}n|j|dV|r@|j|} nd} | dk rT| }q x|dk r|j|}|d|dd}}|tkr|\}}}}|r|tdks|tkr|j||V|j|krd}P|j|} | dk r| }Pn |j|}qVWq WdS)NrFZhtmlT)rr7r r0r r+r rrr#r$rr-rr1r r2r8r%r9r:) rZ currentNodeZdetailsrr"tokenr rZ attributesZ firstChildZ nextSiblingrrrr[sZ                 zNonRecursiveTreeWalker.__iter__N)r3r4r5r7r8r9r:rrrrrrNs N)Z __future__rrrZxml.domrZ constantsrrr __all__Z DOCUMENT_NODEr ZDOCUMENT_TYPE_NODEr Z TEXT_NODEr Z ELEMENT_NODEr Z COMMENT_NODErZ ENTITY_NODErrjoinobjectrrrrrrs  :treewalkers/__pycache__/dom.cpython-36.opt-1.pyc000064400000003137147204715120015457 0ustar003 B;W@sBddlmZmZmZddlmZddlmZGdddejZ dS))absolute_importdivisionunicode_literals)Node)basec@s,eZdZddZddZddZddZd S) TreeWalkercCs|jtjkr tj|j|j|jfS|jtjtj fkr>tj |j fS|jtj kri}xJt |jjD]8}|j|}|jr|j||j|jf<q^|j|d|jf<q^Wtj|j|j||jfS|jtjkrtj|j fS|jtjtjfkrtjfStj|jfSdS)N)ZnodeTyperZDOCUMENT_TYPE_NODErZDOCTYPEnameZpublicIdZsystemIdZ TEXT_NODEZCDATA_SECTION_NODEZTEXTZ nodeValueZ ELEMENT_NODElistZ attributeskeysZgetAttributeNodeZ namespaceURIvalueZ localNameZELEMENTZnodeNameZ hasChildNodesZ COMMENT_NODECOMMENTZ DOCUMENT_NODEZDOCUMENT_FRAGMENT_NODEZDOCUMENTZUNKNOWN)selfnodeZattrsattrr/usr/lib/python3.6/dom.pygetNodeDetails s$        zTreeWalker.getNodeDetailscCs|jS)N)Z firstChild)rrrrr getFirstChild$szTreeWalker.getFirstChildcCs|jS)N)Z nextSibling)rrrrrgetNextSibling'szTreeWalker.getNextSiblingcCs|jS)N)Z parentNode)rrrrr getParentNode*szTreeWalker.getParentNodeN)__name__ __module__ __qualname__rrrrrrrrrsrN) Z __future__rrrZxml.domrrZNonRecursiveTreeWalkerrrrrrs  treewalkers/__pycache__/dom.cpython-36.pyc000064400000003137147204715120014520 0ustar003 B;W@sBddlmZmZmZddlmZddlmZGdddejZ dS))absolute_importdivisionunicode_literals)Node)basec@s,eZdZddZddZddZddZd S) TreeWalkercCs|jtjkr tj|j|j|jfS|jtjtj fkr>tj |j fS|jtj kri}xJt |jjD]8}|j|}|jr|j||j|jf<q^|j|d|jf<q^Wtj|j|j||jfS|jtjkrtj|j fS|jtjtjfkrtjfStj|jfSdS)N)ZnodeTyperZDOCUMENT_TYPE_NODErZDOCTYPEnameZpublicIdZsystemIdZ TEXT_NODEZCDATA_SECTION_NODEZTEXTZ nodeValueZ ELEMENT_NODElistZ attributeskeysZgetAttributeNodeZ namespaceURIvalueZ localNameZELEMENTZnodeNameZ hasChildNodesZ COMMENT_NODECOMMENTZ DOCUMENT_NODEZDOCUMENT_FRAGMENT_NODEZDOCUMENTZUNKNOWN)selfnodeZattrsattrr/usr/lib/python3.6/dom.pygetNodeDetails s$        zTreeWalker.getNodeDetailscCs|jS)N)Z firstChild)rrrrr getFirstChild$szTreeWalker.getFirstChildcCs|jS)N)Z nextSibling)rrrrrgetNextSibling'szTreeWalker.getNextSiblingcCs|jS)N)Z parentNode)rrrrr getParentNode*szTreeWalker.getParentNodeN)__name__ __module__ __qualname__rrrrrrrrrsrN) Z __future__rrrZxml.domrrZNonRecursiveTreeWalkerrrrrrs  treewalkers/__pycache__/etree.cpython-36.opt-1.pyc000064400000006621147204715120016005 0ustar003 B;W@@sddlmZmZmZyddlmZWn>ek rbyddlmZWnek r\eZYnXYnXddl Z ddl m Z ddl m Z ddlmZe jd Zd d ZeeZdS) )absolute_importdivisionunicode_literals) OrderedDictN) string_types)base)moduleFactoryFactoryz {([^}]*)}(.*)cs,|}|jdjGfdddtj}tS)NZasdcs4eZdZdZfddZddZddZdd Zd S) z#getETreeBuilder..TreeWalkeraGiven the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: 1. The current element 2. The index of the element relative to its parent 3. A stack of ancestor elements 4. A flag "text", "tail" or None to indicate if the current node is a text node; either the text or tail of the current element (1) c s2t|tr2|\}}}}|d kr.tjt||fS|}t|dsD|j}|jd krVtjfS|jdkr|tj |j |j d|j dfS|jkrtj |j fSt j|j}|r|j\}}n d}|j}t}xPt|jjD]>\} } t j| }|r| ||jd |jd f<q| |d| f<qWtj|||t|p*|j fSdS) Ntexttailtag DOCUMENT_ROOTDOCUMENT_FRAGMENTz ZpublicIdZsystemIdrr )r r )rr) isinstancetuplerZTEXTgetattrhasattrZgetrootr ZDOCUMENTZDOCTYPEr getCOMMENT tag_regexpmatchgroupsrlistZattribitemsgroupZELEMENTlen) selfnodeZelt_flagr namespacer Zattrsnamevalue)ElementTreeCommentType/usr/lib/python3.6/etree.pygetNodeDetails's6         z2getETreeBuilder..TreeWalker.getNodeDetailscSstt|tr|\}}}}n|dgdf\}}}}|dkr8dS|jrJ|||dfSt|rl|j||dd|dfSdSdS)Nr r r)r r )rrr rappend)rrelementkeyparentsr r%r%r& getFirstChildOs   z1getETreeBuilder..TreeWalker.getFirstChildcSst|tr|\}}}}ndS|dkrLt|rF|j||dd|dfSdSnN|jrf|dkrf|||dfS|t|ddkr|d|d|d|dfSdSdS)Nr rr rr-)rrrr(r )rrr)r*r+r r%r%r&getNextSibling`s   z2getETreeBuilder..TreeWalker.getNextSiblingcSsht|tr|\}}}}ndS|dkr:|s,|S|||dfSn*|j}|sJ|S|t|dj||dfSdS)Nr rr-)rrpoprindex)rrr)r*r+r parentr%r%r& getParentNodets z1getETreeBuilder..TreeWalker.getParentNodeN)__name__ __module__ __qualname____doc__r'r,r.r2r%)r$r%r& TreeWalkers   (r7)Commentr rZNonRecursiveTreeWalkerlocals)ZElementTreeImplementationZ ElementTreer7r%)r$r&getETreeBuilders nr:)Z __future__rrr collectionsr ImportErrorZ ordereddictdictreZsixrrZ_utilsr compilerr:ZgetETreeModuler%r%r%r&s    ttreewalkers/__pycache__/etree.cpython-36.pyc000064400000006765147204715120015057 0ustar003 B;W@@sddlmZmZmZyddlmZWn>ek rbyddlmZWnek r\eZYnXYnXddl Z ddl m Z ddl m Z ddlmZe jd Zd d ZeeZdS) )absolute_importdivisionunicode_literals) OrderedDictN) string_types)base)moduleFactoryFactoryz {([^}]*)}(.*)cs,|}|jdjGfdddtj}tS)NZasdcs4eZdZdZfddZddZddZdd Zd S) z#getETreeBuilder..TreeWalkeraGiven the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: 1. The current element 2. The index of the element relative to its parent 3. A stack of ancestor elements 4. A flag "text", "tail" or None to indicate if the current node is a text node; either the text or tail of the current element (1) c sLt|tr2|\}}}}|d kr.tjt||fS|}t|dsD|j}|jd krVtjfS|jdkr|tj |j |j d|j dfS|jkrtj |j fSt|jt stt|jtj|j}|r|j\}}n d}|j}t}xPt|jjD]>\} } tj| }|r| ||jd |jd f<q| |d| f<qWtj|||t|pD|j fSdS) Ntexttailtag DOCUMENT_ROOTDOCUMENT_FRAGMENTz ZpublicIdZsystemIdrr )r r )rr) isinstancetuplerZTEXTgetattrhasattrZgetrootr ZDOCUMENTZDOCTYPEr getCOMMENTrAssertionErrortype tag_regexpmatchgroupsrlistZattribitemsgroupZELEMENTlen) selfnodeZelt_flagr namespacer Zattrsnamevalue)ElementTreeCommentType/usr/lib/python3.6/etree.pygetNodeDetails's8         z2getETreeBuilder..TreeWalker.getNodeDetailscSstt|tr|\}}}}n|dgdf\}}}}|dkr8dS|jrJ|||dfSt|rl|j||dd|dfSdSdS)Nr r r)r r )rrr rappend)rr elementkeyparentsr"r'r'r( getFirstChildOs   z1getETreeBuilder..TreeWalker.getFirstChildcSst|tr|\}}}}ndS|dkrLt|rF|j||dd|dfSdSnN|jrf|dkrf|||dfS|t|ddkr|d|d|d|dfSdSdS)Nr rr rr/)rrrr*r )rr r+r,r-r"r'r'r(getNextSibling`s   z2getETreeBuilder..TreeWalker.getNextSiblingcSst|tr|\}}}}ndS|dkr:|s,|S|||dfSnD|j}|sJ|St|dj|dksdt|t|dj||dfSdS)Nr rr/r/)rrpoprcountrindex)rr r+r,r-r"parentr'r'r( getParentNodets z1getETreeBuilder..TreeWalker.getParentNodeN)__name__ __module__ __qualname____doc__r)r.r0r5r')r&r'r( TreeWalkers   (r:)Commentr rZNonRecursiveTreeWalkerlocals)ZElementTreeImplementationZ ElementTreer:r')r&r(getETreeBuilders nr=)Z __future__rrr collectionsr ImportErrorZ ordereddictdictreZsixrrZ_utilsr compilerr=ZgetETreeModuler'r'r'r(s    ttreewalkers/__pycache__/etree_lxml.cpython-36.opt-1.pyc000064400000014037147204715120017041 0ustar003 B;W@sddlmZmZmZddlmZddlmZddlm Z ddl m Z ddl m Z d d Z Gd d d eZGd ddeZGdddeZGdddeZGddde jZdS))absolute_importdivisionunicode_literals) text_type)etree) tag_regexp)base) _ihatexmlcCs*|dkr dSt|tr|S|jddSdS)Nasciistrict) isinstancerdecode)sr /usr/lib/python3.6/etree_lxml.py ensure_str s  rc@s,eZdZddZddZddZddZd S) RootcCs||_g|_y:|jjrD|jjt|t|jjt|jjt|jj Wnt k rZYnXy |j }Wnt k r|}YnXx|j dk r|j }qWx |dk r|jj||j }qWd|_d|_dS)N)Z elementtreechildrenZdocinfoZ internalDTDappendDoctyperZ root_name public_idZ system_urlAttributeErrorZgetrootZ getpreviousgetnexttexttail)selfZetnoderrr__init__s*         z Root.__init__cCs |j|S)N)r)rkeyrrr __getitem__1szRoot.__getitem__cCsdS)Nr)rrrrr4sz Root.getnextcCsdS)Nr r)rrrr__len__7sz Root.__len__N)__name__ __module__ __qualname__rr!rr"rrrrrsrc@seZdZddZddZdS)rcCs(||_||_||_||_d|_d|_dS)N) root_nodenamer system_idrr)rr&r'rr(rrrr<s zDoctype.__init__cCs |jjdS)Nr )r&r)rrrrrEszDoctype.getnextN)r#r$r%rrrrrrr;s rc@seZdZddZddZdS) FragmentRootcs$fdd|D_d__dS)Ncsg|]}t|qSr)FragmentWrapper).0Zchild)rrr Ksz)FragmentRoot.__init__..)rrr)rrr)rrrJszFragmentRoot.__init__cCsdS)Nr)rrrrrNszFragmentRoot.getnextN)r#r$r%rrrrrrr)Isr)c@sTeZdZddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)r*cCsT||_||_t|jdr(t|jj|_nd|_t|jdrJt|jj|_nd|_dS)Nrr)r&objhasattrrrr)rZ fragment_rootr-rrrrSs  zFragmentWrapper.__init__cCs t|j|S)N)getattrr-)rr'rrr __getattr___szFragmentWrapper.__getattr__cCs6|jj}|j|}|t|dkr.||dSdSdS)Nr )r&rindexlen)rZsiblingsidxrrrrbs   zFragmentWrapper.getnextcCs |j|S)N)r-)rr rrrr!jszFragmentWrapper.__getitem__cCs t|jS)N)boolr-)rrrr__bool__mszFragmentWrapper.__bool__cCsdS)Nr)rrrr getparentpszFragmentWrapper.getparentcCs t|jS)N)strr-)rrrr__str__sszFragmentWrapper.__str__cCs t|jS)N)r7r-)rrrr __unicode__vszFragmentWrapper.__unicode__cCs t|jS)N)r2r-)rrrrr"yszFragmentWrapper.__len__N) r#r$r%rr0rr!r5r6r8r9r"rrrrr*Rs r*c@s4eZdZddZddZddZddZd d Zd S) TreeWalkercCsJt|trt||_t|}nt|_t|}tjj||t j |_ dS)N) rlistsetfragmentChildrenr)rr NonRecursiveTreeWalkerrr Z InfosetFilterfilter)rZtreerrrr~s   zTreeWalker.__init__c Cst|tr&|\}}tjtt||fSt|tr8tjfSt|trVtj |j |j |j fSt|t r|t|d r|tjt|jfS|jtjkrtjt|jfS|jtjkrtjt|jddfStjt|j}|r|j\}}nd}t|j}i}xbt|jjD]P\}}t|}t|}tj|}|rB|||jd|jdf<n ||d|f<qWtj||jj ||t!|dkpx|jfSdS)Ntagr rr)"rtupler ZTEXTrr/rZDOCUMENTrZDOCTYPEr'rr(r*r.r-r@rCommentCOMMENTrZEntityZENTITYrmatchgroupsr;ZattribitemsgroupZELEMENTr?Z fromXmlNamer2) rrr rE namespacer@Zattrsr'valuerrrgetNodeDetailss8       zTreeWalker.getNodeDetailscCs|jr|dfS|dSdS)Nrr)r)rrrrr getFirstChildszTreeWalker.getFirstChildcCsNt|tr8|\}}|dkr0t|r*|dSdSn|jS|jrF|dfS|jS)Nrrr)rrBr2rr)rrr rrrgetNextSiblings zTreeWalker.getNextSiblingcCs6t|tr |\}}|dkr.|Sn||jkr.dS|jS)Nr)rrBr=r6)rrr rrr getParentNodes  zTreeWalker.getParentNodeN)r#r$r%rrKrLrMrNrrrrr:}s  ) r:N)Z __future__rrrZsixrZlxmlrZtreebuilders.etreerr r robjectrrr)r*r>r:rrrrs      & +treewalkers/__pycache__/etree_lxml.cpython-36.pyc000064400000014632147204715120016103 0ustar003 B;W@sddlmZmZmZddlmZddlmZddlm Z ddl m Z ddl m Z d d Z Gd d d eZGd ddeZGdddeZGdddeZGddde jZdS))absolute_importdivisionunicode_literals) text_type)etree) tag_regexp)base) _ihatexmlcCs*|dkr dSt|tr|S|jddSdS)Nasciistrict) isinstancerdecode)sr /usr/lib/python3.6/etree_lxml.py ensure_str s  rc@s,eZdZddZddZddZddZd S) RootcCs||_g|_y:|jjrD|jjt|t|jjt|jjt|jj Wnt k rZYnXy |j }Wnt k r|}YnXx|j dk r|j }qWx |dk r|jj||j }qWd|_d|_dS)N)Z elementtreechildrenZdocinfoZ internalDTDappendDoctyperZ root_name public_idZ system_urlAttributeErrorZgetrootZ getpreviousgetnexttexttail)selfZetnoderrr__init__s*         z Root.__init__cCs |j|S)N)r)rkeyrrr __getitem__1szRoot.__getitem__cCsdS)Nr)rrrrr4sz Root.getnextcCsdS)Nr r)rrrr__len__7sz Root.__len__N)__name__ __module__ __qualname__rr!rr"rrrrrsrc@seZdZddZddZdS)rcCs(||_||_||_||_d|_d|_dS)N) root_nodenamer system_idrr)rr&r'rr(rrrr<s zDoctype.__init__cCs |jjdS)Nr )r&r)rrrrrEszDoctype.getnextN)r#r$r%rrrrrrr;s rc@seZdZddZddZdS) FragmentRootcs$fdd|D_d__dS)Ncsg|]}t|qSr)FragmentWrapper).0Zchild)rrr Ksz)FragmentRoot.__init__..)rrr)rrr)rrrJszFragmentRoot.__init__cCsdS)Nr)rrrrrNszFragmentRoot.getnextN)r#r$r%rrrrrrr)Isr)c@sTeZdZddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)r*cCsT||_||_t|jdr(t|jj|_nd|_t|jdrJt|jj|_nd|_dS)Nrr)r&objhasattrrrr)rZ fragment_rootr-rrrrSs  zFragmentWrapper.__init__cCs t|j|S)N)getattrr-)rr'rrr __getattr___szFragmentWrapper.__getattr__cCs6|jj}|j|}|t|dkr.||dSdSdS)Nr )r&rindexlen)rZsiblingsidxrrrrbs   zFragmentWrapper.getnextcCs |j|S)N)r-)rr rrrr!jszFragmentWrapper.__getitem__cCs t|jS)N)boolr-)rrrr__bool__mszFragmentWrapper.__bool__cCsdS)Nr)rrrr getparentpszFragmentWrapper.getparentcCs t|jS)N)strr-)rrrr__str__sszFragmentWrapper.__str__cCs t|jS)N)r7r-)rrrr __unicode__vszFragmentWrapper.__unicode__cCs t|jS)N)r2r-)rrrrr"yszFragmentWrapper.__len__N) r#r$r%rr0rr!r5r6r8r9r"rrrrr*Rs r*c@s4eZdZddZddZddZddZd d Zd S) TreeWalkercCsJt|trt||_t|}nt|_t|}tjj||t j |_ dS)N) rlistsetfragmentChildrenr)rr NonRecursiveTreeWalkerrr Z InfosetFilterfilter)rZtreerrrr~s   zTreeWalker.__init__c Cst|tr:|\}}|dks&td|tjtt||fSt|trLtjfSt|t rjtj |j |j |j fSt|trt|d rtjt|jfS|jtjkrtjt|jfS|jtjkrtjt|jdd fStjt|j}|r|j\}}nd}t|j}i}xbt|jjD]P\}}t|}t|}tj|}|rX|||jd|jdf<n ||d|f<qWtj||j j!||t"|dkp|jfSdS) Nrrz%Text nodes are text or tail, found %stagr rr)rr)#rtupleAssertionErrorr ZTEXTrr/rZDOCUMENTrZDOCTYPEr'rr(r*r.r-r@rCommentCOMMENTrZEntityZENTITYrmatchgroupsr;ZattribitemsgroupZELEMENTr?Z fromXmlNamer2) rrr rF namespacer@Zattrsr'valuerrrgetNodeDetailss:       zTreeWalker.getNodeDetailscCsDt|t stdt|s*|js*td|jr8|dfS|dSdS)NzText nodes have no childrenzNode has no childrenrr)rrBrCr2r)rrrrr getFirstChilds zTreeWalker.getFirstChildcCsbt|trL|\}}|dks&td||dkrDt|r>|dSdSn|jS|jrZ|dfS|jS)Nrrz%Text nodes are text or tail, found %sr)rr)rrBrCr2rr)rrr rrrgetNextSiblings zTreeWalker.getNextSiblingcCsJt|tr4|\}}|dks&td||dkrB|Sn||jkrBdS|jS)Nrrz%Text nodes are text or tail, found %s)rr)rrBrCr=r6)rrr rrr getParentNodes  zTreeWalker.getParentNodeN)r#r$r%rrLrMrNrOrrrrr:}s  ) r:N)Z __future__rrrZsixrZlxmlrZtreebuilders.etreerr r robjectrrr)r*r>r:rrrrs      & +treewalkers/__pycache__/genshi.cpython-36.opt-1.pyc000064400000003415147204715120016154 0ustar003 B;W @sddlmZmZmZddlmZddlmZmZmZm Z m Z ddlm Z m Z m Z mZmZmZddlmZddlmZmZGd d d ejZd S) )absolute_importdivisionunicode_literals)QName)STARTEND XML_NAMESPACEDOCTYPETEXT)START_NSEND_NS START_CDATA END_CDATAPICOMMENT)base) voidElements namespacesc@seZdZddZddZdS) TreeWalkerccsdd}x6|jD],}|dk r4x|j||D] }|Vq&W|}q W|dk r`x|j|dD] }|VqRWdS)N)Ztreetokens)selfZpreviouseventtokenr/usr/lib/python3.6/genshi.py__iter__ s  zTreeWalker.__iter__ccs|\}}}|tkr|\}}|j}|j} i} x8|D]0\} } t| trT| | | j| jf<q0| | d| f<q0W| tdkr|tkrxJ|j| || | p|dtkp|d|kD] } | VqWn|j | || Vn|tkr|j}|j} | tdks|tkr|j | |Vn~|t kr|j |Vnf|t kr>xZ|j|D] } | Vq,Wn>|tkrV|j|Vn&|tttttttfkrpn |j|VdS)NZhtmlrr)rZ localname namespace isinstancerrrZemptyTagrZstartTagZendTagrZcommentr textr Zdoctyperr r r rrunknown)rrnextZkinddata_tagZattribsnamerZconverted_attribskvrrrrrs@        zTreeWalker.tokensN)__name__ __module__ __qualname__rrrrrrr srN)Z __future__rrrZ genshi.corerrrrr r r r r rrrrZ constantsrrrrrrrs    treewalkers/__pycache__/genshi.cpython-36.pyc000064400000003415147204715120015215 0ustar003 B;W @sddlmZmZmZddlmZddlmZmZmZm Z m Z ddlm Z m Z m Z mZmZmZddlmZddlmZmZGd d d ejZd S) )absolute_importdivisionunicode_literals)QName)STARTEND XML_NAMESPACEDOCTYPETEXT)START_NSEND_NS START_CDATA END_CDATAPICOMMENT)base) voidElements namespacesc@seZdZddZddZdS) TreeWalkerccsdd}x6|jD],}|dk r4x|j||D] }|Vq&W|}q W|dk r`x|j|dD] }|VqRWdS)N)Ztreetokens)selfZpreviouseventtokenr/usr/lib/python3.6/genshi.py__iter__ s  zTreeWalker.__iter__ccs|\}}}|tkr|\}}|j}|j} i} x8|D]0\} } t| trT| | | j| jf<q0| | d| f<q0W| tdkr|tkrxJ|j| || | p|dtkp|d|kD] } | VqWn|j | || Vn|tkr|j}|j} | tdks|tkr|j | |Vn~|t kr|j |Vnf|t kr>xZ|j|D] } | Vq,Wn>|tkrV|j|Vn&|tttttttfkrpn |j|VdS)NZhtmlrr)rZ localname namespace isinstancerrrZemptyTagrZstartTagZendTagrZcommentr textr Zdoctyperr r r rrunknown)rrnextZkinddata_tagZattribsnamerZconverted_attribskvrrrrrs@        zTreeWalker.tokensN)__name__ __module__ __qualname__rrrrrrr srN)Z __future__rrrZ genshi.corerrrrr r r r r rrrrZ constantsrrrrrrrs    treewalkers/__init__.py000064400000012650147204715120011214 0ustar00"""A collection of modules for iterating through different kinds of tree, generating tokens identical to those produced by the tokenizer module. To create a tree walker for a new type of tree, you need to do implement a tree walker object (called TreeWalker by convention) that implements a 'serialize' method taking a tree as sole argument and returning an iterator generating tokens. """ from __future__ import absolute_import, division, unicode_literals from .. import constants from .._utils import default_etree __all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"] treeWalkerCache = {} def getTreeWalker(treeType, implementation=None, **kwargs): """Get a TreeWalker class for various types of tree with built-in support Args: treeType (str): the name of the tree type required (case-insensitive). Supported values are: - "dom": The xml.dom.minidom DOM implementation - "etree": A generic walker for tree implementations exposing an elementtree-like interface (known to work with ElementTree, cElementTree and lxml.etree). - "lxml": Optimized walker for lxml.etree - "genshi": a Genshi stream Implementation: A module implementing the tree type e.g. xml.etree.ElementTree or cElementTree (Currently applies to the "etree" tree type only). """ treeType = treeType.lower() if treeType not in treeWalkerCache: if treeType == "dom": from . import dom treeWalkerCache[treeType] = dom.TreeWalker elif treeType == "genshi": from . import genshi treeWalkerCache[treeType] = genshi.TreeWalker elif treeType == "lxml": from . import etree_lxml treeWalkerCache[treeType] = etree_lxml.TreeWalker elif treeType == "etree": from . import etree if implementation is None: implementation = default_etree # XXX: NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeWalker return treeWalkerCache.get(treeType) def concatenateCharacterTokens(tokens): pendingCharacters = [] for token in tokens: type = token["type"] if type in ("Characters", "SpaceCharacters"): pendingCharacters.append(token["data"]) else: if pendingCharacters: yield {"type": "Characters", "data": "".join(pendingCharacters)} pendingCharacters = [] yield token if pendingCharacters: yield {"type": "Characters", "data": "".join(pendingCharacters)} def pprint(walker): """Pretty printer for tree walkers""" output = [] indent = 0 for token in concatenateCharacterTokens(walker): type = token["type"] if type in ("StartTag", "EmptyTag"): # tag name if token["namespace"] and token["namespace"] != constants.namespaces["html"]: if token["namespace"] in constants.prefixes: ns = constants.prefixes[token["namespace"]] else: ns = token["namespace"] name = "%s %s" % (ns, token["name"]) else: name = token["name"] output.append("%s<%s>" % (" " * indent, name)) indent += 2 # attributes (sorted for consistent ordering) attrs = token["data"] for (namespace, localname), value in sorted(attrs.items()): if namespace: if namespace in constants.prefixes: ns = constants.prefixes[namespace] else: ns = namespace name = "%s %s" % (ns, localname) else: name = localname output.append("%s%s=\"%s\"" % (" " * indent, name, value)) # self-closing if type == "EmptyTag": indent -= 2 elif type == "EndTag": indent -= 2 elif type == "Comment": output.append("%s" % (" " * indent, token["data"])) elif type == "Doctype": if token["name"]: if token["publicId"]: output.append("""%s""" % (" " * indent, token["name"], token["publicId"], token["systemId"] if token["systemId"] else "")) elif token["systemId"]: output.append("""%s""" % (" " * indent, token["name"], token["systemId"])) else: output.append("%s" % (" " * indent, token["name"])) else: output.append("%s" % (" " * indent,)) elif type == "Characters": output.append("%s\"%s\"" % (" " * indent, token["data"])) elif type == "SpaceCharacters": assert False, "concatenateCharacterTokens should have got rid of all Space tokens" else: raise ValueError("Unknown token type, %s" % type) return "\n".join(output) treewalkers/base.py000064400000011513147204715120010364 0ustar00from __future__ import absolute_import, division, unicode_literals from xml.dom import Node from ..constants import namespaces, voidElements, spaceCharacters __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", "TreeWalker", "NonRecursiveTreeWalker"] DOCUMENT = Node.DOCUMENT_NODE DOCTYPE = Node.DOCUMENT_TYPE_NODE TEXT = Node.TEXT_NODE ELEMENT = Node.ELEMENT_NODE COMMENT = Node.COMMENT_NODE ENTITY = Node.ENTITY_NODE UNKNOWN = "<#UNKNOWN#>" spaceCharacters = "".join(spaceCharacters) class TreeWalker(object): def __init__(self, tree): self.tree = tree def __iter__(self): raise NotImplementedError def error(self, msg): return {"type": "SerializeError", "data": msg} def emptyTag(self, namespace, name, attrs, hasChildren=False): yield {"type": "EmptyTag", "name": name, "namespace": namespace, "data": attrs} if hasChildren: yield self.error("Void element has children") def startTag(self, namespace, name, attrs): return {"type": "StartTag", "name": name, "namespace": namespace, "data": attrs} def endTag(self, namespace, name): return {"type": "EndTag", "name": name, "namespace": namespace} def text(self, data): data = data middle = data.lstrip(spaceCharacters) left = data[:len(data) - len(middle)] if left: yield {"type": "SpaceCharacters", "data": left} data = middle middle = data.rstrip(spaceCharacters) right = data[len(middle):] if middle: yield {"type": "Characters", "data": middle} if right: yield {"type": "SpaceCharacters", "data": right} def comment(self, data): return {"type": "Comment", "data": data} def doctype(self, name, publicId=None, systemId=None): return {"type": "Doctype", "name": name, "publicId": publicId, "systemId": systemId} def entity(self, name): return {"type": "Entity", "name": name} def unknown(self, nodeType): return self.error("Unknown node type: " + nodeType) class NonRecursiveTreeWalker(TreeWalker): def getNodeDetails(self, node): raise NotImplementedError def getFirstChild(self, node): raise NotImplementedError def getNextSibling(self, node): raise NotImplementedError def getParentNode(self, node): raise NotImplementedError def __iter__(self): currentNode = self.tree while currentNode is not None: details = self.getNodeDetails(currentNode) type, details = details[0], details[1:] hasChildren = False if type == DOCTYPE: yield self.doctype(*details) elif type == TEXT: for token in self.text(*details): yield token elif type == ELEMENT: namespace, name, attributes, hasChildren = details if (not namespace or namespace == namespaces["html"]) and name in voidElements: for token in self.emptyTag(namespace, name, attributes, hasChildren): yield token hasChildren = False else: yield self.startTag(namespace, name, attributes) elif type == COMMENT: yield self.comment(details[0]) elif type == ENTITY: yield self.entity(details[0]) elif type == DOCUMENT: hasChildren = True else: yield self.unknown(details[0]) if hasChildren: firstChild = self.getFirstChild(currentNode) else: firstChild = None if firstChild is not None: currentNode = firstChild else: while currentNode is not None: details = self.getNodeDetails(currentNode) type, details = details[0], details[1:] if type == ELEMENT: namespace, name, attributes, hasChildren = details if (namespace and namespace != namespaces["html"]) or name not in voidElements: yield self.endTag(namespace, name) if self.tree is currentNode: currentNode = None break nextSibling = self.getNextSibling(currentNode) if nextSibling is not None: currentNode = nextSibling break else: currentNode = self.getParentNode(currentNode) treewalkers/dom.py000064400000002605147204715120010233 0ustar00from __future__ import absolute_import, division, unicode_literals from xml.dom import Node from . import base class TreeWalker(base.NonRecursiveTreeWalker): def getNodeDetails(self, node): if node.nodeType == Node.DOCUMENT_TYPE_NODE: return base.DOCTYPE, node.name, node.publicId, node.systemId elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): return base.TEXT, node.nodeValue elif node.nodeType == Node.ELEMENT_NODE: attrs = {} for attr in list(node.attributes.keys()): attr = node.getAttributeNode(attr) if attr.namespaceURI: attrs[(attr.namespaceURI, attr.localName)] = attr.value else: attrs[(None, attr.name)] = attr.value return (base.ELEMENT, node.namespaceURI, node.nodeName, attrs, node.hasChildNodes()) elif node.nodeType == Node.COMMENT_NODE: return base.COMMENT, node.nodeValue elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): return (base.DOCUMENT,) else: return base.UNKNOWN, node.nodeType def getFirstChild(self, node): return node.firstChild def getNextSibling(self, node): return node.nextSibling def getParentNode(self, node): return node.parentNode treewalkers/etree.py000064400000011100147204715120010546 0ustar00from __future__ import absolute_import, division, unicode_literals try: from collections import OrderedDict except ImportError: try: from ordereddict import OrderedDict except ImportError: OrderedDict = dict import re from six import string_types from . import base from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") def getETreeBuilder(ElementTreeImplementation): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable """Given the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: 1. The current element 2. The index of the element relative to its parent 3. A stack of ancestor elements 4. A flag "text", "tail" or None to indicate if the current node is a text node; either the text or tail of the current element (1) """ def getNodeDetails(self, node): if isinstance(node, tuple): # It might be the root Element elt, _, _, flag = node if flag in ("text", "tail"): return base.TEXT, getattr(elt, flag) else: node = elt if not(hasattr(node, "tag")): node = node.getroot() if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): return (base.DOCUMENT,) elif node.tag == "": return (base.DOCTYPE, node.text, node.get("publicId"), node.get("systemId")) elif node.tag == ElementTreeCommentType: return base.COMMENT, node.text else: assert isinstance(node.tag, string_types), type(node.tag) # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: namespace, tag = match.groups() else: namespace = None tag = node.tag attrs = OrderedDict() for name, value in list(node.attrib.items()): match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, tag, attrs, len(node) or node.text) def getFirstChild(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: element, key, parents, flag = node, None, [], None if flag in ("text", "tail"): return None else: if element.text: return element, key, parents, "text" elif len(element): parents.append(element) return element[0], 0, parents, None else: return None def getNextSibling(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: return None if flag == "text": if len(element): parents.append(element) return element[0], 0, parents, None else: return None else: if element.tail and flag != "tail": return element, key, parents, "tail" elif key < len(parents[-1]) - 1: return parents[-1][key + 1], key + 1, parents, None else: return None def getParentNode(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: return None if flag == "text": if not parents: return element else: return element, key, parents, None else: parent = parents.pop() if not parents: return parent else: assert list(parents[-1]).count(parent) == 1 return parent, list(parents[-1]).index(parent), parents, None return locals() getETreeModule = moduleFactoryFactory(getETreeBuilder) treewalkers/etree_lxml.py000064400000014231147204715120011612 0ustar00from __future__ import absolute_import, division, unicode_literals from six import text_type from lxml import etree from ..treebuilders.etree import tag_regexp from . import base from .. import _ihatexml def ensure_str(s): if s is None: return None elif isinstance(s, text_type): return s else: return s.decode("ascii", "strict") class Root(object): def __init__(self, et): self.elementtree = et self.children = [] try: if et.docinfo.internalDTD: self.children.append(Doctype(self, ensure_str(et.docinfo.root_name), ensure_str(et.docinfo.public_id), ensure_str(et.docinfo.system_url))) except AttributeError: pass try: node = et.getroot() except AttributeError: node = et while node.getprevious() is not None: node = node.getprevious() while node is not None: self.children.append(node) node = node.getnext() self.text = None self.tail = None def __getitem__(self, key): return self.children[key] def getnext(self): return None def __len__(self): return 1 class Doctype(object): def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name self.public_id = public_id self.system_id = system_id self.text = None self.tail = None def getnext(self): return self.root_node.children[1] class FragmentRoot(Root): def __init__(self, children): self.children = [FragmentWrapper(self, child) for child in children] self.text = self.tail = None def getnext(self): return None class FragmentWrapper(object): def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj if hasattr(self.obj, 'text'): self.text = ensure_str(self.obj.text) else: self.text = None if hasattr(self.obj, 'tail'): self.tail = ensure_str(self.obj.tail) else: self.tail = None def __getattr__(self, name): return getattr(self.obj, name) def getnext(self): siblings = self.root_node.children idx = siblings.index(self) if idx < len(siblings) - 1: return siblings[idx + 1] else: return None def __getitem__(self, key): return self.obj[key] def __bool__(self): return bool(self.obj) def getparent(self): return None def __str__(self): return str(self.obj) def __unicode__(self): return str(self.obj) def __len__(self): return len(self.obj) class TreeWalker(base.NonRecursiveTreeWalker): def __init__(self, tree): # pylint:disable=redefined-variable-type if isinstance(tree, list): self.fragmentChildren = set(tree) tree = FragmentRoot(tree) else: self.fragmentChildren = set() tree = Root(tree) base.NonRecursiveTreeWalker.__init__(self, tree) self.filter = _ihatexml.InfosetFilter() def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = {} for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text) def getFirstChild(self, node): assert not isinstance(node, tuple), "Text nodes have no children" assert len(node) or node.text, "Node has no children" if node.text: return (node, "text") else: return node[0] def getNextSibling(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": # XXX: we cannot use a "bool(node) and node[0] or None" construct here # because node[0] might evaluate to False if it has no child element if len(node): return node[0] else: return None else: # tail return node.getnext() return (node, "tail") if node.tail else node.getnext() def getParentNode(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": return node # else: fallback to "normal" processing elif node in self.fragmentChildren: return None return node.getparent() treewalkers/genshi.py000064400000004405147204715120010731 0ustar00from __future__ import absolute_import, division, unicode_literals from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT from . import base from ..constants import voidElements, namespaces class TreeWalker(base.TreeWalker): def __iter__(self): # Buffer the events so we can pass in the following one previous = None for event in self.tree: if previous is not None: for token in self.tokens(previous, event): yield token previous = event # Don't forget the final event! if previous is not None: for token in self.tokens(previous, None): yield token def tokens(self, event, next): kind, data, _ = event if kind == START: tag, attribs = data name = tag.localname namespace = tag.namespace converted_attribs = {} for k, v in attribs: if isinstance(k, QName): converted_attribs[(k.namespace, k.localname)] = v else: converted_attribs[(None, k)] = v if namespace == namespaces["html"] and name in voidElements: for token in self.emptyTag(namespace, name, converted_attribs, not next or next[0] != END or next[1] != tag): yield token else: yield self.startTag(namespace, name, converted_attribs) elif kind == END: name = data.localname namespace = data.namespace if namespace != namespaces["html"] or name not in voidElements: yield self.endTag(namespace, name) elif kind == COMMENT: yield self.comment(data) elif kind == TEXT: for token in self.text(data): yield token elif kind == DOCTYPE: yield self.doctype(*data) elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, START_CDATA, END_CDATA, PI): pass else: yield self.unknown(kind) __init__.py000064400000001421147204715120006656 0ustar00""" HTML parsing library based on the WHATWG "HTML5" specification. The parser is designed to be compatible with existing HTML found in the wild and implements well-defined error recovery that is largely compatible with modern desktop web browsers. Example usage: import html5lib f = open("my_document.html") tree = html5lib.parse(f) """ from __future__ import absolute_import, division, unicode_literals from .html5parser import HTMLParser, parse, parseFragment from .treebuilders import getTreeBuilder from .treewalkers import getTreeWalker from .serializer import serialize __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", "getTreeWalker", "serialize"] # this has to be at the top level, see how setup.py parses this __version__ = "0.999999999" _ihatexml.py000064400000040501147204715120007073 0ustar00from __future__ import absolute_import, division, unicode_literals import re import warnings from .constants import DataLossWarning baseChar = """ [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" combiningCharacter = """ [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A""" digit = """ [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" extender = """ #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" letter = " | ".join([baseChar, ideographic]) # Without the name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, extender]) nameFirst = " | ".join([letter, "_"]) reChar = re.compile(r"#x([\d|A-F]{4,4})") reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") def charStringToList(chars): charRanges = [item.strip() for item in chars.split(" | ")] rv = [] for item in charRanges: foundMatch = False for regexp in (reChar, reCharRange): match = regexp.match(item) if match is not None: rv.append([hexToInt(item) for item in match.groups()]) if len(rv[-1]) == 1: rv[-1] = rv[-1] * 2 foundMatch = True break if not foundMatch: assert len(item) == 1 rv.append([ord(item)] * 2) rv = normaliseCharList(rv) return rv def normaliseCharList(charList): charList = sorted(charList) for item in charList: assert item[1] >= item[0] rv = [] i = 0 while i < len(charList): j = 1 rv.append(charList[i]) while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: rv[-1][1] = charList[i + j][1] j += 1 i += j return rv # We don't really support characters above the BMP :( max_unicode = int("FFFF", 16) def missingRanges(charList): rv = [] if charList[0] != 0: rv.append([0, charList[0][0] - 1]) for i, item in enumerate(charList[:-1]): rv.append([item[1] + 1, charList[i + 1][0] - 1]) if charList[-1][1] != max_unicode: rv.append([charList[-1][1] + 1, max_unicode]) return rv def listToRegexpStr(charList): rv = [] for item in charList: if item[0] == item[1]: rv.append(escapeRegexp(chr(item[0]))) else: rv.append(escapeRegexp(chr(item[0])) + "-" + escapeRegexp(chr(item[1]))) return "[%s]" % "".join(rv) def hexToInt(hex_str): return int(hex_str, 16) def escapeRegexp(string): specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", "[", "]", "|", "(", ")", "-") for char in specialCharacters: string = string.replace(char, "\\" + char) return string # output from the above nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa # Simpler things nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") class InfosetFilter(object): replacementRegexp = re.compile(r"U[\dA-F]{5,5}") def __init__(self, dropXmlnsLocalName=False, dropXmlnsAttrNs=False, preventDoubleDashComments=False, preventDashAtCommentEnd=False, replaceFormFeedCharacters=True, preventSingleQuotePubid=False): self.dropXmlnsLocalName = dropXmlnsLocalName self.dropXmlnsAttrNs = dropXmlnsAttrNs self.preventDoubleDashComments = preventDoubleDashComments self.preventDashAtCommentEnd = preventDashAtCommentEnd self.replaceFormFeedCharacters = replaceFormFeedCharacters self.preventSingleQuotePubid = preventSingleQuotePubid self.replaceCache = {} def coerceAttribute(self, name, namespace=None): if self.dropXmlnsLocalName and name.startswith("xmlns:"): warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) return None elif (self.dropXmlnsAttrNs and namespace == "http://www.w3.org/2000/xmlns/"): warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) return None else: return self.toXmlName(name) def coerceElement(self, name): return self.toXmlName(name) def coerceComment(self, data): if self.preventDoubleDashComments: while "--" in data: warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) data = data.replace("--", "- -") if data.endswith("-"): warnings.warn("Comments cannot end in a dash", DataLossWarning) data += " " return data def coerceCharacters(self, data): if self.replaceFormFeedCharacters: for _ in range(data.count("\x0C")): warnings.warn("Text cannot contain U+000C", DataLossWarning) data = data.replace("\x0C", " ") # Other non-xml characters return data def coercePubid(self, data): dataOutput = data for char in nonPubidCharRegexp.findall(data): warnings.warn("Coercing non-XML pubid", DataLossWarning) replacement = self.getReplacementCharacter(char) dataOutput = dataOutput.replace(char, replacement) if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: warnings.warn("Pubid cannot contain single quote", DataLossWarning) dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) return dataOutput def toXmlName(self, name): nameFirst = name[0] nameRest = name[1:] m = nonXmlNameFirstBMPRegexp.match(nameFirst) if m: warnings.warn("Coercing non-XML name", DataLossWarning) nameFirstOutput = self.getReplacementCharacter(nameFirst) else: nameFirstOutput = nameFirst nameRestOutput = nameRest replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) for char in replaceChars: warnings.warn("Coercing non-XML name", DataLossWarning) replacement = self.getReplacementCharacter(char) nameRestOutput = nameRestOutput.replace(char, replacement) return nameFirstOutput + nameRestOutput def getReplacementCharacter(self, char): if char in self.replaceCache: replacement = self.replaceCache[char] else: replacement = self.escapeChar(char) return replacement def fromXmlName(self, name): for item in set(self.replacementRegexp.findall(name)): name = name.replace(item, self.unescapeChar(item)) return name def escapeChar(self, char): replacement = "U%05X" % ord(char) self.replaceCache[char] = replacement return replacement def unescapeChar(self, charcode): return chr(int(charcode[1:], 16)) _inputstream.py000064400000077353147204715120007652 0ustar00from __future__ import absolute_import, division, unicode_literals from six import text_type, binary_type from six.moves import http_client, urllib import codecs import re import webencodings from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase from .constants import ReparseException from . import _utils from io import StringIO try: from io import BytesIO except ImportError: BytesIO = StringIO # Non-unicode versions of constants for use in the pre-parser spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa if _utils.supports_lone_surrogates: # Use one extra step of indirection and create surrogates with # eval. Not using this indirection would introduce an illegal # unicode literal on platforms not supporting such lone # surrogates. assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used "]") else: invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF]) ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]") # Cache for charsUntil() charsUntilRegEx = {} class BufferedStream(object): """Buffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that joining many strings will be slow since it is O(n**2) """ def __init__(self, stream): self.stream = stream self.buffer = [] self.position = [-1, 0] # chunk number, offset def tell(self): pos = 0 for chunk in self.buffer[:self.position[0]]: pos += len(chunk) pos += self.position[1] return pos def seek(self, pos): assert pos <= self._bufferedBytes() offset = pos i = 0 while len(self.buffer[i]) < offset: offset -= len(self.buffer[i]) i += 1 self.position = [i, offset] def read(self, bytes): if not self.buffer: return self._readStream(bytes) elif (self.position[0] == len(self.buffer) and self.position[1] == len(self.buffer[-1])): return self._readStream(bytes) else: return self._readFromBuffer(bytes) def _bufferedBytes(self): return sum([len(item) for item in self.buffer]) def _readStream(self, bytes): data = self.stream.read(bytes) self.buffer.append(data) self.position[0] += 1 self.position[1] = len(data) return data def _readFromBuffer(self, bytes): remainingBytes = bytes rv = [] bufferIndex = self.position[0] bufferOffset = self.position[1] while bufferIndex < len(self.buffer) and remainingBytes != 0: assert remainingBytes > 0 bufferedData = self.buffer[bufferIndex] if remainingBytes <= len(bufferedData) - bufferOffset: bytesToRead = remainingBytes self.position = [bufferIndex, bufferOffset + bytesToRead] else: bytesToRead = len(bufferedData) - bufferOffset self.position = [bufferIndex, len(bufferedData)] bufferIndex += 1 rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) remainingBytes -= bytesToRead bufferOffset = 0 if remainingBytes: rv.append(self._readStream(remainingBytes)) return b"".join(rv) def HTMLInputStream(source, **kwargs): # Work around Python bug #20007: read(0) closes the connection. # http://bugs.python.org/issue20007 if (isinstance(source, http_client.HTTPResponse) or # Also check for addinfourl wrapping HTTPResponse (isinstance(source, urllib.response.addbase) and isinstance(source.fp, http_client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) else: isUnicode = isinstance(source, text_type) if isUnicode: encodings = [x for x in kwargs if x.endswith("_encoding")] if encodings: raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) return HTMLUnicodeInputStream(source, **kwargs) else: return HTMLBinaryInputStream(source, **kwargs) class HTMLUnicodeInputStream(object): """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. """ _defaultChunkSize = 10240 def __init__(self, source): """Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ if not _utils.supports_lone_surrogates: # Such platforms will have already checked for such # surrogate errors, so no need to do this checking. self.reportCharacterErrors = None elif len("\U0010FFFF") == 1: self.reportCharacterErrors = self.characterErrorsUCS4 else: self.reportCharacterErrors = self.characterErrorsUCS2 # List of where new lines occur self.newLines = [0] self.charEncoding = (lookupEncoding("utf-8"), "certain") self.dataStream = self.openStream(source) self.reset() def reset(self): self.chunk = "" self.chunkSize = 0 self.chunkOffset = 0 self.errors = [] # number of (complete) lines in previous chunks self.prevNumLines = 0 # number of columns in the last line of the previous chunk self.prevNumCols = 0 # Deal with CR LF and surrogates split over chunk boundaries self._bufferedCharacter = None def openStream(self, source): """Produces a file object from source. source can be either a file object, local filename or a string. """ # Already a file object if hasattr(source, 'read'): stream = source else: stream = StringIO(source) return stream def _position(self, offset): chunk = self.chunk nLines = chunk.count('\n', 0, offset) positionLine = self.prevNumLines + nLines lastLinePos = chunk.rfind('\n', 0, offset) if lastLinePos == -1: positionColumn = self.prevNumCols + offset else: positionColumn = offset - (lastLinePos + 1) return (positionLine, positionColumn) def position(self): """Returns (line, col) of the current position in the stream.""" line, col = self._position(self.chunkOffset) return (line + 1, col) def char(self): """ Read one character from the stream or queue if available. Return EOF when EOF is reached. """ # Read a new chunk from the input stream if necessary if self.chunkOffset >= self.chunkSize: if not self.readChunk(): return EOF chunkOffset = self.chunkOffset char = self.chunk[chunkOffset] self.chunkOffset = chunkOffset + 1 return char def readChunk(self, chunkSize=None): if chunkSize is None: chunkSize = self._defaultChunkSize self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) self.chunk = "" self.chunkSize = 0 self.chunkOffset = 0 data = self.dataStream.read(chunkSize) # Deal with CR LF and surrogates broken across chunks if self._bufferedCharacter: data = self._bufferedCharacter + data self._bufferedCharacter = None elif not data: # We have no more data, bye-bye stream return False if len(data) > 1: lastv = ord(data[-1]) if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: self._bufferedCharacter = data[-1] data = data[:-1] if self.reportCharacterErrors: self.reportCharacterErrors(data) # Replace invalid characters data = data.replace("\r\n", "\n") data = data.replace("\r", "\n") self.chunk = data self.chunkSize = len(data) return True def characterErrorsUCS4(self, data): for _ in range(len(invalid_unicode_re.findall(data))): self.errors.append("invalid-codepoint") def characterErrorsUCS2(self, data): # Someone picked the wrong compile option # You lose skip = False for match in invalid_unicode_re.finditer(data): if skip: continue codepoint = ord(match.group()) pos = match.start() # Pretty sure there should be endianness issues here if _utils.isSurrogatePair(data[pos:pos + 2]): # We have a surrogate pair! char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) if char_val in non_bmp_invalid_codepoints: self.errors.append("invalid-codepoint") skip = True elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and pos == len(data) - 1): self.errors.append("invalid-codepoint") else: skip = False self.errors.append("invalid-codepoint") def charsUntil(self, characters, opposite=False): """ Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. """ # Use a cache of regexps to find the required characters try: chars = charsUntilRegEx[(characters, opposite)] except KeyError: if __debug__: for c in characters: assert(ord(c) < 128) regex = "".join(["\\x%02x" % ord(c) for c in characters]) if not opposite: regex = "^%s" % regex chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) rv = [] while True: # Find the longest matching prefix m = chars.match(self.chunk, self.chunkOffset) if m is None: # If nothing matched, and it wasn't because we ran out of chunk, # then stop if self.chunkOffset != self.chunkSize: break else: end = m.end() # If not the whole chunk matched, return everything # up to the part that didn't match if end != self.chunkSize: rv.append(self.chunk[self.chunkOffset:end]) self.chunkOffset = end break # If the whole remainder of the chunk matched, # use it all and read the next chunk rv.append(self.chunk[self.chunkOffset:]) if not self.readChunk(): # Reached EOF break r = "".join(rv) return r def unget(self, char): # Only one character is allowed to be ungotten at once - it must # be consumed again before any further call to unget if char is not None: if self.chunkOffset == 0: # unget is called quite rarely, so it's a good idea to do # more work here if it saves a bit of work in the frequently # called char and charsUntil. # So, just prepend the ungotten character onto the current # chunk: self.chunk = char + self.chunk self.chunkSize += 1 else: self.chunkOffset -= 1 assert self.chunk[self.chunkOffset] == char class HTMLBinaryInputStream(HTMLUnicodeInputStream): """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. """ def __init__(self, source, override_encoding=None, transport_encoding=None, same_origin_parent_encoding=None, likely_encoding=None, default_encoding="windows-1252", useChardet=True): """Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ # Raw Stream - for unicode objects this will encode to utf-8 and set # self.charEncoding as appropriate self.rawStream = self.openStream(source) HTMLUnicodeInputStream.__init__(self, self.rawStream) # Encoding Information # Number of bytes to use when looking for a meta element with # encoding information self.numBytesMeta = 1024 # Number of bytes to use when using detecting encoding using chardet self.numBytesChardet = 100 # Things from args self.override_encoding = override_encoding self.transport_encoding = transport_encoding self.same_origin_parent_encoding = same_origin_parent_encoding self.likely_encoding = likely_encoding self.default_encoding = default_encoding # Determine encoding self.charEncoding = self.determineEncoding(useChardet) assert self.charEncoding[0] is not None # Call superclass self.reset() def reset(self): self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') HTMLUnicodeInputStream.reset(self) def openStream(self, source): """Produces a file object from source. source can be either a file object, local filename or a string. """ # Already a file object if hasattr(source, 'read'): stream = source else: stream = BytesIO(source) try: stream.seek(stream.tell()) except: # pylint:disable=bare-except stream = BufferedStream(stream) return stream def determineEncoding(self, chardet=True): # BOMs take precedence over everything # This will also read past the BOM if present charEncoding = self.detectBOM(), "certain" if charEncoding[0] is not None: return charEncoding # If we've been overriden, we've been overriden charEncoding = lookupEncoding(self.override_encoding), "certain" if charEncoding[0] is not None: return charEncoding # Now check the transport layer charEncoding = lookupEncoding(self.transport_encoding), "certain" if charEncoding[0] is not None: return charEncoding # Look for meta elements with encoding information charEncoding = self.detectEncodingMeta(), "tentative" if charEncoding[0] is not None: return charEncoding # Parent document encoding charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): return charEncoding # "likely" encoding charEncoding = lookupEncoding(self.likely_encoding), "tentative" if charEncoding[0] is not None: return charEncoding # Guess with chardet, if available if chardet: try: from chardet.universaldetector import UniversalDetector except ImportError: pass else: buffers = [] detector = UniversalDetector() while not detector.done: buffer = self.rawStream.read(self.numBytesChardet) assert isinstance(buffer, bytes) if not buffer: break buffers.append(buffer) detector.feed(buffer) detector.close() encoding = lookupEncoding(detector.result['encoding']) self.rawStream.seek(0) if encoding is not None: return encoding, "tentative" # Try the default encoding charEncoding = lookupEncoding(self.default_encoding), "tentative" if charEncoding[0] is not None: return charEncoding # Fallback to html5lib's default if even that hasn't worked return lookupEncoding("windows-1252"), "tentative" def changeEncoding(self, newEncoding): assert self.charEncoding[1] != "certain" newEncoding = lookupEncoding(newEncoding) if newEncoding is None: return if newEncoding.name in ("utf-16be", "utf-16le"): newEncoding = lookupEncoding("utf-8") assert newEncoding is not None elif newEncoding == self.charEncoding[0]: self.charEncoding = (self.charEncoding[0], "certain") else: self.rawStream.seek(0) self.charEncoding = (newEncoding, "certain") self.reset() raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) def detectBOM(self): """Attempts to detect at BOM at the start of the stream. If an encoding can be determined from the BOM return the name of the encoding otherwise return None""" bomDict = { codecs.BOM_UTF8: 'utf-8', codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' } # Go to beginning of file and read in 4 bytes string = self.rawStream.read(4) assert isinstance(string, bytes) # Try detecting the BOM using bytes from the string encoding = bomDict.get(string[:3]) # UTF-8 seek = 3 if not encoding: # Need to detect UTF-32 before UTF-16 encoding = bomDict.get(string) # UTF-32 seek = 4 if not encoding: encoding = bomDict.get(string[:2]) # UTF-16 seek = 2 # Set the read position past the BOM if one was found, otherwise # set it to the start of the stream if encoding: self.rawStream.seek(seek) return lookupEncoding(encoding) else: self.rawStream.seek(0) return None def detectEncodingMeta(self): """Report the encoding declared by the meta element """ buffer = self.rawStream.read(self.numBytesMeta) assert isinstance(buffer, bytes) parser = EncodingParser(buffer) self.rawStream.seek(0) encoding = parser.getEncoding() if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): encoding = lookupEncoding("utf-8") return encoding class EncodingBytes(bytes): """String-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raised""" def __new__(self, value): assert isinstance(value, bytes) return bytes.__new__(self, value.lower()) def __init__(self, value): # pylint:disable=unused-argument self._position = -1 def __iter__(self): return self def __next__(self): p = self._position = self._position + 1 if p >= len(self): raise StopIteration elif p < 0: raise TypeError return self[p:p + 1] def next(self): # Py2 compat return self.__next__() def previous(self): p = self._position if p >= len(self): raise StopIteration elif p < 0: raise TypeError self._position = p = p - 1 return self[p:p + 1] def setPosition(self, position): if self._position >= len(self): raise StopIteration self._position = position def getPosition(self): if self._position >= len(self): raise StopIteration if self._position >= 0: return self._position else: return None position = property(getPosition, setPosition) def getCurrentByte(self): return self[self.position:self.position + 1] currentByte = property(getCurrentByte) def skip(self, chars=spaceCharactersBytes): """Skip past a list of characters""" p = self.position # use property for the error-checking while p < len(self): c = self[p:p + 1] if c not in chars: self._position = p return c p += 1 self._position = p return None def skipUntil(self, chars): p = self.position while p < len(self): c = self[p:p + 1] if c in chars: self._position = p return c p += 1 self._position = p return None def matchBytes(self, bytes): """Look for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone""" p = self.position data = self[p:p + len(bytes)] rv = data.startswith(bytes) if rv: self.position += len(bytes) return rv def jumpTo(self, bytes): """Look for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the match""" newPosition = self[self.position:].find(bytes) if newPosition > -1: # XXX: This is ugly, but I can't see a nicer way to fix this. if self._position == -1: self._position = 0 self._position += (newPosition + len(bytes) - 1) return True else: raise StopIteration class EncodingParser(object): """Mini parser for detecting character encoding from meta elements""" def __init__(self, data): """string - the data to work on for encoding detection""" self.data = EncodingBytes(data) self.encoding = None def getEncoding(self): methodDispatch = ( (b"") def handleMeta(self): if self.data.currentByte not in spaceCharactersBytes: # if we have ") def getAttribute(self): """Return a name,value pair for the next attribute in the stream, if one is found, or None""" data = self.data # Step 1 (skip chars) c = data.skip(spaceCharactersBytes | frozenset([b"/"])) assert c is None or len(c) == 1 # Step 2 if c in (b">", None): return None # Step 3 attrName = [] attrValue = [] # Step 4 attribute name while True: if c == b"=" and attrName: break elif c in spaceCharactersBytes: # Step 6! c = data.skip() break elif c in (b"/", b">"): return b"".join(attrName), b"" elif c in asciiUppercaseBytes: attrName.append(c.lower()) elif c is None: return None else: attrName.append(c) # Step 5 c = next(data) # Step 7 if c != b"=": data.previous() return b"".join(attrName), b"" # Step 8 next(data) # Step 9 c = data.skip() # Step 10 if c in (b"'", b'"'): # 10.1 quoteChar = c while True: # 10.2 c = next(data) # 10.3 if c == quoteChar: next(data) return b"".join(attrName), b"".join(attrValue) # 10.4 elif c in asciiUppercaseBytes: attrValue.append(c.lower()) # 10.5 else: attrValue.append(c) elif c == b">": return b"".join(attrName), b"" elif c in asciiUppercaseBytes: attrValue.append(c.lower()) elif c is None: return None else: attrValue.append(c) # Step 11 while True: c = next(data) if c in spacesAngleBrackets: return b"".join(attrName), b"".join(attrValue) elif c in asciiUppercaseBytes: attrValue.append(c.lower()) elif c is None: return None else: attrValue.append(c) class ContentAttrParser(object): def __init__(self, data): assert isinstance(data, bytes) self.data = data def parse(self): try: # Check if the attr name is charset # otherwise return self.data.jumpTo(b"charset") self.data.position += 1 self.data.skip() if not self.data.currentByte == b"=": # If there is no = sign keep looking for attrs return None self.data.position += 1 self.data.skip() # Look for an encoding between matching quote marks if self.data.currentByte in (b'"', b"'"): quoteMark = self.data.currentByte self.data.position += 1 oldPosition = self.data.position if self.data.jumpTo(quoteMark): return self.data[oldPosition:self.data.position] else: return None else: # Unquoted value oldPosition = self.data.position try: self.data.skipUntil(spaceCharactersBytes) return self.data[oldPosition:self.data.position] except StopIteration: # Return the whole remaining value return self.data[oldPosition:] except StopIteration: return None def lookupEncoding(encoding): """Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.""" if isinstance(encoding, binary_type): try: encoding = encoding.decode("ascii") except UnicodeDecodeError: return None if encoding is not None: try: return webencodings.lookup(encoding) except AttributeError: return None else: return None _tokenizer.py000064400000225430147204715120007300 0ustar00from __future__ import absolute_import, division, unicode_literals from six import unichr as chr from collections import deque from .constants import spaceCharacters from .constants import entities from .constants import asciiLetters, asciiUpper2Lower from .constants import digits, hexDigits, EOF from .constants import tokenTypes, tagTokenTypes from .constants import replacementCharacters from ._inputstream import HTMLInputStream from ._trie import Trie entitiesTrie = Trie(entities) class HTMLTokenizer(object): """ This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. """ def __init__(self, stream, parser=None, **kwargs): self.stream = HTMLInputStream(stream, **kwargs) self.parser = parser # Setup the initial tokenizer state self.escapeFlag = False self.lastFourChars = [] self.state = self.dataState self.escape = False # The current token being created self.currentToken = None super(HTMLTokenizer, self).__init__() def __iter__(self): """ This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. """ self.tokenQueue = deque([]) # Start processing. When EOF is reached self.state will return False # instead of True and the loop will terminate. while self.state(): while self.stream.errors: yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} while self.tokenQueue: yield self.tokenQueue.popleft() def consumeNumberEntity(self, isHex): """This function returns either U+FFFD or the character based on the decimal or hexadecimal representation. It also discards ";" if present. If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. """ allowed = digits radix = 10 if isHex: allowed = hexDigits radix = 16 charStack = [] # Consume all the characters that are in range while making sure we # don't hit an EOF. c = self.stream.char() while c in allowed and c is not EOF: charStack.append(c) c = self.stream.char() # Convert the set of characters consumed to an int. charAsInt = int("".join(charStack), radix) # Certain characters get replaced with others if charAsInt in replacementCharacters: char = replacementCharacters[charAsInt] self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "illegal-codepoint-for-numeric-entity", "datavars": {"charAsInt": charAsInt}}) elif ((0xD800 <= charAsInt <= 0xDFFF) or (charAsInt > 0x10FFFF)): char = "\uFFFD" self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "illegal-codepoint-for-numeric-entity", "datavars": {"charAsInt": charAsInt}}) else: # Should speed up this check somehow (e.g. move the set to a constant) if ((0x0001 <= charAsInt <= 0x0008) or (0x000E <= charAsInt <= 0x001F) or (0x007F <= charAsInt <= 0x009F) or (0xFDD0 <= charAsInt <= 0xFDEF) or charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF])): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "illegal-codepoint-for-numeric-entity", "datavars": {"charAsInt": charAsInt}}) try: # Try/except needed as UCS-2 Python builds' unichar only works # within the BMP. char = chr(charAsInt) except ValueError: v = charAsInt - 0x10000 char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) # Discard the ; if present. Otherwise, put it back on the queue and # invoke parseError on parser. if c != ";": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "numeric-entity-without-semicolon"}) self.stream.unget(c) return char def consumeEntity(self, allowedChar=None, fromAttribute=False): # Initialise to the default output for when no entity is matched output = "&" charStack = [self.stream.char()] if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or (allowedChar is not None and allowedChar == charStack[0])): self.stream.unget(charStack[0]) elif charStack[0] == "#": # Read the next character to see if it's hex or decimal hex = False charStack.append(self.stream.char()) if charStack[-1] in ("x", "X"): hex = True charStack.append(self.stream.char()) # charStack[-1] should be the first digit if (hex and charStack[-1] in hexDigits) \ or (not hex and charStack[-1] in digits): # At least one digit found, so consume the whole number self.stream.unget(charStack[-1]) output = self.consumeNumberEntity(hex) else: # No digits found self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-numeric-entity"}) self.stream.unget(charStack.pop()) output = "&" + "".join(charStack) else: # At this point in the process might have named entity. Entities # are stored in the global variable "entities". # # Consume characters and compare to these to a substring of the # entity names in the list until the substring no longer matches. while (charStack[-1] is not EOF): if not entitiesTrie.has_keys_with_prefix("".join(charStack)): break charStack.append(self.stream.char()) # At this point we have a string that starts with some characters # that may match an entity # Try to find the longest entity the string will match to take care # of ¬i for instance. try: entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) entityLength = len(entityName) except KeyError: entityName = None if entityName is not None: if entityName[-1] != ";": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "named-entity-without-semicolon"}) if (entityName[-1] != ";" and fromAttribute and (charStack[entityLength] in asciiLetters or charStack[entityLength] in digits or charStack[entityLength] == "=")): self.stream.unget(charStack.pop()) output = "&" + "".join(charStack) else: output = entities[entityName] self.stream.unget(charStack.pop()) output += "".join(charStack[entityLength:]) else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-named-entity"}) self.stream.unget(charStack.pop()) output = "&" + "".join(charStack) if fromAttribute: self.currentToken["data"][-1][1] += output else: if output in spaceCharacters: tokenType = "SpaceCharacters" else: tokenType = "Characters" self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) def processEntityInAttribute(self, allowedChar): """This method replaces the need for "entityInAttributeValueState". """ self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) def emitCurrentToken(self): """This method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted. """ token = self.currentToken # Add token to the queue to be yielded if (token["type"] in tagTokenTypes): token["name"] = token["name"].translate(asciiUpper2Lower) if token["type"] == tokenTypes["EndTag"]: if token["data"]: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "attributes-in-end-tag"}) if token["selfClosing"]: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "self-closing-flag-on-end-tag"}) self.tokenQueue.append(token) self.state = self.dataState # Below are the various tokenizer states worked out. def dataState(self): data = self.stream.char() if data == "&": self.state = self.entityDataState elif data == "<": self.state = self.tagOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\u0000"}) elif data is EOF: # Tokenization ends. return False elif data in spaceCharacters: # Directly after emitting a token you switch back to the "data # state". At that point spaceCharacters are important so they are # emitted separately. self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": data + self.stream.charsUntil(spaceCharacters, True)}) # No need to update lastFourChars here, since the first space will # have already been appended to lastFourChars and will have broken # any sequences else: chars = self.stream.charsUntil(("&", "<", "\u0000")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True def entityDataState(self): self.consumeEntity() self.state = self.dataState return True def rcdataState(self): data = self.stream.char() if data == "&": self.state = self.characterReferenceInRcdata elif data == "<": self.state = self.rcdataLessThanSignState elif data == EOF: # Tokenization ends. return False elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) elif data in spaceCharacters: # Directly after emitting a token you switch back to the "data # state". At that point spaceCharacters are important so they are # emitted separately. self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": data + self.stream.charsUntil(spaceCharacters, True)}) # No need to update lastFourChars here, since the first space will # have already been appended to lastFourChars and will have broken # any sequences else: chars = self.stream.charsUntil(("&", "<", "\u0000")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True def characterReferenceInRcdata(self): self.consumeEntity() self.state = self.rcdataState return True def rawtextState(self): data = self.stream.char() if data == "<": self.state = self.rawtextLessThanSignState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) elif data == EOF: # Tokenization ends. return False else: chars = self.stream.charsUntil(("<", "\u0000")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True def scriptDataState(self): data = self.stream.char() if data == "<": self.state = self.scriptDataLessThanSignState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) elif data == EOF: # Tokenization ends. return False else: chars = self.stream.charsUntil(("<", "\u0000")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True def plaintextState(self): data = self.stream.char() if data == EOF: # Tokenization ends. return False elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + self.stream.charsUntil("\u0000")}) return True def tagOpenState(self): data = self.stream.char() if data == "!": self.state = self.markupDeclarationOpenState elif data == "/": self.state = self.closeTagOpenState elif data in asciiLetters: self.currentToken = {"type": tokenTypes["StartTag"], "name": data, "data": [], "selfClosing": False, "selfClosingAcknowledged": False} self.state = self.tagNameState elif data == ">": # XXX In theory it could be something besides a tag name. But # do we really care? self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-tag-name-but-got-right-bracket"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) self.state = self.dataState elif data == "?": # XXX In theory it could be something besides a tag name. But # do we really care? self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-tag-name-but-got-question-mark"}) self.stream.unget(data) self.state = self.bogusCommentState else: # XXX self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-tag-name"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.dataState return True def closeTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.currentToken = {"type": tokenTypes["EndTag"], "name": data, "data": [], "selfClosing": False} self.state = self.tagNameState elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-closing-tag-but-got-right-bracket"}) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-closing-tag-but-got-eof"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": self.emitCurrentToken() elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-tag-name"}) self.state = self.dataState elif data == "/": self.state = self.selfClosingStartTagState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["name"] += "\uFFFD" else: self.currentToken["name"] += data # (Don't use charsUntil here, because tag names are # very short and it's faster to not do anything fancy) return True def rcdataLessThanSignState(self): data = self.stream.char() if data == "/": self.temporaryBuffer = "" self.state = self.rcdataEndTagOpenState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.rcdataState return True def rcdataEndTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.temporaryBuffer += data self.state = self.rcdataEndTagNameState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) self.state = self.scriptDataState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) self.state = self.scriptDataEscapedState elif data == EOF: self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataEscapedState return True def scriptDataEscapedLessThanSignState(self): data = self.stream.char() if data == "/": self.temporaryBuffer = "" self.state = self.scriptDataEscapedEndTagOpenState elif data in asciiLetters: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) self.temporaryBuffer = data self.state = self.scriptDataDoubleEscapeStartState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.scriptDataEscapedState return True def scriptDataEscapedEndTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.temporaryBuffer = data self.state = self.scriptDataEscapedEndTagNameState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ""))): self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) if self.temporaryBuffer.lower() == "script": self.state = self.scriptDataDoubleEscapedState else: self.state = self.scriptDataEscapedState elif data in asciiLetters: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.temporaryBuffer += data else: self.stream.unget(data) self.state = self.scriptDataEscapedState return True def scriptDataDoubleEscapedState(self): data = self.stream.char() if data == "-": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataDoubleEscapedDashState elif data == "<": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.state = self.scriptDataDoubleEscapedLessThanSignState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-script-in-script"}) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) return True def scriptDataDoubleEscapedDashState(self): data = self.stream.char() if data == "-": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataDoubleEscapedDashDashState elif data == "<": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.state = self.scriptDataDoubleEscapedLessThanSignState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) self.state = self.scriptDataDoubleEscapedState elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-script-in-script"}) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataDoubleEscapedState return True def scriptDataDoubleEscapedDashDashState(self): data = self.stream.char() if data == "-": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) elif data == "<": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.state = self.scriptDataDoubleEscapedLessThanSignState elif data == ">": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) self.state = self.scriptDataState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "\uFFFD"}) self.state = self.scriptDataDoubleEscapedState elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-script-in-script"}) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataDoubleEscapedState return True def scriptDataDoubleEscapedLessThanSignState(self): data = self.stream.char() if data == "/": self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) self.temporaryBuffer = "" self.state = self.scriptDataDoubleEscapeEndState else: self.stream.unget(data) self.state = self.scriptDataDoubleEscapedState return True def scriptDataDoubleEscapeEndState(self): data = self.stream.char() if data in (spaceCharacters | frozenset(("/", ">"))): self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) if self.temporaryBuffer.lower() == "script": self.state = self.scriptDataEscapedState else: self.state = self.scriptDataDoubleEscapedState elif data in asciiLetters: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.temporaryBuffer += data else: self.stream.unget(data) self.state = self.scriptDataDoubleEscapedState return True def beforeAttributeNameState(self): data = self.stream.char() if data in spaceCharacters: self.stream.charsUntil(spaceCharacters, True) elif data in asciiLetters: self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState elif data == ">": self.emitCurrentToken() elif data == "/": self.state = self.selfClosingStartTagState elif data in ("'", '"', "=", "<"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-character-in-attribute-name"}) self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"].append(["\uFFFD", ""]) self.state = self.attributeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-attribute-name-but-got-eof"}) self.state = self.dataState else: self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState return True def attributeNameState(self): data = self.stream.char() leavingThisState = True emitToken = False if data == "=": self.state = self.beforeAttributeValueState elif data in asciiLetters: self.currentToken["data"][-1][0] += data +\ self.stream.charsUntil(asciiLetters, True) leavingThisState = False elif data == ">": # XXX If we emit here the attributes are converted to a dict # without being checked and when the code below runs we error # because data is a dict not a list emitToken = True elif data in spaceCharacters: self.state = self.afterAttributeNameState elif data == "/": self.state = self.selfClosingStartTagState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"][-1][0] += "\uFFFD" leavingThisState = False elif data in ("'", '"', "<"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-character-in-attribute-name"}) self.currentToken["data"][-1][0] += data leavingThisState = False elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-attribute-name"}) self.state = self.dataState else: self.currentToken["data"][-1][0] += data leavingThisState = False if leavingThisState: # Attributes are not dropped at this stage. That happens when the # start tag token is emitted so values can still be safely appended # to attributes, but we do want to report the parse error in time. self.currentToken["data"][-1][0] = ( self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) for name, _ in self.currentToken["data"][:-1]: if self.currentToken["data"][-1][0] == name: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "duplicate-attribute"}) break # XXX Fix for above XXX if emitToken: self.emitCurrentToken() return True def afterAttributeNameState(self): data = self.stream.char() if data in spaceCharacters: self.stream.charsUntil(spaceCharacters, True) elif data == "=": self.state = self.beforeAttributeValueState elif data == ">": self.emitCurrentToken() elif data in asciiLetters: self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState elif data == "/": self.state = self.selfClosingStartTagState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"].append(["\uFFFD", ""]) self.state = self.attributeNameState elif data in ("'", '"', "<"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-character-after-attribute-name"}) self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-end-of-tag-but-got-eof"}) self.state = self.dataState else: self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState return True def beforeAttributeValueState(self): data = self.stream.char() if data in spaceCharacters: self.stream.charsUntil(spaceCharacters, True) elif data == "\"": self.state = self.attributeValueDoubleQuotedState elif data == "&": self.state = self.attributeValueUnQuotedState self.stream.unget(data) elif data == "'": self.state = self.attributeValueSingleQuotedState elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-attribute-value-but-got-right-bracket"}) self.emitCurrentToken() elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"][-1][1] += "\uFFFD" self.state = self.attributeValueUnQuotedState elif data in ("=", "<", "`"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "equals-in-unquoted-attribute-value"}) self.currentToken["data"][-1][1] += data self.state = self.attributeValueUnQuotedState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-attribute-value-but-got-eof"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data self.state = self.attributeValueUnQuotedState return True def attributeValueDoubleQuotedState(self): data = self.stream.char() if data == "\"": self.state = self.afterAttributeValueState elif data == "&": self.processEntityInAttribute('"') elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"][-1][1] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-attribute-value-double-quote"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data +\ self.stream.charsUntil(("\"", "&", "\u0000")) return True def attributeValueSingleQuotedState(self): data = self.stream.char() if data == "'": self.state = self.afterAttributeValueState elif data == "&": self.processEntityInAttribute("'") elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"][-1][1] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-attribute-value-single-quote"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data +\ self.stream.charsUntil(("'", "&", "\u0000")) return True def attributeValueUnQuotedState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeAttributeNameState elif data == "&": self.processEntityInAttribute(">") elif data == ">": self.emitCurrentToken() elif data in ('"', "'", "=", "<", "`"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-character-in-unquoted-attribute-value"}) self.currentToken["data"][-1][1] += data elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"][-1][1] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-attribute-value-no-quotes"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data + self.stream.charsUntil( frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) return True def afterAttributeValueState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeAttributeNameState elif data == ">": self.emitCurrentToken() elif data == "/": self.state = self.selfClosingStartTagState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-EOF-after-attribute-value"}) self.stream.unget(data) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-character-after-attribute-value"}) self.stream.unget(data) self.state = self.beforeAttributeNameState return True def selfClosingStartTagState(self): data = self.stream.char() if data == ">": self.currentToken["selfClosing"] = True self.emitCurrentToken() elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-EOF-after-solidus-in-tag"}) self.stream.unget(data) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-character-after-solidus-in-tag"}) self.stream.unget(data) self.state = self.beforeAttributeNameState return True def bogusCommentState(self): # Make a new comment token and give it as value all the characters # until the first > or EOF (charsUntil checks for EOF automatically) # and emit it. data = self.stream.charsUntil(">") data = data.replace("\u0000", "\uFFFD") self.tokenQueue.append( {"type": tokenTypes["Comment"], "data": data}) # Eat the character directly after the bogus comment which is either a # ">" or an EOF. self.stream.char() self.state = self.dataState return True def markupDeclarationOpenState(self): charStack = [self.stream.char()] if charStack[-1] == "-": charStack.append(self.stream.char()) if charStack[-1] == "-": self.currentToken = {"type": tokenTypes["Comment"], "data": ""} self.state = self.commentStartState return True elif charStack[-1] in ('d', 'D'): matched = True for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), ('y', 'Y'), ('p', 'P'), ('e', 'E')): charStack.append(self.stream.char()) if charStack[-1] not in expected: matched = False break if matched: self.currentToken = {"type": tokenTypes["Doctype"], "name": "", "publicId": None, "systemId": None, "correct": True} self.state = self.doctypeState return True elif (charStack[-1] == "[" and self.parser is not None and self.parser.tree.openElements and self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): matched = True for expected in ["C", "D", "A", "T", "A", "["]: charStack.append(self.stream.char()) if charStack[-1] != expected: matched = False break if matched: self.state = self.cdataSectionState return True self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-dashes-or-doctype"}) while charStack: self.stream.unget(charStack.pop()) self.state = self.bogusCommentState return True def commentStartState(self): data = self.stream.char() if data == "-": self.state = self.commentStartDashState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "incorrect-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += data self.state = self.commentState return True def commentStartDashState(self): data = self.stream.char() if data == "-": self.state = self.commentEndState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"] += "-\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "incorrect-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += "-" + data self.state = self.commentState return True def commentState(self): data = self.stream.char() if data == "-": self.state = self.commentEndDashState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += data + \ self.stream.charsUntil(("-", "\u0000")) return True def commentEndDashState(self): data = self.stream.char() if data == "-": self.state = self.commentEndState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"] += "-\uFFFD" self.state = self.commentState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment-end-dash"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += "-" + data self.state = self.commentState return True def commentEndState(self): data = self.stream.char() if data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"] += "--\uFFFD" self.state = self.commentState elif data == "!": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-bang-after-double-dash-in-comment"}) self.state = self.commentEndBangState elif data == "-": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-dash-after-double-dash-in-comment"}) self.currentToken["data"] += data elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment-double-dash"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: # XXX self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-comment"}) self.currentToken["data"] += "--" + data self.state = self.commentState return True def commentEndBangState(self): data = self.stream.char() if data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == "-": self.currentToken["data"] += "--!" self.state = self.commentEndDashState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["data"] += "--!\uFFFD" self.state = self.commentState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment-end-bang-state"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += "--!" + data self.state = self.commentState return True def doctypeState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeDoctypeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-doctype-name-but-got-eof"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "need-space-after-doctype"}) self.stream.unget(data) self.state = self.beforeDoctypeNameState return True def beforeDoctypeNameState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-doctype-name-but-got-right-bracket"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["name"] = "\uFFFD" self.state = self.doctypeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-doctype-name-but-got-eof"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["name"] = data self.state = self.doctypeNameState return True def doctypeNameState(self): data = self.stream.char() if data in spaceCharacters: self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) self.state = self.afterDoctypeNameState elif data == ">": self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["name"] += "\uFFFD" self.state = self.doctypeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype-name"}) self.currentToken["correct"] = False self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["name"] += data return True def afterDoctypeNameState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.currentToken["correct"] = False self.stream.unget(data) self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: if data in ("p", "P"): matched = True for expected in (("u", "U"), ("b", "B"), ("l", "L"), ("i", "I"), ("c", "C")): data = self.stream.char() if data not in expected: matched = False break if matched: self.state = self.afterDoctypePublicKeywordState return True elif data in ("s", "S"): matched = True for expected in (("y", "Y"), ("s", "S"), ("t", "T"), ("e", "E"), ("m", "M")): data = self.stream.char() if data not in expected: matched = False break if matched: self.state = self.afterDoctypeSystemKeywordState return True # All the characters read before the current 'data' will be # [a-zA-Z], so they're garbage in the bogus doctype and can be # discarded; only the latest character might be '>' or EOF # and needs to be ungetted self.stream.unget(data) self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-space-or-right-bracket-in-doctype", "datavars": {"data": data}}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True def afterDoctypePublicKeywordState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeDoctypePublicIdentifierState elif data in ("'", '"'): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.stream.unget(data) self.state = self.beforeDoctypePublicIdentifierState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.stream.unget(data) self.state = self.beforeDoctypePublicIdentifierState return True def beforeDoctypePublicIdentifierState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == "\"": self.currentToken["publicId"] = "" self.state = self.doctypePublicIdentifierDoubleQuotedState elif data == "'": self.currentToken["publicId"] = "" self.state = self.doctypePublicIdentifierSingleQuotedState elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True def doctypePublicIdentifierDoubleQuotedState(self): data = self.stream.char() if data == "\"": self.state = self.afterDoctypePublicIdentifierState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["publicId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["publicId"] += data return True def doctypePublicIdentifierSingleQuotedState(self): data = self.stream.char() if data == "'": self.state = self.afterDoctypePublicIdentifierState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["publicId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["publicId"] += data return True def afterDoctypePublicIdentifierState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.betweenDoctypePublicAndSystemIdentifiersState elif data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == '"': self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierDoubleQuotedState elif data == "'": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierSingleQuotedState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True def betweenDoctypePublicAndSystemIdentifiersState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == '"': self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierDoubleQuotedState elif data == "'": self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierSingleQuotedState elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True def afterDoctypeSystemKeywordState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeDoctypeSystemIdentifierState elif data in ("'", '"'): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.stream.unget(data) self.state = self.beforeDoctypeSystemIdentifierState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.stream.unget(data) self.state = self.beforeDoctypeSystemIdentifierState return True def beforeDoctypeSystemIdentifierState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == "\"": self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierDoubleQuotedState elif data == "'": self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierSingleQuotedState elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True def doctypeSystemIdentifierDoubleQuotedState(self): data = self.stream.char() if data == "\"": self.state = self.afterDoctypeSystemIdentifierState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["systemId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["systemId"] += data return True def doctypeSystemIdentifierSingleQuotedState(self): data = self.stream.char() if data == "'": self.state = self.afterDoctypeSystemIdentifierState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) self.currentToken["systemId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["systemId"] += data return True def afterDoctypeSystemIdentifierState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "unexpected-char-in-doctype"}) self.state = self.bogusDoctypeState return True def bogusDoctypeState(self): data = self.stream.char() if data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: # XXX EMIT self.stream.unget(data) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: pass return True def cdataSectionState(self): data = [] while True: data.append(self.stream.charsUntil("]")) data.append(self.stream.charsUntil(">")) char = self.stream.char() if char == EOF: break else: assert char == ">" if data[-1][-2:] == "]]": data[-1] = data[-1][:-2] break else: data.append(char) data = "".join(data) # pylint:disable=redefined-variable-type # Deal with null here rather than in the parser nullCount = data.count("\u0000") if nullCount > 0: for _ in range(nullCount): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) data = data.replace("\u0000", "\uFFFD") if data: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.dataState return True _utils.py000064400000007764147204715120006436 0ustar00from __future__ import absolute_import, division, unicode_literals import sys from types import ModuleType from six import text_type try: import xml.etree.cElementTree as default_etree except ImportError: import xml.etree.ElementTree as default_etree __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", "surrogatePairToCodepoint", "moduleFactoryFactory", "supports_lone_surrogates", "PY27"] PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7 # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be # caught by the below test. In general this would be any platform # using UTF-16 as its encoding of unicode strings, such as # Jython. This is because UTF-16 itself is based on the use of such # surrogates, and there is no mechanism to further escape such # escapes. try: _x = eval('"\\uD800"') # pylint:disable=eval-used if not isinstance(_x, text_type): # We need this with u"" because of http://bugs.jython.org/issue2039 _x = eval('u"\\uD800"') # pylint:disable=eval-used assert isinstance(_x, text_type) except: # pylint:disable=bare-except supports_lone_surrogates = False else: supports_lone_surrogates = True class MethodDispatcher(dict): """Dict with 2 special properties: On initiation, keys that are lists, sets or tuples are converted to multiple keys so accessing any one of the items in the original list-like object returns the matching value md = MethodDispatcher({("foo", "bar"):"baz"}) md["foo"] == "baz" A default value which can be set through the default attribute. """ def __init__(self, items=()): # Using _dictEntries instead of directly assigning to self is about # twice as fast. Please do careful performance testing before changing # anything here. _dictEntries = [] for name, value in items: if isinstance(name, (list, tuple, frozenset, set)): for item in name: _dictEntries.append((item, value)) else: _dictEntries.append((name, value)) dict.__init__(self, _dictEntries) assert len(self) == len(_dictEntries) self.default = None def __getitem__(self, key): return dict.get(self, key, self.default) # Some utility functions to deal with weirdness around UCS2 vs UCS4 # python builds def isSurrogatePair(data): return (len(data) == 2 and ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) def surrogatePairToCodepoint(data): char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + (ord(data[1]) - 0xDC00)) return char_val # Module Factory Factory (no, this isn't Java, I know) # Here to stop this being duplicated all over the place. def moduleFactoryFactory(factory): moduleCache = {} def moduleFactory(baseModule, *args, **kwargs): if isinstance(ModuleType.__name__, type("")): name = "_%s_factory" % baseModule.__name__ else: name = b"_%s_factory" % baseModule.__name__ kwargs_tuple = tuple(kwargs.items()) try: return moduleCache[name][args][kwargs_tuple] except KeyError: mod = ModuleType(name) objs = factory(baseModule, *args, **kwargs) mod.__dict__.update(objs) if "name" not in moduleCache: moduleCache[name] = {} if "args" not in moduleCache[name]: moduleCache[name][args] = {} if "kwargs" not in moduleCache[name][args]: moduleCache[name][args][kwargs_tuple] = {} moduleCache[name][args][kwargs_tuple] = mod return mod return moduleFactory def memoize(func): cache = {} def wrapped(*args, **kwargs): key = (tuple(args), tuple(kwargs.items())) if key not in cache: cache[key] = func(*args, **kwargs) return cache[key] return wrapped constants.py000064400000242673147204715120007153 0ustar00from __future__ import absolute_import, division, unicode_literals import string EOF = None E = { "null-character": "Null character in input stream, replaced with U+FFFD.", "invalid-codepoint": "Invalid codepoint in stream.", "incorrectly-placed-solidus": "Solidus (/) incorrectly placed in tag.", "incorrect-cr-newline-entity": "Incorrect CR newline entity, replaced with LF.", "illegal-windows-1252-entity": "Entity used with illegal number (windows-1252 reference).", "cant-convert-numeric-entity": "Numeric entity couldn't be converted to character " "(codepoint U+%(charAsInt)08x).", "illegal-codepoint-for-numeric-entity": "Numeric entity represents an illegal codepoint: " "U+%(charAsInt)08x.", "numeric-entity-without-semicolon": "Numeric entity didn't end with ';'.", "expected-numeric-entity-but-got-eof": "Numeric entity expected. Got end of file instead.", "expected-numeric-entity": "Numeric entity expected but none found.", "named-entity-without-semicolon": "Named entity didn't end with ';'.", "expected-named-entity": "Named entity expected. Got none.", "attributes-in-end-tag": "End tag contains unexpected attributes.", 'self-closing-flag-on-end-tag': "End tag contains unexpected self-closing flag.", "expected-tag-name-but-got-right-bracket": "Expected tag name. Got '>' instead.", "expected-tag-name-but-got-question-mark": "Expected tag name. Got '?' instead. (HTML doesn't " "support processing instructions.)", "expected-tag-name": "Expected tag name. Got something else instead", "expected-closing-tag-but-got-right-bracket": "Expected closing tag. Got '>' instead. Ignoring ''.", "expected-closing-tag-but-got-eof": "Expected closing tag. Unexpected end of file.", "expected-closing-tag-but-got-char": "Expected closing tag. Unexpected character '%(data)s' found.", "eof-in-tag-name": "Unexpected end of file in the tag name.", "expected-attribute-name-but-got-eof": "Unexpected end of file. Expected attribute name instead.", "eof-in-attribute-name": "Unexpected end of file in attribute name.", "invalid-character-in-attribute-name": "Invalid character in attribute name", "duplicate-attribute": "Dropped duplicate attribute on tag.", "expected-end-of-tag-name-but-got-eof": "Unexpected end of file. Expected = or end of tag.", "expected-attribute-value-but-got-eof": "Unexpected end of file. Expected attribute value.", "expected-attribute-value-but-got-right-bracket": "Expected attribute value. Got '>' instead.", 'equals-in-unquoted-attribute-value': "Unexpected = in unquoted attribute", 'unexpected-character-in-unquoted-attribute-value': "Unexpected character in unquoted attribute", "invalid-character-after-attribute-name": "Unexpected character after attribute name.", "unexpected-character-after-attribute-value": "Unexpected character after attribute value.", "eof-in-attribute-value-double-quote": "Unexpected end of file in attribute value (\").", "eof-in-attribute-value-single-quote": "Unexpected end of file in attribute value (').", "eof-in-attribute-value-no-quotes": "Unexpected end of file in attribute value.", "unexpected-EOF-after-solidus-in-tag": "Unexpected end of file in tag. Expected >", "unexpected-character-after-solidus-in-tag": "Unexpected character after / in tag. Expected >", "expected-dashes-or-doctype": "Expected '--' or 'DOCTYPE'. Not found.", "unexpected-bang-after-double-dash-in-comment": "Unexpected ! after -- in comment", "unexpected-space-after-double-dash-in-comment": "Unexpected space after -- in comment", "incorrect-comment": "Incorrect comment.", "eof-in-comment": "Unexpected end of file in comment.", "eof-in-comment-end-dash": "Unexpected end of file in comment (-)", "unexpected-dash-after-double-dash-in-comment": "Unexpected '-' after '--' found in comment.", "eof-in-comment-double-dash": "Unexpected end of file in comment (--).", "eof-in-comment-end-space-state": "Unexpected end of file in comment.", "eof-in-comment-end-bang-state": "Unexpected end of file in comment.", "unexpected-char-in-comment": "Unexpected character in comment found.", "need-space-after-doctype": "No space after literal string 'DOCTYPE'.", "expected-doctype-name-but-got-right-bracket": "Unexpected > character. Expected DOCTYPE name.", "expected-doctype-name-but-got-eof": "Unexpected end of file. Expected DOCTYPE name.", "eof-in-doctype-name": "Unexpected end of file in DOCTYPE name.", "eof-in-doctype": "Unexpected end of file in DOCTYPE.", "expected-space-or-right-bracket-in-doctype": "Expected space or '>'. Got '%(data)s'", "unexpected-end-of-doctype": "Unexpected end of DOCTYPE.", "unexpected-char-in-doctype": "Unexpected character in DOCTYPE.", "eof-in-innerhtml": "XXX innerHTML EOF", "unexpected-doctype": "Unexpected DOCTYPE. Ignored.", "non-html-root": "html needs to be the first start tag.", "expected-doctype-but-got-eof": "Unexpected End of file. Expected DOCTYPE.", "unknown-doctype": "Erroneous DOCTYPE.", "expected-doctype-but-got-chars": "Unexpected non-space characters. Expected DOCTYPE.", "expected-doctype-but-got-start-tag": "Unexpected start tag (%(name)s). Expected DOCTYPE.", "expected-doctype-but-got-end-tag": "Unexpected end tag (%(name)s). Expected DOCTYPE.", "end-tag-after-implied-root": "Unexpected end tag (%(name)s) after the (implied) root element.", "expected-named-closing-tag-but-got-eof": "Unexpected end of file. Expected end tag (%(name)s).", "two-heads-are-not-better-than-one": "Unexpected start tag head in existing head. Ignored.", "unexpected-end-tag": "Unexpected end tag (%(name)s). Ignored.", "unexpected-start-tag-out-of-my-head": "Unexpected start tag (%(name)s) that can be in head. Moved.", "unexpected-start-tag": "Unexpected start tag (%(name)s).", "missing-end-tag": "Missing end tag (%(name)s).", "missing-end-tags": "Missing end tags (%(name)s).", "unexpected-start-tag-implies-end-tag": "Unexpected start tag (%(startName)s) " "implies end tag (%(endName)s).", "unexpected-start-tag-treated-as": "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", "deprecated-tag": "Unexpected start tag %(name)s. Don't use it!", "unexpected-start-tag-ignored": "Unexpected start tag %(name)s. Ignored.", "expected-one-end-tag-but-got-another": "Unexpected end tag (%(gotName)s). " "Missing end tag (%(expectedName)s).", "end-tag-too-early": "End tag (%(name)s) seen too early. Expected other end tag.", "end-tag-too-early-named": "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", "end-tag-too-early-ignored": "End tag (%(name)s) seen too early. Ignored.", "adoption-agency-1.1": "End tag (%(name)s) violates step 1, " "paragraph 1 of the adoption agency algorithm.", "adoption-agency-1.2": "End tag (%(name)s) violates step 1, " "paragraph 2 of the adoption agency algorithm.", "adoption-agency-1.3": "End tag (%(name)s) violates step 1, " "paragraph 3 of the adoption agency algorithm.", "adoption-agency-4.4": "End tag (%(name)s) violates step 4, " "paragraph 4 of the adoption agency algorithm.", "unexpected-end-tag-treated-as": "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", "no-end-tag": "This element (%(name)s) has no end tag.", "unexpected-implied-end-tag-in-table": "Unexpected implied end tag (%(name)s) in the table phase.", "unexpected-implied-end-tag-in-table-body": "Unexpected implied end tag (%(name)s) in the table body phase.", "unexpected-char-implies-table-voodoo": "Unexpected non-space characters in " "table context caused voodoo mode.", "unexpected-hidden-input-in-table": "Unexpected input with type hidden in table context.", "unexpected-form-in-table": "Unexpected form in table context.", "unexpected-start-tag-implies-table-voodoo": "Unexpected start tag (%(name)s) in " "table context caused voodoo mode.", "unexpected-end-tag-implies-table-voodoo": "Unexpected end tag (%(name)s) in " "table context caused voodoo mode.", "unexpected-cell-in-table-body": "Unexpected table cell start tag (%(name)s) " "in the table body phase.", "unexpected-cell-end-tag": "Got table cell end tag (%(name)s) " "while required end tags are missing.", "unexpected-end-tag-in-table-body": "Unexpected end tag (%(name)s) in the table body phase. Ignored.", "unexpected-implied-end-tag-in-table-row": "Unexpected implied end tag (%(name)s) in the table row phase.", "unexpected-end-tag-in-table-row": "Unexpected end tag (%(name)s) in the table row phase. Ignored.", "unexpected-select-in-select": "Unexpected select start tag in the select phase " "treated as select end tag.", "unexpected-input-in-select": "Unexpected input start tag in the select phase.", "unexpected-start-tag-in-select": "Unexpected start tag token (%(name)s in the select phase. " "Ignored.", "unexpected-end-tag-in-select": "Unexpected end tag (%(name)s) in the select phase. Ignored.", "unexpected-table-element-start-tag-in-select-in-table": "Unexpected table element start tag (%(name)s) in the select in table phase.", "unexpected-table-element-end-tag-in-select-in-table": "Unexpected table element end tag (%(name)s) in the select in table phase.", "unexpected-char-after-body": "Unexpected non-space characters in the after body phase.", "unexpected-start-tag-after-body": "Unexpected start tag token (%(name)s)" " in the after body phase.", "unexpected-end-tag-after-body": "Unexpected end tag token (%(name)s)" " in the after body phase.", "unexpected-char-in-frameset": "Unexpected characters in the frameset phase. Characters ignored.", "unexpected-start-tag-in-frameset": "Unexpected start tag token (%(name)s)" " in the frameset phase. Ignored.", "unexpected-frameset-in-frameset-innerhtml": "Unexpected end tag token (frameset) " "in the frameset phase (innerHTML).", "unexpected-end-tag-in-frameset": "Unexpected end tag token (%(name)s)" " in the frameset phase. Ignored.", "unexpected-char-after-frameset": "Unexpected non-space characters in the " "after frameset phase. Ignored.", "unexpected-start-tag-after-frameset": "Unexpected start tag (%(name)s)" " in the after frameset phase. Ignored.", "unexpected-end-tag-after-frameset": "Unexpected end tag (%(name)s)" " in the after frameset phase. Ignored.", "unexpected-end-tag-after-body-innerhtml": "Unexpected end tag after body(innerHtml)", "expected-eof-but-got-char": "Unexpected non-space characters. Expected end of file.", "expected-eof-but-got-start-tag": "Unexpected start tag (%(name)s)" ". Expected end of file.", "expected-eof-but-got-end-tag": "Unexpected end tag (%(name)s)" ". Expected end of file.", "eof-in-table": "Unexpected end of file. Expected table content.", "eof-in-select": "Unexpected end of file. Expected select content.", "eof-in-frameset": "Unexpected end of file. Expected frameset content.", "eof-in-script-in-script": "Unexpected end of file. Expected script content.", "eof-in-foreign-lands": "Unexpected end of file. Expected foreign content", "non-void-element-with-trailing-solidus": "Trailing solidus not allowed on element %(name)s", "unexpected-html-element-in-foreign-content": "Element %(name)s not allowed in a non-html context", "unexpected-end-tag-before-html": "Unexpected end tag (%(name)s) before html.", "unexpected-inhead-noscript-tag": "Element %(name)s not allowed in a inhead-noscript context", "eof-in-head-noscript": "Unexpected end of file. Expected inhead-noscript content", "char-in-head-noscript": "Unexpected non-space character. Expected inhead-noscript content", "XXX-undefined-error": "Undefined error (this sucks and should be fixed)", } namespaces = { "html": "http://www.w3.org/1999/xhtml", "mathml": "http://www.w3.org/1998/Math/MathML", "svg": "http://www.w3.org/2000/svg", "xlink": "http://www.w3.org/1999/xlink", "xml": "http://www.w3.org/XML/1998/namespace", "xmlns": "http://www.w3.org/2000/xmlns/" } scopingElements = frozenset([ (namespaces["html"], "applet"), (namespaces["html"], "caption"), (namespaces["html"], "html"), (namespaces["html"], "marquee"), (namespaces["html"], "object"), (namespaces["html"], "table"), (namespaces["html"], "td"), (namespaces["html"], "th"), (namespaces["mathml"], "mi"), (namespaces["mathml"], "mo"), (namespaces["mathml"], "mn"), (namespaces["mathml"], "ms"), (namespaces["mathml"], "mtext"), (namespaces["mathml"], "annotation-xml"), (namespaces["svg"], "foreignObject"), (namespaces["svg"], "desc"), (namespaces["svg"], "title"), ]) formattingElements = frozenset([ (namespaces["html"], "a"), (namespaces["html"], "b"), (namespaces["html"], "big"), (namespaces["html"], "code"), (namespaces["html"], "em"), (namespaces["html"], "font"), (namespaces["html"], "i"), (namespaces["html"], "nobr"), (namespaces["html"], "s"), (namespaces["html"], "small"), (namespaces["html"], "strike"), (namespaces["html"], "strong"), (namespaces["html"], "tt"), (namespaces["html"], "u") ]) specialElements = frozenset([ (namespaces["html"], "address"), (namespaces["html"], "applet"), (namespaces["html"], "area"), (namespaces["html"], "article"), (namespaces["html"], "aside"), (namespaces["html"], "base"), (namespaces["html"], "basefont"), (namespaces["html"], "bgsound"), (namespaces["html"], "blockquote"), (namespaces["html"], "body"), (namespaces["html"], "br"), (namespaces["html"], "button"), (namespaces["html"], "caption"), (namespaces["html"], "center"), (namespaces["html"], "col"), (namespaces["html"], "colgroup"), (namespaces["html"], "command"), (namespaces["html"], "dd"), (namespaces["html"], "details"), (namespaces["html"], "dir"), (namespaces["html"], "div"), (namespaces["html"], "dl"), (namespaces["html"], "dt"), (namespaces["html"], "embed"), (namespaces["html"], "fieldset"), (namespaces["html"], "figure"), (namespaces["html"], "footer"), (namespaces["html"], "form"), (namespaces["html"], "frame"), (namespaces["html"], "frameset"), (namespaces["html"], "h1"), (namespaces["html"], "h2"), (namespaces["html"], "h3"), (namespaces["html"], "h4"), (namespaces["html"], "h5"), (namespaces["html"], "h6"), (namespaces["html"], "head"), (namespaces["html"], "header"), (namespaces["html"], "hr"), (namespaces["html"], "html"), (namespaces["html"], "iframe"), # Note that image is commented out in the spec as "this isn't an # element that can end up on the stack, so it doesn't matter," (namespaces["html"], "image"), (namespaces["html"], "img"), (namespaces["html"], "input"), (namespaces["html"], "isindex"), (namespaces["html"], "li"), (namespaces["html"], "link"), (namespaces["html"], "listing"), (namespaces["html"], "marquee"), (namespaces["html"], "menu"), (namespaces["html"], "meta"), (namespaces["html"], "nav"), (namespaces["html"], "noembed"), (namespaces["html"], "noframes"), (namespaces["html"], "noscript"), (namespaces["html"], "object"), (namespaces["html"], "ol"), (namespaces["html"], "p"), (namespaces["html"], "param"), (namespaces["html"], "plaintext"), (namespaces["html"], "pre"), (namespaces["html"], "script"), (namespaces["html"], "section"), (namespaces["html"], "select"), (namespaces["html"], "style"), (namespaces["html"], "table"), (namespaces["html"], "tbody"), (namespaces["html"], "td"), (namespaces["html"], "textarea"), (namespaces["html"], "tfoot"), (namespaces["html"], "th"), (namespaces["html"], "thead"), (namespaces["html"], "title"), (namespaces["html"], "tr"), (namespaces["html"], "ul"), (namespaces["html"], "wbr"), (namespaces["html"], "xmp"), (namespaces["svg"], "foreignObject") ]) htmlIntegrationPointElements = frozenset([ (namespaces["mathml"], "annotaion-xml"), (namespaces["svg"], "foreignObject"), (namespaces["svg"], "desc"), (namespaces["svg"], "title") ]) mathmlTextIntegrationPointElements = frozenset([ (namespaces["mathml"], "mi"), (namespaces["mathml"], "mo"), (namespaces["mathml"], "mn"), (namespaces["mathml"], "ms"), (namespaces["mathml"], "mtext") ]) adjustSVGAttributes = { "attributename": "attributeName", "attributetype": "attributeType", "basefrequency": "baseFrequency", "baseprofile": "baseProfile", "calcmode": "calcMode", "clippathunits": "clipPathUnits", "contentscripttype": "contentScriptType", "contentstyletype": "contentStyleType", "diffuseconstant": "diffuseConstant", "edgemode": "edgeMode", "externalresourcesrequired": "externalResourcesRequired", "filterres": "filterRes", "filterunits": "filterUnits", "glyphref": "glyphRef", "gradienttransform": "gradientTransform", "gradientunits": "gradientUnits", "kernelmatrix": "kernelMatrix", "kernelunitlength": "kernelUnitLength", "keypoints": "keyPoints", "keysplines": "keySplines", "keytimes": "keyTimes", "lengthadjust": "lengthAdjust", "limitingconeangle": "limitingConeAngle", "markerheight": "markerHeight", "markerunits": "markerUnits", "markerwidth": "markerWidth", "maskcontentunits": "maskContentUnits", "maskunits": "maskUnits", "numoctaves": "numOctaves", "pathlength": "pathLength", "patterncontentunits": "patternContentUnits", "patterntransform": "patternTransform", "patternunits": "patternUnits", "pointsatx": "pointsAtX", "pointsaty": "pointsAtY", "pointsatz": "pointsAtZ", "preservealpha": "preserveAlpha", "preserveaspectratio": "preserveAspectRatio", "primitiveunits": "primitiveUnits", "refx": "refX", "refy": "refY", "repeatcount": "repeatCount", "repeatdur": "repeatDur", "requiredextensions": "requiredExtensions", "requiredfeatures": "requiredFeatures", "specularconstant": "specularConstant", "specularexponent": "specularExponent", "spreadmethod": "spreadMethod", "startoffset": "startOffset", "stddeviation": "stdDeviation", "stitchtiles": "stitchTiles", "surfacescale": "surfaceScale", "systemlanguage": "systemLanguage", "tablevalues": "tableValues", "targetx": "targetX", "targety": "targetY", "textlength": "textLength", "viewbox": "viewBox", "viewtarget": "viewTarget", "xchannelselector": "xChannelSelector", "ychannelselector": "yChannelSelector", "zoomandpan": "zoomAndPan" } adjustMathMLAttributes = {"definitionurl": "definitionURL"} adjustForeignAttributes = { "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), "xlink:href": ("xlink", "href", namespaces["xlink"]), "xlink:role": ("xlink", "role", namespaces["xlink"]), "xlink:show": ("xlink", "show", namespaces["xlink"]), "xlink:title": ("xlink", "title", namespaces["xlink"]), "xlink:type": ("xlink", "type", namespaces["xlink"]), "xml:base": ("xml", "base", namespaces["xml"]), "xml:lang": ("xml", "lang", namespaces["xml"]), "xml:space": ("xml", "space", namespaces["xml"]), "xmlns": (None, "xmlns", namespaces["xmlns"]), "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) } unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in adjustForeignAttributes.items()]) spaceCharacters = frozenset([ "\t", "\n", "\u000C", " ", "\r" ]) tableInsertModeElements = frozenset([ "table", "tbody", "tfoot", "thead", "tr" ]) asciiLowercase = frozenset(string.ascii_lowercase) asciiUppercase = frozenset(string.ascii_uppercase) asciiLetters = frozenset(string.ascii_letters) digits = frozenset(string.digits) hexDigits = frozenset(string.hexdigits) asciiUpper2Lower = dict([(ord(c), ord(c.lower())) for c in string.ascii_uppercase]) # Heading elements need to be ordered headingElements = ( "h1", "h2", "h3", "h4", "h5", "h6" ) voidElements = frozenset([ "base", "command", "event-source", "link", "meta", "hr", "br", "img", "embed", "param", "area", "col", "input", "source", "track" ]) cdataElements = frozenset(['title', 'textarea']) rcdataElements = frozenset([ 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript' ]) booleanAttributes = { "": frozenset(["irrelevant"]), "style": frozenset(["scoped"]), "img": frozenset(["ismap"]), "audio": frozenset(["autoplay", "controls"]), "video": frozenset(["autoplay", "controls"]), "script": frozenset(["defer", "async"]), "details": frozenset(["open"]), "datagrid": frozenset(["multiple", "disabled"]), "command": frozenset(["hidden", "disabled", "checked", "default"]), "hr": frozenset(["noshade"]), "menu": frozenset(["autosubmit"]), "fieldset": frozenset(["disabled", "readonly"]), "option": frozenset(["disabled", "readonly", "selected"]), "optgroup": frozenset(["disabled", "readonly"]), "button": frozenset(["disabled", "autofocus"]), "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), "output": frozenset(["disabled", "readonly"]), } # entitiesWindows1252 has to be _ordered_ and needs to have an index. It # therefore can't be a frozenset. entitiesWindows1252 = ( 8364, # 0x80 0x20AC EURO SIGN 65533, # 0x81 UNDEFINED 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS 8224, # 0x86 0x2020 DAGGER 8225, # 0x87 0x2021 DOUBLE DAGGER 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT 8240, # 0x89 0x2030 PER MILLE SIGN 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE 65533, # 0x8D UNDEFINED 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON 65533, # 0x8F UNDEFINED 65533, # 0x90 UNDEFINED 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK 8226, # 0x95 0x2022 BULLET 8211, # 0x96 0x2013 EN DASH 8212, # 0x97 0x2014 EM DASH 732, # 0x98 0x02DC SMALL TILDE 8482, # 0x99 0x2122 TRADE MARK SIGN 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE 65533, # 0x9D UNDEFINED 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ) xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) entities = { "AElig": "\xc6", "AElig;": "\xc6", "AMP": "&", "AMP;": "&", "Aacute": "\xc1", "Aacute;": "\xc1", "Abreve;": "\u0102", "Acirc": "\xc2", "Acirc;": "\xc2", "Acy;": "\u0410", "Afr;": "\U0001d504", "Agrave": "\xc0", "Agrave;": "\xc0", "Alpha;": "\u0391", "Amacr;": "\u0100", "And;": "\u2a53", "Aogon;": "\u0104", "Aopf;": "\U0001d538", "ApplyFunction;": "\u2061", "Aring": "\xc5", "Aring;": "\xc5", "Ascr;": "\U0001d49c", "Assign;": "\u2254", "Atilde": "\xc3", "Atilde;": "\xc3", "Auml": "\xc4", "Auml;": "\xc4", "Backslash;": "\u2216", "Barv;": "\u2ae7", "Barwed;": "\u2306", "Bcy;": "\u0411", "Because;": "\u2235", "Bernoullis;": "\u212c", "Beta;": "\u0392", "Bfr;": "\U0001d505", "Bopf;": "\U0001d539", "Breve;": "\u02d8", "Bscr;": "\u212c", "Bumpeq;": "\u224e", "CHcy;": "\u0427", "COPY": "\xa9", "COPY;": "\xa9", "Cacute;": "\u0106", "Cap;": "\u22d2", "CapitalDifferentialD;": "\u2145", "Cayleys;": "\u212d", "Ccaron;": "\u010c", "Ccedil": "\xc7", "Ccedil;": "\xc7", "Ccirc;": "\u0108", "Cconint;": "\u2230", "Cdot;": "\u010a", "Cedilla;": "\xb8", "CenterDot;": "\xb7", "Cfr;": "\u212d", "Chi;": "\u03a7", "CircleDot;": "\u2299", "CircleMinus;": "\u2296", "CirclePlus;": "\u2295", "CircleTimes;": "\u2297", "ClockwiseContourIntegral;": "\u2232", "CloseCurlyDoubleQuote;": "\u201d", "CloseCurlyQuote;": "\u2019", "Colon;": "\u2237", "Colone;": "\u2a74", "Congruent;": "\u2261", "Conint;": "\u222f", "ContourIntegral;": "\u222e", "Copf;": "\u2102", "Coproduct;": "\u2210", "CounterClockwiseContourIntegral;": "\u2233", "Cross;": "\u2a2f", "Cscr;": "\U0001d49e", "Cup;": "\u22d3", "CupCap;": "\u224d", "DD;": "\u2145", "DDotrahd;": "\u2911", "DJcy;": "\u0402", "DScy;": "\u0405", "DZcy;": "\u040f", "Dagger;": "\u2021", "Darr;": "\u21a1", "Dashv;": "\u2ae4", "Dcaron;": "\u010e", "Dcy;": "\u0414", "Del;": "\u2207", "Delta;": "\u0394", "Dfr;": "\U0001d507", "DiacriticalAcute;": "\xb4", "DiacriticalDot;": "\u02d9", "DiacriticalDoubleAcute;": "\u02dd", "DiacriticalGrave;": "`", "DiacriticalTilde;": "\u02dc", "Diamond;": "\u22c4", "DifferentialD;": "\u2146", "Dopf;": "\U0001d53b", "Dot;": "\xa8", "DotDot;": "\u20dc", "DotEqual;": "\u2250", "DoubleContourIntegral;": "\u222f", "DoubleDot;": "\xa8", "DoubleDownArrow;": "\u21d3", "DoubleLeftArrow;": "\u21d0", "DoubleLeftRightArrow;": "\u21d4", "DoubleLeftTee;": "\u2ae4", "DoubleLongLeftArrow;": "\u27f8", "DoubleLongLeftRightArrow;": "\u27fa", "DoubleLongRightArrow;": "\u27f9", "DoubleRightArrow;": "\u21d2", "DoubleRightTee;": "\u22a8", "DoubleUpArrow;": "\u21d1", "DoubleUpDownArrow;": "\u21d5", "DoubleVerticalBar;": "\u2225", "DownArrow;": "\u2193", "DownArrowBar;": "\u2913", "DownArrowUpArrow;": "\u21f5", "DownBreve;": "\u0311", "DownLeftRightVector;": "\u2950", "DownLeftTeeVector;": "\u295e", "DownLeftVector;": "\u21bd", "DownLeftVectorBar;": "\u2956", "DownRightTeeVector;": "\u295f", "DownRightVector;": "\u21c1", "DownRightVectorBar;": "\u2957", "DownTee;": "\u22a4", "DownTeeArrow;": "\u21a7", "Downarrow;": "\u21d3", "Dscr;": "\U0001d49f", "Dstrok;": "\u0110", "ENG;": "\u014a", "ETH": "\xd0", "ETH;": "\xd0", "Eacute": "\xc9", "Eacute;": "\xc9", "Ecaron;": "\u011a", "Ecirc": "\xca", "Ecirc;": "\xca", "Ecy;": "\u042d", "Edot;": "\u0116", "Efr;": "\U0001d508", "Egrave": "\xc8", "Egrave;": "\xc8", "Element;": "\u2208", "Emacr;": "\u0112", "EmptySmallSquare;": "\u25fb", "EmptyVerySmallSquare;": "\u25ab", "Eogon;": "\u0118", "Eopf;": "\U0001d53c", "Epsilon;": "\u0395", "Equal;": "\u2a75", "EqualTilde;": "\u2242", "Equilibrium;": "\u21cc", "Escr;": "\u2130", "Esim;": "\u2a73", "Eta;": "\u0397", "Euml": "\xcb", "Euml;": "\xcb", "Exists;": "\u2203", "ExponentialE;": "\u2147", "Fcy;": "\u0424", "Ffr;": "\U0001d509", "FilledSmallSquare;": "\u25fc", "FilledVerySmallSquare;": "\u25aa", "Fopf;": "\U0001d53d", "ForAll;": "\u2200", "Fouriertrf;": "\u2131", "Fscr;": "\u2131", "GJcy;": "\u0403", "GT": ">", "GT;": ">", "Gamma;": "\u0393", "Gammad;": "\u03dc", "Gbreve;": "\u011e", "Gcedil;": "\u0122", "Gcirc;": "\u011c", "Gcy;": "\u0413", "Gdot;": "\u0120", "Gfr;": "\U0001d50a", "Gg;": "\u22d9", "Gopf;": "\U0001d53e", "GreaterEqual;": "\u2265", "GreaterEqualLess;": "\u22db", "GreaterFullEqual;": "\u2267", "GreaterGreater;": "\u2aa2", "GreaterLess;": "\u2277", "GreaterSlantEqual;": "\u2a7e", "GreaterTilde;": "\u2273", "Gscr;": "\U0001d4a2", "Gt;": "\u226b", "HARDcy;": "\u042a", "Hacek;": "\u02c7", "Hat;": "^", "Hcirc;": "\u0124", "Hfr;": "\u210c", "HilbertSpace;": "\u210b", "Hopf;": "\u210d", "HorizontalLine;": "\u2500", "Hscr;": "\u210b", "Hstrok;": "\u0126", "HumpDownHump;": "\u224e", "HumpEqual;": "\u224f", "IEcy;": "\u0415", "IJlig;": "\u0132", "IOcy;": "\u0401", "Iacute": "\xcd", "Iacute;": "\xcd", "Icirc": "\xce", "Icirc;": "\xce", "Icy;": "\u0418", "Idot;": "\u0130", "Ifr;": "\u2111", "Igrave": "\xcc", "Igrave;": "\xcc", "Im;": "\u2111", "Imacr;": "\u012a", "ImaginaryI;": "\u2148", "Implies;": "\u21d2", "Int;": "\u222c", "Integral;": "\u222b", "Intersection;": "\u22c2", "InvisibleComma;": "\u2063", "InvisibleTimes;": "\u2062", "Iogon;": "\u012e", "Iopf;": "\U0001d540", "Iota;": "\u0399", "Iscr;": "\u2110", "Itilde;": "\u0128", "Iukcy;": "\u0406", "Iuml": "\xcf", "Iuml;": "\xcf", "Jcirc;": "\u0134", "Jcy;": "\u0419", "Jfr;": "\U0001d50d", "Jopf;": "\U0001d541", "Jscr;": "\U0001d4a5", "Jsercy;": "\u0408", "Jukcy;": "\u0404", "KHcy;": "\u0425", "KJcy;": "\u040c", "Kappa;": "\u039a", "Kcedil;": "\u0136", "Kcy;": "\u041a", "Kfr;": "\U0001d50e", "Kopf;": "\U0001d542", "Kscr;": "\U0001d4a6", "LJcy;": "\u0409", "LT": "<", "LT;": "<", "Lacute;": "\u0139", "Lambda;": "\u039b", "Lang;": "\u27ea", "Laplacetrf;": "\u2112", "Larr;": "\u219e", "Lcaron;": "\u013d", "Lcedil;": "\u013b", "Lcy;": "\u041b", "LeftAngleBracket;": "\u27e8", "LeftArrow;": "\u2190", "LeftArrowBar;": "\u21e4", "LeftArrowRightArrow;": "\u21c6", "LeftCeiling;": "\u2308", "LeftDoubleBracket;": "\u27e6", "LeftDownTeeVector;": "\u2961", "LeftDownVector;": "\u21c3", "LeftDownVectorBar;": "\u2959", "LeftFloor;": "\u230a", "LeftRightArrow;": "\u2194", "LeftRightVector;": "\u294e", "LeftTee;": "\u22a3", "LeftTeeArrow;": "\u21a4", "LeftTeeVector;": "\u295a", "LeftTriangle;": "\u22b2", "LeftTriangleBar;": "\u29cf", "LeftTriangleEqual;": "\u22b4", "LeftUpDownVector;": "\u2951", "LeftUpTeeVector;": "\u2960", "LeftUpVector;": "\u21bf", "LeftUpVectorBar;": "\u2958", "LeftVector;": "\u21bc", "LeftVectorBar;": "\u2952", "Leftarrow;": "\u21d0", "Leftrightarrow;": "\u21d4", "LessEqualGreater;": "\u22da", "LessFullEqual;": "\u2266", "LessGreater;": "\u2276", "LessLess;": "\u2aa1", "LessSlantEqual;": "\u2a7d", "LessTilde;": "\u2272", "Lfr;": "\U0001d50f", "Ll;": "\u22d8", "Lleftarrow;": "\u21da", "Lmidot;": "\u013f", "LongLeftArrow;": "\u27f5", "LongLeftRightArrow;": "\u27f7", "LongRightArrow;": "\u27f6", "Longleftarrow;": "\u27f8", "Longleftrightarrow;": "\u27fa", "Longrightarrow;": "\u27f9", "Lopf;": "\U0001d543", "LowerLeftArrow;": "\u2199", "LowerRightArrow;": "\u2198", "Lscr;": "\u2112", "Lsh;": "\u21b0", "Lstrok;": "\u0141", "Lt;": "\u226a", "Map;": "\u2905", "Mcy;": "\u041c", "MediumSpace;": "\u205f", "Mellintrf;": "\u2133", "Mfr;": "\U0001d510", "MinusPlus;": "\u2213", "Mopf;": "\U0001d544", "Mscr;": "\u2133", "Mu;": "\u039c", "NJcy;": "\u040a", "Nacute;": "\u0143", "Ncaron;": "\u0147", "Ncedil;": "\u0145", "Ncy;": "\u041d", "NegativeMediumSpace;": "\u200b", "NegativeThickSpace;": "\u200b", "NegativeThinSpace;": "\u200b", "NegativeVeryThinSpace;": "\u200b", "NestedGreaterGreater;": "\u226b", "NestedLessLess;": "\u226a", "NewLine;": "\n", "Nfr;": "\U0001d511", "NoBreak;": "\u2060", "NonBreakingSpace;": "\xa0", "Nopf;": "\u2115", "Not;": "\u2aec", "NotCongruent;": "\u2262", "NotCupCap;": "\u226d", "NotDoubleVerticalBar;": "\u2226", "NotElement;": "\u2209", "NotEqual;": "\u2260", "NotEqualTilde;": "\u2242\u0338", "NotExists;": "\u2204", "NotGreater;": "\u226f", "NotGreaterEqual;": "\u2271", "NotGreaterFullEqual;": "\u2267\u0338", "NotGreaterGreater;": "\u226b\u0338", "NotGreaterLess;": "\u2279", "NotGreaterSlantEqual;": "\u2a7e\u0338", "NotGreaterTilde;": "\u2275", "NotHumpDownHump;": "\u224e\u0338", "NotHumpEqual;": "\u224f\u0338", "NotLeftTriangle;": "\u22ea", "NotLeftTriangleBar;": "\u29cf\u0338", "NotLeftTriangleEqual;": "\u22ec", "NotLess;": "\u226e", "NotLessEqual;": "\u2270", "NotLessGreater;": "\u2278", "NotLessLess;": "\u226a\u0338", "NotLessSlantEqual;": "\u2a7d\u0338", "NotLessTilde;": "\u2274", "NotNestedGreaterGreater;": "\u2aa2\u0338", "NotNestedLessLess;": "\u2aa1\u0338", "NotPrecedes;": "\u2280", "NotPrecedesEqual;": "\u2aaf\u0338", "NotPrecedesSlantEqual;": "\u22e0", "NotReverseElement;": "\u220c", "NotRightTriangle;": "\u22eb", "NotRightTriangleBar;": "\u29d0\u0338", "NotRightTriangleEqual;": "\u22ed", "NotSquareSubset;": "\u228f\u0338", "NotSquareSubsetEqual;": "\u22e2", "NotSquareSuperset;": "\u2290\u0338", "NotSquareSupersetEqual;": "\u22e3", "NotSubset;": "\u2282\u20d2", "NotSubsetEqual;": "\u2288", "NotSucceeds;": "\u2281", "NotSucceedsEqual;": "\u2ab0\u0338", "NotSucceedsSlantEqual;": "\u22e1", "NotSucceedsTilde;": "\u227f\u0338", "NotSuperset;": "\u2283\u20d2", "NotSupersetEqual;": "\u2289", "NotTilde;": "\u2241", "NotTildeEqual;": "\u2244", "NotTildeFullEqual;": "\u2247", "NotTildeTilde;": "\u2249", "NotVerticalBar;": "\u2224", "Nscr;": "\U0001d4a9", "Ntilde": "\xd1", "Ntilde;": "\xd1", "Nu;": "\u039d", "OElig;": "\u0152", "Oacute": "\xd3", "Oacute;": "\xd3", "Ocirc": "\xd4", "Ocirc;": "\xd4", "Ocy;": "\u041e", "Odblac;": "\u0150", "Ofr;": "\U0001d512", "Ograve": "\xd2", "Ograve;": "\xd2", "Omacr;": "\u014c", "Omega;": "\u03a9", "Omicron;": "\u039f", "Oopf;": "\U0001d546", "OpenCurlyDoubleQuote;": "\u201c", "OpenCurlyQuote;": "\u2018", "Or;": "\u2a54", "Oscr;": "\U0001d4aa", "Oslash": "\xd8", "Oslash;": "\xd8", "Otilde": "\xd5", "Otilde;": "\xd5", "Otimes;": "\u2a37", "Ouml": "\xd6", "Ouml;": "\xd6", "OverBar;": "\u203e", "OverBrace;": "\u23de", "OverBracket;": "\u23b4", "OverParenthesis;": "\u23dc", "PartialD;": "\u2202", "Pcy;": "\u041f", "Pfr;": "\U0001d513", "Phi;": "\u03a6", "Pi;": "\u03a0", "PlusMinus;": "\xb1", "Poincareplane;": "\u210c", "Popf;": "\u2119", "Pr;": "\u2abb", "Precedes;": "\u227a", "PrecedesEqual;": "\u2aaf", "PrecedesSlantEqual;": "\u227c", "PrecedesTilde;": "\u227e", "Prime;": "\u2033", "Product;": "\u220f", "Proportion;": "\u2237", "Proportional;": "\u221d", "Pscr;": "\U0001d4ab", "Psi;": "\u03a8", "QUOT": "\"", "QUOT;": "\"", "Qfr;": "\U0001d514", "Qopf;": "\u211a", "Qscr;": "\U0001d4ac", "RBarr;": "\u2910", "REG": "\xae", "REG;": "\xae", "Racute;": "\u0154", "Rang;": "\u27eb", "Rarr;": "\u21a0", "Rarrtl;": "\u2916", "Rcaron;": "\u0158", "Rcedil;": "\u0156", "Rcy;": "\u0420", "Re;": "\u211c", "ReverseElement;": "\u220b", "ReverseEquilibrium;": "\u21cb", "ReverseUpEquilibrium;": "\u296f", "Rfr;": "\u211c", "Rho;": "\u03a1", "RightAngleBracket;": "\u27e9", "RightArrow;": "\u2192", "RightArrowBar;": "\u21e5", "RightArrowLeftArrow;": "\u21c4", "RightCeiling;": "\u2309", "RightDoubleBracket;": "\u27e7", "RightDownTeeVector;": "\u295d", "RightDownVector;": "\u21c2", "RightDownVectorBar;": "\u2955", "RightFloor;": "\u230b", "RightTee;": "\u22a2", "RightTeeArrow;": "\u21a6", "RightTeeVector;": "\u295b", "RightTriangle;": "\u22b3", "RightTriangleBar;": "\u29d0", "RightTriangleEqual;": "\u22b5", "RightUpDownVector;": "\u294f", "RightUpTeeVector;": "\u295c", "RightUpVector;": "\u21be", "RightUpVectorBar;": "\u2954", "RightVector;": "\u21c0", "RightVectorBar;": "\u2953", "Rightarrow;": "\u21d2", "Ropf;": "\u211d", "RoundImplies;": "\u2970", "Rrightarrow;": "\u21db", "Rscr;": "\u211b", "Rsh;": "\u21b1", "RuleDelayed;": "\u29f4", "SHCHcy;": "\u0429", "SHcy;": "\u0428", "SOFTcy;": "\u042c", "Sacute;": "\u015a", "Sc;": "\u2abc", "Scaron;": "\u0160", "Scedil;": "\u015e", "Scirc;": "\u015c", "Scy;": "\u0421", "Sfr;": "\U0001d516", "ShortDownArrow;": "\u2193", "ShortLeftArrow;": "\u2190", "ShortRightArrow;": "\u2192", "ShortUpArrow;": "\u2191", "Sigma;": "\u03a3", "SmallCircle;": "\u2218", "Sopf;": "\U0001d54a", "Sqrt;": "\u221a", "Square;": "\u25a1", "SquareIntersection;": "\u2293", "SquareSubset;": "\u228f", "SquareSubsetEqual;": "\u2291", "SquareSuperset;": "\u2290", "SquareSupersetEqual;": "\u2292", "SquareUnion;": "\u2294", "Sscr;": "\U0001d4ae", "Star;": "\u22c6", "Sub;": "\u22d0", "Subset;": "\u22d0", "SubsetEqual;": "\u2286", "Succeeds;": "\u227b", "SucceedsEqual;": "\u2ab0", "SucceedsSlantEqual;": "\u227d", "SucceedsTilde;": "\u227f", "SuchThat;": "\u220b", "Sum;": "\u2211", "Sup;": "\u22d1", "Superset;": "\u2283", "SupersetEqual;": "\u2287", "Supset;": "\u22d1", "THORN": "\xde", "THORN;": "\xde", "TRADE;": "\u2122", "TSHcy;": "\u040b", "TScy;": "\u0426", "Tab;": "\t", "Tau;": "\u03a4", "Tcaron;": "\u0164", "Tcedil;": "\u0162", "Tcy;": "\u0422", "Tfr;": "\U0001d517", "Therefore;": "\u2234", "Theta;": "\u0398", "ThickSpace;": "\u205f\u200a", "ThinSpace;": "\u2009", "Tilde;": "\u223c", "TildeEqual;": "\u2243", "TildeFullEqual;": "\u2245", "TildeTilde;": "\u2248", "Topf;": "\U0001d54b", "TripleDot;": "\u20db", "Tscr;": "\U0001d4af", "Tstrok;": "\u0166", "Uacute": "\xda", "Uacute;": "\xda", "Uarr;": "\u219f", "Uarrocir;": "\u2949", "Ubrcy;": "\u040e", "Ubreve;": "\u016c", "Ucirc": "\xdb", "Ucirc;": "\xdb", "Ucy;": "\u0423", "Udblac;": "\u0170", "Ufr;": "\U0001d518", "Ugrave": "\xd9", "Ugrave;": "\xd9", "Umacr;": "\u016a", "UnderBar;": "_", "UnderBrace;": "\u23df", "UnderBracket;": "\u23b5", "UnderParenthesis;": "\u23dd", "Union;": "\u22c3", "UnionPlus;": "\u228e", "Uogon;": "\u0172", "Uopf;": "\U0001d54c", "UpArrow;": "\u2191", "UpArrowBar;": "\u2912", "UpArrowDownArrow;": "\u21c5", "UpDownArrow;": "\u2195", "UpEquilibrium;": "\u296e", "UpTee;": "\u22a5", "UpTeeArrow;": "\u21a5", "Uparrow;": "\u21d1", "Updownarrow;": "\u21d5", "UpperLeftArrow;": "\u2196", "UpperRightArrow;": "\u2197", "Upsi;": "\u03d2", "Upsilon;": "\u03a5", "Uring;": "\u016e", "Uscr;": "\U0001d4b0", "Utilde;": "\u0168", "Uuml": "\xdc", "Uuml;": "\xdc", "VDash;": "\u22ab", "Vbar;": "\u2aeb", "Vcy;": "\u0412", "Vdash;": "\u22a9", "Vdashl;": "\u2ae6", "Vee;": "\u22c1", "Verbar;": "\u2016", "Vert;": "\u2016", "VerticalBar;": "\u2223", "VerticalLine;": "|", "VerticalSeparator;": "\u2758", "VerticalTilde;": "\u2240", "VeryThinSpace;": "\u200a", "Vfr;": "\U0001d519", "Vopf;": "\U0001d54d", "Vscr;": "\U0001d4b1", "Vvdash;": "\u22aa", "Wcirc;": "\u0174", "Wedge;": "\u22c0", "Wfr;": "\U0001d51a", "Wopf;": "\U0001d54e", "Wscr;": "\U0001d4b2", "Xfr;": "\U0001d51b", "Xi;": "\u039e", "Xopf;": "\U0001d54f", "Xscr;": "\U0001d4b3", "YAcy;": "\u042f", "YIcy;": "\u0407", "YUcy;": "\u042e", "Yacute": "\xdd", "Yacute;": "\xdd", "Ycirc;": "\u0176", "Ycy;": "\u042b", "Yfr;": "\U0001d51c", "Yopf;": "\U0001d550", "Yscr;": "\U0001d4b4", "Yuml;": "\u0178", "ZHcy;": "\u0416", "Zacute;": "\u0179", "Zcaron;": "\u017d", "Zcy;": "\u0417", "Zdot;": "\u017b", "ZeroWidthSpace;": "\u200b", "Zeta;": "\u0396", "Zfr;": "\u2128", "Zopf;": "\u2124", "Zscr;": "\U0001d4b5", "aacute": "\xe1", "aacute;": "\xe1", "abreve;": "\u0103", "ac;": "\u223e", "acE;": "\u223e\u0333", "acd;": "\u223f", "acirc": "\xe2", "acirc;": "\xe2", "acute": "\xb4", "acute;": "\xb4", "acy;": "\u0430", "aelig": "\xe6", "aelig;": "\xe6", "af;": "\u2061", "afr;": "\U0001d51e", "agrave": "\xe0", "agrave;": "\xe0", "alefsym;": "\u2135", "aleph;": "\u2135", "alpha;": "\u03b1", "amacr;": "\u0101", "amalg;": "\u2a3f", "amp": "&", "amp;": "&", "and;": "\u2227", "andand;": "\u2a55", "andd;": "\u2a5c", "andslope;": "\u2a58", "andv;": "\u2a5a", "ang;": "\u2220", "ange;": "\u29a4", "angle;": "\u2220", "angmsd;": "\u2221", "angmsdaa;": "\u29a8", "angmsdab;": "\u29a9", "angmsdac;": "\u29aa", "angmsdad;": "\u29ab", "angmsdae;": "\u29ac", "angmsdaf;": "\u29ad", "angmsdag;": "\u29ae", "angmsdah;": "\u29af", "angrt;": "\u221f", "angrtvb;": "\u22be", "angrtvbd;": "\u299d", "angsph;": "\u2222", "angst;": "\xc5", "angzarr;": "\u237c", "aogon;": "\u0105", "aopf;": "\U0001d552", "ap;": "\u2248", "apE;": "\u2a70", "apacir;": "\u2a6f", "ape;": "\u224a", "apid;": "\u224b", "apos;": "'", "approx;": "\u2248", "approxeq;": "\u224a", "aring": "\xe5", "aring;": "\xe5", "ascr;": "\U0001d4b6", "ast;": "*", "asymp;": "\u2248", "asympeq;": "\u224d", "atilde": "\xe3", "atilde;": "\xe3", "auml": "\xe4", "auml;": "\xe4", "awconint;": "\u2233", "awint;": "\u2a11", "bNot;": "\u2aed", "backcong;": "\u224c", "backepsilon;": "\u03f6", "backprime;": "\u2035", "backsim;": "\u223d", "backsimeq;": "\u22cd", "barvee;": "\u22bd", "barwed;": "\u2305", "barwedge;": "\u2305", "bbrk;": "\u23b5", "bbrktbrk;": "\u23b6", "bcong;": "\u224c", "bcy;": "\u0431", "bdquo;": "\u201e", "becaus;": "\u2235", "because;": "\u2235", "bemptyv;": "\u29b0", "bepsi;": "\u03f6", "bernou;": "\u212c", "beta;": "\u03b2", "beth;": "\u2136", "between;": "\u226c", "bfr;": "\U0001d51f", "bigcap;": "\u22c2", "bigcirc;": "\u25ef", "bigcup;": "\u22c3", "bigodot;": "\u2a00", "bigoplus;": "\u2a01", "bigotimes;": "\u2a02", "bigsqcup;": "\u2a06", "bigstar;": "\u2605", "bigtriangledown;": "\u25bd", "bigtriangleup;": "\u25b3", "biguplus;": "\u2a04", "bigvee;": "\u22c1", "bigwedge;": "\u22c0", "bkarow;": "\u290d", "blacklozenge;": "\u29eb", "blacksquare;": "\u25aa", "blacktriangle;": "\u25b4", "blacktriangledown;": "\u25be", "blacktriangleleft;": "\u25c2", "blacktriangleright;": "\u25b8", "blank;": "\u2423", "blk12;": "\u2592", "blk14;": "\u2591", "blk34;": "\u2593", "block;": "\u2588", "bne;": "=\u20e5", "bnequiv;": "\u2261\u20e5", "bnot;": "\u2310", "bopf;": "\U0001d553", "bot;": "\u22a5", "bottom;": "\u22a5", "bowtie;": "\u22c8", "boxDL;": "\u2557", "boxDR;": "\u2554", "boxDl;": "\u2556", "boxDr;": "\u2553", "boxH;": "\u2550", "boxHD;": "\u2566", "boxHU;": "\u2569", "boxHd;": "\u2564", "boxHu;": "\u2567", "boxUL;": "\u255d", "boxUR;": "\u255a", "boxUl;": "\u255c", "boxUr;": "\u2559", "boxV;": "\u2551", "boxVH;": "\u256c", "boxVL;": "\u2563", "boxVR;": "\u2560", "boxVh;": "\u256b", "boxVl;": "\u2562", "boxVr;": "\u255f", "boxbox;": "\u29c9", "boxdL;": "\u2555", "boxdR;": "\u2552", "boxdl;": "\u2510", "boxdr;": "\u250c", "boxh;": "\u2500", "boxhD;": "\u2565", "boxhU;": "\u2568", "boxhd;": "\u252c", "boxhu;": "\u2534", "boxminus;": "\u229f", "boxplus;": "\u229e", "boxtimes;": "\u22a0", "boxuL;": "\u255b", "boxuR;": "\u2558", "boxul;": "\u2518", "boxur;": "\u2514", "boxv;": "\u2502", "boxvH;": "\u256a", "boxvL;": "\u2561", "boxvR;": "\u255e", "boxvh;": "\u253c", "boxvl;": "\u2524", "boxvr;": "\u251c", "bprime;": "\u2035", "breve;": "\u02d8", "brvbar": "\xa6", "brvbar;": "\xa6", "bscr;": "\U0001d4b7", "bsemi;": "\u204f", "bsim;": "\u223d", "bsime;": "\u22cd", "bsol;": "\\", "bsolb;": "\u29c5", "bsolhsub;": "\u27c8", "bull;": "\u2022", "bullet;": "\u2022", "bump;": "\u224e", "bumpE;": "\u2aae", "bumpe;": "\u224f", "bumpeq;": "\u224f", "cacute;": "\u0107", "cap;": "\u2229", "capand;": "\u2a44", "capbrcup;": "\u2a49", "capcap;": "\u2a4b", "capcup;": "\u2a47", "capdot;": "\u2a40", "caps;": "\u2229\ufe00", "caret;": "\u2041", "caron;": "\u02c7", "ccaps;": "\u2a4d", "ccaron;": "\u010d", "ccedil": "\xe7", "ccedil;": "\xe7", "ccirc;": "\u0109", "ccups;": "\u2a4c", "ccupssm;": "\u2a50", "cdot;": "\u010b", "cedil": "\xb8", "cedil;": "\xb8", "cemptyv;": "\u29b2", "cent": "\xa2", "cent;": "\xa2", "centerdot;": "\xb7", "cfr;": "\U0001d520", "chcy;": "\u0447", "check;": "\u2713", "checkmark;": "\u2713", "chi;": "\u03c7", "cir;": "\u25cb", "cirE;": "\u29c3", "circ;": "\u02c6", "circeq;": "\u2257", "circlearrowleft;": "\u21ba", "circlearrowright;": "\u21bb", "circledR;": "\xae", "circledS;": "\u24c8", "circledast;": "\u229b", "circledcirc;": "\u229a", "circleddash;": "\u229d", "cire;": "\u2257", "cirfnint;": "\u2a10", "cirmid;": "\u2aef", "cirscir;": "\u29c2", "clubs;": "\u2663", "clubsuit;": "\u2663", "colon;": ":", "colone;": "\u2254", "coloneq;": "\u2254", "comma;": ",", "commat;": "@", "comp;": "\u2201", "compfn;": "\u2218", "complement;": "\u2201", "complexes;": "\u2102", "cong;": "\u2245", "congdot;": "\u2a6d", "conint;": "\u222e", "copf;": "\U0001d554", "coprod;": "\u2210", "copy": "\xa9", "copy;": "\xa9", "copysr;": "\u2117", "crarr;": "\u21b5", "cross;": "\u2717", "cscr;": "\U0001d4b8", "csub;": "\u2acf", "csube;": "\u2ad1", "csup;": "\u2ad0", "csupe;": "\u2ad2", "ctdot;": "\u22ef", "cudarrl;": "\u2938", "cudarrr;": "\u2935", "cuepr;": "\u22de", "cuesc;": "\u22df", "cularr;": "\u21b6", "cularrp;": "\u293d", "cup;": "\u222a", "cupbrcap;": "\u2a48", "cupcap;": "\u2a46", "cupcup;": "\u2a4a", "cupdot;": "\u228d", "cupor;": "\u2a45", "cups;": "\u222a\ufe00", "curarr;": "\u21b7", "curarrm;": "\u293c", "curlyeqprec;": "\u22de", "curlyeqsucc;": "\u22df", "curlyvee;": "\u22ce", "curlywedge;": "\u22cf", "curren": "\xa4", "curren;": "\xa4", "curvearrowleft;": "\u21b6", "curvearrowright;": "\u21b7", "cuvee;": "\u22ce", "cuwed;": "\u22cf", "cwconint;": "\u2232", "cwint;": "\u2231", "cylcty;": "\u232d", "dArr;": "\u21d3", "dHar;": "\u2965", "dagger;": "\u2020", "daleth;": "\u2138", "darr;": "\u2193", "dash;": "\u2010", "dashv;": "\u22a3", "dbkarow;": "\u290f", "dblac;": "\u02dd", "dcaron;": "\u010f", "dcy;": "\u0434", "dd;": "\u2146", "ddagger;": "\u2021", "ddarr;": "\u21ca", "ddotseq;": "\u2a77", "deg": "\xb0", "deg;": "\xb0", "delta;": "\u03b4", "demptyv;": "\u29b1", "dfisht;": "\u297f", "dfr;": "\U0001d521", "dharl;": "\u21c3", "dharr;": "\u21c2", "diam;": "\u22c4", "diamond;": "\u22c4", "diamondsuit;": "\u2666", "diams;": "\u2666", "die;": "\xa8", "digamma;": "\u03dd", "disin;": "\u22f2", "div;": "\xf7", "divide": "\xf7", "divide;": "\xf7", "divideontimes;": "\u22c7", "divonx;": "\u22c7", "djcy;": "\u0452", "dlcorn;": "\u231e", "dlcrop;": "\u230d", "dollar;": "$", "dopf;": "\U0001d555", "dot;": "\u02d9", "doteq;": "\u2250", "doteqdot;": "\u2251", "dotminus;": "\u2238", "dotplus;": "\u2214", "dotsquare;": "\u22a1", "doublebarwedge;": "\u2306", "downarrow;": "\u2193", "downdownarrows;": "\u21ca", "downharpoonleft;": "\u21c3", "downharpoonright;": "\u21c2", "drbkarow;": "\u2910", "drcorn;": "\u231f", "drcrop;": "\u230c", "dscr;": "\U0001d4b9", "dscy;": "\u0455", "dsol;": "\u29f6", "dstrok;": "\u0111", "dtdot;": "\u22f1", "dtri;": "\u25bf", "dtrif;": "\u25be", "duarr;": "\u21f5", "duhar;": "\u296f", "dwangle;": "\u29a6", "dzcy;": "\u045f", "dzigrarr;": "\u27ff", "eDDot;": "\u2a77", "eDot;": "\u2251", "eacute": "\xe9", "eacute;": "\xe9", "easter;": "\u2a6e", "ecaron;": "\u011b", "ecir;": "\u2256", "ecirc": "\xea", "ecirc;": "\xea", "ecolon;": "\u2255", "ecy;": "\u044d", "edot;": "\u0117", "ee;": "\u2147", "efDot;": "\u2252", "efr;": "\U0001d522", "eg;": "\u2a9a", "egrave": "\xe8", "egrave;": "\xe8", "egs;": "\u2a96", "egsdot;": "\u2a98", "el;": "\u2a99", "elinters;": "\u23e7", "ell;": "\u2113", "els;": "\u2a95", "elsdot;": "\u2a97", "emacr;": "\u0113", "empty;": "\u2205", "emptyset;": "\u2205", "emptyv;": "\u2205", "emsp13;": "\u2004", "emsp14;": "\u2005", "emsp;": "\u2003", "eng;": "\u014b", "ensp;": "\u2002", "eogon;": "\u0119", "eopf;": "\U0001d556", "epar;": "\u22d5", "eparsl;": "\u29e3", "eplus;": "\u2a71", "epsi;": "\u03b5", "epsilon;": "\u03b5", "epsiv;": "\u03f5", "eqcirc;": "\u2256", "eqcolon;": "\u2255", "eqsim;": "\u2242", "eqslantgtr;": "\u2a96", "eqslantless;": "\u2a95", "equals;": "=", "equest;": "\u225f", "equiv;": "\u2261", "equivDD;": "\u2a78", "eqvparsl;": "\u29e5", "erDot;": "\u2253", "erarr;": "\u2971", "escr;": "\u212f", "esdot;": "\u2250", "esim;": "\u2242", "eta;": "\u03b7", "eth": "\xf0", "eth;": "\xf0", "euml": "\xeb", "euml;": "\xeb", "euro;": "\u20ac", "excl;": "!", "exist;": "\u2203", "expectation;": "\u2130", "exponentiale;": "\u2147", "fallingdotseq;": "\u2252", "fcy;": "\u0444", "female;": "\u2640", "ffilig;": "\ufb03", "fflig;": "\ufb00", "ffllig;": "\ufb04", "ffr;": "\U0001d523", "filig;": "\ufb01", "fjlig;": "fj", "flat;": "\u266d", "fllig;": "\ufb02", "fltns;": "\u25b1", "fnof;": "\u0192", "fopf;": "\U0001d557", "forall;": "\u2200", "fork;": "\u22d4", "forkv;": "\u2ad9", "fpartint;": "\u2a0d", "frac12": "\xbd", "frac12;": "\xbd", "frac13;": "\u2153", "frac14": "\xbc", "frac14;": "\xbc", "frac15;": "\u2155", "frac16;": "\u2159", "frac18;": "\u215b", "frac23;": "\u2154", "frac25;": "\u2156", "frac34": "\xbe", "frac34;": "\xbe", "frac35;": "\u2157", "frac38;": "\u215c", "frac45;": "\u2158", "frac56;": "\u215a", "frac58;": "\u215d", "frac78;": "\u215e", "frasl;": "\u2044", "frown;": "\u2322", "fscr;": "\U0001d4bb", "gE;": "\u2267", "gEl;": "\u2a8c", "gacute;": "\u01f5", "gamma;": "\u03b3", "gammad;": "\u03dd", "gap;": "\u2a86", "gbreve;": "\u011f", "gcirc;": "\u011d", "gcy;": "\u0433", "gdot;": "\u0121", "ge;": "\u2265", "gel;": "\u22db", "geq;": "\u2265", "geqq;": "\u2267", "geqslant;": "\u2a7e", "ges;": "\u2a7e", "gescc;": "\u2aa9", "gesdot;": "\u2a80", "gesdoto;": "\u2a82", "gesdotol;": "\u2a84", "gesl;": "\u22db\ufe00", "gesles;": "\u2a94", "gfr;": "\U0001d524", "gg;": "\u226b", "ggg;": "\u22d9", "gimel;": "\u2137", "gjcy;": "\u0453", "gl;": "\u2277", "glE;": "\u2a92", "gla;": "\u2aa5", "glj;": "\u2aa4", "gnE;": "\u2269", "gnap;": "\u2a8a", "gnapprox;": "\u2a8a", "gne;": "\u2a88", "gneq;": "\u2a88", "gneqq;": "\u2269", "gnsim;": "\u22e7", "gopf;": "\U0001d558", "grave;": "`", "gscr;": "\u210a", "gsim;": "\u2273", "gsime;": "\u2a8e", "gsiml;": "\u2a90", "gt": ">", "gt;": ">", "gtcc;": "\u2aa7", "gtcir;": "\u2a7a", "gtdot;": "\u22d7", "gtlPar;": "\u2995", "gtquest;": "\u2a7c", "gtrapprox;": "\u2a86", "gtrarr;": "\u2978", "gtrdot;": "\u22d7", "gtreqless;": "\u22db", "gtreqqless;": "\u2a8c", "gtrless;": "\u2277", "gtrsim;": "\u2273", "gvertneqq;": "\u2269\ufe00", "gvnE;": "\u2269\ufe00", "hArr;": "\u21d4", "hairsp;": "\u200a", "half;": "\xbd", "hamilt;": "\u210b", "hardcy;": "\u044a", "harr;": "\u2194", "harrcir;": "\u2948", "harrw;": "\u21ad", "hbar;": "\u210f", "hcirc;": "\u0125", "hearts;": "\u2665", "heartsuit;": "\u2665", "hellip;": "\u2026", "hercon;": "\u22b9", "hfr;": "\U0001d525", "hksearow;": "\u2925", "hkswarow;": "\u2926", "hoarr;": "\u21ff", "homtht;": "\u223b", "hookleftarrow;": "\u21a9", "hookrightarrow;": "\u21aa", "hopf;": "\U0001d559", "horbar;": "\u2015", "hscr;": "\U0001d4bd", "hslash;": "\u210f", "hstrok;": "\u0127", "hybull;": "\u2043", "hyphen;": "\u2010", "iacute": "\xed", "iacute;": "\xed", "ic;": "\u2063", "icirc": "\xee", "icirc;": "\xee", "icy;": "\u0438", "iecy;": "\u0435", "iexcl": "\xa1", "iexcl;": "\xa1", "iff;": "\u21d4", "ifr;": "\U0001d526", "igrave": "\xec", "igrave;": "\xec", "ii;": "\u2148", "iiiint;": "\u2a0c", "iiint;": "\u222d", "iinfin;": "\u29dc", "iiota;": "\u2129", "ijlig;": "\u0133", "imacr;": "\u012b", "image;": "\u2111", "imagline;": "\u2110", "imagpart;": "\u2111", "imath;": "\u0131", "imof;": "\u22b7", "imped;": "\u01b5", "in;": "\u2208", "incare;": "\u2105", "infin;": "\u221e", "infintie;": "\u29dd", "inodot;": "\u0131", "int;": "\u222b", "intcal;": "\u22ba", "integers;": "\u2124", "intercal;": "\u22ba", "intlarhk;": "\u2a17", "intprod;": "\u2a3c", "iocy;": "\u0451", "iogon;": "\u012f", "iopf;": "\U0001d55a", "iota;": "\u03b9", "iprod;": "\u2a3c", "iquest": "\xbf", "iquest;": "\xbf", "iscr;": "\U0001d4be", "isin;": "\u2208", "isinE;": "\u22f9", "isindot;": "\u22f5", "isins;": "\u22f4", "isinsv;": "\u22f3", "isinv;": "\u2208", "it;": "\u2062", "itilde;": "\u0129", "iukcy;": "\u0456", "iuml": "\xef", "iuml;": "\xef", "jcirc;": "\u0135", "jcy;": "\u0439", "jfr;": "\U0001d527", "jmath;": "\u0237", "jopf;": "\U0001d55b", "jscr;": "\U0001d4bf", "jsercy;": "\u0458", "jukcy;": "\u0454", "kappa;": "\u03ba", "kappav;": "\u03f0", "kcedil;": "\u0137", "kcy;": "\u043a", "kfr;": "\U0001d528", "kgreen;": "\u0138", "khcy;": "\u0445", "kjcy;": "\u045c", "kopf;": "\U0001d55c", "kscr;": "\U0001d4c0", "lAarr;": "\u21da", "lArr;": "\u21d0", "lAtail;": "\u291b", "lBarr;": "\u290e", "lE;": "\u2266", "lEg;": "\u2a8b", "lHar;": "\u2962", "lacute;": "\u013a", "laemptyv;": "\u29b4", "lagran;": "\u2112", "lambda;": "\u03bb", "lang;": "\u27e8", "langd;": "\u2991", "langle;": "\u27e8", "lap;": "\u2a85", "laquo": "\xab", "laquo;": "\xab", "larr;": "\u2190", "larrb;": "\u21e4", "larrbfs;": "\u291f", "larrfs;": "\u291d", "larrhk;": "\u21a9", "larrlp;": "\u21ab", "larrpl;": "\u2939", "larrsim;": "\u2973", "larrtl;": "\u21a2", "lat;": "\u2aab", "latail;": "\u2919", "late;": "\u2aad", "lates;": "\u2aad\ufe00", "lbarr;": "\u290c", "lbbrk;": "\u2772", "lbrace;": "{", "lbrack;": "[", "lbrke;": "\u298b", "lbrksld;": "\u298f", "lbrkslu;": "\u298d", "lcaron;": "\u013e", "lcedil;": "\u013c", "lceil;": "\u2308", "lcub;": "{", "lcy;": "\u043b", "ldca;": "\u2936", "ldquo;": "\u201c", "ldquor;": "\u201e", "ldrdhar;": "\u2967", "ldrushar;": "\u294b", "ldsh;": "\u21b2", "le;": "\u2264", "leftarrow;": "\u2190", "leftarrowtail;": "\u21a2", "leftharpoondown;": "\u21bd", "leftharpoonup;": "\u21bc", "leftleftarrows;": "\u21c7", "leftrightarrow;": "\u2194", "leftrightarrows;": "\u21c6", "leftrightharpoons;": "\u21cb", "leftrightsquigarrow;": "\u21ad", "leftthreetimes;": "\u22cb", "leg;": "\u22da", "leq;": "\u2264", "leqq;": "\u2266", "leqslant;": "\u2a7d", "les;": "\u2a7d", "lescc;": "\u2aa8", "lesdot;": "\u2a7f", "lesdoto;": "\u2a81", "lesdotor;": "\u2a83", "lesg;": "\u22da\ufe00", "lesges;": "\u2a93", "lessapprox;": "\u2a85", "lessdot;": "\u22d6", "lesseqgtr;": "\u22da", "lesseqqgtr;": "\u2a8b", "lessgtr;": "\u2276", "lesssim;": "\u2272", "lfisht;": "\u297c", "lfloor;": "\u230a", "lfr;": "\U0001d529", "lg;": "\u2276", "lgE;": "\u2a91", "lhard;": "\u21bd", "lharu;": "\u21bc", "lharul;": "\u296a", "lhblk;": "\u2584", "ljcy;": "\u0459", "ll;": "\u226a", "llarr;": "\u21c7", "llcorner;": "\u231e", "llhard;": "\u296b", "lltri;": "\u25fa", "lmidot;": "\u0140", "lmoust;": "\u23b0", "lmoustache;": "\u23b0", "lnE;": "\u2268", "lnap;": "\u2a89", "lnapprox;": "\u2a89", "lne;": "\u2a87", "lneq;": "\u2a87", "lneqq;": "\u2268", "lnsim;": "\u22e6", "loang;": "\u27ec", "loarr;": "\u21fd", "lobrk;": "\u27e6", "longleftarrow;": "\u27f5", "longleftrightarrow;": "\u27f7", "longmapsto;": "\u27fc", "longrightarrow;": "\u27f6", "looparrowleft;": "\u21ab", "looparrowright;": "\u21ac", "lopar;": "\u2985", "lopf;": "\U0001d55d", "loplus;": "\u2a2d", "lotimes;": "\u2a34", "lowast;": "\u2217", "lowbar;": "_", "loz;": "\u25ca", "lozenge;": "\u25ca", "lozf;": "\u29eb", "lpar;": "(", "lparlt;": "\u2993", "lrarr;": "\u21c6", "lrcorner;": "\u231f", "lrhar;": "\u21cb", "lrhard;": "\u296d", "lrm;": "\u200e", "lrtri;": "\u22bf", "lsaquo;": "\u2039", "lscr;": "\U0001d4c1", "lsh;": "\u21b0", "lsim;": "\u2272", "lsime;": "\u2a8d", "lsimg;": "\u2a8f", "lsqb;": "[", "lsquo;": "\u2018", "lsquor;": "\u201a", "lstrok;": "\u0142", "lt": "<", "lt;": "<", "ltcc;": "\u2aa6", "ltcir;": "\u2a79", "ltdot;": "\u22d6", "lthree;": "\u22cb", "ltimes;": "\u22c9", "ltlarr;": "\u2976", "ltquest;": "\u2a7b", "ltrPar;": "\u2996", "ltri;": "\u25c3", "ltrie;": "\u22b4", "ltrif;": "\u25c2", "lurdshar;": "\u294a", "luruhar;": "\u2966", "lvertneqq;": "\u2268\ufe00", "lvnE;": "\u2268\ufe00", "mDDot;": "\u223a", "macr": "\xaf", "macr;": "\xaf", "male;": "\u2642", "malt;": "\u2720", "maltese;": "\u2720", "map;": "\u21a6", "mapsto;": "\u21a6", "mapstodown;": "\u21a7", "mapstoleft;": "\u21a4", "mapstoup;": "\u21a5", "marker;": "\u25ae", "mcomma;": "\u2a29", "mcy;": "\u043c", "mdash;": "\u2014", "measuredangle;": "\u2221", "mfr;": "\U0001d52a", "mho;": "\u2127", "micro": "\xb5", "micro;": "\xb5", "mid;": "\u2223", "midast;": "*", "midcir;": "\u2af0", "middot": "\xb7", "middot;": "\xb7", "minus;": "\u2212", "minusb;": "\u229f", "minusd;": "\u2238", "minusdu;": "\u2a2a", "mlcp;": "\u2adb", "mldr;": "\u2026", "mnplus;": "\u2213", "models;": "\u22a7", "mopf;": "\U0001d55e", "mp;": "\u2213", "mscr;": "\U0001d4c2", "mstpos;": "\u223e", "mu;": "\u03bc", "multimap;": "\u22b8", "mumap;": "\u22b8", "nGg;": "\u22d9\u0338", "nGt;": "\u226b\u20d2", "nGtv;": "\u226b\u0338", "nLeftarrow;": "\u21cd", "nLeftrightarrow;": "\u21ce", "nLl;": "\u22d8\u0338", "nLt;": "\u226a\u20d2", "nLtv;": "\u226a\u0338", "nRightarrow;": "\u21cf", "nVDash;": "\u22af", "nVdash;": "\u22ae", "nabla;": "\u2207", "nacute;": "\u0144", "nang;": "\u2220\u20d2", "nap;": "\u2249", "napE;": "\u2a70\u0338", "napid;": "\u224b\u0338", "napos;": "\u0149", "napprox;": "\u2249", "natur;": "\u266e", "natural;": "\u266e", "naturals;": "\u2115", "nbsp": "\xa0", "nbsp;": "\xa0", "nbump;": "\u224e\u0338", "nbumpe;": "\u224f\u0338", "ncap;": "\u2a43", "ncaron;": "\u0148", "ncedil;": "\u0146", "ncong;": "\u2247", "ncongdot;": "\u2a6d\u0338", "ncup;": "\u2a42", "ncy;": "\u043d", "ndash;": "\u2013", "ne;": "\u2260", "neArr;": "\u21d7", "nearhk;": "\u2924", "nearr;": "\u2197", "nearrow;": "\u2197", "nedot;": "\u2250\u0338", "nequiv;": "\u2262", "nesear;": "\u2928", "nesim;": "\u2242\u0338", "nexist;": "\u2204", "nexists;": "\u2204", "nfr;": "\U0001d52b", "ngE;": "\u2267\u0338", "nge;": "\u2271", "ngeq;": "\u2271", "ngeqq;": "\u2267\u0338", "ngeqslant;": "\u2a7e\u0338", "nges;": "\u2a7e\u0338", "ngsim;": "\u2275", "ngt;": "\u226f", "ngtr;": "\u226f", "nhArr;": "\u21ce", "nharr;": "\u21ae", "nhpar;": "\u2af2", "ni;": "\u220b", "nis;": "\u22fc", "nisd;": "\u22fa", "niv;": "\u220b", "njcy;": "\u045a", "nlArr;": "\u21cd", "nlE;": "\u2266\u0338", "nlarr;": "\u219a", "nldr;": "\u2025", "nle;": "\u2270", "nleftarrow;": "\u219a", "nleftrightarrow;": "\u21ae", "nleq;": "\u2270", "nleqq;": "\u2266\u0338", "nleqslant;": "\u2a7d\u0338", "nles;": "\u2a7d\u0338", "nless;": "\u226e", "nlsim;": "\u2274", "nlt;": "\u226e", "nltri;": "\u22ea", "nltrie;": "\u22ec", "nmid;": "\u2224", "nopf;": "\U0001d55f", "not": "\xac", "not;": "\xac", "notin;": "\u2209", "notinE;": "\u22f9\u0338", "notindot;": "\u22f5\u0338", "notinva;": "\u2209", "notinvb;": "\u22f7", "notinvc;": "\u22f6", "notni;": "\u220c", "notniva;": "\u220c", "notnivb;": "\u22fe", "notnivc;": "\u22fd", "npar;": "\u2226", "nparallel;": "\u2226", "nparsl;": "\u2afd\u20e5", "npart;": "\u2202\u0338", "npolint;": "\u2a14", "npr;": "\u2280", "nprcue;": "\u22e0", "npre;": "\u2aaf\u0338", "nprec;": "\u2280", "npreceq;": "\u2aaf\u0338", "nrArr;": "\u21cf", "nrarr;": "\u219b", "nrarrc;": "\u2933\u0338", "nrarrw;": "\u219d\u0338", "nrightarrow;": "\u219b", "nrtri;": "\u22eb", "nrtrie;": "\u22ed", "nsc;": "\u2281", "nsccue;": "\u22e1", "nsce;": "\u2ab0\u0338", "nscr;": "\U0001d4c3", "nshortmid;": "\u2224", "nshortparallel;": "\u2226", "nsim;": "\u2241", "nsime;": "\u2244", "nsimeq;": "\u2244", "nsmid;": "\u2224", "nspar;": "\u2226", "nsqsube;": "\u22e2", "nsqsupe;": "\u22e3", "nsub;": "\u2284", "nsubE;": "\u2ac5\u0338", "nsube;": "\u2288", "nsubset;": "\u2282\u20d2", "nsubseteq;": "\u2288", "nsubseteqq;": "\u2ac5\u0338", "nsucc;": "\u2281", "nsucceq;": "\u2ab0\u0338", "nsup;": "\u2285", "nsupE;": "\u2ac6\u0338", "nsupe;": "\u2289", "nsupset;": "\u2283\u20d2", "nsupseteq;": "\u2289", "nsupseteqq;": "\u2ac6\u0338", "ntgl;": "\u2279", "ntilde": "\xf1", "ntilde;": "\xf1", "ntlg;": "\u2278", "ntriangleleft;": "\u22ea", "ntrianglelefteq;": "\u22ec", "ntriangleright;": "\u22eb", "ntrianglerighteq;": "\u22ed", "nu;": "\u03bd", "num;": "#", "numero;": "\u2116", "numsp;": "\u2007", "nvDash;": "\u22ad", "nvHarr;": "\u2904", "nvap;": "\u224d\u20d2", "nvdash;": "\u22ac", "nvge;": "\u2265\u20d2", "nvgt;": ">\u20d2", "nvinfin;": "\u29de", "nvlArr;": "\u2902", "nvle;": "\u2264\u20d2", "nvlt;": "<\u20d2", "nvltrie;": "\u22b4\u20d2", "nvrArr;": "\u2903", "nvrtrie;": "\u22b5\u20d2", "nvsim;": "\u223c\u20d2", "nwArr;": "\u21d6", "nwarhk;": "\u2923", "nwarr;": "\u2196", "nwarrow;": "\u2196", "nwnear;": "\u2927", "oS;": "\u24c8", "oacute": "\xf3", "oacute;": "\xf3", "oast;": "\u229b", "ocir;": "\u229a", "ocirc": "\xf4", "ocirc;": "\xf4", "ocy;": "\u043e", "odash;": "\u229d", "odblac;": "\u0151", "odiv;": "\u2a38", "odot;": "\u2299", "odsold;": "\u29bc", "oelig;": "\u0153", "ofcir;": "\u29bf", "ofr;": "\U0001d52c", "ogon;": "\u02db", "ograve": "\xf2", "ograve;": "\xf2", "ogt;": "\u29c1", "ohbar;": "\u29b5", "ohm;": "\u03a9", "oint;": "\u222e", "olarr;": "\u21ba", "olcir;": "\u29be", "olcross;": "\u29bb", "oline;": "\u203e", "olt;": "\u29c0", "omacr;": "\u014d", "omega;": "\u03c9", "omicron;": "\u03bf", "omid;": "\u29b6", "ominus;": "\u2296", "oopf;": "\U0001d560", "opar;": "\u29b7", "operp;": "\u29b9", "oplus;": "\u2295", "or;": "\u2228", "orarr;": "\u21bb", "ord;": "\u2a5d", "order;": "\u2134", "orderof;": "\u2134", "ordf": "\xaa", "ordf;": "\xaa", "ordm": "\xba", "ordm;": "\xba", "origof;": "\u22b6", "oror;": "\u2a56", "orslope;": "\u2a57", "orv;": "\u2a5b", "oscr;": "\u2134", "oslash": "\xf8", "oslash;": "\xf8", "osol;": "\u2298", "otilde": "\xf5", "otilde;": "\xf5", "otimes;": "\u2297", "otimesas;": "\u2a36", "ouml": "\xf6", "ouml;": "\xf6", "ovbar;": "\u233d", "par;": "\u2225", "para": "\xb6", "para;": "\xb6", "parallel;": "\u2225", "parsim;": "\u2af3", "parsl;": "\u2afd", "part;": "\u2202", "pcy;": "\u043f", "percnt;": "%", "period;": ".", "permil;": "\u2030", "perp;": "\u22a5", "pertenk;": "\u2031", "pfr;": "\U0001d52d", "phi;": "\u03c6", "phiv;": "\u03d5", "phmmat;": "\u2133", "phone;": "\u260e", "pi;": "\u03c0", "pitchfork;": "\u22d4", "piv;": "\u03d6", "planck;": "\u210f", "planckh;": "\u210e", "plankv;": "\u210f", "plus;": "+", "plusacir;": "\u2a23", "plusb;": "\u229e", "pluscir;": "\u2a22", "plusdo;": "\u2214", "plusdu;": "\u2a25", "pluse;": "\u2a72", "plusmn": "\xb1", "plusmn;": "\xb1", "plussim;": "\u2a26", "plustwo;": "\u2a27", "pm;": "\xb1", "pointint;": "\u2a15", "popf;": "\U0001d561", "pound": "\xa3", "pound;": "\xa3", "pr;": "\u227a", "prE;": "\u2ab3", "prap;": "\u2ab7", "prcue;": "\u227c", "pre;": "\u2aaf", "prec;": "\u227a", "precapprox;": "\u2ab7", "preccurlyeq;": "\u227c", "preceq;": "\u2aaf", "precnapprox;": "\u2ab9", "precneqq;": "\u2ab5", "precnsim;": "\u22e8", "precsim;": "\u227e", "prime;": "\u2032", "primes;": "\u2119", "prnE;": "\u2ab5", "prnap;": "\u2ab9", "prnsim;": "\u22e8", "prod;": "\u220f", "profalar;": "\u232e", "profline;": "\u2312", "profsurf;": "\u2313", "prop;": "\u221d", "propto;": "\u221d", "prsim;": "\u227e", "prurel;": "\u22b0", "pscr;": "\U0001d4c5", "psi;": "\u03c8", "puncsp;": "\u2008", "qfr;": "\U0001d52e", "qint;": "\u2a0c", "qopf;": "\U0001d562", "qprime;": "\u2057", "qscr;": "\U0001d4c6", "quaternions;": "\u210d", "quatint;": "\u2a16", "quest;": "?", "questeq;": "\u225f", "quot": "\"", "quot;": "\"", "rAarr;": "\u21db", "rArr;": "\u21d2", "rAtail;": "\u291c", "rBarr;": "\u290f", "rHar;": "\u2964", "race;": "\u223d\u0331", "racute;": "\u0155", "radic;": "\u221a", "raemptyv;": "\u29b3", "rang;": "\u27e9", "rangd;": "\u2992", "range;": "\u29a5", "rangle;": "\u27e9", "raquo": "\xbb", "raquo;": "\xbb", "rarr;": "\u2192", "rarrap;": "\u2975", "rarrb;": "\u21e5", "rarrbfs;": "\u2920", "rarrc;": "\u2933", "rarrfs;": "\u291e", "rarrhk;": "\u21aa", "rarrlp;": "\u21ac", "rarrpl;": "\u2945", "rarrsim;": "\u2974", "rarrtl;": "\u21a3", "rarrw;": "\u219d", "ratail;": "\u291a", "ratio;": "\u2236", "rationals;": "\u211a", "rbarr;": "\u290d", "rbbrk;": "\u2773", "rbrace;": "}", "rbrack;": "]", "rbrke;": "\u298c", "rbrksld;": "\u298e", "rbrkslu;": "\u2990", "rcaron;": "\u0159", "rcedil;": "\u0157", "rceil;": "\u2309", "rcub;": "}", "rcy;": "\u0440", "rdca;": "\u2937", "rdldhar;": "\u2969", "rdquo;": "\u201d", "rdquor;": "\u201d", "rdsh;": "\u21b3", "real;": "\u211c", "realine;": "\u211b", "realpart;": "\u211c", "reals;": "\u211d", "rect;": "\u25ad", "reg": "\xae", "reg;": "\xae", "rfisht;": "\u297d", "rfloor;": "\u230b", "rfr;": "\U0001d52f", "rhard;": "\u21c1", "rharu;": "\u21c0", "rharul;": "\u296c", "rho;": "\u03c1", "rhov;": "\u03f1", "rightarrow;": "\u2192", "rightarrowtail;": "\u21a3", "rightharpoondown;": "\u21c1", "rightharpoonup;": "\u21c0", "rightleftarrows;": "\u21c4", "rightleftharpoons;": "\u21cc", "rightrightarrows;": "\u21c9", "rightsquigarrow;": "\u219d", "rightthreetimes;": "\u22cc", "ring;": "\u02da", "risingdotseq;": "\u2253", "rlarr;": "\u21c4", "rlhar;": "\u21cc", "rlm;": "\u200f", "rmoust;": "\u23b1", "rmoustache;": "\u23b1", "rnmid;": "\u2aee", "roang;": "\u27ed", "roarr;": "\u21fe", "robrk;": "\u27e7", "ropar;": "\u2986", "ropf;": "\U0001d563", "roplus;": "\u2a2e", "rotimes;": "\u2a35", "rpar;": ")", "rpargt;": "\u2994", "rppolint;": "\u2a12", "rrarr;": "\u21c9", "rsaquo;": "\u203a", "rscr;": "\U0001d4c7", "rsh;": "\u21b1", "rsqb;": "]", "rsquo;": "\u2019", "rsquor;": "\u2019", "rthree;": "\u22cc", "rtimes;": "\u22ca", "rtri;": "\u25b9", "rtrie;": "\u22b5", "rtrif;": "\u25b8", "rtriltri;": "\u29ce", "ruluhar;": "\u2968", "rx;": "\u211e", "sacute;": "\u015b", "sbquo;": "\u201a", "sc;": "\u227b", "scE;": "\u2ab4", "scap;": "\u2ab8", "scaron;": "\u0161", "sccue;": "\u227d", "sce;": "\u2ab0", "scedil;": "\u015f", "scirc;": "\u015d", "scnE;": "\u2ab6", "scnap;": "\u2aba", "scnsim;": "\u22e9", "scpolint;": "\u2a13", "scsim;": "\u227f", "scy;": "\u0441", "sdot;": "\u22c5", "sdotb;": "\u22a1", "sdote;": "\u2a66", "seArr;": "\u21d8", "searhk;": "\u2925", "searr;": "\u2198", "searrow;": "\u2198", "sect": "\xa7", "sect;": "\xa7", "semi;": ";", "seswar;": "\u2929", "setminus;": "\u2216", "setmn;": "\u2216", "sext;": "\u2736", "sfr;": "\U0001d530", "sfrown;": "\u2322", "sharp;": "\u266f", "shchcy;": "\u0449", "shcy;": "\u0448", "shortmid;": "\u2223", "shortparallel;": "\u2225", "shy": "\xad", "shy;": "\xad", "sigma;": "\u03c3", "sigmaf;": "\u03c2", "sigmav;": "\u03c2", "sim;": "\u223c", "simdot;": "\u2a6a", "sime;": "\u2243", "simeq;": "\u2243", "simg;": "\u2a9e", "simgE;": "\u2aa0", "siml;": "\u2a9d", "simlE;": "\u2a9f", "simne;": "\u2246", "simplus;": "\u2a24", "simrarr;": "\u2972", "slarr;": "\u2190", "smallsetminus;": "\u2216", "smashp;": "\u2a33", "smeparsl;": "\u29e4", "smid;": "\u2223", "smile;": "\u2323", "smt;": "\u2aaa", "smte;": "\u2aac", "smtes;": "\u2aac\ufe00", "softcy;": "\u044c", "sol;": "/", "solb;": "\u29c4", "solbar;": "\u233f", "sopf;": "\U0001d564", "spades;": "\u2660", "spadesuit;": "\u2660", "spar;": "\u2225", "sqcap;": "\u2293", "sqcaps;": "\u2293\ufe00", "sqcup;": "\u2294", "sqcups;": "\u2294\ufe00", "sqsub;": "\u228f", "sqsube;": "\u2291", "sqsubset;": "\u228f", "sqsubseteq;": "\u2291", "sqsup;": "\u2290", "sqsupe;": "\u2292", "sqsupset;": "\u2290", "sqsupseteq;": "\u2292", "squ;": "\u25a1", "square;": "\u25a1", "squarf;": "\u25aa", "squf;": "\u25aa", "srarr;": "\u2192", "sscr;": "\U0001d4c8", "ssetmn;": "\u2216", "ssmile;": "\u2323", "sstarf;": "\u22c6", "star;": "\u2606", "starf;": "\u2605", "straightepsilon;": "\u03f5", "straightphi;": "\u03d5", "strns;": "\xaf", "sub;": "\u2282", "subE;": "\u2ac5", "subdot;": "\u2abd", "sube;": "\u2286", "subedot;": "\u2ac3", "submult;": "\u2ac1", "subnE;": "\u2acb", "subne;": "\u228a", "subplus;": "\u2abf", "subrarr;": "\u2979", "subset;": "\u2282", "subseteq;": "\u2286", "subseteqq;": "\u2ac5", "subsetneq;": "\u228a", "subsetneqq;": "\u2acb", "subsim;": "\u2ac7", "subsub;": "\u2ad5", "subsup;": "\u2ad3", "succ;": "\u227b", "succapprox;": "\u2ab8", "succcurlyeq;": "\u227d", "succeq;": "\u2ab0", "succnapprox;": "\u2aba", "succneqq;": "\u2ab6", "succnsim;": "\u22e9", "succsim;": "\u227f", "sum;": "\u2211", "sung;": "\u266a", "sup1": "\xb9", "sup1;": "\xb9", "sup2": "\xb2", "sup2;": "\xb2", "sup3": "\xb3", "sup3;": "\xb3", "sup;": "\u2283", "supE;": "\u2ac6", "supdot;": "\u2abe", "supdsub;": "\u2ad8", "supe;": "\u2287", "supedot;": "\u2ac4", "suphsol;": "\u27c9", "suphsub;": "\u2ad7", "suplarr;": "\u297b", "supmult;": "\u2ac2", "supnE;": "\u2acc", "supne;": "\u228b", "supplus;": "\u2ac0", "supset;": "\u2283", "supseteq;": "\u2287", "supseteqq;": "\u2ac6", "supsetneq;": "\u228b", "supsetneqq;": "\u2acc", "supsim;": "\u2ac8", "supsub;": "\u2ad4", "supsup;": "\u2ad6", "swArr;": "\u21d9", "swarhk;": "\u2926", "swarr;": "\u2199", "swarrow;": "\u2199", "swnwar;": "\u292a", "szlig": "\xdf", "szlig;": "\xdf", "target;": "\u2316", "tau;": "\u03c4", "tbrk;": "\u23b4", "tcaron;": "\u0165", "tcedil;": "\u0163", "tcy;": "\u0442", "tdot;": "\u20db", "telrec;": "\u2315", "tfr;": "\U0001d531", "there4;": "\u2234", "therefore;": "\u2234", "theta;": "\u03b8", "thetasym;": "\u03d1", "thetav;": "\u03d1", "thickapprox;": "\u2248", "thicksim;": "\u223c", "thinsp;": "\u2009", "thkap;": "\u2248", "thksim;": "\u223c", "thorn": "\xfe", "thorn;": "\xfe", "tilde;": "\u02dc", "times": "\xd7", "times;": "\xd7", "timesb;": "\u22a0", "timesbar;": "\u2a31", "timesd;": "\u2a30", "tint;": "\u222d", "toea;": "\u2928", "top;": "\u22a4", "topbot;": "\u2336", "topcir;": "\u2af1", "topf;": "\U0001d565", "topfork;": "\u2ada", "tosa;": "\u2929", "tprime;": "\u2034", "trade;": "\u2122", "triangle;": "\u25b5", "triangledown;": "\u25bf", "triangleleft;": "\u25c3", "trianglelefteq;": "\u22b4", "triangleq;": "\u225c", "triangleright;": "\u25b9", "trianglerighteq;": "\u22b5", "tridot;": "\u25ec", "trie;": "\u225c", "triminus;": "\u2a3a", "triplus;": "\u2a39", "trisb;": "\u29cd", "tritime;": "\u2a3b", "trpezium;": "\u23e2", "tscr;": "\U0001d4c9", "tscy;": "\u0446", "tshcy;": "\u045b", "tstrok;": "\u0167", "twixt;": "\u226c", "twoheadleftarrow;": "\u219e", "twoheadrightarrow;": "\u21a0", "uArr;": "\u21d1", "uHar;": "\u2963", "uacute": "\xfa", "uacute;": "\xfa", "uarr;": "\u2191", "ubrcy;": "\u045e", "ubreve;": "\u016d", "ucirc": "\xfb", "ucirc;": "\xfb", "ucy;": "\u0443", "udarr;": "\u21c5", "udblac;": "\u0171", "udhar;": "\u296e", "ufisht;": "\u297e", "ufr;": "\U0001d532", "ugrave": "\xf9", "ugrave;": "\xf9", "uharl;": "\u21bf", "uharr;": "\u21be", "uhblk;": "\u2580", "ulcorn;": "\u231c", "ulcorner;": "\u231c", "ulcrop;": "\u230f", "ultri;": "\u25f8", "umacr;": "\u016b", "uml": "\xa8", "uml;": "\xa8", "uogon;": "\u0173", "uopf;": "\U0001d566", "uparrow;": "\u2191", "updownarrow;": "\u2195", "upharpoonleft;": "\u21bf", "upharpoonright;": "\u21be", "uplus;": "\u228e", "upsi;": "\u03c5", "upsih;": "\u03d2", "upsilon;": "\u03c5", "upuparrows;": "\u21c8", "urcorn;": "\u231d", "urcorner;": "\u231d", "urcrop;": "\u230e", "uring;": "\u016f", "urtri;": "\u25f9", "uscr;": "\U0001d4ca", "utdot;": "\u22f0", "utilde;": "\u0169", "utri;": "\u25b5", "utrif;": "\u25b4", "uuarr;": "\u21c8", "uuml": "\xfc", "uuml;": "\xfc", "uwangle;": "\u29a7", "vArr;": "\u21d5", "vBar;": "\u2ae8", "vBarv;": "\u2ae9", "vDash;": "\u22a8", "vangrt;": "\u299c", "varepsilon;": "\u03f5", "varkappa;": "\u03f0", "varnothing;": "\u2205", "varphi;": "\u03d5", "varpi;": "\u03d6", "varpropto;": "\u221d", "varr;": "\u2195", "varrho;": "\u03f1", "varsigma;": "\u03c2", "varsubsetneq;": "\u228a\ufe00", "varsubsetneqq;": "\u2acb\ufe00", "varsupsetneq;": "\u228b\ufe00", "varsupsetneqq;": "\u2acc\ufe00", "vartheta;": "\u03d1", "vartriangleleft;": "\u22b2", "vartriangleright;": "\u22b3", "vcy;": "\u0432", "vdash;": "\u22a2", "vee;": "\u2228", "veebar;": "\u22bb", "veeeq;": "\u225a", "vellip;": "\u22ee", "verbar;": "|", "vert;": "|", "vfr;": "\U0001d533", "vltri;": "\u22b2", "vnsub;": "\u2282\u20d2", "vnsup;": "\u2283\u20d2", "vopf;": "\U0001d567", "vprop;": "\u221d", "vrtri;": "\u22b3", "vscr;": "\U0001d4cb", "vsubnE;": "\u2acb\ufe00", "vsubne;": "\u228a\ufe00", "vsupnE;": "\u2acc\ufe00", "vsupne;": "\u228b\ufe00", "vzigzag;": "\u299a", "wcirc;": "\u0175", "wedbar;": "\u2a5f", "wedge;": "\u2227", "wedgeq;": "\u2259", "weierp;": "\u2118", "wfr;": "\U0001d534", "wopf;": "\U0001d568", "wp;": "\u2118", "wr;": "\u2240", "wreath;": "\u2240", "wscr;": "\U0001d4cc", "xcap;": "\u22c2", "xcirc;": "\u25ef", "xcup;": "\u22c3", "xdtri;": "\u25bd", "xfr;": "\U0001d535", "xhArr;": "\u27fa", "xharr;": "\u27f7", "xi;": "\u03be", "xlArr;": "\u27f8", "xlarr;": "\u27f5", "xmap;": "\u27fc", "xnis;": "\u22fb", "xodot;": "\u2a00", "xopf;": "\U0001d569", "xoplus;": "\u2a01", "xotime;": "\u2a02", "xrArr;": "\u27f9", "xrarr;": "\u27f6", "xscr;": "\U0001d4cd", "xsqcup;": "\u2a06", "xuplus;": "\u2a04", "xutri;": "\u25b3", "xvee;": "\u22c1", "xwedge;": "\u22c0", "yacute": "\xfd", "yacute;": "\xfd", "yacy;": "\u044f", "ycirc;": "\u0177", "ycy;": "\u044b", "yen": "\xa5", "yen;": "\xa5", "yfr;": "\U0001d536", "yicy;": "\u0457", "yopf;": "\U0001d56a", "yscr;": "\U0001d4ce", "yucy;": "\u044e", "yuml": "\xff", "yuml;": "\xff", "zacute;": "\u017a", "zcaron;": "\u017e", "zcy;": "\u0437", "zdot;": "\u017c", "zeetrf;": "\u2128", "zeta;": "\u03b6", "zfr;": "\U0001d537", "zhcy;": "\u0436", "zigrarr;": "\u21dd", "zopf;": "\U0001d56b", "zscr;": "\U0001d4cf", "zwj;": "\u200d", "zwnj;": "\u200c", } replacementCharacters = { 0x0: "\uFFFD", 0x0d: "\u000D", 0x80: "\u20AC", 0x81: "\u0081", 0x82: "\u201A", 0x83: "\u0192", 0x84: "\u201E", 0x85: "\u2026", 0x86: "\u2020", 0x87: "\u2021", 0x88: "\u02C6", 0x89: "\u2030", 0x8A: "\u0160", 0x8B: "\u2039", 0x8C: "\u0152", 0x8D: "\u008D", 0x8E: "\u017D", 0x8F: "\u008F", 0x90: "\u0090", 0x91: "\u2018", 0x92: "\u2019", 0x93: "\u201C", 0x94: "\u201D", 0x95: "\u2022", 0x96: "\u2013", 0x97: "\u2014", 0x98: "\u02DC", 0x99: "\u2122", 0x9A: "\u0161", 0x9B: "\u203A", 0x9C: "\u0153", 0x9D: "\u009D", 0x9E: "\u017E", 0x9F: "\u0178", } tokenTypes = { "Doctype": 0, "Characters": 1, "SpaceCharacters": 2, "StartTag": 3, "EndTag": 4, "EmptyTag": 5, "Comment": 6, "ParseError": 7 } tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"]]) prefixes = dict([(v, k) for k, v in namespaces.items()]) prefixes["http://www.w3.org/1998/Math/MathML"] = "math" class DataLossWarning(UserWarning): pass class ReparseException(Exception): pass html5parser.py000064400000344632147204715120007403 0ustar00from __future__ import absolute_import, division, unicode_literals from six import with_metaclass, viewkeys, PY3 import types try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict from . import _inputstream from . import _tokenizer from . import treebuilders from .treebuilders.base import Marker from . import _utils from .constants import ( spaceCharacters, asciiUpper2Lower, specialElements, headingElements, cdataElements, rcdataElements, tokenTypes, tagTokenTypes, namespaces, htmlIntegrationPointElements, mathmlTextIntegrationPointElements, adjustForeignAttributes as adjustForeignAttributesMap, adjustMathMLAttributes, adjustSVGAttributes, E, ReparseException ) def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): """Parse a string or file-like object into a tree""" tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parse(doc, **kwargs) def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parseFragment(doc, container=container, **kwargs) def method_decorator_metaclass(function): class Decorated(type): def __new__(meta, classname, bases, classDict): for attributeName, attribute in classDict.items(): if isinstance(attribute, types.FunctionType): attribute = function(attribute) classDict[attributeName] = attribute return type.__new__(meta, classname, bases, classDict) return Decorated class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): """ strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) """ # Raise an exception on the first error encountered self.strict = strict if tree is None: tree = treebuilders.getTreeBuilder("etree") self.tree = tree(namespaceHTMLElements) self.errors = [] self.phases = dict([(name, cls(self, self.tree)) for name, cls in getPhases(debug).items()]) def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): self.innerHTMLMode = innerHTML self.container = container self.scripting = scripting self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) self.reset() try: self.mainLoop() except ReparseException: self.reset() self.mainLoop() def reset(self): self.tree.reset() self.firstStartTag = False self.errors = [] self.log = [] # only used with debug mode # "quirks" / "limited quirks" / "no quirks" self.compatMode = "no quirks" if self.innerHTMLMode: self.innerHTML = self.container.lower() if self.innerHTML in cdataElements: self.tokenizer.state = self.tokenizer.rcdataState elif self.innerHTML in rcdataElements: self.tokenizer.state = self.tokenizer.rawtextState elif self.innerHTML == 'plaintext': self.tokenizer.state = self.tokenizer.plaintextState else: # state already is data state # self.tokenizer.state = self.tokenizer.dataState pass self.phase = self.phases["beforeHtml"] self.phase.insertHtmlElement() self.resetInsertionMode() else: self.innerHTML = False # pylint:disable=redefined-variable-type self.phase = self.phases["initial"] self.lastPhase = None self.beforeRCDataPhase = None self.framesetOK = True @property def documentEncoding(self): """The name of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet. """ if not hasattr(self, 'tokenizer'): return None return self.tokenizer.stream.charEncoding[0].name def isHTMLIntegrationPoint(self, element): if (element.name == "annotation-xml" and element.namespace == namespaces["mathml"]): return ("encoding" in element.attributes and element.attributes["encoding"].translate( asciiUpper2Lower) in ("text/html", "application/xhtml+xml")) else: return (element.namespace, element.name) in htmlIntegrationPointElements def isMathMLTextIntegrationPoint(self, element): return (element.namespace, element.name) in mathmlTextIntegrationPointElements def mainLoop(self): CharactersToken = tokenTypes["Characters"] SpaceCharactersToken = tokenTypes["SpaceCharacters"] StartTagToken = tokenTypes["StartTag"] EndTagToken = tokenTypes["EndTag"] CommentToken = tokenTypes["Comment"] DoctypeToken = tokenTypes["Doctype"] ParseErrorToken = tokenTypes["ParseError"] for token in self.normalizedTokens(): prev_token = None new_token = token while new_token is not None: prev_token = new_token currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None type = new_token["type"] if type == ParseErrorToken: self.parseError(new_token["data"], new_token.get("datavars", {})) new_token = None else: if (len(self.tree.openElements) == 0 or currentNodeNamespace == self.tree.defaultNamespace or (self.isMathMLTextIntegrationPoint(currentNode) and ((type == StartTagToken and token["name"] not in frozenset(["mglyph", "malignmark"])) or type in (CharactersToken, SpaceCharactersToken))) or (currentNodeNamespace == namespaces["mathml"] and currentNodeName == "annotation-xml" and type == StartTagToken and token["name"] == "svg") or (self.isHTMLIntegrationPoint(currentNode) and type in (StartTagToken, CharactersToken, SpaceCharactersToken))): phase = self.phase else: phase = self.phases["inForeignContent"] if type == CharactersToken: new_token = phase.processCharacters(new_token) elif type == SpaceCharactersToken: new_token = phase.processSpaceCharacters(new_token) elif type == StartTagToken: new_token = phase.processStartTag(new_token) elif type == EndTagToken: new_token = phase.processEndTag(new_token) elif type == CommentToken: new_token = phase.processComment(new_token) elif type == DoctypeToken: new_token = phase.processDoctype(new_token) if (type == StartTagToken and prev_token["selfClosing"] and not prev_token["selfClosingAcknowledged"]): self.parseError("non-void-element-with-trailing-solidus", {"name": prev_token["name"]}) # When the loop finishes it's EOF reprocess = True phases = [] while reprocess: phases.append(self.phase) reprocess = self.phase.processEOF() if reprocess: assert self.phase not in phases def normalizedTokens(self): for token in self.tokenizer: yield self.normalizeToken(token) def parse(self, stream, *args, **kwargs): """Parse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) scripting - treat noscript elements as if javascript was turned on """ self._parse(stream, False, None, *args, **kwargs) return self.tree.getDocument() def parseFragment(self, stream, *args, **kwargs): """Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property if set to None, default to 'div' stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) scripting - treat noscript elements as if javascript was turned on """ self._parse(stream, True, *args, **kwargs) return self.tree.getFragment() def parseError(self, errorcode="XXX-undefined-error", datavars=None): # XXX The idea is to make errorcode mandatory. if datavars is None: datavars = {} self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) if self.strict: raise ParseError(E[errorcode] % datavars) def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == tokenTypes["StartTag"]: raw = token["data"] token["data"] = OrderedDict(raw) if len(raw) > len(token["data"]): # we had some duplicated attribute, fix so first wins token["data"].update(raw[::-1]) return token def adjustMathMLAttributes(self, token): adjust_attributes(token, adjustMathMLAttributes) def adjustSVGAttributes(self, token): adjust_attributes(token, adjustSVGAttributes) def adjustForeignAttributes(self, token): adjust_attributes(token, adjustForeignAttributesMap) def reparseTokenNormal(self, token): # pylint:disable=unused-argument self.parser.phase() def resetInsertionMode(self): # The name of this method is mostly historical. (It's also used in the # specification.) last = False newModes = { "select": "inSelect", "td": "inCell", "th": "inCell", "tr": "inRow", "tbody": "inTableBody", "thead": "inTableBody", "tfoot": "inTableBody", "caption": "inCaption", "colgroup": "inColumnGroup", "table": "inTable", "head": "inBody", "body": "inBody", "frameset": "inFrameset", "html": "beforeHead" } for node in self.tree.openElements[::-1]: nodeName = node.name new_phase = None if node == self.tree.openElements[0]: assert self.innerHTML last = True nodeName = self.innerHTML # Check for conditions that should only happen in the innerHTML # case if nodeName in ("select", "colgroup", "head", "html"): assert self.innerHTML if not last and node.namespace != self.tree.defaultNamespace: continue if nodeName in newModes: new_phase = self.phases[newModes[nodeName]] break elif last: new_phase = self.phases["inBody"] break self.phase = new_phase def parseRCDataRawtext(self, token, contentType): """Generic RCDATA/RAWTEXT Parsing algorithm contentType - RCDATA or RAWTEXT """ assert contentType in ("RAWTEXT", "RCDATA") self.tree.insertElement(token) if contentType == "RAWTEXT": self.tokenizer.state = self.tokenizer.rawtextState else: self.tokenizer.state = self.tokenizer.rcdataState self.originalPhase = self.phase self.phase = self.phases["text"] @_utils.memoize def getPhases(debug): def log(function): """Logger that records which phase processes each token""" type_names = dict((value, key) for key, value in tokenTypes.items()) def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: token = args[0] try: info = {"type": type_names[token['type']]} except: raise if token['type'] in tagTokenTypes: info["name"] = token['name'] self.parser.log.append((self.parser.tokenizer.state.__name__, self.parser.phase.__class__.__name__, self.__class__.__name__, function.__name__, info)) return function(self, *args, **kwargs) else: return function(self, *args, **kwargs) return wrapped def getMetaclass(use_metaclass, metaclass_func): if use_metaclass: return method_decorator_metaclass(metaclass_func) else: return type # pylint:disable=unused-argument class Phase(with_metaclass(getMetaclass(debug, log))): """Base class for helper object that implements each phase of processing """ def __init__(self, parser, tree): self.parser = parser self.tree = tree def processEOF(self): raise NotImplementedError def processComment(self, token): # For most phases the following is correct. Where it's not it will be # overridden. self.tree.insertComment(token, self.tree.openElements[-1]) def processDoctype(self, token): self.parser.parseError("unexpected-doctype") def processCharacters(self, token): self.tree.insertText(token["data"]) def processSpaceCharacters(self, token): self.tree.insertText(token["data"]) def processStartTag(self, token): return self.startTagHandler[token["name"]](token) def startTagHtml(self, token): if not self.parser.firstStartTag and token["name"] == "html": self.parser.parseError("non-html-root") # XXX Need a check here to see if the first start tag token emitted is # this token... If it's not, invoke self.parser.parseError(). for attr, value in token["data"].items(): if attr not in self.tree.openElements[0].attributes: self.tree.openElements[0].attributes[attr] = value self.parser.firstStartTag = False def processEndTag(self, token): return self.endTagHandler[token["name"]](token) class InitialPhase(Phase): def processSpaceCharacters(self, token): pass def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] correct = token["correct"] if (name != "html" or publicId is not None or systemId is not None and systemId != "about:legacy-compat"): self.parser.parseError("unknown-doctype") if publicId is None: publicId = "" self.tree.insertDoctype(token) if publicId != "": publicId = publicId.translate(asciiUpper2Lower) if (not correct or token["name"] != "html" or publicId.startswith( ("+//silmaril//dtd html pro v0r11 19970101//", "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", "-//as//dtd html 3.0 aswedit + extensions//", "-//ietf//dtd html 2.0 level 1//", "-//ietf//dtd html 2.0 level 2//", "-//ietf//dtd html 2.0 strict level 1//", "-//ietf//dtd html 2.0 strict level 2//", "-//ietf//dtd html 2.0 strict//", "-//ietf//dtd html 2.0//", "-//ietf//dtd html 2.1e//", "-//ietf//dtd html 3.0//", "-//ietf//dtd html 3.2 final//", "-//ietf//dtd html 3.2//", "-//ietf//dtd html 3//", "-//ietf//dtd html level 0//", "-//ietf//dtd html level 1//", "-//ietf//dtd html level 2//", "-//ietf//dtd html level 3//", "-//ietf//dtd html strict level 0//", "-//ietf//dtd html strict level 1//", "-//ietf//dtd html strict level 2//", "-//ietf//dtd html strict level 3//", "-//ietf//dtd html strict//", "-//ietf//dtd html//", "-//metrius//dtd metrius presentational//", "-//microsoft//dtd internet explorer 2.0 html strict//", "-//microsoft//dtd internet explorer 2.0 html//", "-//microsoft//dtd internet explorer 2.0 tables//", "-//microsoft//dtd internet explorer 3.0 html strict//", "-//microsoft//dtd internet explorer 3.0 html//", "-//microsoft//dtd internet explorer 3.0 tables//", "-//netscape comm. corp.//dtd html//", "-//netscape comm. corp.//dtd strict html//", "-//o'reilly and associates//dtd html 2.0//", "-//o'reilly and associates//dtd html extended 1.0//", "-//o'reilly and associates//dtd html extended relaxed 1.0//", "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", "-//spyglass//dtd html 2.0 extended//", "-//sq//dtd html 2.0 hotmetal + extensions//", "-//sun microsystems corp.//dtd hotjava html//", "-//sun microsystems corp.//dtd hotjava strict html//", "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//", "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//", "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//", "-//w3c//dtd html 4.0 transitional//", "-//w3c//dtd html experimental 19960712//", "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//", "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//", "-//webtechs//dtd mozilla html//")) or publicId in ("-//w3o//dtd w3 html strict 3.0//en//", "-/w3c/dtd html 4.0 transitional/en", "html") or publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//")) and systemId is None or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): self.parser.compatMode = "quirks" elif (publicId.startswith( ("-//w3c//dtd xhtml 1.0 frameset//", "-//w3c//dtd xhtml 1.0 transitional//")) or publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//")) and systemId is not None): self.parser.compatMode = "limited quirks" self.parser.phase = self.parser.phases["beforeHtml"] def anythingElse(self): self.parser.compatMode = "quirks" self.parser.phase = self.parser.phases["beforeHtml"] def processCharacters(self, token): self.parser.parseError("expected-doctype-but-got-chars") self.anythingElse() return token def processStartTag(self, token): self.parser.parseError("expected-doctype-but-got-start-tag", {"name": token["name"]}) self.anythingElse() return token def processEndTag(self, token): self.parser.parseError("expected-doctype-but-got-end-tag", {"name": token["name"]}) self.anythingElse() return token def processEOF(self): self.parser.parseError("expected-doctype-but-got-eof") self.anythingElse() return True class BeforeHtmlPhase(Phase): # helper methods def insertHtmlElement(self): self.tree.insertRoot(impliedTagToken("html", "StartTag")) self.parser.phase = self.parser.phases["beforeHead"] # other def processEOF(self): self.insertHtmlElement() return True def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.insertHtmlElement() return token def processStartTag(self, token): if token["name"] == "html": self.parser.firstStartTag = True self.insertHtmlElement() return token def processEndTag(self, token): if token["name"] not in ("head", "body", "html", "br"): self.parser.parseError("unexpected-end-tag-before-html", {"name": token["name"]}) else: self.insertHtmlElement() return token class BeforeHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ (("head", "body", "html", "br"), self.endTagImplyHead) ]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.startTagHead(impliedTagToken("head", "StartTag")) return True def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagHead(self, token): self.tree.insertElement(token) self.tree.headPointer = self.tree.openElements[-1] self.parser.phase = self.parser.phases["inHead"] def startTagOther(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) return token def endTagImplyHead(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) return token def endTagOther(self, token): self.parser.parseError("end-tag-after-implied-root", {"name": token["name"]}) class InHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), (("noframes", "style"), self.startTagNoFramesStyle), ("noscript", self.startTagNoscript), ("script", self.startTagScript), (("base", "basefont", "bgsound", "command", "link"), self.startTagBaseLinkCommand), ("meta", self.startTagMeta), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("head", self.endTagHead), (("br", "html", "body"), self.endTagHtmlBodyBr) ]) self.endTagHandler.default = self.endTagOther # the real thing def processEOF(self): self.anythingElse() return True def processCharacters(self, token): self.anythingElse() return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagHead(self, token): self.parser.parseError("two-heads-are-not-better-than-one") def startTagBaseLinkCommand(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagMeta(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True attributes = token["data"] if self.parser.tokenizer.stream.charEncoding[1] == "tentative": if "charset" in attributes: self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) elif ("content" in attributes and "http-equiv" in attributes and attributes["http-equiv"].lower() == "content-type"): # Encoding it as UTF-8 here is a hack, as really we should pass # the abstract Unicode string, and just use the # ContentAttrParser on that, but using UTF-8 allows all chars # to be encoded and as a ASCII-superset works. data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) parser = _inputstream.ContentAttrParser(data) codec = parser.parse() self.parser.tokenizer.stream.changeEncoding(codec) def startTagTitle(self, token): self.parser.parseRCDataRawtext(token, "RCDATA") def startTagNoFramesStyle(self, token): # Need to decide whether to implement the scripting-disabled case self.parser.parseRCDataRawtext(token, "RAWTEXT") def startTagNoscript(self, token): if self.parser.scripting: self.parser.parseRCDataRawtext(token, "RAWTEXT") else: self.tree.insertElement(token) self.parser.phase = self.parser.phases["inHeadNoscript"] def startTagScript(self, token): self.tree.insertElement(token) self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState self.parser.originalPhase = self.parser.phase self.parser.phase = self.parser.phases["text"] def startTagOther(self, token): self.anythingElse() return token def endTagHead(self, token): node = self.parser.tree.openElements.pop() assert node.name == "head", "Expected head got %s" % node.name self.parser.phase = self.parser.phases["afterHead"] def endTagHtmlBodyBr(self, token): self.anythingElse() return token def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): self.endTagHead(impliedTagToken("head")) class InHeadNoscriptPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), (("head", "noscript"), self.startTagHeadNoscript), ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("noscript", self.endTagNoscript), ("br", self.endTagBr), ]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.parser.parseError("eof-in-head-noscript") self.anythingElse() return True def processComment(self, token): return self.parser.phases["inHead"].processComment(token) def processCharacters(self, token): self.parser.parseError("char-in-head-noscript") self.anythingElse() return token def processSpaceCharacters(self, token): return self.parser.phases["inHead"].processSpaceCharacters(token) def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagBaseLinkCommand(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagHeadNoscript(self, token): self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) def startTagOther(self, token): self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) self.anythingElse() return token def endTagNoscript(self, token): node = self.parser.tree.openElements.pop() assert node.name == "noscript", "Expected noscript got %s" % node.name self.parser.phase = self.parser.phases["inHead"] def endTagBr(self, token): self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) self.anythingElse() return token def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): # Caller must raise parse error first! self.endTagNoscript(impliedTagToken("noscript")) class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title"), self.startTagFromHead), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), self.endTagHtmlBodyBr)]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.anythingElse() return True def processCharacters(self, token): self.anythingElse() return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagBody(self, token): self.parser.framesetOK = False self.tree.insertElement(token) self.parser.phase = self.parser.phases["inBody"] def startTagFrameset(self, token): self.tree.insertElement(token) self.parser.phase = self.parser.phases["inFrameset"] def startTagFromHead(self, token): self.parser.parseError("unexpected-start-tag-out-of-my-head", {"name": token["name"]}) self.tree.openElements.append(self.tree.headPointer) self.parser.phases["inHead"].processStartTag(token) for node in self.tree.openElements[::-1]: if node.name == "head": self.tree.openElements.remove(node) break def startTagHead(self, token): self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) def startTagOther(self, token): self.anythingElse() return token def endTagHtmlBodyBr(self, token): self.anythingElse() return token def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): self.tree.insertElement(impliedTagToken("body", "StartTag")) self.parser.phase = self.parser.phases["inBody"] self.parser.framesetOK = True class InBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody # the really-really-really-very crazy mode def __init__(self, parser, tree): Phase.__init__(self, parser, tree) # Set this to the default handler self.processSpaceCharacters = self.processSpaceCharactersNonPre self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("base", "basefont", "bgsound", "command", "link", "meta", "script", "style", "title"), self.startTagProcessInHead), ("body", self.startTagBody), ("frameset", self.startTagFrameset), (("address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", "section", "summary", "ul"), self.startTagCloseP), (headingElements, self.startTagHeading), (("pre", "listing"), self.startTagPreListing), ("form", self.startTagForm), (("li", "dd", "dt"), self.startTagListItem), ("plaintext", self.startTagPlaintext), ("a", self.startTagA), (("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u"), self.startTagFormatting), ("nobr", self.startTagNobr), ("button", self.startTagButton), (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), ("xmp", self.startTagXmp), ("table", self.startTagTable), (("area", "br", "embed", "img", "keygen", "wbr"), self.startTagVoidFormatting), (("param", "source", "track"), self.startTagParamSource), ("input", self.startTagInput), ("hr", self.startTagHr), ("image", self.startTagImage), ("isindex", self.startTagIsIndex), ("textarea", self.startTagTextarea), ("iframe", self.startTagIFrame), ("noscript", self.startTagNoscript), (("noembed", "noframes"), self.startTagRawtext), ("select", self.startTagSelect), (("rp", "rt"), self.startTagRpRt), (("option", "optgroup"), self.startTagOpt), (("math"), self.startTagMath), (("svg"), self.startTagSvg), (("caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagMisplaced) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("body", self.endTagBody), ("html", self.endTagHtml), (("address", "article", "aside", "blockquote", "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", "section", "summary", "ul"), self.endTagBlock), ("form", self.endTagForm), ("p", self.endTagP), (("dd", "dt", "li"), self.endTagListItem), (headingElements, self.endTagHeading), (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u"), self.endTagFormatting), (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), ("br", self.endTagBr), ]) self.endTagHandler.default = self.endTagOther def isMatchingFormattingElement(self, node1, node2): return (node1.name == node2.name and node1.namespace == node2.namespace and node1.attributes == node2.attributes) # helper def addFormattingElement(self, token): self.tree.insertElement(token) element = self.tree.openElements[-1] matchingElements = [] for node in self.tree.activeFormattingElements[::-1]: if node is Marker: break elif self.isMatchingFormattingElement(node, element): matchingElements.append(node) assert len(matchingElements) <= 3 if len(matchingElements) == 3: self.tree.activeFormattingElements.remove(matchingElements[-1]) self.tree.activeFormattingElements.append(element) # the real deal def processEOF(self): allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", "tfoot", "th", "thead", "tr", "body", "html")) for node in self.tree.openElements[::-1]: if node.name not in allowed_elements: self.parser.parseError("expected-closing-tag-but-got-eof") break # Stop parsing def processSpaceCharactersDropNewline(self, token): # Sometimes (start of
, , and