File: //snap/core18/current/usr/lib/python3.6/html/__pycache__/parser.cpython-36.pyc
3
�ähI � @ s� d Z ddlZddlZddlZddlmZ dgZejd�Zejd�Z ejd�Z
ejd�Zejd �Zejd
�Z
ejd�Zejd�Zejd
�Zejd�Zejdej�Zejd�Zejd�ZG dd� dej�ZdS )zA parser for HTML and XHTML.� N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]z
</[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
(?:\s*,)* # possibly followed by a comma
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c @ s� e Zd ZdZd:Zdd�dd�Zdd � Zd
d� Zdd
� ZdZ dd� Z
dd� Zdd� Zdd� Z
dd� Zd;dd�Zdd� Zdd� Zd d!� Zd"d#� Zd$d%� Zd&d'� Zd(d)� Zd*d+� Zd,d-� Zd.d/� Zd0d1� Zd2d3� Zd4d5� Zd6d7� Zd8d9� ZdS )<r aE Find tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
�script�styleT)�convert_charrefsc C s || _ | j� dS )z�Initialize and reset this instance.
If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
N)r �reset)�selfr � r
�!/usr/lib/python3.6/html/parser.py�__init__X s zHTMLParser.__init__c C s( d| _ d| _t| _d| _tjj| � dS )z1Reset this instance. Loses all unprocessed data.� z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser )r r
r
r r a s
zHTMLParser.resetc C s | j | | _ | jd� dS )z�Feed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
r N)r �goahead)r �datar
r
r �feedi s zHTMLParser.feedc C s | j d� dS )zHandle any buffered data.� N)r )r r
r
r �closer s zHTMLParser.closeNc C s | j S )z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_text)r r
r
r �get_starttag_textx s zHTMLParser.get_starttag_textc C s$ |j � | _tjd| j tj�| _d S )Nz</\s*%s\s*>)�lowerr �re�compile�Ir )r �elemr
r
r �set_cdata_mode| s
zHTMLParser.set_cdata_modec C s t | _d | _d S )N)r r r )r r
r
r �clear_cdata_mode� s zHTMLParser.clear_cdata_modec C sB | j }d}t|�}�x�||k �r�| jr|| j r||jd|�}|dk r�|jdt||d ��}|dkrvtjd�j ||� rvP |}n(| j
j ||�}|r�|j� }n| jr�P |}||k r�| jr�| j r�| jt
|||� �� n| j|||� � | j||�}||kr�P |j}|d|��r4tj||��r&| j|�} n�|d|��r>| j|�} nr|d|��rV| j|�} nZ|d|��rn| j|�} nB|d |��r�| j|�} n*|d
|k �s�|�r�| jd� |d
} nP | dk �r&|�s�P tj||��rԐnN|d|��r$|d |k�r�| jd� n&tj||��r
n| j||d d � � � n�|d|��r||}x.dD ]&}
|j|
|d ��r:|t|
�8 }P �q:W | j||d |� � n�|d|��r�| j||d d � � n�|||d � j� dk�r�| j||d d � � nP|d |��r�| j||d d � � n,|d|��r| j||d d � � ntd��|} | j|| �}q|d|��r�tj||�}|�r�|j � dd� }| j!|� |j"� } |d| d
��s�| d
} | j|| �}qn:d||d � k�r�| j|||d � � | j||d �}P q|d|��r�t#j||�}|�r8|j d
�}| j$|� |j"� } |d| d
��s*| d
} | j|| �}qt%j||�}|�r�|�r�|j � ||d � k�r�|j"� } | |k�r||} | j||d
�}P n,|d
|k �r�| jd� | j||d
�}nP qdstd��qW |�r0||k �r0| j �r0| j�r| j �r| jt
|||� �� n| j|||� � | j||�}||d � | _ d S )Nr �<�&�"