File: //lib/python3.10/html/__pycache__/parser.cpython-310.pyc
o
    E�h�G  �                   @   s�   d Z ddlZddlZddlmZ dgZe�d�Ze�d�Ze�d�Z	e�d�Z
e�d	�Ze�d
�Ze�d�Z
e�d�Ze�d
�Ze�d�Ze�dej�Ze�d�Ze�d�ZG dd� dej�ZdS )zA parser for HTML and XHTML.�    N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   s�   e Zd ZdZdZdd�dd�Zdd� Zd	d
� Zdd� Zd
Z	dd� Z
dd� Zdd� Zdd� Z
dd� Zd7dd�Zdd� Zdd� Zdd � Zd!d"� Zd#d$� Zd%d&� Zd'd(� Zd)d*� Zd+d,� Zd-d.� Zd/d0� Zd1d2� Zd3d4� Zd5d6� Zd
S )8r   aE  Find tags and other markup and call handler functions.
    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()
    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )�script�styleT)�convert_charrefsc                C   s   || _ | ��  dS )z�Initialize and reset this instance.
        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   �reset)�selfr   � r
   �"/usr/lib/python3.10/html/parser.py�__init__W   s   zHTMLParser.__init__c                 C   s(   d| _ d| _t| _d| _tj�| � dS )z1Reset this instance.  Loses all unprocessed data.� z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser   �r	   r
   r
   r   r   `   s
   zHTMLParser.resetc                 C   s   | j | | _ | �d� dS )z�Feed data to the parser.
        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   �goahead�r	   �datar
   r
   r   �feedh   s   zHTMLParser.feedc                 C   s   | � d� dS )zHandle any buffered data.�   N)r   r   r
   r
   r   �closeq   s   zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_textr   r
   r
   r   �get_starttag_textw   s   zHTMLParser.get_starttag_textc                 C   s$   |� � | _t�d| j tj�| _d S )Nz</\s*%s\s*>)�lowerr   �re�compile�Ir   )r	   �elemr
   r
   r   �set_cdata_mode{   s   
zHTMLParser.set_cdata_modec                 C   s   t | _d | _d S �N)r   r   r   r   r
   r
   r   �clear_cdata_mode   s   
zHTMLParser.clear_cdata_modec                 C   s  | j }d}t|�}||k �rU| jr;| js;|�d|�}|dk r:|�dt||d ��}|dkr8t�d��	||�s8�n|}n| j
�	||�}|rI|�� }n| jrN�n|}||k ro| jrf| jsf| �t
|||� �� n	| �|||� � | �||�}||kr{�n�|j}|d|��r�t�||�r�| �|�}	n@|d|�r�| �|�}	n5|d|�r�| �|�}	n*|d|�r�| �|�}	n|d	|�r�| �|�}	n|d
 |k s�|r�| �d� |d
 }	n�n�|	dk �r�|sِn|t�||�r�n�|d|��r|d |kr�| �d� n�t�||�r�n�| �||d d � � n~|d|��r0|}dD ]}
|�|
|d
 ��r"|t|
�8 } n�q| �||d
 |� � nS|d|��rB| �||d d � � nA|||d � �� dk�r[| �||d d � � n(|d	|��rm| �||d d � � n|d|��r| �||d d � � ntd��|}	| �||	�}n�|d|��r�t�||�}|�r�|� � dd� }| �!|� |�"� }	|d|	d
 ��s�|	d
 }	| �||	�}q	d||d � v �r�| �|||d � � | �||d �}ny|d|��rMt#�||�}|�r|� d
�}| �$|� |�"� }	|d|	d
 ��s|	d
 }	| �||	�}q	t%�||�}|�r7|�r6|� � ||d � k�r6|�"� }	|	|k�r.|}	| �||d
 �}n|d
 |k �rL| �d� | �||d
 �}nnJ d��||k s|�r�||k �r�| j�s�| j�ru| j�su| �t
|||� �� n	| �|||� � | �||�}||d � | _ d S )Nr   �<�&�"