
    ^jG                        d dl mZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d	d
lmZ d	dlmZmZmZ d	dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ e G d d                      Z,e G d d                      Z-e G d d                      Z. G d de/e          Z0 G d de/e          Z1dede/defdZ2 G d d          Z3 G d d          Z4 G d d           Z5 G d! d"          Z6dS )#    )	dataclassasdict)Enum)chain)unescape)ListDictIteratorIterablePatternOptional)ElementTreeN)	HTTPErrorSessionResponse   )ProxyConfig)	WATCH_URLINNERTUBE_CONTEXTINNERTUBE_API_URL)VideoUnavailableYouTubeRequestFailedNoTranscriptFoundTranscriptsDisabledNotTranslatableTranslationLanguageNotAvailableFailedToCreateConsentCookieInvalidVideoId	IpBlockedRequestBlockedAgeRestrictedVideoUnplayableYouTubeDataUnparsablePoTokenRequiredc                   0    e Zd ZU eed<   eed<   	 eed<   dS )FetchedTranscriptSnippettextstartdurationN)__name__
__module____qualname__str__annotations__float     d/usr/local/lib/hermes-agent/venv/lib/python3.11/site-packages/youtube_transcript_api/_transcripts.pyr&   r&   "   s8         
IIILLL OOO r1   r&   c                       e Zd ZU dZee         ed<   eed<   eed<   eed<   eed<   de	e         fdZ
defd	Zdefd
Zdee         fdZdS )FetchedTranscriptz
    Represents a fetched transcript. This object is iterable, which allows you to
    iterate over the transcript snippets.
    snippetsvideo_idlanguagelanguage_codeis_generatedreturnc                 *    t          | j                  S N)iterr5   selfs    r2   __iter__zFetchedTranscript.__iter__>   s    DM"""r1   c                     | j         |         S r<   )r5   )r?   indexs     r2   __getitem__zFetchedTranscript.__getitem__A   s    }U##r1   c                 *    t          | j                  S r<   )lenr5   r>   s    r2   __len__zFetchedTranscript.__len__D   s    4=!!!r1   c                     d | D             S )Nc                 ,    g | ]}t          |          S r0   )r   ).0snippets     r2   
<listcomp>z1FetchedTranscript.to_raw_data.<locals>.<listcomp>H   s    444Gw444r1   r0   r>   s    r2   to_raw_datazFetchedTranscript.to_raw_dataG   s    44t4444r1   N)r*   r+   r,   __doc__r   r&   r.   r-   boolr
   r@   rC   intrF   r	   rL   r0   r1   r2   r4   r4   1   s          
 +,,,,MMMMMM#(#;< # # # #$$< $ $ $ $" " " " "5T$Z 5 5 5 5 5 5r1   r4   c                   $    e Zd ZU eed<   eed<   dS )_TranslationLanguager7   r8   N)r*   r+   r,   r-   r.   r0   r1   r2   rQ   rQ   K   s'         MMMr1   rQ   c                       e Zd ZdZdZdZdS )_PlayabilityStatusOKERRORLOGIN_REQUIREDN)r*   r+   r,   rT   rU   rV   r0   r1   r2   rS   rS   Q   s        	BE%NNNr1   rS   c                       e Zd ZdZdZdZdS )_PlayabilityFailedReasonu%   Sign in to confirm you’re not a botz/This video may be inappropriate for some users.zThis video is unavailableN)r*   r+   r,   BOT_DETECTEDAGE_RESTRICTEDVIDEO_UNAVAILABLEr0   r1   r2   rX   rX   W   s         :LFN3r1   rX   responser6   r:   c                     	 | j         dk    rt          |          |                                  | S # t          $ r}t	          ||          d }~ww xY w)Ni  )status_coder   raise_for_statusr   r   )r\   r6   errors      r2   _raise_http_errorsra   ]   si    43&&H%%%!!### 4 4 4"8U3334s   /2 
AAAc                       e Zd Zdededededededee         fdZdd
ede	fdZ
defdZedefd            Zdedd fdZdS )
Transcripthttp_clientr6   urlr7   r8   r9   translation_languagesc                     || _         || _        || _        || _        || _        || _        || _        d |D             | _        dS )z
        You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
        TranscriptList.
        c                 (    i | ]}|j         |j        S r0   )r8   r7   rI   translation_languages     r2   
<dictcomp>z'Transcript.__init__.<locals>.<dictcomp>}   s1     ,
 ,
 ,
$ !.0D0M,
 ,
 ,
r1   N)_http_clientr6   _urlr7   r8   r9   rf   _translation_languages_dict)r?   rd   r6   re   r7   r8   r9   rf   s           r2   __init__zTranscript.__init__h   s`     ( 	 *(%:",
 ,
(=,
 ,
 ,
(((r1   Fpreserve_formattingr:   c                 @   d| j         v rt          | j                  | j                            | j                   }t          |                              t          || j                  j                  }t          || j        | j
        | j        | j                  S )z
        Loads the actual transcript data.
        :param preserve_formatting: whether to keep select HTML text formatting
        z&exp=xpe)rp   )r5   r6   r7   r8   r9   )rm   r$   r6   rl   get_TranscriptParserparsera   r'   r4   r7   r8   r9   )r?   rp   r\   r5   s       r2   fetchzTranscript.fetch   s    
 ""!$-000$((33$9LMMMSSx77<
 
 !]],*
 
 
 	
r1   c                 X    d                     | j        | j        | j        rdnd          S )Nz7{language_code} ("{language}"){translation_description}z[TRANSLATABLE] )r7   r8   translation_description)formatr7   r8   is_translatabler>   s    r2   __str__zTranscript.__str__   s=    HOO],8<8L$T$4$4RT P 
 
 	
r1   c                 2    t          | j                  dk    S )Nr   )rE   rf   r>   s    r2   rz   zTranscript.is_translatable   s    4-..22r1   c           	          | j         st          | j                  || j        vrt	          | j                  t          | j        | j        d                    | j        |          | j        |         |dg           S )Nz{url}&tlang={language_code})re   r8   T)	rz   r   r6   rn   r   rc   rl   ry   rm   )r?   r8   s     r2   	translatezTranscript.translate   s    # 	1!$-000 @@@1$-@@@M)00I] 1   ,];

 

 
	
r1   NF)r*   r+   r,   r   r-   rN   r   rQ   ro   r4   ru   r{   propertyrz   r~   r0   r1   r2   rc   rc   g   s        

 
 	

 
 
 
  $$89
 
 
 
4
 
 
:K 
 
 
 
&
 
 
 
 
 3 3 3 3 X3
s 
| 
 
 
 
 
 
r1   rc   c            	       N   e Zd ZdZdedeeef         deeef         dee         fdZ	e
dededed	d fd
            Zd	ee         fdZdee         d	efdZdee         d	efdZdee         d	efdZdee         deeeef                  d	efdZd	efdZdee         d	efdZdS )TranscriptListz
    This object represents a list of transcripts. It can be iterated over to list all transcripts which are available
    for a given YouTube video. Also, it provides functionality to search for a transcript in a given language.
    r6   manually_created_transcriptsgenerated_transcriptsrf   c                 >    || _         || _        || _        || _        dS )a  
        The constructor is only for internal use. Use the static build method instead.

        :param video_id: the id of the video this TranscriptList is for
        :param manually_created_transcripts: dict mapping language codes to the manually created transcripts
        :param generated_transcripts: dict mapping language codes to the generated transcripts
        :param translation_languages: list of languages which can be used for translatable languages
        N)r6   _manually_created_transcripts_generated_transcripts_translation_languages)r?   r6   r   r   rf   s        r2   ro   zTranscriptList.__init__   s)     !-I*&;#&;###r1   rd   captions_jsonr:   c                    d |                     dg           D             }i }i }|d         D ]}|                     dd          dk    r|}n|}t          | ||d                             dd          |d	         d
         d         d         |d         |                     dd          dk    |                     dd          r|ng           ||d         <   t          ||||          S )a]  
        Factory method for TranscriptList.

        :param http_client: http client which is used to make the transcript retrieving http calls
        :param video_id: the id of the video this TranscriptList is for
        :param captions_json: the JSON parsed from the YouTube pages static HTML
        :return: the created TranscriptList
        c                 l    g | ]1}t          |d          d         d         d         |d                   2S )languageNamerunsr   r'   languageCoder7   r8   )rQ   ri   s     r2   rK   z(TranscriptList.build.<locals>.<listcomp>   sX     !
 !
 !

 %	 !-n=fEaHP2>B  !
 !
 !
r1   translationLanguagescaptionTrackskindrw   asrbaseUrlz	&fmt=srv3namer   r   r'   r   isTranslatableF)rr   rc   replacer   )rd   r6   r   rf   r   r   captiontranscript_dicts           r2   buildzTranscriptList.build   s(   !
 !

 )6(9(9:PRT(U(U!
 !
 !
 (*$ "$_5 	 	G{{62&&%//"7">7A	"**;;;'*62'FB''50)05Eu)M)MU%%SU8 8OGN344 (!!	
 
 	
r1   c                 ~    t          | j                                        | j                                                  S r<   )r   r   valuesr   r>   s    r2   r@   zTranscriptList.__iter__   s8    .5577'..00
 
 	
r1   language_codesc                 F    |                      || j        | j        g          S )a>  
        Finds a transcript for a given language code. Manually created transcripts are returned first and only if none
        are found, generated transcripts are used. If you only want generated transcripts use
        `find_manually_created_transcript` instead.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )_find_transcriptr   r   r?   r   s     r2   find_transcriptzTranscriptList.find_transcript  s-     $$/1LM
 
 	
r1   c                 :    |                      || j        g          S )a  
        Finds an automatically generated transcript for a given language code.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )r   r   r   s     r2   find_generated_transcriptz(TranscriptList.find_generated_transcript  s      $$^d6Q5RSSSr1   c                 :    |                      || j        g          S )a|  
        Finds a manually created transcript for a given language code.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )r   r   r   s     r2    find_manually_created_transcriptz/TranscriptList.find_manually_created_transcript  s'     $$T?@
 
 	
r1   transcript_dictsc                 b    |D ]}|D ]}||v r||         c c S t          | j        ||           r<   )r   r6   )r?   r   r   r8   r   s        r2   r   zTranscriptList._find_transcript,  sd    
 , 	: 	:M#3 : : O33*=999999 4:  ~tDDDr1   c           	      R   d                     | j        |                     d | j                                        D                       |                     d | j                                        D                       |                     d | j        D                                 S )Na  For this video ({video_id}) transcripts are available in the following languages:

(MANUALLY CREATED)
{available_manually_created_transcript_languages}

(GENERATED)
{available_generated_transcripts}

(TRANSLATION LANGUAGES)
{available_translation_languages}c              3   4   K   | ]}t          |          V  d S r<   r-   rI   
transcripts     r2   	<genexpr>z)TranscriptList.__str__.<locals>.<genexpr>C  sD       [ [ J[ [ [ [ [ [r1   c              3   4   K   | ]}t          |          V  d S r<   r   r   s     r2   r   z)TranscriptList.__str__.<locals>.<genexpr>G  sA       K K$.JK K K K K Kr1   c              3   X   K   | ]%}d                      |j        |j                  V  &dS )z{language_code} ("{language}")r   N)ry   r7   r8   ri   s     r2   r   z)TranscriptList.__str__.<locals>.<genexpr>J  sa       K K
 )	 1771:"6"D 8  K K K K K Kr1   )r6   /available_manually_created_transcript_languagesavailable_generated_transcriptsavailable_translation_languages)ry   r6   _get_language_descriptionr   r   r   r   r>   s    r2   r{   zTranscriptList.__str__8  s    0 &]<@<Z<Z [ ["&"D"K"K"M"M[ [ [ = = -1,J,J K K262M2T2T2V2VK K K - - -1,J,J K K
 -1,GK K K - -  
 
	
r1   transcript_stringsc                 L    d                     d |D                       }|r|ndS )N
c              3   B   K   | ]}d                      |          V  dS )z - {transcript})r   N)ry   r   s     r2   r   z;TranscriptList._get_language_description.<locals>.<genexpr>T  sG        
  
 $$
$;; 
  
  
  
  
  
r1   None)join)r?   r   descriptions      r2   r   z(TranscriptList._get_language_descriptionS  sB    ii  
  
0 
  
  
 
 
 *5{{v5r1   N)r*   r+   r,   rM   r-   r	   rc   r   rQ   ro   staticmethodr   r   r
   r@   r   r   r   r   r   r{   r   r0   r1   r2   r   r      s        
<< '+3
?&;<  $CO4	<
  $$89< < < <( +
+
(++
<@+
	+
 +
 +
 \+
Z
(:. 
 
 
 

hsm 

 
 
 
 
 	T 	T* 	T 	T 	T 	T
&sm
	
 
 
 

E 
E tCO45
E 
	
E 
E 
E 
E
 
 
 
 
66HSM 6c 6 6 6 6 6 6r1   r   c                       e Zd Zdedee         fdZdedefdZ	ddede
defd	Zd
ededefdZdededefdZdededdfdZd
ededdfdZdedefdZdedefdZdededefdZdS )TranscriptListFetcherrd   proxy_configc                 "    || _         || _        d S r<   )rl   _proxy_config)r?   rd   r   s      r2   ro   zTranscriptListFetcher.__init__\  s    ')r1   r6   r:   c                 j    t                               | j        ||                     |                    S r<   )r   r   rl   _fetch_captions_json)r?   r6   s     r2   ru   zTranscriptListFetcher.fetch`  s4    ##%%h//
 
 	
r1   r   
try_numberc                    	 |                      |          }|                     ||          }|                     ||          }|                     ||          S # t          $ r\}| j        dn| j        j        }|dz   |k     r|                     ||dz             cY d }~S |                    | j                  d }~ww xY w)Nr   r   )r   )	_fetch_video_html_extract_innertube_api_key_fetch_innertube_data_extract_captions_jsonr    r   retries_when_blockedr   with_proxy_config)r?   r6   r   htmlapi_keyinnertube_data	exceptionretriess           r2   r   z*TranscriptListFetcher._fetch_captions_jsong  s    	B))(33D55dHEEG!77'JJN..~xHHH 	B 	B 	B %- '< 
 A~''00jSTn0UUUUUUUU--d.@AAA	Bs$   AA 
B?#7B:B? B::B?r   c                     d}t          j        ||          }|r:t          |                                          dk    r|                    d          S d|v rt          |          t          |          )Nz)"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"r   zclass="g-recaptcha")researchrE   groupsgroupr   r#   )r?   r   r6   patternmatchs        r2   r   z0TranscriptListFetcher._extract_innertube_api_keyw  sq    >	'4(( 	"S((A--;;q>>! D((H%%%#H---r1   r   c                     |                      |                    d          |           |                    di                               d          }|d|vrt          |          |S )NplayabilityStatuscaptionsplayerCaptionsTracklistRendererr   )_assert_playabilityrr   r   )r?   r   r6   r   s       r2   r   z,TranscriptListFetcher._extract_captions_json  su      !3!34G!H!H(SSS&**:r::>>-
 
  O=$H$H%h///r1   playability_status_dataNc                    |                     d          }|t          j        j        k    rT|S|                     d          }|t          j        j        k    rH|t
          j        j        k    rt          |          |t
          j        j        k    rt          |          |t          j
        j        k    r]|t
          j        j        k    rH|                    d          s|                    d          rt          |          t          |          |                     di                                di                                di                                dg           }t          ||d	 |D                       d S d S )
Nstatusreasonzhttp://zhttps://errorScreenplayerErrorMessageRenderer	subreasonr   c                 :    g | ]}|                     d d          S )r'   rw   )rr   )rI   runs     r2   rK   z=TranscriptListFetcher._assert_playability.<locals>.<listcomp>  s&    "M"M"M337762#6#6"M"M"Mr1   )rr   rS   rT   valuerV   rX   rY   r    rZ   r!   rU   r[   
startswithr   r   r"   )r?   r   r6   playability_statusr   
subreasonss         r2   r   z)TranscriptListFetcher._assert_playability  sz   488BB"4"7"===".,00::F!%7%F%LLL5BHHH(2225DJJJ'111"&8&>&DDD6HNNN&&y11 3X5H5H5T5T 3(222&x000'++M2>>1266["%%VR	  "&"M"M*"M"M"M  - >=..r1   c                     t          j        d|          }|t          |          | j        j                            dd|                    d          z   d           d S )Nzname="v" value="(.*?)"CONSENTzYES+r   z.youtube.com)domain)r   r   r   rl   cookiessetr   )r?   r   r6   r   s       r2   _create_consent_cookiez,TranscriptListFetcher._create_consent_cookie  sj    	2D99=-h777!%%vA.~ 	& 	
 	
 	
 	
 	
r1   c                     |                      |          }d|v r>|                     ||           |                      |          }d|v rt          |          |S )Nz&action="https://consent.youtube.com/s")_fetch_htmlr   r   )r?   r6   r   s      r2   r   z'TranscriptListFetcher._fetch_video_html  sg    ))3t;;''h777##H--D74??1(;;;r1   c                     | j                             t          j        |                    }t	          t          ||          j                  S )N)r6   )rl   rr   r   ry   r   ra   r'   )r?   r6   r\   s      r2   r   z!TranscriptListFetcher._fetch_html  sB    $(()98)L)L)LMM*8X>>CDDDr1   r   c                     | j                             t          j        |          t          |d          }t          ||                                          }|S )N)r   )contextvideoId)json)rl   postr   ry   r   ra   r   )r?   r6   r   r\   datas        r2   r   z+TranscriptListFetcher._fetch_innertube_data  sb    $))$W555,#  * 
 
 "(H55::<<r1   )r   )r*   r+   r,   r   r   r   ro   r-   r   ru   rO   r	   r   r   r   r   r   r   r   r   r0   r1   r2   r   r   [  s       *G *8K;P * * * *
c 
n 
 
 
 
B BS Bc B$ B B B B .s .c .c . . . .	T 	S 	T 	 	 	 	4 3 SW    :
3 
# 
$ 
 
 
 
# #    EC EC E E E E	c 	C 	D 	 	 	 	 	 	r1   r   c                   \    e Zd Zg dZd
defdZdedee         fdZdede	e
         fdZd	S )rs   )
strongembimarksmalldelinssubsupFrp   c                 :    |                      |          | _        d S r<   )_get_html_regex_html_regex)r?   rp   s     r2   ro   z_TranscriptParser.__init__  s    //0CDDr1   r:   c                     |rBd                     | j                  }d|z   dz   }t          j        |t          j                  }nt          j        dt          j                  }|S )N|z<\/?(?!\/?(z
)\b).*?\b>z<[^>]*>)r   _FORMATTING_TAGSr   compile
IGNORECASE)r?   rp   formats_regex
html_regexs       r2   r  z!_TranscriptParser._get_html_regex  s[     	?HHT%:;;M*]:]JMM2=AAJJJ>>Jr1   raw_datac                 D      fdt          j        |          D             S )Nc                    g | ]}|j         	t          t          j        j        dt          |j                             t          |j        d                   t          |j                            dd                              S )Nrw   r(   durz0.0)r'   r(   r)   )	r'   r&   r   r  r  r   r/   attribrr   )rI   xml_elementr?   s     r2   rK   z+_TranscriptParser.parse.<locals>.<listcomp>  s     
 
 
 + %VD,b(;;K2L2LMMK.w788{155eUCCDD   ,++r1   )r   
fromstring)r?   r  s   ` r2   rt   z_TranscriptParser.parse  s;    
 
 
 
  +5h??
 
 
 	
r1   Nr   )r*   r+   r,   r  rN   ro   r   r-   r  r   r&   rt   r0   r1   r2   rs   rs     s          E ED E E E E4 GCL    	
c 	
d+C&D 	
 	
 	
 	
 	
 	
r1   rs   )7dataclassesr   r   enumr   	itertoolsr   r   r   typingr   r	   r
   r   r   r   
defusedxmlr   r   requestsr   r   r   proxiesr   	_settingsr   r   r   _errorsr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r&   r4   rQ   r-   rS   rX   ra   rc   r   r   rs   r0   r1   r2   <module>r     sn   ) ) ) ) ) ) ) )                   D D D D D D D D D D D D D D D D " " " " " " 				 1 1 1 1 1 1 1 1 1 1             F F F F F F F F F F                               $         5 5 5 5 5 5 5 52        
& & & & &d & & &4 4 4 4 4sD 4 4 44 4S 4X 4 4 4 4J
 J
 J
 J
 J
 J
 J
 J
Zd6 d6 d6 d6 d6 d6 d6 d6Nk k k k k k k k\#
 #
 #
 #
 #
 #
 #
 #
 #
 #
r1   