
    i!                        d dl mZmZmZmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ d d	lZd
eee                  dee         fdZdeeef         deeef         fdZd	ddee         dee         deeef         fdZdedee         de	fdZ	 d dededee         de
fdZd	ddededee         de
fdZ	 d dededee         dee         dee         f
dZdedede fdZ!dededeeef         fdZ"d	S )!    )OptionalListDictAny   )ScrapeOptionsWebhookConfigDocumentBatchScrapeResponseBatchScrapeJobPaginationConfig)AsyncHttpClient)prepare_scrape_options)handle_response_error)normalize_document_input)validate_batch_urlsN	data_listreturnc                     g }| pg D ]E}t          |t                    r.t          |          }|                    t	          di |           F|S )N )
isinstancedictr   appendr
   )r   	documentsdoc
normalizeds       h/home/agentuser/.hermes/hermes-agent/venv/lib/python3.11/site-packages/firecrawl/v2/methods/aio/batch.py_parse_batch_scrape_documentsr   
   sb     "IB 5 5c4   	51#66JX33
33444    bodyc                    |                      d          s#t          |                      dd                    |                      d          |                      dd          |                      dd          |                      d          |                      d	          |                      d
          t          |                      dg           pg           dS )NsuccesserrorUnknown error occurredstatus	completedr   totalcreditsUsed	expiresAtnextdatar%   r&   r'   credits_used
expires_atr*   r+   )get	Exceptionr   )r    s    r   #_parse_batch_scrape_status_responser1      s    88I E*BCCDDD ((8$$XXk1--'1%%//hh{++  -dhhvr.B.B.HbII  r   )optionsurlsr2   c                   | st          d          t          d | D                       }d|i}|r&t          |          }|r|                    |           |                    d          x}0t          |t                    r|n|                    d          |d<   |                    d          x}||d<   |                    d	          x}||d
<   |                    d          x}||d<   |                    d          x}||d<   |                    d          x}(t          |                                          }|r||d<   |S )NzURLs list cannot be emptyc                 d    g | ]-}t          |t                    r|                                n|.S r   )r   strstrip).0us     r   
<listcomp>z_prepare.<locals>.<listcomp>%   s4    )_)_)_UVz!S7I7I*P!'')))q)_)_)_r   r3   webhookT)exclude_noneappend_to_id
appendToIdignore_invalid_urlsignoreInvalidURLsmax_concurrencymaxConcurrencyzero_data_retentionzeroDataRetentionintegration)	
ValueErrorr   r   updater/   r   r6   
model_dumpr7   )	r3   r2   kwargsvalidated_urlspayloadoptswvtrimmed_integrations	            r   _preparerP   !   s~    64555()_)_Z^)_)_)_``N%~6G !%g.. 	!NN4   ZZ	"""/",Q"4"4YQQ!,,TX,:Y:Y	ZZ'''4 !ZZ-...;'(#$ZZ)***7$% !ZZ-...;'(#$ZZ&&&3!!ffllnn 	9%8GM"Nr   clientc                   K   t          |fi |}|                     d|           d {V }|j        dk    rt          |d           |                                }|                    d          s#t          |                    dd                    t          |                    d          |                    d          |                    d	          
          S )Nz/v2/batch/scrape  zstart batch scraper"   r#   r$   idurlinvalidURLs)rT   rU   invalid_urls)rP   poststatus_coder   jsonr/   r0   r   )rQ   r3   rI   rK   responser    s         r   start_batch_scraper\   <   s      t&&v&&G[[!3W========Hs""h(<=====??D88I E*BCCDDD$((4..dhhuooTXT\T\]jTkTkllllr   job_idpagination_configc           	        K   |                      d|            d{V }|j        dk    rt          |d           |                                }t	          |          }|d         }|r|j        nd}|r&|d         rt          | |d         ||           d{V }t          |d         |d	         |d
         |d         |d         |s|d         nd|          S )al  
    Get the status of a batch scrape job.
    
    Args:
        client: Async HTTP client instance
        job_id: ID of the batch scrape job
        pagination_config: Optional configuration for pagination behavior
        
    Returns:
        BatchScrapeJob containing job status and data
        
    Raises:
        Exception: If the status check fails
    /v2/batch/scrape/NrS   zget batch scrape statusr+   Tr*   r%   r&   r'   r-   r.   r,   )r/   rY   r   rZ   r1   auto_paginate_fetch_all_batch_pages_asyncr   )rQ   r]   r^   r[   r    rK   docsra   s           r   get_batch_scrape_statusrd   G   s0     & ZZ <F < <========Hs""h(ABBB==??D1$77G6?D 8IR%33dM 
 
1FO	
 
 
 
 
 
 
 
 x +&g^,<($1;WV__t   r   )request_timeoutnext_urlre   c          
      <  K   |                      ||           d{V }|j        dk    rt          |d           |                                }t	          |          }t          |d         |d         |d         |d         |d	         |d
         |d                   S )a  
    Fetch a single page of batch scrape results using the provided next URL.

    Args:
        client: Async HTTP client instance
        next_url: Opaque next URL from a prior batch scrape status response
        request_timeout: Timeout (in seconds) for the HTTP request

    Returns:
        BatchScrapeJob with the page data and next URL (if any)

    Raises:
        Exception: If the request fails or returns an error response
    )timeoutNrS   zget batch scrape status pager%   r&   r'   r-   r.   r*   r+   r,   )r/   rY   r   rZ   r1   r   )rQ   rf   re   r[   r    rK   s         r   get_batch_scrape_status_pageri   v   s      ( ZZ/ZBBBBBBBBHs""h(FGGG==??D1$77Gx +&g^,<(V_V_   r   initial_documentsc                   K   |                                 }|}d}|r|j        nd}|r|j        nd}|r|j        nd}	t	          j                    }
|r|||k    rn|	t	          j                    |
z
  |	k    rn|                     |           d{V }|j        dk    r7ddl}|	                    d          }|
                    d|j                    n|                                }	 t          |          }n# t          $ r Y naw xY w|d         D ].}|t          |          |k    r n|                    |           /|t          |          |k    rn|d         }|dz  }||S )	ag  
    Fetch all pages of batch scrape results asynchronously.
    
    Args:
        client: Async HTTP client instance
        next_url: URL for the next page
        initial_documents: Documents from the first page
        pagination_config: Optional configuration for pagination limits
        
    Returns:
        List of all documents from all pages
    r   NrS   	firecrawlzFailed to fetch next page: r+   r*      )copy	max_pagesmax_resultsmax_wait_timetime	monotonicr/   rY   logging	getLoggerwarningrZ   r1   r0   lenr   )rQ   rf   rj   r^   r   current_url
page_countro   rp   rq   
start_timer[   rt   logger	page_datapage_payloaddocuments                    r   rb   rb      s     $ "&&((IKJ 0AJ!++dI3DN#//$K7HR%33dM!!J
 %!
i(?(?%DN,<,<z,I]+Z+Z  K000000003&&NNN&&{33FNNO9MOOPPPMMOO		>yIILL 	 	 	E	 %V, 	' 	'H'c)nn.K.KX&&&& ##i..K*G*G #6*a
K  %N s   +C; ;
DDc                    K   |                      d|            d {V }|j        dk    rt          |d           |                                }|                    d          dk    S )Nr`   rS   zcancel batch scraper%   	cancelled)deleterY   r   rZ   r/   rQ   r]   r[   r    s       r   cancel_batch_scraper      su      ]]#?v#?#?@@@@@@@@Hs""h(=>>>==??D88H,,r   c                   K   |                      d| d           d {V }|j        dk    rt          |d           |                                }|                     d          s#t	          |                     dd                    |S )Nr`   z/errorsrS   zget batch scrape errorsr"   r#   r$   )r/   rY   r   rZ   r0   r   s       r   get_batch_scrape_errorsr      s      ZZ CF C C CDDDDDDDDHs""h(ABBB==??D88I E*BCCDDDKr   )N)#typingr   r   r   r   typesr   r	   r
   r   r   r   utils.http_client_asyncr   utils.validationr   utils.error_handlerr   utils.normalizer   methods.batchr   rr   r   r6   r1   rP   r\   rd   floatri   rb   boolr   r   r   r   r   <module>r      s   , , , , , , , , , , , , r r r r r r r r r r r r r r r r 6 6 6 6 6 6 6 6 6 6 6 6 8 8 8 8 8 8 7 7 7 7 7 7 0 0 0 0 0 0 Xd3i-@ T(^    d38n c3h     EI   49 (=*A W[\_ad\dWe    6m_ mDI mTg m m m m 59, ,,,   01, 	, , , ,f (,	! ! !!! e_	!
 ! ! ! !P 59	D DDD H~D   01	D
 
(^D D D DN-o -s -t - - - -/ 3 4PSUXPX>      r   