
    i>                     t   d Z ddlZddlmZmZmZmZmZmZm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z1 dd	l/m2Z3 dd
l/m4Z5 ddl/m6Z7 ddl/m8Z9 dd
l/m4Z: ddl/m;Z< ddl/m=Z> ddl/m?Z@ ddl/mAZB ddlCmDZD  G d d          ZEdS )zs
Main Firecrawl v2 API client.

This module provides the main client class that orchestrates all v2 functionality.
    N)OptionalListDictAnyCallableUnionLiteral   ) ClientConfigScrapeOptionsDocumentSearchRequest
SearchDataSourceOptionCategoryOptionCrawlRequestCrawlResponseCrawlJobCrawlParamsRequest	PDFParserCrawlParamsDataWebhookConfigAgentWebhookConfigCrawlErrorsResponseActiveCrawlsResponse
MapOptionsMapDataFormatOption
WaitActionScreenshotActionClickActionWriteActionPressActionScrollActionScrapeActionExecuteJavascriptAction	PDFActionLocationPaginationConfigAgentOptions)
HttpClient)FirecrawlError)scrape)crawl)batch)search)map)usage)extract)agent)browserWatcherc            <          e Zd ZdZededefd            Z	 	 	 	 	 dd	ee         d
edee	         de
de	f
dZdddddddddddddddddddddddedeed                  deeeef                  deee                  deee                  dee         dee
         dee
         dee         deeee         eeeef                  f                  deeed                           ded         dee         dee         dee         d ee         d!ee         d"ee         d#ee
         d$ee         d%eeeef                  d&ee         def.d'Z	 ddd(ddd)d*ed+ee         d,ee         d-ed.         dee
         d/ee         fd0Zd*efd1Zd*efd2Z	 ddd(ddd)d*ed+ee         d,ee         d-ed.         dee
         d/ee         fd3Zd*efd4Zdddddddddd5	d6ed7eee                  d8eee                  d9ee
         d:ee         dee         d;ee         dee
         d<ee         d&ee         defd=Zddddddd>dd>d>d>d>ddddd>d?d>d@ddddAded,ee         dBeee                  dCeee                  dDee
         dEeedF                  dGee         dHed9ee
         dIedJedKedLedMee
         dNee
         dOeeeef                  d<ee         dPedQedRedSe
dee
         dTee	         d&ee         def2dUZ ddddddd>dd>d>d>d>ddddd>d?d>ddVded,ee         dBeee                  dCeee                  dDee
         dEeedF                  dGee         dHed9ee
         dIedJedKedLedMee
         dNee
         dOeeeef                  d<ee         dPedQedRed&ee         de!f,dWZ"	 dddXd*edYee#         dTee	         defdZZ$ddXd[edTee	         defd\Z%d]ede&fd^Z'de(fd_Z)de(fd`Z*dddddddddadedbee         dcee         dHee         d9ee
         dEeedF                  dee
         d&ee         dee+         de,fddZ-d]edefdeZ.ded,ede/fdfZ0	 ddddddddddddg
dheee                  d,ee         dieeeef                  djee         dJee         dkee         dlee         d<edm         d;ee         d&ee         dnee1         fdoZ2	 dddddddddd@ddddpdheee                  d,ee         dieeeef                  djee         dJee         dkee         dlee         d<edm         d;ee         dSe
dee
         d&ee         dnee1         fdqZ3dddddddddddddddddddddddddddrdhee         deed                  deeeef                  deee                  deee                  dee         dee
         dee
         dee         deeee         eeeef                  f                  deeed                           ded         dee         dee         dee         d ee         d!ee         d"ee         d#ee
         d$ee         dOeeeef                  dsee         d;ee         dNee
         dRee         d&ee         dtee         f6duZ4	 dd*edYee#         fdvZ5ddXd[edTee	         fdwZ6d*edefdxZ7d*efdyZ8d*efdzZ9	 dddddddd{dheee                  d,ediee         d&ee         d|ee
         d}ee         d~eed                  dOeeee:f                  fdZ;	 dddd@dddddddheee                  d,ediee         d&ee         dSe
dee
         d|ee
         d}ee         d~eed                  dOeeee:f                  fdZ<d*efdZ=d*edefdZ>d Z?d Z@d ZAddefdZBddefdZCd ZDddddddee
         dee
         dee         d%eeeef                  fdZEdddded+ed-ed.         dee
         fdZFdefdZGdddeed                  fdZHdd@ddd*eded         dSe
dee
         deIf
dZJddddddddddddddddddddddddddd@dddhee         deed                  deeeef                  deee                  deee                  dee         dee
         dee
         dee         deeee         eeeef                  f                  deeed                           ded         dee         dee         dee         d ee         d!ee         d"ee         d#ee
         d$ee         dOeeeef                  dsee         d;ee         dNee
         dRee         d&ee         dtee         dSe
dee
         f:dZKdS )FirecrawlClientz|
    Main Firecrawl v2 API client.

    This client provides a clean, modular interface to all Firecrawl functionality.
    urlreturnc                 .    d|                                  v S )Nzapi.firecrawl.dev)lower)r:   s    ]/home/agentuser/.hermes/hermes-agent/venv/lib/python3.11/site-packages/firecrawl/v2/client.py_is_cloud_servicez!FirecrawlClient._is_cloud_service@   s    "ciikk11    Nhttps://api.firecrawl.dev         ?api_keyapi_urltimeoutmax_retriesbackoff_factorc                     |t          j        d          }|                     |          r|st          d          t	          |||||          | _        t          |||||          | _        dS )a  
        Initialize the Firecrawl client.

        Args:
            api_key: Firecrawl API key (or set FIRECRAWL_API_KEY env var)
            api_url: Base URL for the Firecrawl API
            timeout: Request timeout in seconds
            max_retries: Maximum number of retries for failed requests
            backoff_factor: Exponential backoff factor for retries (e.g. 0.5 means wait 0.5s, then 1s, then 2s between retries)
        NFIRECRAWL_API_KEYzlAPI key is required for the cloud API. Set FIRECRAWL_API_KEY environment variable or pass api_key parameter.)rD   rE   rF   rG   rH   )rF   rG   rH   )osgetenvr?   
ValueErrorr   configr+   http_client)selfrD   rE   rF   rG   rH   s         r>   __init__zFirecrawlClient.__init__D   s    $ ?i 344G!!'** 	7 	-  
 ##)
 
 
 &#)
 
 
r@   )formatsheadersinclude_tagsexclude_tagsonly_main_contentrF   wait_formobileparsersactionslocationskip_tls_verificationremove_base64_images	fast_modeuse_mock	block_adsproxymax_agestore_in_cacheprofileintegrationrR   r   rS   rT   rU   rV   rW   rX   rY   rZ   )	r   r    r!   r"   r#   r$   r%   r&   r'   r[   r(   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   c                v   t          d ||||||||	|
||||||||||||fD                       rot          di d t          di d|d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|d|d|d|                                D             nd}t	          j        | j        ||          S )a  
        Scrape a single URL and return the document.
        Args:
            url: URL to scrape
            formats: List of formats to scrape
            headers: Dictionary of headers to use
            include_tags: List of tags to include
            exclude_tags: List of tags to exclude
            only_main_content: Whether to only scrape the main content
            timeout: Timeout in milliseconds
            wait_for: Wait for a specific element to be present
            mobile: Whether to use mobile mode
            parsers: List of parsers to use
            actions: List of actions to perform
            location: Location to scrape
            skip_tls_verification: Whether to skip TLS verification
            remove_base64_images: Whether to remove base64 images
            fast_mode: Whether to use fast mode
            use_mock: Whether to use mock mode
            block_ads: Whether to block ads
            proxy: Proxy to use
            max_age: Maximum age of the cache
            store_in_cache: Whether to store the result in the cache
            profile: Browser profile for persistent state (e.g. {"name": "my-profile", "saveChanges": True})
        Returns:
            Document
        c              3      K   | ]}|d uV  	d S N .0vs     r>   	<genexpr>z)FirecrawlClient.scrape.<locals>.<genexpr>   s>        e  e1$  e  e  e  e  e  er@   c                     i | ]
\  }}|||S rh   ri   rk   krl   s      r>   
<dictcomp>z*FirecrawlClient.scrape.<locals>.<dictcomp>   s.     ( ( (1, - !, 'r@   rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   Nri   )anyr   dictitemsscrape_moduler-   rO   )rP   r:   rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   optionss                           r>   r-   zFirecrawlClient.scrapeo   s"   Z   e  e'<Whjqs{  ~D  FM  OV  X`  bw  yM  OX  Zb  dm  ot  v}  M  OV  Xc  )d  e  e  e  e  e1o- 
 
( ( ! ! !!! *\! *\	!
 #4"3!  ! "! v!  !  ! "! '<&;! &:%9! $)! "!  $)!!" e#!$  %!&  .~'!(  )!* (K+!, egg-( ( (
 
 
0 ko1 	2 #D$4c7CCCr@   nodepromptlanguagerF   originjob_idcodery   rz   )pythonrw   bashr{   c          	      B    t          j        | j        ||||||          S )ab  
        Interact with the browser session associated with a scrape job.

        Either ``code`` or ``prompt`` must be provided.

        Args:
            job_id: Scrape job ID
            code: Code to execute (optional if prompt is provided)
            prompt: Natural-language instruction for the browser agent (optional if code is provided)
            language: Programming language ("python", "node", or "bash")
            timeout: Execution timeout in seconds (1-300)
            origin: Optional request origin tag

        Returns:
            BrowserExecuteResponse with execution result
        rx   )ru   interactrO   rP   r|   r}   ry   rz   rF   r{   s          r>   r   zFirecrawlClient.interact   s6    4 %
 
 
 	
r@   c                 6    t          j        | j        |          S )z
        Stop the interaction session associated with a scrape job.

        Args:
            job_id: Scrape job ID

        Returns:
            BrowserDeleteResponse
        )ru   stop_interactionrO   rP   r|   s     r>   r   z FirecrawlClient.stop_interaction   s     -d.>GGGr@   c                 ,    |                      |          S z(Deprecated alias for stop_interaction().r   r   s     r>   stop_interactive_browserz(FirecrawlClient.stop_interactive_browser       $$V,,,r@   c                8    |                      ||||||          S )z Deprecated alias for interact().rx   )r   r   s          r>   scrape_executezFirecrawlClient.scrape_execute   s1     }}  
 
 	
r@   c                 ,    |                      |          S r   r   r   s     r>   delete_scrape_browserz%FirecrawlClient.delete_scrape_browser  r   r@   )	sources
categorieslimittbsr[   ignore_invalid_urlsrF   scrape_optionsre   queryr   r   r   r   r   r   c       	         h    t          |||||||||	|

  
        }t          j        | j        |          S )a  
        Search for documents.
        
        Args:
            query: Search query string
            limit: Maximum number of results to return (default: 5)
            tbs: Time-based search filter
            location: Location string for search
            timeout: Request timeout in milliseconds (default: 300000)
            page_options: Options for scraping individual pages
            
        Returns:
            SearchData containing the search results
        )
r   r   r   r   r   r[   r   rF   r   re   )r   search_moduler0   rO   )rP   r   r   r   r   r   r[   r   rF   r   re   requests               r>   r0   zFirecrawlClient.search  sN    8  ! 3)#
 
 
 #D$4g>>>r@   FT   )ry   exclude_pathsinclude_pathsmax_discovery_depthsitemapignore_sitemapignore_query_parametersr   crawl_entire_domainallow_external_linksallow_subdomainsignore_robots_txtdelaymax_concurrencywebhookr   regex_on_full_urldeduplicate_similar_urlszero_data_retentionpoll_intervalrF   request_timeoutre   r   r   r   r   )onlyincludeskipr   r   r   r   r   r   r   r   r   r   r   r   r   r   c                    |}|||rdnd}i d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|||d}|||d<   t          di |}t          j        | j        ||||          S )a  
        Start a crawl job and wait for it to complete.

        Args:
            url: Target URL to start crawling from
            prompt: Optional prompt to guide the crawl
            exclude_paths: Patterns of URLs to exclude
            include_paths: Patterns of URLs to include
            max_discovery_depth: Maximum depth for finding new URLs
            sitemap: Sitemap usage mode ("only" | "include" | "skip")
            ignore_sitemap: Deprecated alias for sitemap ("skip" when true, "include" when false)
            ignore_query_parameters: Ignore URL parameters
            limit: Maximum pages to crawl
            crawl_entire_domain: Follow parent directory links
            allow_external_links: Follow external domain links
            allow_subdomains: Follow subdomains
            ignore_robots_txt: Whether to ignore robots.txt rules
            delay: Delay in seconds between scrapes
            max_concurrency: Maximum number of concurrent scrapes
            webhook: Webhook configuration for notifications
            scrape_options: Page scraping configuration
            regex_on_full_url: Apply includePaths/excludePaths regex to the full URL (including query parameters) instead of just the pathname
            deduplicate_similar_urls: Whether to deduplicate similar URLs during crawl (default: True)
            zero_data_retention: Whether to delete data after 24 hours
            poll_interval: Seconds between status checks
            timeout: Maximum seconds to wait for the entire crawl job to complete (None for no timeout)
            request_timeout: Timeout (in seconds) for each individual HTTP request, including pagination requests when fetching results. If there are multiple pages, each page request gets this timeout
            
        Returns:
            CrawlJob when job completes
            
        Raises:
            ValueError: If request is invalid
            Exception: If the crawl fails to start or complete
            TimeoutError: If timeout is reached
        Nr   r   r:   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   re   r   )r   rF   r   ri   )r   crawl_moduler.   rO   )rP   r:   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rF   r   re   resolved_sitemaprequest_kwargsr   s                               r>   r.   zFirecrawlClient.crawl6  sd   @ ##(B)7FvvY
3
f
 ]
 ]	

 "#6
 &'>
 U
 "#6
 #$8
  0
  !2
 U
 
 w
 n
   !2!
" '(@#
$ $7&'
 
 
* '(8N9%0000!'+
 
 
 	
r@   )ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   re   c                    |}|||rdnd}i d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|||d}|||d<   t          di |}t          j        | j        |          S )aK  
        Start an asynchronous crawl job.

        Args:
            url: Target URL to start crawling from
            prompt: Optional prompt to guide the crawl
            exclude_paths: Patterns of URLs to exclude
            include_paths: Patterns of URLs to include
            max_discovery_depth: Maximum depth for finding new URLs
            sitemap: Sitemap usage mode ("only" | "include" | "skip")
            ignore_sitemap: Deprecated alias for sitemap ("skip" when true, "include" when false)
            ignore_query_parameters: Ignore URL parameters
            limit: Maximum pages to crawl
            crawl_entire_domain: Follow parent directory links
            allow_external_links: Follow external domain links
            allow_subdomains: Follow subdomains
            ignore_robots_txt: Whether to ignore robots.txt rules
            delay: Delay in seconds between scrapes
            max_concurrency: Maximum number of concurrent scrapes
            webhook: Webhook configuration for notifications
            scrape_options: Page scraping configuration
            regex_on_full_url: Apply includePaths/excludePaths regex to the full URL (including query parameters) instead of just the pathname
            deduplicate_similar_urls: Whether to deduplicate similar URLs during crawl (default: True)
            zero_data_retention: Whether to delete data after 24 hours

        Returns:
            CrawlResponse with job information
            
        Raises:
            ValueError: If request is invalid
            Exception: If the crawl operation fails to start
        Nr   r   r:   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ri   )r   r   start_crawlrO   )rP   r:   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   re   r   r   r   s                            r>   r   zFirecrawlClient.start_crawl  sN   r ##(B)7FvvY
3
f
 ]
 ]	

 "#6
 &'>
 U
 "#6
 #$8
  0
  !2
 U
 
 w
 n
   !2!
" '(@#
$ $7&'
 
 
* '(8N9%0000'(8'BBBr@   r   pagination_configc                <    t          j        | j        |||          S )av  
        Get the status of a crawl job.
        
        Args:
            job_id: ID of the crawl job
            pagination_config: Optional configuration for pagination behavior
            request_timeout: Timeout (in seconds) for each individual HTTP request. When auto-pagination 
                is enabled (default) and there are multiple pages of results, this timeout applies to 
                each page request separately, not to the entire operation
            
        Returns:
            CrawlJob with current status and data
            
        Raises:
            Exception: If the status check fails
        )r   r   )r   get_crawl_statusrO   )rP   r|   r   r   s       r>   r   z FirecrawlClient.get_crawl_status  s-    . ,/+	
 
 
 	
r@   next_urlc                :    t          j        | j        ||          S )a7  
        Fetch a single page of crawl results using a next URL.

        Args:
            next_url: Opaque next URL from a prior crawl status response
            request_timeout: Timeout (in seconds) for the HTTP request

        Returns:
            CrawlJob with the page data and next URL (if any)
        r   )r   get_crawl_status_pagerO   rP   r   r   s      r>   r   z%FirecrawlClient.get_crawl_status_page  s*      1+
 
 
 	
r@   crawl_idc                 6    t          j        | j        |          S )a  
        Retrieve error details and robots.txt blocks for a given crawl job.
        
        Args:
            crawl_id: The ID of the crawl job
        
        Returns:
            CrawlErrorsResponse containing per-URL errors and robots-blocked URLs
        )r   get_crawl_errorsrO   rP   r   s     r>   r   z FirecrawlClient.get_crawl_errors)  s     ,T-=xHHHr@   c                 4    t          j        | j                  S )z
        Get a list of currently active crawl jobs.
        
        Returns:
            ActiveCrawlsResponse containing a list of active crawl jobs.
        )r   get_active_crawlsrO   rP   s    r>   r   z!FirecrawlClient.get_active_crawls5  s     -d.>???r@   c                 *    |                                  S )z
        List currently active crawl jobs for the authenticated team.
        
        Returns:
            ActiveCrawlsResponse containing the list of active crawl jobs
        )r   r   s    r>   active_crawlszFirecrawlClient.active_crawls>  s     %%'''r@   r0   include_subdomainsr   r   r   rF   re   r[   r0   r   c                    t          d ||||||||	fD                       rt          ||||||nd|||	          nd}
t          j        | j        ||
          S )a/  Map a URL and return discovered links.

        Args:
            url: Root URL to explore
            search: Optional substring filter for discovered links
            include_subdomains: Whether to include subdomains
            ignore_query_parameters: Whether to ignore query parameters when mapping
            limit: Maximum number of links to return
            sitemap: Sitemap usage mode ("only" | "include" | "skip")
            timeout: Request timeout in milliseconds

        Returns:
            MapData containing the discovered links
        c              3      K   | ]}|d uV  	d S rh   ri   rj   s     r>   rm   z&FirecrawlClient.map.<locals>.<genexpr>k  s>        O  O1$  O  O  O  O  O  Or@   Nr   r   )rr   r   
map_moduler1   rO   )rP   r:   r0   r   r   r   r   rF   re   r[   rv   s              r>   r1   zFirecrawlClient.mapG  s    H   O  O1CE\^celnu  xC  EM  )N  O  O  O  O  O	Y*1$;&2GG	#	
 	
 	
 	
 UY 	 ~d.W===r@   c                 6    t          j        | j        |          S )z
        Cancel a crawl job.
        
        Args:
            crawl_id: The ID of the crawl job to cancel
            
        Returns:
            bool: True if the crawl was cancelled, False otherwise
        )r   cancel_crawlrO   r   s     r>   r   zFirecrawlClient.cancel_crawlo  s     ()98DDDr@   c                 X    t          ||          }t          j        | j        |          S )zDerive crawl parameters from natural-language prompt.

        Args:
            url: Root URL
            prompt: Instruction describing how to crawl

        Returns:
            CrawlParamsData with normalized crawl configuration
        )r:   ry   )r   r   crawl_params_previewrO   )rP   r:   ry   r   s       r>   r   z$FirecrawlClient.crawl_params_preview{  s,     %V<<<01A7KKKr@   
ry   schemasystem_promptr   enable_web_searchshow_sourcesr   r   re   r4   urlsr   r   r   r   r   r4   c       
         L    t          j        | j        |||||||||	|
|          S )a  Start an extract job (non-blocking).

        .. deprecated::
            The extract endpoint is in maintenance mode and its use is discouraged.
            Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor
            to find a replacement.

        Args:
            urls: URLs to extract from (optional)
            prompt: Natural-language instruction for extraction
            schema: Target JSON schema for the output
            system_prompt: Optional system instruction
            allow_external_links: Allow hyperlinks in output
            enable_web_search: Whether to augment with web search
            show_sources: Include per-field/source mapping when available
            scrape_options: Scrape options applied prior to extraction
            ignore_invalid_urls: Skip invalid URLs instead of failing
            integration: Integration tag/name
            agent: Agent configuration
        Returns:
            Response payload with job id/status (poll with get_extract_status)
        r   )extract_modulestart_extractrO   )rP   r   ry   r   r   r   r   r   r   r   re   r4   s               r>   r   zFirecrawlClient.start_extract  sF    J +'!5/%) 3#
 
 
 	
r@   ry   r   r   r   r   r   r   r   r   rF   re   r4   c                P    t          j        | j        |||||||||	|
|||          S )a  Extract structured data and wait until completion.

        .. deprecated::
            The extract endpoint is in maintenance mode and its use is discouraged.
            Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor
            to find a replacement.

        Args:
            urls: URLs to extract from (optional)
            prompt: Natural-language instruction for extraction
            schema: Target JSON schema for the output
            system_prompt: Optional system instruction
            allow_external_links: Allow hyperlinks in output
            enable_web_search: Whether to augment with web search
            show_sources: Include per-field/source mapping when available
            scrape_options: Scrape options applied prior to extraction
            ignore_invalid_urls: Skip invalid URLs instead of failing
            poll_interval: Seconds between status checks
            timeout: Maximum seconds to wait (None for no timeout)
            integration: Integration tag/name
            agent: Agent configuration
        Returns:
            Final extract response when completed
        r   )r   r3   rO   )rP   r   ry   r   r   r   r   r   r   r   r   rF   re   r4   s                 r>   r3   zFirecrawlClient.extract  sL    R %'!5/%) 3'#
 
 
 	
r@   )rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   r   append_to_idr   r   r   re   idempotency_keyr   r   c                v   t          d ||||||||	|
||||||||||fD                       rit          di d t          di d|d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|d|                                D             nd}t	          j        | j        |||||||||
  
        S )a  Start a batch scrape job over multiple URLs (non-blocking).

        Args:
            urls: List of URLs to scrape
            formats: Output formats to collect per URL
            headers: HTTP headers
            include_tags: HTML tags to include
            exclude_tags: HTML tags to exclude
            only_main_content: Restrict scraping to main content
            timeout: Per-request timeout in milliseconds
            wait_for: Wait condition in milliseconds
            mobile: Emulate mobile viewport
            parsers: Parser list (e.g., ["pdf"]) 
            actions: Browser actions to perform
            location: Location settings
            skip_tls_verification: Skip TLS verification
            remove_base64_images: Remove base64 images from output
            fast_mode: Prefer faster scraping modes
            use_mock: Use a mock data source (internal/testing)
            block_ads: Block ads during scraping
            proxy: Proxy setting
            max_age: Cache max age
            store_in_cache: Whether to store results in cache
            webhook: Webhook configuration
            append_to_id: Append to an existing batch job
            ignore_invalid_urls: Skip invalid URLs without failing
            max_concurrency: Max concurrent scrapes
            zero_data_retention: Delete data after 24 hours
            integration: Integration tag/name
            idempotency_key: Header used to deduplicate starts

        Returns:
            Response payload with job id (poll with get_batch_scrape_status)
        c              3      K   | ]}|d uV  	d S rh   ri   rj   s     r>   rm   z5FirecrawlClient.start_batch_scrape.<locals>.<genexpr>M  >        O  O1$  O  O  O  O  O  Or@   c                     i | ]
\  }}|||S rh   ri   ro   s      r>   rq   z6FirecrawlClient.start_batch_scrape.<locals>.<dictcomp>8  .     ( ( (1( ) !( 'r@   rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   N)rv   r   r   r   r   r   re   r   ri   )rr   r   rs   rt   batch_modulestart_batch_scraperO   )rP   r   rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   r   r   r   r   r   re   r   rv   s                                r>   r   z"FirecrawlClient.start_batch_scrape  s   n   O  O'<Whjqs{  ~D  FM  OV  X`  bw  yM  OX  Zb  dm  ot  v}  M  )N  O  O  O  O  O-Y- 
 
( ( ! ! !!! *\! *\	!
 #4"3!  ! "! v!  !  ! "! '<&;! &:%9! $)! "!  $)!!" e#!$  %!&  .~'!( egg)( ( (
 
 
, UY- 	0 .% 3+ 3#+
 
 
 	
r@   c                 :    t          j        | j        ||          S )a  Get current status and any scraped data for a batch job.

        Args:
            job_id: Batch job ID
            pagination_config: Optional configuration for pagination behavior

        Returns:
            Status payload including counts and partial data
        )r   )r   get_batch_scrape_statusrO   )rP   r|   r   s      r>   r   z'FirecrawlClient.get_batch_scrape_status\  s*     3/
 
 
 	
r@   c                :    t          j        | j        ||          S )aB  Fetch a single page of batch scrape results using a next URL.

        Args:
            next_url: Opaque next URL from a prior batch scrape status response
            request_timeout: Timeout (in seconds) for the HTTP request

        Returns:
            BatchScrapeJob with the page data and next URL (if any)
        r   )r   get_batch_scrape_status_pagerO   r   s      r>   r   z,FirecrawlClient.get_batch_scrape_status_pagep  s*     8+
 
 
 	
r@   c                 6    t          j        | j        |          S )zCancel a running batch scrape job.

        Args:
            job_id: Batch job ID

        Returns:
            True if the job was cancelled
        )r   cancel_batch_scraperO   r   s     r>   r   z#FirecrawlClient.cancel_batch_scrape  s     /0@&IIIr@   c                 6    t          j        | j        |          S )zRetrieve error details for a batch scrape job.

        Args:
            job_id: Batch job ID

        Returns:
            Errors and robots-blocked URLs for the job
        )batch_methodsget_batch_scrape_errorsrO   r   s     r>   r   z'FirecrawlClient.get_batch_scrape_errors  s     4T5EvNNNr@   c                 6    t          j        | j        |          S )a  Get the current status (and data if completed) of an extract job.

        .. deprecated::
            The extract endpoint is in maintenance mode and its use is discouraged.
            Review https://docs.firecrawl.dev/developer-guides/usage-guides/choosing-the-data-extractor
            to find a replacement.

        Args:
            job_id: Extract job ID

        Returns:
            Extract response payload with status and optional data
        )r   get_extract_statusrO   r   s     r>   r   z"FirecrawlClient.get_extract_status  s     01A6JJJr@   )r   re   max_creditsstrict_constrain_to_urlsmodelr   r   r   r   )zspark-1-prozspark-1-minic                F    t          j        | j        ||||||||	  	        S )aT  Start an agent job (non-blocking).

        Args:
            urls: URLs to process (optional)
            prompt: Natural-language instruction for the agent
            schema: Target JSON schema for the output (dict or Pydantic BaseModel)
            integration: Integration tag/name
            max_credits: Maximum credits to use (optional)
            model: Model to use for the agent ("spark-1-pro" or "spark-1-mini")
            webhook: Webhook URL or configuration for notifications
        Returns:
            Response payload with job id/status (poll with get_agent_status)
        )ry   r   re   r   r   r   r   )agent_modulestart_agentrO   )	rP   r   ry   r   re   r   r   r   r   s	            r>   r   zFirecrawlClient.start_agent  s<    2 '##%=

 

 

 
	
r@   )r   re   r   rF   r   r   r   r   c       	         J    t          j        | j        |||||||||	|
          S )a  Run an agent and wait until completion.

        Args:
            urls: URLs to process (optional)
            prompt: Natural-language instruction for the agent
            schema: Target JSON schema for the output (dict or Pydantic BaseModel)
            integration: Integration tag/name
            poll_interval: Seconds between status checks
            timeout: Maximum seconds to wait (None for no timeout)
            max_credits: Maximum credits to use (optional)
            model: Model to use for the agent ("spark-1-pro" or "spark-1-mini")
            webhook: Webhook URL or configuration for notifications
        Returns:
            Final agent response when completed
        )	ry   r   re   r   rF   r   r   r   r   )r   r4   rO   )rP   r   ry   r   re   r   rF   r   r   r   r   s              r>   r4   zFirecrawlClient.agent  sB    : !#'#%=
 
 
 	
r@   c                 6    t          j        | j        |          S )zGet the current status (and data if completed) of an agent job.

        Args:
            job_id: Agent job ID

        Returns:
            Agent response payload with status and optional data
        )r   get_agent_statusrO   r   s     r>   r   z FirecrawlClient.get_agent_status  s     ,T-=vFFFr@   c                 6    t          j        | j        |          S )zCancel a running agent job.

        Args:
            job_id: Agent job ID

        Returns:
            True if the agent was cancelled
        )r   cancel_agentrO   r   s     r>   r   zFirecrawlClient.cancel_agent  s     ()96BBBr@   c                 4    t          j        | j                  S )zCGet current concurrency and maximum allowed for this team/key (v2).)usage_methodsget_concurrencyrO   r   s    r>   r   zFirecrawlClient.get_concurrency      ,T-=>>>r@   c                 4    t          j        | j                  S )z-Get remaining credits for this team/key (v2).)r   get_credit_usagerO   r   s    r>   r   z FirecrawlClient.get_credit_usage      -d.>???r@   c                 4    t          j        | j                  S )z$Get recent token usage metrics (v2).)r   get_token_usagerO   r   s    r>   r  zFirecrawlClient.get_token_usage  r   r@   
by_api_keyc                 6    t          j        | j        |          S )z!Get historical credit usage (v2).)r   get_credit_usage_historicalrO   rP   r  s     r>   r  z+FirecrawlClient.get_credit_usage_historical  s    89I:VVVr@   c                 6    t          j        | j        |          S )z Get historical token usage (v2).)r   get_token_usage_historicalrO   r  s     r>   r  z*FirecrawlClient.get_token_usage_historical!  s    78H*UUUr@   c                 4    t          j        | j                  S )z*Get metrics about the team's scrape queue.)r   get_queue_statusrO   r   s    r>   r
  z FirecrawlClient.get_queue_status%  r   r@   ttlactivity_ttlstream_web_viewrd   r  r  r  c                >    t          j        | j        ||||          S )a  Create a new browser session.

        Args:
            ttl: Total time-to-live in seconds (30-3600, default 300)
            activity_ttl: Inactivity TTL in seconds (10-3600)
            stream_web_view: Whether to enable webview streaming
            profile: Profile config with ``name`` (str) and
                optional ``save_changes`` (bool, default ``True``)

        Returns:
            BrowserCreateResponse with session id and CDP URL
        r  )browser_moduler5   rO   )rP   r  r  r  rd   s        r>   r5   zFirecrawlClient.browser*  s0    ( %%+
 
 
 	
r@   r   rz   rF   
session_idc                >    t          j        | j        ||||          S )aa  Execute code in a browser session.

        Args:
            session_id: Browser session ID
            code: Code to execute
            language: Programming language ("python", "node", or "bash")
            timeout: Execution timeout in seconds (1-300, default 30)

        Returns:
            BrowserExecuteResponse with execution result
        r  )r  browser_executerO   )rP   r  r}   rz   rF   s        r>   r  zFirecrawlClient.browser_executeF  s0    & -
 
 
 	
r@   c                 6    t          j        | j        |          S )zDelete a browser session.

        Args:
            session_id: Browser session ID

        Returns:
            BrowserDeleteResponse
        )r  delete_browserrO   )rP   r  s     r>   r  zFirecrawlClient.delete_browsera  s     ,T-=zJJJr@   statusr  )active	destroyedc                8    t          j        | j        |          S )zList browser sessions.

        Args:
            status: Filter by session status ("active" or "destroyed")

        Returns:
            BrowserListResponse with list of sessions
        r  )r  list_browsersrO   )rP   r  s     r>   r  zFirecrawlClient.list_browsersl  s'     +
 
 
 	
r@   r.   kindr   rF   r  )r.   r/   c                *    t          | ||||          S )aA  Create a watcher for crawl or batch jobs.

        Args:
            job_id: Job ID to watch
            kind: Job kind ("crawl" or "batch")
            poll_interval: Seconds between status checks
            timeout: Maximum seconds to watch (None for no timeout)

        Returns:
            Watcher instance
        r  r6   )rP   r|   r  r   rF   s        r>   watcherzFirecrawlClient.watcher~  s    & tV$mU\]]]]r@   )rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   r   r   r   r   r   re   r   r   wait_timeoutr!  c                z   t          d ||||||||	|
||||||||||fD                       rit          di d t          di d|d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|d|                                D             nd}t	          j        | j        |||||||||||          S )zE
        Start a batch scrape job and wait until completion.
        c              3      K   | ]}|d uV  	d S rh   ri   rj   s     r>   rm   z/FirecrawlClient.batch_scrape.<locals>.<genexpr>  r   r@   c                     i | ]
\  }}|||S rh   ri   ro   s      r>   rq   z0FirecrawlClient.batch_scrape.<locals>.<dictcomp>  r   r@   rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   N)
rv   r   r   r   r   r   re   r   r   rF   ri   )rr   r   rs   rt   r   batch_scraperO   )rP   r   rR   rS   rT   rU   rV   rF   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   r   r   r   r   r   re   r   r   r!  rv   s                                  r>   r%  zFirecrawlClient.batch_scrape  s%   t   O  O'<Whjqs{  ~D  FM  OV  X`  bw  yM  OX  Zb  dm  ot  v}  M  )N  O  O  O  O  O-Y- 
 
( ( ! ! !!! *\! *\	!
 #4"3!  ! "! v!  !  ! "! '<&;! &:%9! $)! "!  $)!!" e#!$  %!&  .~'!( egg)( ( (
 
 
, UY- 	0 (% 3+ 3#+' 
 
 
 	
r@   )NrA   NrB   rC   rh   )F)L__name__
__module____qualname____doc__staticmethodstrboolr?   r   floatintrQ   r   r   r   r   r   r   r-   r	   r   r   r   r   r   r   r   r   r   r0   r   r   r.   r   r   r)   r   r   r   r   r   r   r   r(   r   r1   r   r   r   r*   r   r3   r   r   r   r   r   r   r   r   r4   r   r   r   r   r  r  r  r
  r5   r  r  r  r7   r   r%  ri   r@   r>   r9   r9   9   s         2s 2t 2 2 2 \2
 "&2#' #)
 )
#)
 )
 %	)

 )
 )
 )
 )
 )
^ 37,0,0,0,0!%"&!%KO AE)-04/3$("&$(#!%)-,0%)1ND ND NDND $~./	ND
 $sCx.)ND tCy)ND tCy)ND $D>ND #ND 3-ND ND %S	4c9n0E+F FGHND $u  &{   |  }  ~ND :&ND  (~ND  'tn!ND" D>#ND$ 3-%ND& D>'ND( })ND* #+ND, !-ND. $sCx.)/ND0 c]1ND2 
3ND ND ND NDf #"

 !%6<!% $"
 "
 "
"
 sm"

 "
 23"
 #"
 "
 "
 "
 "
H
Hs 
H 
H 
H 
H-s - - - - #

 !%6<!% $
 
 

 sm

 
 23
 #
 
 
 
 
(-C - - - - 1559#!"&.2!%26%))? )? )?)? $|,-	)?
 T.12)? })? c])? 3-)? &d^)? #)? !/)? c])? 
)? )? )? )?^ !%-1-1-1@D)-(-#$)%*!&"'#)-7;26"')-$)!%+/%)5d
 d
 d
d
 	d

  S	*d
  S	*d
 &c]d
 '";<=d
 !d
 "&d
 }d
 "d
 #d
 d
  d
  }!d
" "##d
$ %] 234%d
& !/'d
(  )d
* #'+d
, "-d
. /d
0 #1d
2 "%3d
4 c]5d
6 
7d
 d
 d
 d
T !%-1-1-1@D)-(-#$)%*!&"'#)-7;26"')-$)%)/WC WC WCWC 	WC
  S	*WC  S	*WC &c]WC '";<=WC !WC "&WC }WC "WC #WC WC  WC  }!WC" "##WC$ %] 234%WC& !/'WC(  )WC* #'+WC, "-WC. c]/WC0 
1WC WC WC WCx 9=

 ,0
 
 

 $$45

 "%
 

 
 
 
D ,0	
 
 

 "%	

 

 
 
 
,
I 
I1D 
I 
I 
I 
I@#7 @ @ @ @(3 ( ( ( ( !%-126#@D!%%)'+&> &> &>&> 	&>
 %TN&> "*$&> }&> '";<=&> #&> c]&> 8$&> 
&> &> &> &>P
ES 
ET 
E 
E 
E 
EL LS L_ L L L L %)2
 !%+/'+/3,0'+48.2%)(,2
 2
 2
tCy!2
 	2

 c3h(2
  }2
 'tn2
 $D>2
 tn2
 !12
 &d^2
 c]2
 %2
 2
 2
 2
l %)8
 !%+/'+/3,0'+48.2!%%)(,8
 8
 8
tCy!8
 	8

 c3h(8
  }8
 'tn8
 $D>8
 tn8
 !18
 &d^8
 8
 #8
 c]8
 %8
 8
 8
 8
| 37,0,0,0,0!%"&!%KO AE)-04/3$("&$(#!%)-7;&*.2)-.2%))-;d
 d
 d
3id
 $~./	d

 $sCx.)d
 tCy)d
 tCy)d
 $D>d
 #d
 3-d
 d
 %S	4c9n0E+F FGHd
 $u  &{   |  }  ~d
 :&d
  (~d
  'tn!d
" D>#d
$ 3-%d
& D>'d
( })d
* #+d
, !-d
. %] 234/d
0 sm1d
2 &d^3d
4 "#5d
6 &d^7d
8 c]9d
: "#;d
 d
 d
 d
R 9=
 

 $$45
 
 
 
0 ,0	
 
 

 "%	
 
 
 
*	J# 	J$ 	J 	J 	J 	J	Oc 	O 	O 	O 	OK K K K K$ %)#

 !%%)%)37BF<@#
 #
 #
tCy!#
 	#

 #
 c]#
 c]#
 #+4.#
  =>?#
 %%7 789#
 #
 #
 #
N %))

 !%%)!%%)37BF<@)
 )
 )
tCy!)
 	)

 )
 c])
 )
 #)
 c])
 #+4.)
  =>?)
 %%7 789)
 )
 )
 )
V	Gs 	G 	G 	G 	G	C3 	C4 	C 	C 	C 	C? ? ?@ @ @? ? ?W Wd W W W WV VT V V V V@ @ @ "&**.,0
 
 
 c]
 sm	

 "$
 $sCx.)
 
 
 
B 7=!%
 
 

 

 23
 #
 
 
 
6	K 	K 	K 	K 	K <@
 
 
 !678
 
 
 
, +2!%^ ^ ^^ &'	^
 ^ #^ 
^ ^ ^ ^2 37,0,0,0,0!%"&!%KO AE)-04/3$("&$(#!%)-7;&*.2)-.2%))-&*?I
 I
 I
3iI
 $~./	I

 $sCx.)I
 tCy)I
 tCy)I
 $D>I
 #I
 3-I
 I
 %S	4c9n0E+F FGHI
 $u  &{   |  }  ~I
 :&I
  (~I
  'tn!I
" D>#I
$ 3-%I
& D>'I
( })I
* #+I
, !-I
. %] 234/I
0 sm1I
2 &d^3I
4 "#5I
6 &d^7I
8 c]9I
: "#;I
< =I
> sm?I
 I
 I
 I
 I
 I
r@   r9   )Fr)  rK   typingr   r   r   r   r   r   r	   typesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   utils.http_clientr+   utils.error_handlerr,   methodsr-   ru   r.   r   r/   r   r0   r   r1   r   r   r2   r   r3   r   r4   r   r5   r  r   r7   r9   ri   r@   r>   <module>r4     s    
			 F F F F F F F F F F F F F F F F F F! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !D * ) ) ) ) ) / / / / / / , , , , , , * * * * * * * * * * * * , , , , , , & & & & & & + + + + + + + + + + + + . . . . . . * * * * * * . . . . . .      c
 c
 c
 c
 c
 c
 c
 c
 c
 c
r@   