Module scrapfly.errors
Expand source code
import base64
from typing import Optional, Tuple
from requests import Request, Response
class WebhookError(Exception):
pass
class WebhookSignatureMissMatch(WebhookError):
pass
class ContentError(Exception):
pass
class ScrapflyError(Exception):
KIND_HTTP_BAD_RESPONSE = 'HTTP_BAD_RESPONSE'
KIND_SCRAPFLY_ERROR = 'SCRAPFLY_ERROR'
RESOURCE_PROXY = 'PROXY'
RESOURCE_THROTTLE = 'THROTTLE'
RESOURCE_SCRAPE = 'SCRAPE'
RESOURCE_ASP = 'ASP'
RESOURCE_SCHEDULE = 'SCHEDULE'
RESOURCE_WEBHOOK = 'WEBHOOK'
RESOURCE_SESSION = 'SESSION'
def __init__(
self,
message: str,
code: str,
http_status_code: int,
resource: Optional[str]=None,
is_retryable: bool = False,
retry_delay: Optional[int] = None,
retry_times: Optional[int] = None,
documentation_url: Optional[str] = None,
api_response: Optional['ApiResponse'] = None
):
self.message = message
self.code = code
self.retry_delay = retry_delay
self.retry_times = retry_times
self.resource = resource
self.is_retryable = is_retryable
self.documentation_url = documentation_url
self.api_response = api_response
self.http_status_code = http_status_code
super().__init__(self.message, str(self.code))
def __str__(self):
message = self.message
if self.documentation_url is not None:
message += '. Learn more: %s' % self.documentation_url
return message
class EncoderError(BaseException):
def __init__(self, content:str):
self.content = content
super().__init__()
def __str__(self) -> str:
return self.content
def __repr__(self):
return "Invalid payload: %s" % self.content
class ExtraUsageForbidden(ScrapflyError):
pass
class HttpError(ScrapflyError):
def __init__(self, request:Request, response:Optional[Response]=None, **kwargs):
self.request = request
self.response = response
super().__init__(**kwargs)
def __str__(self) -> str:
if isinstance(self, UpstreamHttpError):
return f"Target website responded with {self.api_response.scrape_result['status_code']} - {self.api_response.scrape_result['reason']}"
if self.api_response is not None:
return self.api_response.error_message
text = f"{self.response.status_code} - {self.response.reason}"
if isinstance(self, (ApiHttpClientError, ApiHttpServerError)):
text += " - " + self.message
return text
class UpstreamHttpError(HttpError):
pass
class UpstreamHttpClientError(UpstreamHttpError):
pass
class UpstreamHttpServerError(UpstreamHttpClientError):
pass
class ApiHttpClientError(HttpError):
pass
class BadApiKeyError(ApiHttpClientError):
pass
class PaymentRequired(ApiHttpClientError):
pass
class TooManyRequest(ApiHttpClientError):
pass
class ApiHttpServerError(ApiHttpClientError):
pass
class ScraperAPIError(HttpError):
pass
class ScrapflyScrapeError(ScraperAPIError):
pass
class ScrapflyProxyError(ScraperAPIError):
pass
class ScrapflyThrottleError(ScraperAPIError):
pass
class ScrapflyAspError(ScraperAPIError):
pass
class ScrapflyScheduleError(ScraperAPIError):
pass
class ScrapflyWebhookError(ScraperAPIError):
pass
class ScrapflySessionError(ScraperAPIError):
pass
class TooManyConcurrentRequest(HttpError):
pass
class QuotaLimitReached(HttpError):
pass
class ScreenshotAPIError(HttpError):
pass
class ExtractionAPIError(HttpError):
pass
class ErrorFactory:
RESOURCE_TO_ERROR = {
ScrapflyError.RESOURCE_SCRAPE: ScrapflyScrapeError,
ScrapflyError.RESOURCE_WEBHOOK: ScrapflyWebhookError,
ScrapflyError.RESOURCE_PROXY: ScrapflyProxyError,
ScrapflyError.RESOURCE_SCHEDULE: ScrapflyScheduleError,
ScrapflyError.RESOURCE_ASP: ScrapflyAspError,
ScrapflyError.RESOURCE_SESSION: ScrapflySessionError
}
# Notable http error has own class for more convenience
# Only applicable for generic API error
HTTP_STATUS_TO_ERROR = {
401: BadApiKeyError,
402: PaymentRequired,
429: TooManyRequest
}
@staticmethod
def _get_resource(code: str) -> Optional[Tuple[str, str]]:
if isinstance(code, str) and '::' in code:
_, resource, _ = code.split('::')
return resource
return None
@staticmethod
def create(api_response: 'ScrapeApiResponse'):
is_retryable = False
kind = ScrapflyError.KIND_HTTP_BAD_RESPONSE if api_response.success is False else ScrapflyError.KIND_SCRAPFLY_ERROR
http_code = api_response.status_code
retry_delay = 5
retry_times = 3
description = None
error_url = 'https://scrapfly.io/docs/scrape-api/errors#api'
code = api_response.error['code']
if code == 'ERR::SCRAPE::BAD_UPSTREAM_RESPONSE':
http_code = api_response.scrape_result['status_code']
if 'description' in api_response.error:
description = api_response.error['description']
message = '%s %s %s' % (str(http_code), code, api_response.error['message'])
if 'doc_url' in api_response.error:
error_url = api_response.error['doc_url']
if 'retryable' in api_response.error:
is_retryable = api_response.error['retryable']
resource = ErrorFactory._get_resource(code=code)
if is_retryable is True:
if 'X-Retry' in api_response.headers:
retry_delay = int(api_response.headers['Retry-After'])
message = '%s: %s' % (message, description) if description else message
if retry_delay is not None and is_retryable is True:
message = '%s. Retry delay : %s seconds' % (message, str(retry_delay))
args = {
'message': message,
'code': code,
'http_status_code': http_code,
'is_retryable': is_retryable,
'api_response': api_response,
'resource': resource,
'retry_delay': retry_delay,
'retry_times': retry_times,
'documentation_url': error_url,
'request': api_response.request,
'response': api_response.response
}
if kind == ScrapflyError.KIND_HTTP_BAD_RESPONSE:
if http_code >= 500:
return ApiHttpServerError(**args)
is_scraper_api_error = resource in ErrorFactory.RESOURCE_TO_ERROR
if http_code in ErrorFactory.HTTP_STATUS_TO_ERROR and not is_scraper_api_error:
return ErrorFactory.HTTP_STATUS_TO_ERROR[http_code](**args)
if is_scraper_api_error:
return ErrorFactory.RESOURCE_TO_ERROR[resource](**args)
return ApiHttpClientError(**args)
elif kind == ScrapflyError.KIND_SCRAPFLY_ERROR:
if code == 'ERR::SCRAPE::BAD_UPSTREAM_RESPONSE':
if http_code >= 500:
return UpstreamHttpServerError(**args)
if http_code >= 400:
return UpstreamHttpClientError(**args)
if resource in ErrorFactory.RESOURCE_TO_ERROR:
return ErrorFactory.RESOURCE_TO_ERROR[resource](**args)
return ScrapflyError(**args)
__all__:Tuple[str, ...] = [
'ScrapflyError',
'ScrapflyAspError',
'ScrapflyProxyError',
'ScrapflyScheduleError',
'ScrapflyScrapeError',
'ScrapflySessionError',
'ScrapflyThrottleError',
'ScrapflyWebhookError',
'UpstreamHttpClientError',
'UpstreamHttpServerError',
'ApiHttpClientError',
'ApiHttpServerError',
]
Classes
class ApiHttpClientError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ApiHttpClientError(HttpError): pass
Ancestors
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
Subclasses
- ApiHttpServerError
- scrapfly.errors.BadApiKeyError
- scrapfly.errors.PaymentRequired
- scrapfly.errors.TooManyRequest
class ApiHttpServerError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ApiHttpServerError(ApiHttpClientError): pass
Ancestors
- ApiHttpClientError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflyAspError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyAspError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflyError (message: str, code: str, http_status_code: int, resource: Optional[str] = None, is_retryable: bool = False, retry_delay: Optional[int] = None, retry_times: Optional[int] = None, documentation_url: Optional[str] = None, api_response: Optional[ForwardRef('ApiResponse')] = None)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyError(Exception): KIND_HTTP_BAD_RESPONSE = 'HTTP_BAD_RESPONSE' KIND_SCRAPFLY_ERROR = 'SCRAPFLY_ERROR' RESOURCE_PROXY = 'PROXY' RESOURCE_THROTTLE = 'THROTTLE' RESOURCE_SCRAPE = 'SCRAPE' RESOURCE_ASP = 'ASP' RESOURCE_SCHEDULE = 'SCHEDULE' RESOURCE_WEBHOOK = 'WEBHOOK' RESOURCE_SESSION = 'SESSION' def __init__( self, message: str, code: str, http_status_code: int, resource: Optional[str]=None, is_retryable: bool = False, retry_delay: Optional[int] = None, retry_times: Optional[int] = None, documentation_url: Optional[str] = None, api_response: Optional['ApiResponse'] = None ): self.message = message self.code = code self.retry_delay = retry_delay self.retry_times = retry_times self.resource = resource self.is_retryable = is_retryable self.documentation_url = documentation_url self.api_response = api_response self.http_status_code = http_status_code super().__init__(self.message, str(self.code)) def __str__(self): message = self.message if self.documentation_url is not None: message += '. Learn more: %s' % self.documentation_url return message
Ancestors
- builtins.Exception
- builtins.BaseException
Subclasses
- scrapfly.errors.ExtraUsageForbidden
- scrapfly.errors.HttpError
Class variables
var KIND_HTTP_BAD_RESPONSE
var KIND_SCRAPFLY_ERROR
var RESOURCE_ASP
var RESOURCE_PROXY
var RESOURCE_SCHEDULE
var RESOURCE_SCRAPE
var RESOURCE_SESSION
var RESOURCE_THROTTLE
var RESOURCE_WEBHOOK
class ScrapflyProxyError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyProxyError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflyScheduleError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyScheduleError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflyScrapeError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyScrapeError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflySessionError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflySessionError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflyThrottleError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyThrottleError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class ScrapflyWebhookError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class ScrapflyWebhookError(ScraperAPIError): pass
Ancestors
- scrapfly.errors.ScraperAPIError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
class UpstreamHttpClientError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class UpstreamHttpClientError(UpstreamHttpError): pass
Ancestors
- scrapfly.errors.UpstreamHttpError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException
Subclasses
class UpstreamHttpServerError (request: requests.models.Request, response: Optional[requests.models.Response] = None, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class UpstreamHttpServerError(UpstreamHttpClientError): pass
Ancestors
- UpstreamHttpClientError
- scrapfly.errors.UpstreamHttpError
- scrapfly.errors.HttpError
- ScrapflyError
- builtins.Exception
- builtins.BaseException