Module scrapfly.scrapy.response

Classes

class ScrapflyScrapyResponse (request: ScrapflyScrapyRequest,
scrape_api_response: ScrapeApiResponse)
Expand source code
class ScrapflyScrapyResponse(TextResponse):

    content:Union[str, BytesIO]
    scrape_api_response:ScrapeApiResponse

    context:Dict
    scrape_config:ScrapeConfig
    log_url:str
    status:str
    config:Dict
    success:bool
    duration:float
    format:str
    screenshots:Dict
    dns:Optional[Dict]
    ssl:Optional[Dict]
    iframes:Dict
    browser_data:Dict
    error:Optional[Dict]

    DEFAULT_ENCODING = 'utf-8'

    def __init__(self, request:ScrapflyScrapyRequest, scrape_api_response:ScrapeApiResponse):
        self.scrape_api_response = scrape_api_response
        self.content = self.scrape_api_response.scrape_result['content']

        self.context = self.scrape_api_response.context
        self.scrape_config = self.scrape_api_response.scrape_config
        self.log_url = self.scrape_api_response.scrape_result['log_url']
        self.status = self.scrape_api_response.scrape_result['status']
        self.success = self.scrape_api_response.scrape_result['success']
        self.duration = self.scrape_api_response.scrape_result['duration']
        self.format = self.scrape_api_response.scrape_result['format']
        self.screenshots = self.scrape_api_response.scrape_result['screenshots']
        self.dns = self.scrape_api_response.scrape_result['dns']
        self.ssl = self.scrape_api_response.scrape_result['ssl']
        self.iframes = self.scrape_api_response.scrape_result['iframes']
        self.browser_data = self.scrape_api_response.scrape_result['browser_data']
        self.error = self.scrape_api_response.scrape_result['error']
        self.ip_address = None

        if isinstance(self.content, str):
            content = self.content.encode('utf-8')
        elif isinstance(self.content, (BytesIO, TextIO)):
            content = self.content.read()
        else:
            raise RuntimeError('Unsupported body %s' % type(self.content))

        TextResponse.__init__(
            self,
            url=self.scrape_api_response.scrape_result['url'],
            status=self.scrape_api_response.scrape_result['status_code'],
            headers=self.scrape_api_response.scrape_result['response_headers'],
            body=content,
            request=request,
            ip_address=None
        )

    @property
    def __class__(self):
        response_headers = self.scrape_api_response.scrape_result['response_headers']

        if 'content-type' in response_headers and response_headers['content-type'].find('text/html') >= 0:
            return HtmlResponse
        elif 'content-type' in response_headers and response_headers['content-type'].find('application/xml') >= 0:
            return XmlResponse
        else:
            return TextResponse

    def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
        self.scrape_api_response.sink(path=path, name=name, file=file)

An object that represents an HTTP response, which is usually downloaded (by the Downloader) and fed to the Spiders for processing.

Ancestors

  • scrapy.http.response.text.TextResponse
  • scrapy.http.response.Response
  • scrapy.utils.trackref.object_ref

Class variables

var DEFAULT_ENCODING

The type of the None singleton.

var browser_data : Dict

The type of the None singleton.

var config : Dict

The type of the None singleton.

var content : str | _io.BytesIO

The type of the None singleton.

var context : Dict

The type of the None singleton.

var dns : Dict | None

The type of the None singleton.

var duration : float

The type of the None singleton.

var error : Dict | None

The type of the None singleton.

var format : str

The type of the None singleton.

var iframes : Dict

The type of the None singleton.

var log_url : str

The type of the None singleton.

var scrape_api_responseScrapeApiResponse

The type of the None singleton.

var scrape_configScrapeConfig

The type of the None singleton.

var screenshots : Dict

The type of the None singleton.

var ssl : Dict | None

The type of the None singleton.

var status : str

The type of the None singleton.

var success : bool

The type of the None singleton.

Methods

def sink(self,
path: str | None = None,
name: str | None = None,
file:  | _io.BytesIO | None = None)
Expand source code
def sink(self, path: Optional[str] = None, name: Optional[str] = None, file: Optional[Union[TextIO, BytesIO]] = None):
    self.scrape_api_response.sink(path=path, name=name, file=file)