Module scrapfly.schedule
Public schedule client for the Scrapfly API.
This module wraps the /scrape/schedules, /screenshot/schedules, /crawl/schedules and cross-kind /schedules endpoints. It is intentionally a thin wrapper: the server returns fully-formed Schedule objects (dicts) and we surface them as-is so callers always see the live server shape.
Classes
class CreateScheduleRequest (webhook_name: str = '',
recurrence: ScheduleRecurrence | None = None,
scheduled_date: str | None = None,
allow_concurrency: bool = False,
retry_on_failure: bool = False,
max_retries: int | None = None,
notes: str | None = None)-
Expand source code
@dataclass class CreateScheduleRequest: """Public-facing request envelope for creating a schedule. The kind-specific configuration (scrape_config / screenshot_config / crawler_config) is supplied as a separate argument by the matching ``create_*_schedule`` method. """ webhook_name: str = "" recurrence: Optional[ScheduleRecurrence] = None scheduled_date: Optional[str] = None allow_concurrency: bool = False retry_on_failure: bool = False max_retries: Optional[int] = None notes: Optional[str] = NonePublic-facing request envelope for creating a schedule.
The kind-specific configuration (scrape_config / screenshot_config / crawler_config) is supplied as a separate argument by the matching
create_*_schedulemethod.Instance variables
var allow_concurrency : boolvar max_retries : int | Nonevar notes : str | Nonevar recurrence : ScheduleRecurrence | Nonevar retry_on_failure : boolvar scheduled_date : str | Nonevar webhook_name : str
class ListSchedulesOptions (kind: str | None = None, status: str | None = None)-
Expand source code
@dataclass class ListSchedulesOptions: """Filter options for list_schedules / list_<kind>_schedules. Use either this dataclass or the equivalent keyword arguments interchangeably.""" kind: Optional[str] = None # "api.scrape" | "api.screenshot" | "api.crawler" status: Optional[str] = None # "ACTIVE" | "PAUSED" | "CANCELLED"Filter options for list_schedules / list_
_schedules. Use either this dataclass or the equivalent keyword arguments interchangeably. Instance variables
var kind : str | Nonevar status : str | None
class ScheduleAPIError (message: str, code: str, http_status_code: int, details: Any = None)-
Expand source code
class ScheduleAPIError(Exception): """Raised on any non-2xx response from a /schedules/* endpoint. The ``code`` attribute carries the public ``ERR::SCHEDULER::*`` identifier so callers can branch on it without parsing the message string. """ def __init__( self, message: str, code: str, http_status_code: int, details: Any = None, ) -> None: super().__init__(message) self.code = code self.http_status_code = http_status_code self.details = details def __str__(self) -> str: # noqa: D401 return f"{self.code} ({self.http_status_code}): {self.args[0] if self.args else ''}"Raised on any non-2xx response from a /schedules/* endpoint.
The
codeattribute carries the publicERR::SCHEDULER::*identifier so callers can branch on it without parsing the message string.Ancestors
- builtins.Exception
- builtins.BaseException
class ScheduleClientMixin-
Expand source code
class ScheduleClientMixin: """Mixed into ScrapflyClient — provides the public schedule surface. All methods funnel through ``_schedule_request``, which uses the same ``self._http_handler`` and ``self.host`` / ``self.key`` as the rest of the client so retries, verify, headers and timeouts behave identically. """ # Attributes provided by the concrete ``ScrapflyClient`` subclass. Declared # here so type checkers can resolve them on the mixin without complaint. key: str host: str verify: bool ua: str _http_handler: Callable[..., Any] # ---- Create --------------------------------------------------------- def create_scrape_schedule( self, scrape_config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: """Create a Web Scraping API schedule. ``scrape_config`` is the same dict you would pass to :meth:`scrape` (e.g. ``{"url": "...", "render_js": True}``).""" return self._create_schedule("/scrape/schedules", "scrape_config", scrape_config, request) def create_screenshot_schedule( self, screenshot_config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: """Create a Screenshot API schedule.""" return self._create_schedule( "/screenshot/schedules", "screenshot_config", screenshot_config, request ) def create_crawler_schedule( self, crawler_config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: """Create a Crawler API schedule.""" return self._create_schedule( "/crawl/schedules", "crawler_config", crawler_config, request ) # ---- Read ----------------------------------------------------------- def get_schedule(self, schedule_id: str) -> Schedule: """Return a schedule by id (works across all kinds).""" return self._schedule_request("GET", "/schedules/" + quote(schedule_id, safe="")) def list_schedules( self, *, kind: Optional[str] = None, status: Optional[str] = None, ) -> List[Schedule]: """List every schedule on the account, optionally filtered by kind or status.""" params: Dict[str, str] = {} if kind: params["kind"] = kind if status: params["status"] = status return self._schedule_request("GET", "/schedules", query=params) def list_scrape_schedules(self, *, status: Optional[str] = None) -> List[Schedule]: params = {"status": status} if status else None return self._schedule_request("GET", "/scrape/schedules", query=params) def list_screenshot_schedules(self, *, status: Optional[str] = None) -> List[Schedule]: params = {"status": status} if status else None return self._schedule_request("GET", "/screenshot/schedules", query=params) def list_crawler_schedules(self, *, status: Optional[str] = None) -> List[Schedule]: params = {"status": status} if status else None return self._schedule_request("GET", "/crawl/schedules", query=params) # ---- Mutate --------------------------------------------------------- def update_schedule(self, schedule_id: str, request: UpdateScheduleRequest) -> Schedule: """Patch an active schedule. Only fields set in ``request`` change.""" return self._schedule_request( "PATCH", "/schedules/" + quote(schedule_id, safe=""), body=request.to_dict() ) def cancel_schedule(self, schedule_id: str) -> None: """Cancel a schedule. Cancellation is terminal (returns no body).""" self._schedule_request("DELETE", "/schedules/" + quote(schedule_id, safe="")) def pause_schedule(self, schedule_id: str) -> Schedule: return self._schedule_request("POST", "/schedules/" + quote(schedule_id, safe="") + "/pause") def resume_schedule(self, schedule_id: str) -> Schedule: return self._schedule_request("POST", "/schedules/" + quote(schedule_id, safe="") + "/resume") def execute_schedule(self, schedule_id: str) -> Schedule: """Fire a schedule immediately, regardless of next_scheduled_date.""" return self._schedule_request("POST", "/schedules/" + quote(schedule_id, safe="") + "/execute") # ---- Internals ------------------------------------------------------ def _create_schedule( self, path: str, config_key: str, config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: body: Dict[str, Any] = { config_key: config, "webhook_name": request.webhook_name, "allow_concurrency": request.allow_concurrency, "retry_on_failure": request.retry_on_failure, } if request.recurrence is not None: body["recurrence"] = request.recurrence.to_dict() if request.scheduled_date is not None: body["scheduled_date"] = request.scheduled_date if request.max_retries is not None: body["max_retries"] = request.max_retries if request.notes is not None: body["notes"] = request.notes return self._schedule_request("POST", path, body=body) def _schedule_request( self, method: str, path: str, *, query: Optional[Dict[str, str]] = None, body: Optional[Dict[str, Any]] = None, ) -> Any: params: Dict[str, str] = {"key": self.key} if query: params.update(query) kwargs: Dict[str, Any] = { "method": method, "url": self.host + path, "params": params, "verify": self.verify, "headers": { "user-agent": self.ua, "accept": "application/json", }, } if body is not None: kwargs["json"] = body kwargs["headers"]["content-type"] = "application/json" response = self._http_handler(**kwargs) if response.status_code == 204: return None if response.status_code >= 400: self._raise_schedule_error(response) if not response.content: return None return response.json() def _raise_schedule_error(self, response) -> None: try: envelope = response.json() except Exception: envelope = {} code = envelope.get("error", "ERR::SCHEDULER::BACKEND_ERROR") message = envelope.get("message", "") reason = envelope.get("reason", "") details = envelope.get("details") text = message if reason: text = f"{message} ({reason})" if message else reason if not text: text = response.text[:500] or f"HTTP {response.status_code}" raise ScheduleAPIError( message=text, code=code, http_status_code=response.status_code, details=details, )Mixed into ScrapflyClient — provides the public schedule surface.
All methods funnel through
_schedule_request, which uses the sameself._http_handlerandself.host/self.keyas the rest of the client so retries, verify, headers and timeouts behave identically.Subclasses
Class variables
var host : strvar key : strvar ua : strvar verify : bool
Methods
def cancel_schedule(self, schedule_id: str) ‑> None-
Expand source code
def cancel_schedule(self, schedule_id: str) -> None: """Cancel a schedule. Cancellation is terminal (returns no body).""" self._schedule_request("DELETE", "/schedules/" + quote(schedule_id, safe=""))Cancel a schedule. Cancellation is terminal (returns no body).
def create_crawler_schedule(self,
crawler_config: Dict[str, Any],
request: CreateScheduleRequest) ‑> Dict[str, Any]-
Expand source code
def create_crawler_schedule( self, crawler_config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: """Create a Crawler API schedule.""" return self._create_schedule( "/crawl/schedules", "crawler_config", crawler_config, request )Create a Crawler API schedule.
def create_scrape_schedule(self,
scrape_config: Dict[str, Any],
request: CreateScheduleRequest) ‑> Dict[str, Any]-
Expand source code
def create_scrape_schedule( self, scrape_config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: """Create a Web Scraping API schedule. ``scrape_config`` is the same dict you would pass to :meth:`scrape` (e.g. ``{"url": "...", "render_js": True}``).""" return self._create_schedule("/scrape/schedules", "scrape_config", scrape_config, request)Create a Web Scraping API schedule.
scrape_configis the same dict you would pass to :meth:scrape(e.g.{"url": "...", "render_js": True}). def create_screenshot_schedule(self,
screenshot_config: Dict[str, Any],
request: CreateScheduleRequest) ‑> Dict[str, Any]-
Expand source code
def create_screenshot_schedule( self, screenshot_config: Dict[str, Any], request: CreateScheduleRequest, ) -> Schedule: """Create a Screenshot API schedule.""" return self._create_schedule( "/screenshot/schedules", "screenshot_config", screenshot_config, request )Create a Screenshot API schedule.
def execute_schedule(self, schedule_id: str) ‑> Dict[str, Any]-
Expand source code
def execute_schedule(self, schedule_id: str) -> Schedule: """Fire a schedule immediately, regardless of next_scheduled_date.""" return self._schedule_request("POST", "/schedules/" + quote(schedule_id, safe="") + "/execute")Fire a schedule immediately, regardless of next_scheduled_date.
def get_schedule(self, schedule_id: str) ‑> Dict[str, Any]-
Expand source code
def get_schedule(self, schedule_id: str) -> Schedule: """Return a schedule by id (works across all kinds).""" return self._schedule_request("GET", "/schedules/" + quote(schedule_id, safe=""))Return a schedule by id (works across all kinds).
def list_crawler_schedules(self, *, status: str | None = None) ‑> List[Dict[str, Any]]-
Expand source code
def list_crawler_schedules(self, *, status: Optional[str] = None) -> List[Schedule]: params = {"status": status} if status else None return self._schedule_request("GET", "/crawl/schedules", query=params) def list_schedules(self, *, kind: str | None = None, status: str | None = None) ‑> List[Dict[str, Any]]-
Expand source code
def list_schedules( self, *, kind: Optional[str] = None, status: Optional[str] = None, ) -> List[Schedule]: """List every schedule on the account, optionally filtered by kind or status.""" params: Dict[str, str] = {} if kind: params["kind"] = kind if status: params["status"] = status return self._schedule_request("GET", "/schedules", query=params)List every schedule on the account, optionally filtered by kind or status.
def list_scrape_schedules(self, *, status: str | None = None) ‑> List[Dict[str, Any]]-
Expand source code
def list_scrape_schedules(self, *, status: Optional[str] = None) -> List[Schedule]: params = {"status": status} if status else None return self._schedule_request("GET", "/scrape/schedules", query=params) def list_screenshot_schedules(self, *, status: str | None = None) ‑> List[Dict[str, Any]]-
Expand source code
def list_screenshot_schedules(self, *, status: Optional[str] = None) -> List[Schedule]: params = {"status": status} if status else None return self._schedule_request("GET", "/screenshot/schedules", query=params) def pause_schedule(self, schedule_id: str) ‑> Dict[str, Any]-
Expand source code
def pause_schedule(self, schedule_id: str) -> Schedule: return self._schedule_request("POST", "/schedules/" + quote(schedule_id, safe="") + "/pause") def resume_schedule(self, schedule_id: str) ‑> Dict[str, Any]-
Expand source code
def resume_schedule(self, schedule_id: str) -> Schedule: return self._schedule_request("POST", "/schedules/" + quote(schedule_id, safe="") + "/resume") def update_schedule(self,
schedule_id: str,
request: UpdateScheduleRequest) ‑> Dict[str, Any]-
Expand source code
def update_schedule(self, schedule_id: str, request: UpdateScheduleRequest) -> Schedule: """Patch an active schedule. Only fields set in ``request`` change.""" return self._schedule_request( "PATCH", "/schedules/" + quote(schedule_id, safe=""), body=request.to_dict() )Patch an active schedule. Only fields set in
requestchange.
class ScheduleEnd (type: str, date: str | None = None, count: int | None = None)-
Expand source code
@dataclass class ScheduleEnd: """Bounds a recurring schedule by either a date or a fire count.""" type: str # "date" | "count" date: Optional[str] = None count: Optional[int] = NoneBounds a recurring schedule by either a date or a fire count.
Instance variables
var count : int | Nonevar date : str | Nonevar type : str
class ScheduleRecurrence (cron: str | None = None,
interval: int | None = None,
unit: str | None = None,
days: List[str] | None = None,
ends: ScheduleEnd | None = None)-
Expand source code
@dataclass class ScheduleRecurrence: """When a schedule fires next. Cron mode wins when ``cron`` is set; otherwise ``interval`` + ``unit`` drive the cadence. All times are interpreted in UTC server-side. """ cron: Optional[str] = None interval: Optional[int] = None unit: Optional[str] = None # "minute" | "hour" | "day" | "week" | "month" days: Optional[List[str]] = None ends: Optional[ScheduleEnd] = None def to_dict(self) -> Dict[str, Any]: out: Dict[str, Any] = {} if self.cron: out["cron"] = self.cron if self.interval is not None: out["interval"] = self.interval if self.unit: out["unit"] = self.unit if self.days: out["days"] = self.days if self.ends: ends: Dict[str, Any] = {"type": self.ends.type} if self.ends.date: ends["date"] = self.ends.date if self.ends.count is not None: ends["count"] = self.ends.count out["ends"] = ends return outWhen a schedule fires next.
Cron mode wins when
cronis set; otherwiseinterval+unitdrive the cadence. All times are interpreted in UTC server-side.Instance variables
var cron : str | Nonevar days : List[str] | Nonevar ends : ScheduleEnd | Nonevar interval : int | Nonevar unit : str | None
Methods
def to_dict(self) ‑> Dict[str, Any]-
Expand source code
def to_dict(self) -> Dict[str, Any]: out: Dict[str, Any] = {} if self.cron: out["cron"] = self.cron if self.interval is not None: out["interval"] = self.interval if self.unit: out["unit"] = self.unit if self.days: out["days"] = self.days if self.ends: ends: Dict[str, Any] = {"type": self.ends.type} if self.ends.date: ends["date"] = self.ends.date if self.ends.count is not None: ends["count"] = self.ends.count out["ends"] = ends return out
class UpdateScheduleRequest (recurrence: ScheduleRecurrence | None = None,
scheduled_date: str | None = None,
allow_concurrency: bool | None = None,
retry_on_failure: bool | None = None,
max_retries: int | None = None,
notes: str | None = None,
scrape_config: Dict[str, Any] | None = None,
screenshot_config: Dict[str, Any] | None = None,
crawler_config: Dict[str, Any] | None = None)-
Expand source code
@dataclass class UpdateScheduleRequest: """Patch payload. Only fields with a non-None value are forwarded.""" recurrence: Optional[ScheduleRecurrence] = None scheduled_date: Optional[str] = None allow_concurrency: Optional[bool] = None retry_on_failure: Optional[bool] = None max_retries: Optional[int] = None notes: Optional[str] = None scrape_config: Optional[Dict[str, Any]] = None screenshot_config: Optional[Dict[str, Any]] = None crawler_config: Optional[Dict[str, Any]] = None def to_dict(self) -> Dict[str, Any]: out: Dict[str, Any] = {} if self.recurrence is not None: out["recurrence"] = self.recurrence.to_dict() if self.scheduled_date is not None: out["scheduled_date"] = self.scheduled_date if self.allow_concurrency is not None: out["allow_concurrency"] = self.allow_concurrency if self.retry_on_failure is not None: out["retry_on_failure"] = self.retry_on_failure if self.max_retries is not None: out["max_retries"] = self.max_retries if self.notes is not None: out["notes"] = self.notes if self.scrape_config is not None: out["scrape_config"] = self.scrape_config if self.screenshot_config is not None: out["screenshot_config"] = self.screenshot_config if self.crawler_config is not None: out["crawler_config"] = self.crawler_config return outPatch payload. Only fields with a non-None value are forwarded.
Instance variables
var allow_concurrency : bool | Nonevar crawler_config : Dict[str, Any] | Nonevar max_retries : int | Nonevar notes : str | Nonevar recurrence : ScheduleRecurrence | Nonevar retry_on_failure : bool | Nonevar scheduled_date : str | Nonevar scrape_config : Dict[str, Any] | Nonevar screenshot_config : Dict[str, Any] | None
Methods
def to_dict(self) ‑> Dict[str, Any]-
Expand source code
def to_dict(self) -> Dict[str, Any]: out: Dict[str, Any] = {} if self.recurrence is not None: out["recurrence"] = self.recurrence.to_dict() if self.scheduled_date is not None: out["scheduled_date"] = self.scheduled_date if self.allow_concurrency is not None: out["allow_concurrency"] = self.allow_concurrency if self.retry_on_failure is not None: out["retry_on_failure"] = self.retry_on_failure if self.max_retries is not None: out["max_retries"] = self.max_retries if self.notes is not None: out["notes"] = self.notes if self.scrape_config is not None: out["scrape_config"] = self.scrape_config if self.screenshot_config is not None: out["screenshot_config"] = self.screenshot_config if self.crawler_config is not None: out["crawler_config"] = self.crawler_config return out