Source code for airflow.providers.amazon.aws.hooks.batch_waiters

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
AWS Batch service waiters.

.. seealso::

    - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/clients.html#waiters
    - https://github.com/boto/botocore/blob/develop/botocore/waiter.py
"""

from __future__ import annotations

import json
import sys
from copy import deepcopy
from pathlib import Path
from typing import TYPE_CHECKING, Callable

import botocore.client
import botocore.exceptions
import botocore.waiter

from airflow.exceptions import AirflowException
from airflow.providers.amazon.aws.hooks.batch_client import BatchClientHook

if TYPE_CHECKING:
    from airflow.providers.amazon.aws.utils.task_log_fetcher import AwsTaskLogFetcher


[docs]class BatchWaitersHook(BatchClientHook): """ A utility to manage waiters for AWS Batch services. .. code-block:: python import random from airflow.providers.amazon.aws.operators.batch_waiters import BatchWaiters # to inspect default waiters waiters = BatchWaiters() config = waiters.default_config # type: Dict waiter_names = waiters.list_waiters() # -> ["JobComplete", "JobExists", "JobRunning"] # The default_config is a useful stepping stone to creating custom waiters, e.g. custom_config = waiters.default_config # this is a deepcopy # modify custom_config['waiters'] as necessary and get a new instance: waiters = BatchWaiters(waiter_config=custom_config) waiters.waiter_config # check the custom configuration (this is a deepcopy) waiters.list_waiters() # names of custom waiters # During the init for BatchWaiters, the waiter_config is used to build a waiter_model; # and note that this only occurs during the class init, to avoid any accidental mutations # of waiter_config leaking into the waiter_model. waiters.waiter_model # -> botocore.waiter.WaiterModel object # The waiter_model is combined with the waiters.client to get a specific waiter # and the details of the config on that waiter can be further modified without any # accidental impact on the generation of new waiters from the defined waiter_model, e.g. waiters.get_waiter("JobExists").config.delay # -> 5 waiter = waiters.get_waiter("JobExists") # -> botocore.waiter.Batch.Waiter.JobExists object waiter.config.delay = 10 waiters.get_waiter("JobExists").config.delay # -> 5 as defined by waiter_model # To use a specific waiter, update the config and call the `wait()` method for jobId, e.g. waiter = waiters.get_waiter("JobExists") # -> botocore.waiter.Batch.Waiter.JobExists object waiter.config.delay = random.uniform(1, 10) # seconds waiter.config.max_attempts = 10 waiter.wait(jobs=[jobId]) .. seealso:: - https://www.2ndwatch.com/blog/use-waiters-boto3-write/ - https://github.com/boto/botocore/blob/develop/botocore/waiter.py - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#waiters - https://github.com/boto/botocore/tree/develop/botocore/data/ec2/2016-11-15 - https://github.com/boto/botocore/issues/1915 :param waiter_config: a custom waiter configuration for AWS Batch services :param aws_conn_id: connection id of AWS credentials / region name. If None, credential boto3 strategy will be used (https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html). :param region_name: region name to use in AWS client. Override the AWS region in connection (if provided) """ def __init__(self, *args, waiter_config: dict | None = None, **kwargs) -> None: super().__init__(*args, **kwargs) self._default_config: dict | None = None self._waiter_config = waiter_config or self.default_config self._waiter_model = botocore.waiter.WaiterModel(self._waiter_config) @property
[docs] def default_config(self) -> dict: """ An immutable default waiter configuration. :return: a waiter configuration for AWS Batch services """ if self._default_config is None: config_path = Path(__file__).with_name("batch_waiters.json").resolve() with open(config_path) as config_file: self._default_config = json.load(config_file) return deepcopy(self._default_config) # avoid accidental mutation
@property
[docs] def waiter_config(self) -> dict: """ An immutable waiter configuration for this instance; a ``deepcopy`` is returned by this property. During the init for BatchWaiters, the waiter_config is used to build a waiter_model and this only occurs during the class init, to avoid any accidental mutations of waiter_config leaking into the waiter_model. :return: a waiter configuration for AWS Batch services """ return deepcopy(self._waiter_config) # avoid accidental mutation
@property
[docs] def waiter_model(self) -> botocore.waiter.WaiterModel: """ A configured waiter model used to generate waiters on AWS Batch services. :return: a waiter model for AWS Batch services """ return self._waiter_model
[docs] def get_waiter( self, waiter_name: str, _: dict[str, str] | None = None, deferrable: bool = False, client=None ) -> botocore.waiter.Waiter: """ Get an AWS Batch service waiter, using the configured ``.waiter_model``. The ``.waiter_model`` is combined with the ``.client`` to get a specific waiter and the properties of that waiter can be modified without any accidental impact on the generation of new waiters from the ``.waiter_model``, e.g. .. code-block:: python waiters.get_waiter("JobExists").config.delay # -> 5 waiter = waiters.get_waiter("JobExists") # a new waiter object waiter.config.delay = 10 waiters.get_waiter("JobExists").config.delay # -> 5 as defined by waiter_model To use a specific waiter, update the config and call the `wait()` method for jobId, e.g. .. code-block:: python import random waiter = waiters.get_waiter("JobExists") # a new waiter object waiter.config.delay = random.uniform(1, 10) # seconds waiter.config.max_attempts = 10 waiter.wait(jobs=[jobId]) :param waiter_name: The name of the waiter. The name should match the name (including the casing) of the key name in the waiter model file (typically this is CamelCasing); see ``.list_waiters``. :param _: unused, just here to match the method signature in base_aws :return: a waiter object for the named AWS Batch service """ return botocore.waiter.create_waiter_with_client(waiter_name, self.waiter_model, self.client)
[docs] def list_waiters(self) -> list[str]: """ List the waiters in a waiter configuration for AWS Batch services. :return: waiter names for AWS Batch services """ return self.waiter_model.waiter_names
[docs] def wait_for_job( self, job_id: str, delay: int | float | None = None, get_batch_log_fetcher: Callable[[str], AwsTaskLogFetcher | None] | None = None, ) -> None: """ Wait for Batch job to complete. This assumes that the ``.waiter_model`` is configured using some variation of the ``.default_config`` so that it can generate waiters with the following names: "JobExists", "JobRunning" and "JobComplete". :param job_id: a Batch job ID :param delay: A delay before polling for job status :param get_batch_log_fetcher: A method that returns batch_log_fetcher of type AwsTaskLogFetcher or None when the CloudWatch log stream hasn't been created yet. :raises: AirflowException .. note:: This method adds a small random jitter to the ``delay`` (+/- 2 sec, >= 1 sec). Using a random interval helps to avoid AWS API throttle limits when many concurrent tasks request job-descriptions. It also modifies the ``max_attempts`` to use the ``sys.maxsize``, which allows Airflow to manage the timeout on waiting. """ self.delay(delay) try: waiter = self.get_waiter("JobExists") waiter.config.delay = self.add_jitter(waiter.config.delay, width=2, minima=1) waiter.config.max_attempts = sys.maxsize # timeout is managed by Airflow waiter.wait(jobs=[job_id]) waiter = self.get_waiter("JobRunning") waiter.config.delay = self.add_jitter(waiter.config.delay, width=2, minima=1) waiter.config.max_attempts = sys.maxsize # timeout is managed by Airflow waiter.wait(jobs=[job_id]) batch_log_fetcher = None try: if get_batch_log_fetcher: batch_log_fetcher = get_batch_log_fetcher(job_id) if batch_log_fetcher: batch_log_fetcher.start() waiter = self.get_waiter("JobComplete") waiter.config.delay = self.add_jitter(waiter.config.delay, width=2, minima=1) waiter.config.max_attempts = sys.maxsize # timeout is managed by Airflow waiter.wait(jobs=[job_id]) finally: if batch_log_fetcher: batch_log_fetcher.stop() batch_log_fetcher.join() except (botocore.exceptions.ClientError, botocore.exceptions.WaiterError) as err: raise AirflowException(err)

Was this entry helpful?