Source code for airflow.providers.cncf.kubernetes.sensors.spark_kubernetes
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromfunctoolsimportcached_propertyfromtypingimportTYPE_CHECKING,Sequencefromkubernetesimportclientfromairflow.exceptionsimportAirflowException,AirflowSkipExceptionfromairflow.providers.cncf.kubernetes.hooks.kubernetesimportKubernetesHookfromairflow.sensors.baseimportBaseSensorOperatorifTYPE_CHECKING:fromairflow.utils.contextimportContext
[docs]classSparkKubernetesSensor(BaseSensorOperator):""" Checks sparkApplication object in kubernetes cluster. .. seealso:: For more detail about Spark Application Object have a look at the reference: https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.1.0-2.4.5/docs/api-docs.md#sparkapplication :param application_name: spark Application resource name :param namespace: the kubernetes namespace where the sparkApplication reside in :param container_name: the kubernetes container name where the sparkApplication reside in :param kubernetes_conn_id: The :ref:`kubernetes connection<howto/connection:kubernetes>` to Kubernetes cluster. :param attach_log: determines whether logs for driver pod should be appended to the sensor log :param api_group: kubernetes api group of sparkApplication :param api_version: kubernetes api version of sparkApplication """
def_log_driver(self,application_state:str,response:dict)->None:ifnotself.attach_log:returnstatus_info=response["status"]if"driverInfo"notinstatus_info:returndriver_info=status_info["driverInfo"]if"podName"notindriver_info:returndriver_pod_name=driver_info["podName"]namespace=response["metadata"]["namespace"]log_method=self.log.errorifapplication_stateinself.FAILURE_STATESelseself.log.infotry:log=""forlineinself.hook.get_pod_logs(driver_pod_name,namespace=namespace,container=self.container_name):log+=line.decode()log_method(log)exceptclient.rest.ApiExceptionase:self.log.warning("Could not read logs for pod %s. It may have been disposed.\n""Make sure timeToLiveSeconds is set on your SparkApplication spec.\n""underlying exception: %s",driver_pod_name,e,)
[docs]defpoke(self,context:Context)->bool:self.log.info("Poking: %s",self.application_name)response=self.hook.get_custom_object(group=self.api_group,version=self.api_version,plural="sparkapplications",name=self.application_name,namespace=self.namespace,)try:application_state=response["status"]["applicationState"]["state"]exceptKeyError:returnFalseifself.attach_logandapplication_stateinself.FAILURE_STATES+self.SUCCESS_STATES:self._log_driver(application_state,response)ifapplication_stateinself.FAILURE_STATES:# TODO: remove this if block when min_airflow_version is set to higher than 2.7.1message=f"Spark application failed with state: {application_state}"ifself.soft_fail:raiseAirflowSkipException(message)raiseAirflowException(message)elifapplication_stateinself.SUCCESS_STATES:self.log.info("Spark application ended successfully")returnTrueelse:self.log.info("Spark application is still in state: %s",application_state)returnFalse