Source code for airflow.providers.cncf.kubernetes.sensors.spark_kubernetes
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.fromtypingimportTYPE_CHECKING,Optional,Sequencefromkubernetesimportclientfromairflow.exceptionsimportAirflowExceptionfromairflow.providers.cncf.kubernetes.hooks.kubernetesimportKubernetesHookfromairflow.sensors.baseimportBaseSensorOperatorifTYPE_CHECKING:fromairflow.utils.contextimportContext
[docs]classSparkKubernetesSensor(BaseSensorOperator):""" Checks sparkApplication object in kubernetes cluster: .. seealso:: For more detail about Spark Application Object have a look at the reference: https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.1.0-2.4.5/docs/api-docs.md#sparkapplication :param application_name: spark Application resource name :param namespace: the kubernetes namespace where the sparkApplication reside in :param kubernetes_conn_id: The :ref:`kubernetes connection<howto/connection:kubernetes>` to Kubernetes cluster. :param attach_log: determines whether logs for driver pod should be appended to the sensor log :param api_group: kubernetes api group of sparkApplication :param api_version: kubernetes api version of sparkApplication """
def__init__(self,*,application_name:str,attach_log:bool=False,namespace:Optional[str]=None,kubernetes_conn_id:str="kubernetes_default",api_group:str='sparkoperator.k8s.io',api_version:str='v1beta2',**kwargs,)->None:super().__init__(**kwargs)self.application_name=application_nameself.attach_log=attach_logself.namespace=namespaceself.kubernetes_conn_id=kubernetes_conn_idself.hook=KubernetesHook(conn_id=self.kubernetes_conn_id)self.api_group=api_groupself.api_version=api_versiondef_log_driver(self,application_state:str,response:dict)->None:ifnotself.attach_log:returnstatus_info=response["status"]if"driverInfo"notinstatus_info:returndriver_info=status_info["driverInfo"]if"podName"notindriver_info:returndriver_pod_name=driver_info["podName"]namespace=response["metadata"]["namespace"]log_method=self.log.errorifapplication_stateinself.FAILURE_STATESelseself.log.infotry:log=""forlineinself.hook.get_pod_logs(driver_pod_name,namespace=namespace):log+=line.decode()log_method(log)exceptclient.rest.ApiExceptionase:self.log.warning("Could not read logs for pod %s. It may have been disposed.\n""Make sure timeToLiveSeconds is set on your SparkApplication spec.\n""underlying exception: %s",driver_pod_name,e,)
[docs]defpoke(self,context:'Context')->bool:self.log.info("Poking: %s",self.application_name)response=self.hook.get_custom_object(group=self.api_group,version=self.api_version,plural="sparkapplications",name=self.application_name,namespace=self.namespace,)try:application_state=response["status"]["applicationState"]["state"]exceptKeyError:returnFalseifself.attach_logandapplication_stateinself.FAILURE_STATES+self.SUCCESS_STATES:self._log_driver(application_state,response)ifapplication_stateinself.FAILURE_STATES:raiseAirflowException(f"Spark application failed with state: {application_state}")elifapplication_stateinself.SUCCESS_STATES:self.log.info("Spark application ended successfully")returnTrueelse:self.log.info("Spark application is still in state: %s",application_state)returnFalse