Source code for airflow.providers.google.cloud.log.stackdriver_task_handler
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License."""Handler that integrates with Stackdriver."""from__future__importannotationsimportloggingfromfunctoolsimportcached_propertyfromtypingimportTYPE_CHECKING,Collectionfromurllib.parseimporturlencodefromgoogle.cloudimportloggingasgcp_loggingfromgoogle.cloud.loggingimportResourcefromgoogle.cloud.logging.handlers.transportsimportBackgroundThreadTransport,Transportfromgoogle.cloud.logging_v2.services.logging_service_v2importLoggingServiceV2Clientfromgoogle.cloud.logging_v2.typesimportListLogEntriesRequest,ListLogEntriesResponsefromairflow.providers.google.cloud.utils.credentials_providerimportget_credentials_and_project_idfromairflow.providers.google.common.constsimportCLIENT_INFOfromairflow.utils.log.trigger_handlerimportctx_indiv_triggerifTYPE_CHECKING:fromgoogle.auth.credentialsimportCredentialsfromairflow.modelsimportTaskInstance
[docs]classStackdriverTaskHandler(logging.Handler):"""Handler that directly makes Stackdriver logging API calls. This is a Python standard ``logging`` handler using that can be used to route Python standard logging messages directly to the Stackdriver Logging API. It can also be used to save logs for executing tasks. To do this, you should set as a handler with the name "tasks". In this case, it will also be used to read the log for display in Web UI. This handler supports both an asynchronous and synchronous transport. :param gcp_key_path: Path to Google Cloud Credential JSON file. If omitted, authorization based on `the Application Default Credentials <https://cloud.google.com/docs/authentication/production#finding_credentials_automatically>`__ will be used. :param scopes: OAuth scopes for the credentials, :param name: the name of the custom log in Stackdriver Logging. Defaults to 'airflow'. The name of the Python logger will be represented in the ``python_logger`` field. :param transport: Class for creating new transport objects. It should extend from the base :class:`google.cloud.logging.handlers.Transport` type and implement :meth`google.cloud.logging.handlers.Transport.send`. Defaults to :class:`google.cloud.logging.handlers.BackgroundThreadTransport`. The other option is :class:`google.cloud.logging.handlers.SyncTransport`. :param resource: (Optional) Monitored resource of the entry, defaults to the global resource type. :param labels: (Optional) Mapping of labels for the entry. """
def__init__(self,gcp_key_path:str|None=None,scopes:Collection[str]|None=_DEFAULT_SCOPESS,name:str=DEFAULT_LOGGER_NAME,transport:type[Transport]=BackgroundThreadTransport,resource:Resource=_GLOBAL_RESOURCE,labels:dict[str,str]|None=None,):super().__init__()self.gcp_key_path:str|None=gcp_key_pathself.scopes:Collection[str]|None=scopesself.name:str=nameself.transport_type:type[Transport]=transportself.resource:Resource=resourceself.labels:dict[str,str]|None=labelsself.task_instance_labels:dict[str,str]|None={}self.task_instance_hostname="default-hostname"@cached_propertydef_credentials_and_project(self)->tuple[Credentials,str]:credentials,project=get_credentials_and_project_id(key_path=self.gcp_key_path,scopes=self.scopes,disable_logging=True)returncredentials,project@propertydef_client(self)->gcp_logging.Client:"""The Cloud Library API client."""credentials,project=self._credentials_and_projectclient=gcp_logging.Client(credentials=credentials,project=project,client_info=CLIENT_INFO,)returnclient@propertydef_logging_service_client(self)->LoggingServiceV2Client:"""The Cloud logging service v2 client."""credentials,_=self._credentials_and_projectclient=LoggingServiceV2Client(credentials=credentials,client_info=CLIENT_INFO,)returnclient@cached_propertydef_transport(self)->Transport:"""Object responsible for sending data to Stackdriver."""# The Transport object is badly defined (no init) but in the docs client/name as constructor# arguments are a requirement for any class that derives from Transport class, hence ignore:returnself.transport_type(self._client,self.name)# type: ignore[call-arg]def_get_labels(self,task_instance=None):iftask_instance:ti_labels=self._task_instance_to_labels(task_instance)else:ti_labels=self.task_instance_labelslabels:dict[str,str]|Noneifself.labelsandti_labels:labels={}labels.update(self.labels)labels.update(ti_labels)elifself.labels:labels=self.labelselifti_labels:labels=ti_labelselse:labels=Nonereturnlabelsor{}
[docs]defemit(self,record:logging.LogRecord)->None:"""Actually log the specified logging record. :param record: The record to be logged. """message=self.format(record)ti=None# todo: remove ctx_indiv_trigger is not None check when min airflow version >= 2.6ifctx_indiv_triggerisnotNoneandgetattr(record,ctx_indiv_trigger.name,None):ti=getattr(record,"task_instance",None)# trigger contextlabels=self._get_labels(ti)self._transport.send(record,message,resource=self.resource,labels=labels)
[docs]defset_context(self,task_instance:TaskInstance)->None:""" Configures the logger to add information with information about the current task. :param task_instance: Currently executed task """self.task_instance_labels=self._task_instance_to_labels(task_instance)self.task_instance_hostname=task_instance.hostname
[docs]defread(self,task_instance:TaskInstance,try_number:int|None=None,metadata:dict|None=None)->tuple[list[tuple[tuple[str,str]]],list[dict[str,str|bool]]]:""" Read logs of given task instance from Stackdriver logging. :param task_instance: task instance object :param try_number: task instance try_number to read logs from. If None it returns all logs :param metadata: log metadata. It is used for steaming log reading and auto-tailing. :return: a tuple of ( list of (one element tuple with two element tuple - hostname and logs) and list of metadata) """iftry_numberisnotNoneandtry_number<1:logs=f"Error fetching the logs. Try number {try_number} is invalid."return[((self.task_instance_hostname,logs),)],[{"end_of_log":"true"}]ifnotmetadata:metadata={}ti_labels=self._task_instance_to_labels(task_instance)iftry_numberisnotNone:ti_labels[self.LABEL_TRY_NUMBER]=str(try_number)else:delti_labels[self.LABEL_TRY_NUMBER]log_filter=self._prepare_log_filter(ti_labels)next_page_token=metadata.get("next_page_token",None)all_pages="download_logs"inmetadataandmetadata["download_logs"]messages,end_of_log,next_page_token=self._read_logs(log_filter,next_page_token,all_pages)new_metadata:dict[str,str|bool]={"end_of_log":end_of_log}ifnext_page_token:new_metadata["next_page_token"]=next_page_tokenreturn[((self.task_instance_hostname,messages),)],[new_metadata]
def_prepare_log_filter(self,ti_labels:dict[str,str])->str:""" Prepares the filter that chooses which log entries to fetch. More information: https://cloud.google.com/logging/docs/reference/v2/rest/v2/entries/list#body.request_body.FIELDS.filter https://cloud.google.com/logging/docs/view/advanced-queries :param ti_labels: Task Instance's labels that will be used to search for logs :return: logs filter """defescape_label_key(key:str)->str:returnf'"{key}"'if"."inkeyelsekeydefescale_label_value(value:str)->str:escaped_value=value.replace("\\","\\\\").replace('"','\\"')returnf'"{escaped_value}"'_,project=self._credentials_and_projectlog_filters=[f"resource.type={escale_label_value(self.resource.type)}",f'logName="projects/{project}/logs/{self.name}"',]forkey,valueinself.resource.labels.items():log_filters.append(f"resource.labels.{escape_label_key(key)}={escale_label_value(value)}")forkey,valueinti_labels.items():log_filters.append(f"labels.{escape_label_key(key)}={escale_label_value(value)}")return"\n".join(log_filters)def_read_logs(self,log_filter:str,next_page_token:str|None,all_pages:bool)->tuple[str,bool,str|None]:""" Sends requests to the Stackdriver service and downloads logs. :param log_filter: Filter specifying the logs to be downloaded. :param next_page_token: The token of the page from which the log download will start. If None is passed, it will start from the first page. :param all_pages: If True is passed, all subpages will be downloaded. Otherwise, only the first page will be downloaded :return: A token that contains the following items: * string with logs * Boolean value describing whether there are more logs, * token of the next page """messages=[]new_messages,next_page_token=self._read_single_logs_page(log_filter=log_filter,page_token=next_page_token,)messages.append(new_messages)ifall_pages:whilenext_page_token:new_messages,next_page_token=self._read_single_logs_page(log_filter=log_filter,page_token=next_page_token)messages.append(new_messages)ifnotmessages:breakend_of_log=Truenext_page_token=Noneelse:end_of_log=notbool(next_page_token)return"\n".join(messages),end_of_log,next_page_tokendef_read_single_logs_page(self,log_filter:str,page_token:str|None=None)->tuple[str,str]:""" Sends requests to the Stackdriver service and downloads single pages with logs. :param log_filter: Filter specifying the logs to be downloaded. :param page_token: The token of the page to be downloaded. If None is passed, the first page will be downloaded. :return: Downloaded logs and next page token """_,project=self._credentials_and_projectrequest=ListLogEntriesRequest(resource_names=[f"projects/{project}"],filter=log_filter,page_token=page_token,order_by="timestamp asc",page_size=1000,)response=self._logging_service_client.list_log_entries(request=request)page:ListLogEntriesResponse=next(response.pages)messages:list[str]=[]forentryinpage.entries:if"message"in(entry.json_payloador{}):messages.append(entry.json_payload["message"])# type: ignoreelifentry.text_payload:messages.append(entry.text_payload)return"\n".join(messages),page.next_page_token@classmethoddef_task_instance_to_labels(cls,ti:TaskInstance)->dict[str,str]:return{cls.LABEL_TASK_ID:ti.task_id,cls.LABEL_DAG_ID:ti.dag_id,cls.LABEL_EXECUTION_DATE:str(ti.execution_date.isoformat()),cls.LABEL_TRY_NUMBER:str(ti.try_number),}@property
[docs]defget_external_log_url(self,task_instance:TaskInstance,try_number:int)->str:""" Creates an address for an external log collecting service. :param task_instance: task instance object :param try_number: task instance try_number to read logs from :return: URL to the external log collection service """_,project_id=self._credentials_and_projectti_labels=self._task_instance_to_labels(task_instance)ti_labels[self.LABEL_TRY_NUMBER]=str(try_number)log_filter=self._prepare_log_filter(ti_labels)url_query_string={"project":project_id,"interval":"NO_LIMIT","resource":self._resource_path,"advancedFilter":log_filter,}url=f"{self.LOG_VIEWER_BASE_URL}?{urlencode(url_query_string)}"returnurl