Source code for tests.system.providers.google.cloud.ml_engine.example_mlengine
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License."""Example Airflow DAG for Google ML Engine service."""from__future__importannotationsimportosfromdatetimeimportdatetimefromgoogle.cloud.aiplatformimportschemafromgoogle.protobuf.json_formatimportParseDictfromgoogle.protobuf.struct_pb2importValuefromairflowimportmodelsfromairflow.operators.bashimportBashOperatorfromairflow.providers.google.cloud.operators.gcsimport(GCSCreateBucketOperator,GCSDeleteBucketOperator,GCSSynchronizeBucketsOperator,)fromairflow.providers.google.cloud.operators.vertex_ai.batch_prediction_jobimport(CreateBatchPredictionJobOperator,DeleteBatchPredictionJobOperator,)fromairflow.providers.google.cloud.operators.vertex_ai.custom_jobimport(CreateCustomPythonPackageTrainingJobOperator,)fromairflow.providers.google.cloud.operators.vertex_ai.datasetimport(CreateDatasetOperator,DeleteDatasetOperator,)fromairflow.providers.google.cloud.operators.vertex_ai.model_serviceimport(DeleteModelOperator,DeleteModelVersionOperator,GetModelOperator,ListModelVersionsOperator,SetDefaultVersionOnModelOperator,)fromairflow.utils.trigger_ruleimportTriggerRule
move_data_files=GCSSynchronizeBucketsOperator(task_id="move_files_to_bucket",source_bucket=RESOURCE_DATA_BUCKET,source_object="vertex-ai/penguins-data",destination_bucket=CUSTOM_PYTHON_GCS_BUCKET_NAME,destination_object="vertex-ai",recursive=True,)create_tabular_dataset=CreateDatasetOperator(task_id="tabular_dataset",dataset=TABULAR_DATASET,region=REGION,project_id=PROJECT_ID,)tabular_dataset_id=create_tabular_dataset.output["dataset_id"]# [START howto_operator_create_custom_python_training_job_v1]create_custom_python_package_training_job=CreateCustomPythonPackageTrainingJobOperator(task_id="create_custom_python_package_training_job",staging_bucket=f"gs://{CUSTOM_PYTHON_GCS_BUCKET_NAME}",display_name=PACKAGE_DISPLAY_NAME,python_package_gcs_uri=PYTHON_PACKAGE_GCS_URI,python_module_name=PYTHON_MODULE_NAME,container_uri=TRAIN_IMAGE,model_serving_container_image_uri=DEPLOY_IMAGE,bigquery_destination=f"bq://{PROJECT_ID}",# run paramsdataset_id=tabular_dataset_id,model_display_name=MODEL_DISPLAY_NAME,replica_count=REPLICA_COUNT,machine_type=MACHINE_TYPE,accelerator_type=ACCELERATOR_TYPE,accelerator_count=ACCELERATOR_COUNT,training_fraction_split=TRAINING_FRACTION_SPLIT,validation_fraction_split=VALIDATION_FRACTION_SPLIT,test_fraction_split=TEST_FRACTION_SPLIT,region=REGION,project_id=PROJECT_ID,)# [END howto_operator_create_custom_python_training_job_v1]model_id_v1=create_custom_python_package_training_job.output["model_id"]# [START howto_operator_gcp_mlengine_get_model]get_model=GetModelOperator(task_id="get_model",region=REGION,project_id=PROJECT_ID,model_id=model_id_v1)# [END howto_operator_gcp_mlengine_get_model]# [START howto_operator_gcp_mlengine_print_model]get_model_result=BashOperator(bash_command=f"echo {get_model.output}",task_id="get_model_result",)# [END howto_operator_gcp_mlengine_print_model]# [START howto_operator_create_custom_python_training_job_v2]create_custom_python_package_training_job_v2=CreateCustomPythonPackageTrainingJobOperator(task_id="create_custom_python_package_training_job_v2",staging_bucket=f"gs://{CUSTOM_PYTHON_GCS_BUCKET_NAME}",display_name=PACKAGE_DISPLAY_NAME,python_package_gcs_uri=PYTHON_PACKAGE_GCS_URI,python_module_name=PYTHON_MODULE_NAME,container_uri=TRAIN_IMAGE,model_serving_container_image_uri=DEPLOY_IMAGE,bigquery_destination=f"bq://{PROJECT_ID}",parent_model=model_id_v1,# run paramsdataset_id=tabular_dataset_id,model_display_name=MODEL_DISPLAY_NAME,replica_count=REPLICA_COUNT,machine_type=MACHINE_TYPE,accelerator_type=ACCELERATOR_TYPE,accelerator_count=ACCELERATOR_COUNT,training_fraction_split=TRAINING_FRACTION_SPLIT,validation_fraction_split=VALIDATION_FRACTION_SPLIT,test_fraction_split=TEST_FRACTION_SPLIT,region=REGION,project_id=PROJECT_ID,)# [END howto_operator_create_custom_python_training_job_v2]model_id_v2=create_custom_python_package_training_job_v2.output["model_id"]# [START howto_operator_gcp_mlengine_default_version]set_default_version=SetDefaultVersionOnModelOperator(task_id="set_default_version",project_id=PROJECT_ID,region=REGION,model_id=model_id_v2,)# [END howto_operator_gcp_mlengine_default_version]# [START howto_operator_gcp_mlengine_list_versions]list_model_versions=ListModelVersionsOperator(task_id="list_model_versions",region=REGION,project_id=PROJECT_ID,model_id=model_id_v2)# [END howto_operator_gcp_mlengine_list_versions]# [START howto_operator_start_batch_prediction]create_batch_prediction_job=CreateBatchPredictionJobOperator(task_id="create_batch_prediction_job",job_display_name=JOB_DISPLAY_NAME,model_name=model_id_v2,predictions_format="bigquery",bigquery_source=BQ_SOURCE,bigquery_destination_prefix=f"bq://{PROJECT_ID}",region=REGION,project_id=PROJECT_ID,machine_type=MACHINE_TYPE,)# [END howto_operator_start_batch_prediction]# [START howto_operator_gcp_mlengine_delete_version]delete_model_version_1=DeleteModelVersionOperator(task_id="delete_model_version_1",project_id=PROJECT_ID,region=REGION,model_id=model_id_v2,trigger_rule=TriggerRule.ALL_DONE,)# [END howto_operator_gcp_mlengine_delete_version]# [START howto_operator_gcp_mlengine_delete_model]delete_model=DeleteModelOperator(task_id="delete_model",project_id=PROJECT_ID,region=REGION,model_id=model_id_v1,trigger_rule=TriggerRule.ALL_DONE,)# [END howto_operator_gcp_mlengine_delete_model]delete_batch_prediction_job=DeleteBatchPredictionJobOperator(task_id="delete_batch_prediction_job",batch_prediction_job_id=create_batch_prediction_job.output["batch_prediction_job_id"],region=REGION,project_id=PROJECT_ID,trigger_rule=TriggerRule.ALL_DONE,)delete_tabular_dataset=DeleteDatasetOperator(task_id="delete_tabular_dataset",dataset_id=tabular_dataset_id,region=REGION,project_id=PROJECT_ID,trigger_rule=TriggerRule.ALL_DONE,)delete_bucket=GCSDeleteBucketOperator(task_id="delete_bucket",bucket_name=CUSTOM_PYTHON_GCS_BUCKET_NAME,trigger_rule=TriggerRule.ALL_DONE,)(# TEST SETUPcreate_bucket>>move_data_files>>create_tabular_dataset# TEST BODY>>create_custom_python_package_training_job>>create_custom_python_package_training_job_v2>>create_batch_prediction_job>>get_model>>get_model_result>>list_model_versions>>set_default_version# TEST TEARDOWN>>delete_model_version_1>>delete_model>>delete_batch_prediction_job>>delete_tabular_dataset>>delete_bucket)fromtests.system.utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "tearDown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests.system.utilsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)