Source code for airflow.providers.snowflake.transfers.s3_to_snowflake

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""This module contains AWS S3 to Snowflake operator."""
from typing import Any, Optional

from airflow.models import BaseOperator
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
from airflow.utils.decorators import apply_defaults


[docs]class S3ToSnowflakeOperator(BaseOperator): """ Executes an COPY command to load files from s3 to Snowflake .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:S3ToSnowflakeOperator` :param s3_keys: reference to a list of S3 keys :type s3_keys: list :param table: reference to a specific table in snowflake database :type table: str :param stage: reference to a specific snowflake stage :type stage: str :param file_format: reference to a specific file format :type file_format: str :param schema: reference to a specific schema in snowflake database :type schema: str :param columns_array: reference to a specific columns array in snowflake database :type columns_array: list :param snowflake_conn_id: reference to a specific snowflake database :type snowflake_conn_id: str """ @apply_defaults def __init__( self, *, s3_keys: list, table: str, stage: Any, file_format: str, schema: str, # TODO: shouldn't be required, rely on session/user defaults columns_array: Optional[list] = None, autocommit: bool = True, snowflake_conn_id: str = 'snowflake_default', **kwargs, ) -> None: super().__init__(**kwargs) self.s3_keys = s3_keys self.table = table self.stage = stage self.file_format = file_format self.schema = schema self.columns_array = columns_array self.autocommit = autocommit self.snowflake_conn_id = snowflake_conn_id
[docs] def execute(self, context: Any) -> None: snowflake_hook = SnowflakeHook(snowflake_conn_id=self.snowflake_conn_id) # Snowflake won't accept list of files it has to be tuple only. # but in python tuple([1]) = (1,) => which is invalid for snowflake files = str(self.s3_keys) files = files.replace('[', '(') files = files.replace(']', ')') # we can extend this based on stage base_sql = """ FROM @{stage}/ files={files} file_format={file_format} """.format( stage=self.stage, files=files, file_format=self.file_format ) if self.columns_array: copy_query = """ COPY INTO {schema}.{table}({columns}) {base_sql} """.format( schema=self.schema, table=self.table, columns=",".join(self.columns_array), base_sql=base_sql ) else: copy_query = """ COPY INTO {schema}.{table} {base_sql} """.format( schema=self.schema, table=self.table, base_sql=base_sql ) self.log.info('Executing COPY command...') snowflake_hook.run(copy_query, self.autocommit) self.log.info("COPY command completed")

Was this entry helpful?