Lake Formation cross-account access using Terraform

  1. Consumption cross-account access (Athena, Redshift Spectrum in a different account than the Lake Formation account)

Architecture

Prerequisites

Lake Formation setup in Account-Source

  • Setup lake Formation administrators in Account-Source
resource "aws_lakeformation_data_lake_settings" "default" {
admins = [${admin-role-arns}]
}
data "aws_iam_policy_document" "lakeformation" {
statement {
actions = [
"s3:ListBucket"
]
resources = [
"arn:aws:s3:::${data-bucket-name}"
]
}
statement {
actions = [
"s3:GetObject"
]
resources = [
"arn:aws:s3:::${data-bucket-name}/*"
]
}
}
module "lakeformation_role" {
source = "github.com/schubergphilis/terraform-aws-mcaf-role?ref=v0.3.2"
name = "LakeFormation"
principal_type = "Service"
principal_identifiers = ["glue.amazonaws.com", "lakeformation.amazonaws.com"]
role_policy = data.aws_iam_policy_document.lakeformation.json
tags = var.tags
policy_arns = [
"arn:aws:iam::aws:policy/AWSLakeFormationDataAdmin",
"arn:aws:iam::aws:policy/AWSLakeFormationCrossAccountManager",
]
}
resource "aws_lakeformation_resource" "default" {
arn = "arn:aws:s3:::${data-bucket-name}"
role_arn = module.lakeformation_role.arn
}
resource "aws_lakeformation_permissions" "database" {
principal = ${account-target-id}
permissions = ["DESCRIBE"]
permissions_with_grant_option = ["DESCRIBE"]
database {
name = ${account-source-database-name}
}
}
resource "aws_lakeformation_permissions" "table" {
principal = ${account-target-id}
permissions = ["SELECT"]
permissions_with_grant_option = ["SELECT"]
table {
database_name = ${account-source-database-name}
name = "${account-source-table-name}"
}
}

Lake Formation setup in Account-Target

  • Setup lake Formation administrators in Account-Target
resource "aws_lakeformation_data_lake_settings" "default" {
admins = [${admin-role-arns}]
}
resource "aws_glue_catalog_database" "resource_link" {
name = "${name}"
target_database {
catalog_id = ${account-source-account-id}
database_name = "${name}"
}
}

Use Lambda to execute an Athena query using AWS Wrangler

In this example, we will create a sample lambda and an execution role to query the table using Athena and AWS Wrangler.

resource "aws_glue_catalog_database" "default" {
name = "${name}"
description = "Default catalog used for temp tables"
}
data "aws_iam_policy_document" "lakeformation_access" {
statement {
actions = [
"athena:Get*",
"athena:StartQueryExecution",
"athena:StopQueryExecution"
]
resources = ["*"]
}
statement {
actions = [
"glue:BatchGetPartition",
"glue:CreateTable",
"glue:DeleteTable",
"glue:Get*"
]
resources = [
"arn:aws:glue:*:*:catalog", "arn:aws:glue:*:*:database/${aws_glue_catalog_database.default.name}",
"arn:aws:glue:*:*:table/${aws_glue_catalog_database.default.name}/temp_table_*",
]
}
statement {
actions = [
"glue:BatchGetPartition",
"glue:Get*"
]
resources = [
"arn:aws:glue:*:*:catalog", "arn:aws:glue:*:*:database/${aws_glue_catalog_database.resource_link.name}",
"arn:aws:glue:*:*:table/${aws_glue_catalog_database.resource_link.name}/temp_table_*",
]
}
statement {
actions = [
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
]
resources = [
${athena-bucket-arn},
"${athena-bucket-arn}/*"
]
}
statement {
actions = ["lakeformation:GetDataAccess"]
resources = ["*"]
}
}
module "lambda_role" {
source = "github.com/schubergphilis/terraform-aws-mcaf-role?ref=v0.3.2"
name = "Lambda"
principal_type = "Service"
principal_identifiers = ["lambda.amazonaws.com"]
role_policy = data.aws_iam_policy_document.lakeformation_access.json
tags = var.tags
}
data "aws_caller_identity" "current" {}data "aws_caller_identity" "Account_Source" {
provider = aws.account_source
}
resource "aws_lakeformation_permissions" "default_database" {
principal = module.lambda_role.arn
permissions = ["ALL"]
permissions_with_grant_option = ["ALL"]
database {
name = aws_glue_catalog_database.default.name
}
}
resource "aws_lakeformation_permissions" "default_table" {
principal = module.lambda_role.arn
permissions = ["ALL"]
permissions_with_grant_option = ["ALL"]
table {
database_name = aws_glue_catalog_database.default.name
wildcard = true
}
}
resource "aws_lakeformation_permissions" "resource_link_database" {
principal = module.lambda_role.arn
permissions = ["DESCRIBE"]
permissions_with_grant_option = ["DESCRIBE"]
database {
name = aws_glue_catalog_database.default.name
}
}
resource "aws_lakeformation_permissions" "resource_link_table" {
catalog_id = data.aws_caller_identity.current.account_id
principal = module.lambda_role.arn
permissions = ["SELECT"]
permissions_with_grant_option = ["SELECT"]
table {
catalog_id = data.aws_caller_identity.Account_Source.account_id
database_name = aws_glue_catalog_database.default.name
wildcard = true
}
}
import awswrangler as wrdef lambda_handler(event, context):
try:
sql = "SELECT * FROM ${table} limit 10"
df = wr.athena.read_sql_query(
sql=sql, database="${resource-link-database}", ctas_database_name="${default-database}"
)
print(df)
except Exception as e:
print(e)
raise e

Useful links

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store