Compare commits

...

6 Commits

Author SHA1 Message Date
Dustin f6910f04df tf/asg: Add CA resource tag for FUSE device plugin
dustin/dynk8s-provisioner/pipeline/head This commit looks good Details
Jenkins jobs that build container images in user namespaces need access
to `/dev/fuse`, which is provided by the [fuse-device-plugin][0].  This
plugin runs as a DaemonSet, which updates the status of the node it's
running on when it starts to indicate that the FUSE device is available.
When scaling up from zero nodes, Cluster Autoscaler has no way to know
that this will occur, and therefore cannot determine that scaling up the
ASG will create a node with the required resources.  Thus, the ASG needs
a tag to inform CA that the nodes it creates will indeed have the
resources and scaling it up will allow the pod to be scheduled.

Although this feature of CA was added in 1.14, it apparently got broken
at some point and no longer works in 1.22.  It works again in 1.26,
though.

[0]: https://github.com/kuberenetes-learning-group/fuse-device-plugin/tree/master
2024-01-14 11:42:46 -06:00
Dustin 5a79680b22 tf/userdata: Install CRI-O from Fedora base
The *cri-o* package has moved from its own module into the base Fedora
repository, as Fedora is [eliminating modules][0].  The last modular
version was 1.25, which is too old to run pods with user namespaces.
Version 1.26 is available in the base repository, which does support
user namespaces.

[0]: https://fedoraproject.org/wiki/Changes/RetireModularity
2024-01-13 10:10:46 -06:00
Dustin 02772f17dd tf/asg: Look up Fedora AMI by attributes
Instead of hard-coding the AMI ID of the Fedora build we want, we can
use the `aws_ami` data source to search for it.  The Fedora release team
has a consistent naming scheme for AMIs, so finding the correct one is
straightforward.
2023-11-13 20:27:50 -06:00
Dustin 473e279a18 tf/userdata: Remove default DNS configuration
Lately, cloud nodes seem to be failing to come up more frequently.  I
traced this down to the fact that `/etc/resolv.conf` in the `kube-proxy`
container contains both the AWS-provided DNS server and the on-premises
server set by Wireguard.  This evidently "works" correctly sometimes,
but not always.  When it doesn't, the `kube-proxy` cannot resolve the
Kubernetes API server address, and thus cannot create the necessary
netfilter rules to forward traffic correctly.  This causes pods to be
unable to communicate.

I am not entirely sure what the "correct" solution to this problem would
be, since there are various issues in play here.  Fortunately, cloud
nodes are only ever around for a short time, and never need to be
rebooted.  As such, we can use a "quick fix" and simply remove the
AWS-provided DNS configuration.
2023-11-13 19:52:57 -06:00
Dustin 4a2a376409 terraform: Update node template to Fedora 38 2023-11-13 19:52:47 -06:00
Dustin 83b8c4a7cc userdata: Set kubelet config path
The default configuration for the *kubelet.service* unit does not
specify the path to the `config.yml` generated by `kubeadm`.  Thus, any
settings defined in the `kublet-config` ConfigMap do not take effect.
To resolve this, we have to explicitly set the path in the `config`
property of the `kubeletExtraArgs` object in the join configuration.
2023-11-13 19:49:32 -06:00
4 changed files with 142 additions and 14 deletions

View File

@ -27,11 +27,31 @@ resource "aws_security_group" "k8s-node" {
}
}
data "aws_ami" "latest-fedora" {
most_recent = true
owners = ["125523088429"]
filter {
name = "name"
values = ["Fedora-Cloud-Base-38-1.*"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "architecture"
values = ["arm64"]
}
}
resource "aws_launch_template" "k8s-aarch64" {
name = "k8s-aarch64"
update_default_version = true
image_id = "ami-0e5ef720e9e713b90"
image_id = "${data.aws_ami.latest-fedora.id}"
instance_type = "t4g.medium"
security_group_names = [aws_security_group.k8s-node.name]
key_name = "dustin@rosalina"
@ -69,4 +89,9 @@ resource "aws_autoscaling_group" "k8s-aarch64" {
value = "owned"
propagate_at_launch = true
}
tag {
key = "k8s.io/cluster-autoscaler/node-template/resources/github.com/fuse"
value = "1"
propagate_at_launch = false
}
}

View File

@ -11,6 +11,7 @@
"autoscaling:DescribeTags",
"sns:Unsubscribe",
"sns:GetSubscriptionAttributes",
"ec2:DescribeImages",
"ec2:DescribeSecurityGroups"
],
"Resource": "*"

View File

@ -1,10 +1,102 @@
{
"version": 4,
"terraform_version": "1.2.9",
"serial": 88,
"terraform_version": "1.6.2",
"serial": 98,
"lineage": "a100be74-c98e-0769-2d6a-bf6a2c5f3ebf",
"outputs": {},
"resources": [
{
"mode": "data",
"type": "aws_ami",
"name": "latest-fedora",
"provider": "provider[\"registry.terraform.io/hashicorp/aws\"]",
"instances": [
{
"schema_version": 0,
"attributes": {
"architecture": "arm64",
"arn": "arn:aws:ec2:us-east-2::image/ami-0dcd72048e69236de",
"block_device_mappings": [
{
"device_name": "/dev/sda1",
"ebs": {
"delete_on_termination": "true",
"encrypted": "false",
"iops": "0",
"snapshot_id": "snap-01034e15b97a1b584",
"throughput": "0",
"volume_size": "6",
"volume_type": "gp2"
},
"no_device": "",
"virtual_name": ""
}
],
"boot_mode": "",
"creation_date": "2023-04-14T00:16:49.000Z",
"deprecation_time": "2025-04-14T00:16:49.000Z",
"description": "Fedora AMI Description",
"ena_support": true,
"executable_users": null,
"filter": [
{
"name": "architecture",
"values": [
"arm64"
]
},
{
"name": "name",
"values": [
"Fedora-Cloud-Base-38-1.*"
]
},
{
"name": "virtualization-type",
"values": [
"hvm"
]
}
],
"hypervisor": "xen",
"id": "ami-0dcd72048e69236de",
"image_id": "ami-0dcd72048e69236de",
"image_location": "125523088429/Fedora-Cloud-Base-38-1.6.aarch64-hvm-us-east-2-gp2-0",
"image_owner_alias": "",
"image_type": "machine",
"include_deprecated": false,
"kernel_id": "",
"most_recent": true,
"name": "Fedora-Cloud-Base-38-1.6.aarch64-hvm-us-east-2-gp2-0",
"name_regex": null,
"owner_id": "125523088429",
"owners": [
"125523088429"
],
"platform": "",
"platform_details": "Linux/UNIX",
"product_codes": [],
"public": true,
"ramdisk_id": "",
"root_device_name": "/dev/sda1",
"root_device_type": "ebs",
"root_snapshot_id": "snap-01034e15b97a1b584",
"sriov_net_support": "",
"state": "available",
"state_reason": {
"code": "UNSET",
"message": "UNSET"
},
"tags": {},
"timeouts": null,
"tpm_support": "",
"usage_operation": "RunInstances",
"virtualization_type": "hvm"
},
"sensitive_attributes": []
}
]
},
{
"mode": "data",
"type": "aws_caller_identity",
@ -15,9 +107,9 @@
"schema_version": 0,
"attributes": {
"account_id": "566967686773",
"arn": "arn:aws:sts::566967686773:assumed-role/dynk8s-terraform/aws-go-sdk-1673405716573043213",
"arn": "arn:aws:sts::566967686773:assumed-role/dynk8s-terraform/aws-go-sdk-1705246977054689837",
"id": "566967686773",
"user_id": "AROAYIAPIKZ25DFDOYZHT:aws-go-sdk-1673405716573043213"
"user_id": "AROAYIAPIKZ25DFDOYZHT:aws-go-sdk-1705246977054689837"
},
"sensitive_attributes": []
}
@ -126,7 +218,7 @@
"context": "",
"default_cooldown": 300,
"default_instance_warmup": 0,
"desired_capacity": 1,
"desired_capacity": 0,
"enabled_metrics": [],
"force_delete": false,
"force_delete_warm_pool": false,
@ -166,6 +258,11 @@
"key": "k8s.io/cluster-autoscaler/kubernetes",
"propagate_at_launch": true,
"value": "owned"
},
{
"key": "k8s.io/cluster-autoscaler/node-template/resources/github.com/fuse",
"propagate_at_launch": false,
"value": "1"
}
],
"tags": null,
@ -181,7 +278,8 @@
"private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiZGVsZXRlIjo2MDAwMDAwMDAwMDAsInVwZGF0ZSI6NjAwMDAwMDAwMDAwfX0=",
"dependencies": [
"aws_launch_template.k8s-aarch64",
"aws_security_group.k8s-node"
"aws_security_group.k8s-node",
"data.aws_ami.latest-fedora"
]
}
]
@ -264,7 +362,7 @@
"capacity_reservation_specification": [],
"cpu_options": [],
"credit_specification": [],
"default_version": 12,
"default_version": 21,
"description": "",
"disable_api_stop": false,
"disable_api_termination": false,
@ -275,7 +373,7 @@
"hibernation_options": [],
"iam_instance_profile": [],
"id": "lt-0789a3800bdaec215",
"image_id": "ami-0995531df014459c2",
"image_id": "ami-0dcd72048e69236de",
"instance_initiated_shutdown_behavior": "",
"instance_market_options": [
{
@ -287,7 +385,7 @@
"instance_type": "t4g.medium",
"kernel_id": "",
"key_name": "dustin@rosalina",
"latest_version": 12,
"latest_version": 21,
"license_specification": [],
"maintenance_options": [],
"metadata_options": [],
@ -311,13 +409,14 @@
"tags": {},
"tags_all": {},
"update_default_version": true,
"user_data": "I2Nsb3VkLWNvbmZpZwpib290Y21kOgotIFsgZG5mLCBtb2R1bGUsIGVuYWJsZSwgJ2NyaS1vOjEuMjQnLCAteSBdCi0gWyBsbiwgLXNmLCAvcnVuL3N5c3RlbWQvcmVzb2x2ZS9zdHViLXJlc29sdi5jb25mLCAvZXRjL3Jlc29sdi5jb25mIF0KCnBhY2thZ2VzOgotIGNyaS1vCi0gY3JpLXRvb2xzCi0gZXRodG9vbAotIGlwdGFibGVzLW5mdAotIGlzY3NpLWluaXRpYXRvci11dGlscwotIGt1YmVybmV0ZXMta3ViZWFkbQotIGt1YmVybmV0ZXMtbm9kZQotIHJ1bmMKLSB3aXJlZ3VhcmQtdG9vbHMKCndyaXRlX2ZpbGVzOgotIHBhdGg6IC9ldGMvZG5mL2RuZi5jb25mCiAgY29udGVudDogfCsKICAgIGluc3RhbGxfd2Vha19kZXBzPUZhbHNlCiAgYXBwZW5kOiB0cnVlCi0gcGF0aDogL2V0Yy9tb2R1bGVzLWxvYWQuZC9rOHMuY29uZgogIGNvbnRlbnQ6IHwrCiAgICBicl9uZXRmaWx0ZXIKLSBwYXRoOiAvZXRjL3N5c2N0bC5kL2s4cy5jb25mCiAgY29udGVudDogfCsKICAgIG5ldC5icmlkZ2UuYnJpZGdlLW5mLWNhbGwtaXB0YWJsZXMgPSAxCiAgICBuZXQuYnJpZGdlLmJyaWRnZS1uZi1jYWxsLWlwNnRhYmxlcyA9IDEKICAgIG5ldC5pcHY0LmlwX2ZvcndhcmQgPSAxCi0gcGF0aDogL3Zhci9saWIvY2xvdWQvc2NyaXB0cy9wZXItaW5zdGFuY2Uva3ViZWFkbS1qb2luCiAgcGVybWlzc2lvbnM6ICcwNzU1JwogIGNvbnRlbnQ6IHwrCiAgICAjIS9iaW4vc2gKCiAgICBCQVNFX1VSTD1odHRwczovL2R5bms4cy1wcm92aXNpb25lci5weXJvY3VmZmxpbmsubmV0CgogICAgaW5zdGFuY2VfaWQ9JChjdXJsIC1zIDE2OS4yNTQuMTY5LjI1NC9sYXRlc3QvbWV0YS1kYXRhL2luc3RhbmNlLWlkKQogICAgYXo9JChjdXJsIC1zIDE2OS4yNTQuMTY5LjI1NC9sYXRlc3QvbWV0YS1kYXRhL3BsYWNlbWVudC9hdmFpbGFiaWxpdHktem9uZSkKCiAgICBjdXJsIC1mcyAiJHtCQVNFX1VSTH0iL3dpcmVndWFyZC9jb25maWcvJHtpbnN0YW5jZV9pZH0gXAogICAgICAgIC1vIC9ldGMvd2lyZWd1YXJkL3dnMC5jb25mIHx8IGV4aXQKICAgIHN5c3RlbWN0bCBlbmFibGUgLS1ub3cgd2ctcXVpY2tAd2cwIHx8IGV4aXQKCiAgICBtb2Rwcm9iZSBicl9uZXRmaWx0ZXIgfHwgZXhpdAogICAgc3lzY3RsIC13IC1mIC9ldGMvc3lzY3RsLmQvazhzLmNvbmYgfHwgZXhpdAoKICAgIHN3YXBvZmYgLWEgfHwgZXhpdAogICAgdG91Y2ggL2V0Yy9zeXN0ZW1kL3pyYW0tZ2VuZXJhdG9yLmNvbmYgfHwgZXhpdAogICAgc3lzdGVtY3RsIGRhZW1vbi1yZWxvYWQgfHwgZXhpdAogICAgc3lzdGVtY3RsIHN0b3AgJ3N5c3RlbWQtenJhbS1zZXR1cEAqJyB8fCBleGl0CgogICAgc3lzdGVtY3RsIGVuYWJsZSBjcmlvIGlzY3NpZCBrdWJlbGV0IHx8IGV4aXQKICAgIHN5c3RlbWN0bCBzdGFydCBjcmlvIGlzY3NpZCB8fCBleGl0CgogICAgaW50ZXJuYWxfaXA9JCgKICAgICAgaXAgYWRkcmVzcyBzaG93IGRldiB3ZzAgcHJpbWFyeSB8IFwKICAgICAgc2VkIC1ybiAncy8uKmluZXQgKFswLTkuXSspLiovXDEvcCcKICAgICkKCiAgICBjYXQgPiBsb25naG9ybi1pc3N1ZTQ5ODguY2lsIDw8RU9GCiAgICAoYWxsb3cgaXNjc2lkX3Qgc2VsZiAoY2FwYWJpbGl0eSAoZGFjX292ZXJyaWRlKSkpCiAgICBFT0YKICAgIHNlbW9kdWxlIC1pIGxvbmdob3JuLWlzc3VlNDk4OC5jaWwKCiAgICBjYXQgPiAvcnVuL2pvaW5jb25maWd1cmF0aW9uIDw8RU9GCiAgICBhcGlWZXJzaW9uOiBrdWJlYWRtLms4cy5pby92MWJldGEzCiAgICBraW5kOiBKb2luQ29uZmlndXJhdGlvbgogICAgbm9kZVJlZ2lzdHJhdGlvbjoKICAgICAga3ViZWxldEV4dHJhQXJnczoKICAgICAgICBwcm92aWRlci1pZDogYXdzOi8vLyR7YXp9LyR7aW5zdGFuY2VfaWR9CiAgICAgICAgbm9kZS1pcDogJHtpbnRlcm5hbF9pcH0KICAgIGRpc2NvdmVyeToKICAgICAgZmlsZToKICAgICAgICBrdWJlQ29uZmlnUGF0aDogJHtCQVNFX1VSTH0va3ViZWFkbS9rdWJlY29uZmlnLyR7aW5zdGFuY2VfaWR9CiAgICBFT0YKICAgIGt1YmVhZG0gam9pbiAtLWNvbmZpZz0vcnVuL2pvaW5jb25maWd1cmF0aW9uCgpydW5jbWQ6Ci0gWyBkbmYsIHJlbW92ZSwgLXksIHpyYW0tZ2VuZXJhdG9yIF0K",
"user_data": "I2Nsb3VkLWNvbmZpZwpib290Y21kOgotIFsgbG4sIC1zZiwgL3J1bi9zeXN0ZW1kL3Jlc29sdmUvc3R1Yi1yZXNvbHYuY29uZiwgL2V0Yy9yZXNvbHYuY29uZiBdCgpwYWNrYWdlczoKLSBjcmktbwotIGNyaS10b29scwotIGV0aHRvb2wKLSBpcHRhYmxlcy1uZnQKLSBpc2NzaS1pbml0aWF0b3ItdXRpbHMKLSBrdWJlcm5ldGVzLWt1YmVhZG0KLSBrdWJlcm5ldGVzLW5vZGUKLSBydW5jCi0gd2lyZWd1YXJkLXRvb2xzCgp3cml0ZV9maWxlczoKLSBwYXRoOiAvZXRjL2RuZi9kbmYuY29uZgogIGNvbnRlbnQ6IHwrCiAgICBpbnN0YWxsX3dlYWtfZGVwcz1GYWxzZQogIGFwcGVuZDogdHJ1ZQotIHBhdGg6IC9ldGMvbW9kdWxlcy1sb2FkLmQvazhzLmNvbmYKICBjb250ZW50OiB8KwogICAgYnJfbmV0ZmlsdGVyCi0gcGF0aDogL2V0Yy9zeXNjdGwuZC9rOHMuY29uZgogIGNvbnRlbnQ6IHwrCiAgICBuZXQuYnJpZGdlLmJyaWRnZS1uZi1jYWxsLWlwdGFibGVzID0gMQogICAgbmV0LmJyaWRnZS5icmlkZ2UtbmYtY2FsbC1pcDZ0YWJsZXMgPSAxCiAgICBuZXQuaXB2NC5pcF9mb3J3YXJkID0gMQotIHBhdGg6IC92YXIvbGliL2Nsb3VkL3NjcmlwdHMvcGVyLWluc3RhbmNlL2t1YmVhZG0tam9pbgogIHBlcm1pc3Npb25zOiAnMDc1NScKICBjb250ZW50OiB8KwogICAgIyEvYmluL3NoCgogICAgQkFTRV9VUkw9aHR0cHM6Ly9keW5rOHMtcHJvdmlzaW9uZXIucHlyb2N1ZmZsaW5rLm5ldAoKICAgIGluc3RhbmNlX2lkPSQoY3VybCAtcyAxNjkuMjU0LjE2OS4yNTQvbGF0ZXN0L21ldGEtZGF0YS9pbnN0YW5jZS1pZCkKICAgIGF6PSQoY3VybCAtcyAxNjkuMjU0LjE2OS4yNTQvbGF0ZXN0L21ldGEtZGF0YS9wbGFjZW1lbnQvYXZhaWxhYmlsaXR5LXpvbmUpCgogICAgY3VybCAtZnMgIiR7QkFTRV9VUkx9Ii93aXJlZ3VhcmQvY29uZmlnLyR7aW5zdGFuY2VfaWR9IFwKICAgICAgICAtbyAvZXRjL3dpcmVndWFyZC93ZzAuY29uZiB8fCBleGl0CiAgICBzeXN0ZW1jdGwgZW5hYmxlIC0tbm93IHdnLXF1aWNrQHdnMCB8fCBleGl0CgogICAgcmVzb2x2ZWN0bCByZXZlcnQgZXRoMAoKICAgIG1vZHByb2JlIGJyX25ldGZpbHRlciB8fCBleGl0CiAgICBzeXNjdGwgLXcgLWYgL2V0Yy9zeXNjdGwuZC9rOHMuY29uZiB8fCBleGl0CgogICAgc3dhcG9mZiAtYSB8fCBleGl0CiAgICB0b3VjaCAvZXRjL3N5c3RlbWQvenJhbS1nZW5lcmF0b3IuY29uZiB8fCBleGl0CiAgICBzeXN0ZW1jdGwgZGFlbW9uLXJlbG9hZCB8fCBleGl0CiAgICBzeXN0ZW1jdGwgc3RvcCAnc3lzdGVtZC16cmFtLXNldHVwQConIHx8IGV4aXQKCiAgICBzeXN0ZW1jdGwgZW5hYmxlIGNyaW8gaXNjc2lkIGt1YmVsZXQgfHwgZXhpdAogICAgc3lzdGVtY3RsIHN0YXJ0IGNyaW8gaXNjc2lkIHx8IGV4aXQKCiAgICBpbnRlcm5hbF9pcD0kKAogICAgICBpcCBhZGRyZXNzIHNob3cgZGV2IHdnMCBwcmltYXJ5IHwgXAogICAgICBzZWQgLXJuICdzLy4qaW5ldCAoWzAtOS5dKykuKi9cMS9wJwogICAgKQoKICAgIGNhdCA+IGxvbmdob3JuLWlzc3VlNDk4OC5jaWwgPDxFT0YKICAgIChhbGxvdyBpc2NzaWRfdCBzZWxmIChjYXBhYmlsaXR5IChkYWNfb3ZlcnJpZGUpKSkKICAgIEVPRgogICAgc2Vtb2R1bGUgLWkgbG9uZ2hvcm4taXNzdWU0OTg4LmNpbAoKICAgIGNhdCA+IC9ydW4vam9pbmNvbmZpZ3VyYXRpb24gPDxFT0YKICAgIGFwaVZlcnNpb246IGt1YmVhZG0uazhzLmlvL3YxYmV0YTMKICAgIGtpbmQ6IEpvaW5Db25maWd1cmF0aW9uCiAgICBub2RlUmVnaXN0cmF0aW9uOgogICAgICBrdWJlbGV0RXh0cmFBcmdzOgogICAgICAgIHByb3ZpZGVyLWlkOiBhd3M6Ly8vJHthen0vJHtpbnN0YW5jZV9pZH0KICAgICAgICBub2RlLWlwOiAke2ludGVybmFsX2lwfQogICAgICAgIGNvbmZpZzogL3Zhci9saWIva3ViZWxldC9jb25maWcueWFtbAogICAgZGlzY292ZXJ5OgogICAgICBmaWxlOgogICAgICAgIGt1YmVDb25maWdQYXRoOiAke0JBU0VfVVJMfS9rdWJlYWRtL2t1YmVjb25maWcvJHtpbnN0YW5jZV9pZH0KICAgIEVPRgogICAga3ViZWFkbSBqb2luIC0tY29uZmlnPS9ydW4vam9pbmNvbmZpZ3VyYXRpb24KCnJ1bmNtZDoKLSBbIGRuZiwgcmVtb3ZlLCAteSwgenJhbS1nZW5lcmF0b3IgXQo=",
"vpc_security_group_ids": []
},
"sensitive_attributes": [],
"private": "bnVsbA==",
"dependencies": [
"aws_security_group.k8s-node"
"aws_security_group.k8s-node",
"data.aws_ami.latest-fedora"
]
}
]
@ -510,5 +609,6 @@
}
]
}
]
],
"check_results": null
}

View File

@ -1,6 +1,5 @@
#cloud-config
bootcmd:
- [ dnf, module, enable, 'cri-o:1.25', -y ]
- [ ln, -sf, /run/systemd/resolve/stub-resolv.conf, /etc/resolv.conf ]
packages:
@ -41,6 +40,8 @@ write_files:
-o /etc/wireguard/wg0.conf || exit
systemctl enable --now wg-quick@wg0 || exit
resolvectl revert eth0
modprobe br_netfilter || exit
sysctl -w -f /etc/sysctl.d/k8s.conf || exit
@ -69,6 +70,7 @@ write_files:
kubeletExtraArgs:
provider-id: aws:///${az}/${instance_id}
node-ip: ${internal_ip}
config: /var/lib/kubelet/config.yaml
discovery:
file:
kubeConfigPath: ${BASE_URL}/kubeadm/kubeconfig/${instance_id}