# EC2 인스턴스 모니터링

while true; do aws ec2 describe-instances --query "Reservations[*].Instances[*].{PublicIPAdd:PublicIpAddress,InstanceName:Tags[?Key=='Name']|[0].Value,Status:State.Name}" --output text | sort; echo "------------------------------" ;date; sleep 3; done



(15분 걸림)



10분

25분?





7

콘솔에서 생성 확인

ec2 생성확인 , ec2 생성하고 3분후에 노드들 사용이 가능하다.

(노드 서버 생성 3분후 노드로  kubectl 명령어로 조회가 가능하다.)



while true ; do kubectl get nodes ;echo "-----------";date; sleep 2 ;done





8

3분후

접속 확인

k get nodes





(지금 25분?)

30분되면 사용가능.







9

다른 터미널에서 모니터링

watch -d kubectl get deploy,rs,pods -A



Every 2.0s: kubectl get deploy,rs,pods -A                                                                            Wed Jun  7 21:43:53 2023

NAMESPACE     NAME                      READY   UP-TO-DATE   AVAILABLE   AGE

kube-system   deployment.apps/coredns   2/2     2            2           12m



NAMESPACE     NAME                                 DESIRED   CURRENT   READY   AGE

kube-system   replicaset.apps/coredns-6777fcd775   2         2         2       30s

kube-system   replicaset.apps/coredns-dc4979556    0         0         0       12m



NAMESPACE     NAME                           READY   STATUS    RESTARTS   AGE

kube-system   pod/aws-node-g8lfk             1/1     Running   0          3m3s

kube-system   pod/aws-node-wsrfd             1/1     Running   0          2m56s

kube-system   pod/aws-node-zbrpf             1/1     Running   0          3m3s

kube-system   pod/coredns-6777fcd775-f8j5j   1/1     Running   0          30s

kube-system   pod/coredns-6777fcd775-wsj22   1/1     Running   0          30s

kube-system   pod/kube-proxy-44mht           1/1     Running   0          81s

kube-system   pod/kube-proxy-4b4z8           1/1     Running   0          84s

kube-system   pod/kube-proxy-cc56d           1/1     Running   0          78s





10

실습시 새탭으로 보기 사용









<2> ExternalDNS  , kube-ops-view 설치


1

재 로그인

(admin@myeks:N/A) [root@myeks-bastion-EC2 ~]#





2

# default 네임스페이스 적용

kubectl ns default







3

# ExternalDNS



MyDomain=<자신의 도메인>

echo "export MyDomain=<자신의 도메인>" >> /etc/profile



MyDomain=  masterseo1.link

echo "export MyDomain=masterseo1.link" >> /etc/profile



MyDnzHostedZoneId=$(aws route53 list-hosted-zones-by-name --dns-name "${MyDomain}." --query "HostedZones[0].Id" --output text)



echo $MyDomain, $MyDnzHostedZoneId



curl -s -O https://raw.githubusercontent.com/gasida/PKOS/main/aews/externaldns.yaml



MyDomain=$MyDomain MyDnzHostedZoneId=$MyDnzHostedZoneId envsubst < externaldns.yaml | kubectl apply -f -





external-dns   디플로이먼트가 생김






4

# kube-ops-view



helm repo add geek-cookbook https://geek-cookbook.github.io/charts/



helm install kube-ops-view geek-cookbook/kube-ops-view --version 1.2.2 --set env.TZ="Asia/Seoul" --namespace kube-system



kubectl patch svc -n kube-system kube-ops-view -p '{"spec":{"type":"LoadBalancer"}}'



kubectl annotate service kube-ops-view -n kube-system "external-dns.alpha.kubernetes.io/hostname=kubeopsview.$MyDomain"



echo -e "Kube Ops View URL = http://kubeopsview.$MyDomain:8080/#scale=1.5"





5

svc EXTERNAL-IP  URL 로 접속



8080으로 접속하기






6

svc EXTERNAL-IP  URL 로 접속

8080으로 접속하기








8

# AWS LB Controller



helm repo add eks https://aws.github.io/eks-charts



helm repo update



helm install aws-load-balancer-controller eks/aws-load-balancer-controller -n kube-system --set clusterName=$CLUSTER_NAME --set serviceAccount.create=false --set serviceAccount.name=aws-load-balancer-controller





9

kube-system에 aws-load-balancer-controller  2개 생성됨.






10

# 노드 보안그룹 ID 확인

NGSGID=$(aws ec2 describe-security-groups --filters Name=group-name,Values='*ng1*' --query "SecurityGroups[*].[GroupId]" --output text)

aws ec2 authorize-security-group-ingress --group-id $NGSGID --protocol '-1' --cidr 192.168.1.100/32









<3> 프로메테우스 & 그라파나(admin / prom-operator) 설치 


1

대시보드 추천 3개

15757 17900 15172





2

# 사용 리전의 인증서 ARN 확인

AWS ACM에서 인증서 하나 만든다.

*.masterseo0.link

route53에서 설정 - 정상 동작 확인





CERT_ARN=`aws acm list-certificates --query 'CertificateSummaryList[].CertificateArn[]' --output text`

echo $CERT_ARN



echo  $MyDomain



MyDomain=masterseo1.link







3

#  prometheus repo 추가

helm repo add prometheus-community https://prometheus-community.github.io/helm-charts





4

# 파라미터 파일 생성 - 정책



cat <<EOT > monitor-values.yaml

prometheus:

  prometheusSpec:

    podMonitorSelectorNilUsesHelmValues: false

    serviceMonitorSelectorNilUsesHelmValues: false

    retention: 5d

    retentionSize: "10GiB"



  verticalPodAutoscaler:

    enabled: true



  ingress:

    enabled: true

    ingressClassName: alb

    hosts: 

      - prometheus.$MyDomain

    paths: 

      - /*

    annotations:

      alb.ingress.kubernetes.io/scheme: internet-facing

      alb.ingress.kubernetes.io/target-type: ip

      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'

      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN

      alb.ingress.kubernetes.io/success-codes: 200-399

      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb

      alb.ingress.kubernetes.io/group.name: study

      alb.ingress.kubernetes.io/ssl-redirect: '443'



grafana:

  defaultDashboardsTimezone: Asia/Seoul

  adminPassword: prom-operator



  ingress:

    enabled: true

    ingressClassName: alb

    hosts: 

      - grafana.$MyDomain

    paths: 

      - /*

    annotations:

      alb.ingress.kubernetes.io/scheme: internet-facing

      alb.ingress.kubernetes.io/target-type: ip

      alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'

      alb.ingress.kubernetes.io/certificate-arn: $CERT_ARN

      alb.ingress.kubernetes.io/success-codes: 200-399

      alb.ingress.kubernetes.io/load-balancer-name: myeks-ingress-alb

      alb.ingress.kubernetes.io/group.name: study

      alb.ingress.kubernetes.io/ssl-redirect: '443'



defaultRules:

  create: false

kubeControllerManager:

  enabled: false

kubeEtcd:

  enabled: false

kubeScheduler:

  enabled: false

alertmanager:

  enabled: false

EOT







5

# 배포



kubectl create ns monitoring



helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 45.27.2 --set prometheus.prometheusSpec.scrapeInterval='15s' --set prometheus.prometheusSpec.evaluationInterval='15s' -f monitor-values.yaml --namespace monitoring



(1분)



모니터링 네임스페이스에 프로메테우스 디플로이먼트 3개 생김.

모니터링 네임스페이스에 프로메테우스 파드들 생김.





6

# Metrics-server 배포



kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml



// kube-system 네임스페이스에 메트릭 파드 1개 생김












<4> EKS Node Viewer 설치 - 커맨드 라인에서 리소스 보기




1

노드 할당 가능 용량과 요청 request 리소스 표시이다.

실제 파드 리소스 사용량이 보이는건 아니다.



https://github.com/awslabs/eks-node-viewer





2

# go 설치 1.19 이상 버전을 설치해야 한다.



GO_VERSION=1.19.5 

curl -LO https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz

rm -rf /usr/local/go 

tar -C /usr/local -xzf go${GO_VERSION}.linux-amd64.tar.gz 

grep /usr/local/go/bin $HOME/.profile || echo 'export PATH=$PATH:/usr/local/go/bin' >> $HOME/.profile 

export PATH=$PATH:/usr/local/go/bin go version

go version







3

# EKS Node Viewer 설치 : 현재 ec2 spec에서는 설치에 다소 시간이 소요됨 

go install github.com/awslabs/eks-node-viewer/cmd/eks-node-viewer@latest



(2분)





4

# bin 확인 및 사용 

tree ~/go/bin



cd ~/go/bin

./eks-node-viewer










3

명령 샘플

# Display both CPU and Memory Usage

./eks-node-viewer --resources cpu,memory



node의  cpu,mem 확인,  node당 pod수 확인, 노드 사양, 온디맨드인지 스팟인지 확인, 상태를 알려준다.






# Karenter nodes only

./eks-node-viewer --node-selector "karpenter.sh/provisioner-name"



# Display extra labels, i.e. AZ

./eks-node-viewer --extra-labels topology.kubernetes.io/zone



# Specify a particular AWS profile and region

AWS_PROFILE=myprofile AWS_REGION=us-west-2





기본 옵션

# select only Karpenter managed nodes

node-selector=karpenter.sh/provisioner-name



# display both CPU and memory

resources=cpu,memory







<5>   Kubernetes autoscaling overview 

1

HPA - 사용자 요청이 늘경우 , 동일한 스팩의 pod를 여러개로 늘린다.

VPA - 서버의 스팩 올리는것 처럼, pod가 vcpu 1개인데 2개가 필요한 경우  pod의 스팩을 올려 재시작해서 생성 하는것이다.



CAS  - 추가로 pod를 배포해야 하는데 노드의 리소스가 없을때, 신규 워커 노드를 배포하는것이다.
          퍼블릭 클라우드 환경에서 사용 가능하다.

카펜터 - CAS의 발전된 형태가 카펜터이다.





출처 : CON324_Optimizing-Amazon-EKS-for-performance-and-cost-on-AWS.pdf



Metric 서버에 정보를 참고해 Autoscaling 된다.




https://github.com/kubernetes/autoscaler







<6> HPA - 파드를 증가  , 스케일 아웃


대시보드 - 그라파나 17125




1

파드 증가 예제

https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/

https://docs.aws.amazon.com/eks/latest/userguide/horizontal-pod-autoscaler.html

https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details





2

php-apache.yaml ?

cpu 과부하를 일으키는   php



# Run and expose php-apache server



curl -s -O https://raw.githubusercontent.com/kubernetes/website/main/content/en/examples/application/php-apache.yaml



cat php-apache.yaml | yh



kubectl apply -f php-apache.yaml





3

# 확인



연산처리 = cpu 과부하를 일으킨다!!!



kubectl exec -it deploy/php-apache -- cat /var/www/html/index.php

...

<?php

$x = 0.0001;

for ($i = 0; $i <= 1000000; $i++) {

        $x += sqrt($x);

}

echo "OK!";

?>





4

# 모니터링 : 터미널2개 사용

watch -d 'kubectl get hpa,pod;echo;kubectl top pod;echo;kubectl top node'

// 노드까지  모니터링 , 메트릭 파드 배포로 top 명령어 사용가능



kubectl exec -it deploy/php-apache -- top





5

# pod 접속 해보자~~

PODIP=$(kubectl get pod -l run=php-apache -o jsonpath={.items[0].status.podIP})

curl -s $PODIP; echo

----

OK!









6

HPA 생성 및 부하 발생 후 오토 스케일링 테스트 ?

 : 증가 시 기본 대기 시간(30초), 감소 시 기본 대기 시간(5분) → 조정 가능



hpa  정책을 설정한다.

# Create the HorizontalPodAutoscaler :

 requests.cpu=200m (밀리코어) = 알고리즘 = 리퀘스트가 0.2 코어를 보장해준다.  



# Since each pod requests 200 milli-cores by kubectl run, this means an average CPU usage of 100 milli-cores.



리퀘스트가  50% 넘으면 (100m 밀리코어= 0.1코어)이면 증가 시켜라~~

200m(밀리코어)에 50%면 100m(밀리코어)이다.  

10개까지 까지 올린다.



kubectl autoscale deployment php-apache --cpu-percent=50 --min=1 --max=10





7

watch 에서 타켓을 모니터링 하자.



watch -d 'kubectl get hpa,pod;echo;kubectl top pod;echo;kubectl top node'



TARGETS 모니터링 하자.

리플리카도 모니터링 하자.






그라파나에서 보자. 

HPA 모니터링 대시보드는 17125





8

kubectl describe hpa

...

Metrics:                                               ( current / target )

  resource cpu on pods  (as a percentage of request):  0% (1m) / 50%

Min replicas:                                          1

Max replicas:                                          10

Deployment pods:                                       1 current / 1 desired

...





9

# HPA 설정 확인



// krew로 neat 유틸을 설치한다.



kubectl krew install neat



kubectl get hpa php-apache -o yaml



// 조건 확인하자. 

kubectl get hpa php-apache -o yaml | kubectl neat | yh



spec: 

  minReplicas: 1               # [4] 또는 최소 1개까지 줄어들 수도 있습니다

  maxReplicas: 10              # [3] 포드를 최대 5개까지 늘립니다

  scaleTargetRef: 

    apiVersion: apps/v1

    kind: Deployment

    name: php-apache           # [1] php-apache 의 자원 사용량에서

  metrics: 

  - type: Resource

    resource: 

      name: cpu

      target: 

        type: Utilization

        averageUtilization: 50  # [2] CPU 활용률이 50% 이상인 경우







10

# 반복 접속 1 (파드1 IP로 접속) >> 증가 확인 후 중지

while true;do curl -s $PODIP; sleep 0.1; done



OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!O!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!^C





타켓이 50% 넘어가는게 보인다.

pod가 증가한다.








11

# 반복 접속 2 (서비스명 도메인으로 접속) >> 증가 확인(몇개까지 증가되는가? 그 이유는?) 후 중지 >>   

전체 총합이,  증가된 pod들로도 부하가 나눠지므로 50%로 많이 넘어가지 않는다.







12

# Run this in a separate terminal

# so that the load generation continues and you can carry on with the rest of the steps



kubectl run -i --tty load-generator --rm --image=busybox:1.28 --restart=Never -- /bin/sh -c "while sleep 0.001; do wget -q -O- http://php-apache; done"



OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!OKK!K!OK!OK!OK!OK!OK!OK!OK!OK!OK!OK!





13

pod들이 증가한다.

최대 pod가 7개 정도까지 증가한다.








14

REPLCAS 7이다.










15

중지 5분 후 파드 갯수 감소 확인 ?

시간이 지나면 천천히 감소한다.







16

도전과제1



HPA : Autoscaling on multiple metrics and custom metrics

특정  메트릭이 올라갈때 증가 되도록 하자.

https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics





17

멀티 메트릭도 제공한다.



cat <<EOT > hpa22.yaml

apiVersion: autoscaling/v2

kind: HorizontalPodAutoscaler

metadata:

  name: php-apache

spec:

  minReplicas: 1

  maxReplicas: 10

  scaleTargetRef:

    apiVersion: apps/v1

    kind: Deployment

    name: php-apache

  metrics:

  - type: Resource

    resource:

      name: cpu

      target:

        type: Utilization

        averageUtilization: 50

  - type: Pods

    pods:

      metric:

        name: packets-per-second

      target:

        type: AverageValue

        averageValue: 1k

  - type: Object

    object:

      metric:

        name: requests-per-second

      describedObject:

        apiVersion: networking.k8s.io/v1

        kind: Ingress

        name: main-route

      target:

        type: Value

        value: 10k

EOT



kubectl apply -f  hpa22.yaml 







18

watch -d kubectl get hpa

NAME         REFERENCE               TARGETS   MINPODS   MAXPODS   REPLICAS   AGE

php-apache   Deployment/php-apache   25%/50%   1         10        7          12m







19

#삭제

kubectl delete deploy,svc,hpa,pod --all











<7> KEDA - Kubernetes based Event Driven Autoscaler




1

이벤트 기반 



기존의 HPA(Horizontal Pod Autoscaler)는 리소스(CPU, Memory) 메트릭을 기반으로 스케일 여부를 결정하게 됩니다.

반면에 KEDA는 특정 이벤트를 기반으로 스케일 여부를 결정할 수 있습니다.



2

다수의 스케일러를 지원한다.

대상 이벤트

https://keda.sh/docs/2.10/scalers/

https://keda.sh/docs/2.10/concepts/

https://devocean.sk.com/blog/techBoardDetail.do?ID=164800





3

설치

# KEDA 설치



cat <<EOT > keda-values.yaml

metricsServer:

  useHostNetwork: true



prometheus:

  metricServer:

    enabled: true

    port: 9022

    portName: metrics

    path: /metrics

    serviceMonitor:

      # Enables ServiceMonitor creation for the Prometheus Operator

      enabled: true

    podMonitor:

      # Enables PodMonitor creation for the Prometheus Operator

      enabled: true

  operator:

    enabled: true

    port: 8080

    serviceMonitor:

      # Enables ServiceMonitor creation for the Prometheus Operator

      enabled: true

    podMonitor:

      # Enables PodMonitor creation for the Prometheus Operator

      enabled: true



  webhooks:

    enabled: true

    port: 8080

    serviceMonitor:

      # Enables ServiceMonitor creation for the Prometheus webhooks

      enabled: true

EOT



kubectl create namespace keda

helm repo add kedacore https://kedacore.github.io/charts

helm install keda kedacore/keda --version 2.10.2 --namespace keda -f keda-values.yaml







4

# 그라파나 대시보드 추가

https://github.com/kedacore/keda/blob/main/config/grafana/keda-dashboard.json





5

# KEDA 설치 확인

kubectl get-all -n keda

kubectl get all -n keda

kubectl get crd | grep keda





6

# keda 네임스페이스에 디플로이먼트 생성

kubectl apply -f php-apache.yaml -n keda

kubectl get pod -n keda



// keda 메트릭 전용 pod가 있다.  KEDA 전용 메트릭서버를 사용한다!!! 수집하는게 조금 달라서







7



# ScaledObject 정책 생성 : cron

크론 스케일러이다.

정책



cat <<EOT > keda-cron.yaml

apiVersion: keda.sh/v1alpha1

kind: ScaledObject

metadata:

  name: php-apache-cron-scaled

spec:

  minReplicaCount: 0

  maxReplicaCount: 2

  pollingInterval: 30

  cooldownPeriod: 300

  scaleTargetRef:

    apiVersion: apps/v1

    kind: Deployment

    name: php-apache

  triggers:

  - type: cron

    metadata:

      timezone: Asia/Seoul

      start: 00,15,30,45 * * * *

      end: 05,20,35,50 * * * *

      desiredReplicas: "1"

EOT

kubectl apply -f keda-cron.yaml -n keda



// php-apache 파드를 0시에 시작하고 5분에 종료 , 삭제 반복 하는것이다.







8

# 모니터링

watch -d 'kubectl get ScaledObject,hpa,pod -n keda'



시나리오?

시간이 되면  ACTIVE가 TRUE로 되고, cron에 의해 파드가 생성된다.

php-apache

10분있다 pod가 종료된다.

반복










9

# 확인



k ns keda

kubectl get ScaledObject -w





 [root@myeks-bastion-EC2 bin]# kubectl get ScaledObject -w

NAME                     SCALETARGETKIND      SCALETARGETNAME   MIN   MAX   TRIGGERS   AUTHENTICATION   READY   ACTIVE   FALLBACK   AGE

php-apache-cron-scaled   apps/v1.Deployment   php-apache        0     2     cron                        True    False    Unknown    87s





kubectl get ScaledObject,hpa,pod -n keda





kubectl get hpa -o jsonpath={.items[0].spec} -n keda | jq

...

"metrics": [

    {

      "external": {

        "metric": {

          "name": "s0-cron-Asia-Seoul-00,15,30,45xxxx-05,20,35,50xxxx",

          "selector": {

            "matchLabels": {

              "scaledobject.keda.sh/name": "php-apache-cron-scaled"

            }

          }

        },

        "target": {

          "averageValue": "1",

          "type": "AverageValue"

        }

      },

      "type": "External"

    }









10

그라파나 그래프를 확인하자!!!

시간이 지나면,  파드 생성과 파드 삭제를 반복하는 그래프가 나온다.





11

# KEDA 및 deployment 등 삭제



kubectl delete -f keda-cron.yaml -n keda && kubectl delete deploy php-apache -n keda && helm uninstall keda -n keda

kubectl delete namespace keda







12

다양한 사례 참고하세요~



https://jenakim47.tistory.com/90

https://swalloow.github.io/airflow-worker-keda-autoscaler/

https://www.youtube.com/watch?v=FPlCVVrCD64









<8> VPA - Pod  용량  증가 시키기 , 스케일 업




1

VPA 정리 블로그

https://malwareanalysis.tistory.com/603

EKS 스터디 - 5주차 1편 - VPA
VPA란? VPA(Vertical Pod Autoscaler)는 pod resources.request을 최대한 최적값으로 수정합니다. 수정된 request값이 기존 값보다 위 또는 아래 범위에 속하므로 Vertical라고 표현합니다. pod마다 resource.request를 최

malwareanalysis.tistory.com 


2

특징

pod resources.request을 최대한 최적값으로 수정 - 현재 적정 값으로 수정한다.

HPA와 같이 사용 불가능하다.

수정시 파드 재실행되는 단점이 있다.

사용이 힘들다.

https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler









3

# 코드 다운로드



git clone https://github.com/kubernetes/autoscaler.git

cd ~/autoscaler/vertical-pod-autoscaler/

tree hack

스크립트 파일로 설치한다.



4

# 배포 과정에서 에러 발생 : 

방안1 openssl 버전 1.1.1 up



방안2 브랜치08에서 작업

ERROR: Failed to create CA certificate for self-signing. If the error is "unknown option -addext", update your openssl version or deploy VPA from the vpa-release-0.8 branch.



# 프로메테우스 임시 파일 시스템 사용으로 재시작 시 저장 메트릭과 대시보드 정보가 다 삭제되어서 스터디 시간 실습 시나리오는 비추천



helm upgrade kube-prometheus-stack prometheus-community/kube-prometheus-stack --reuse-values --set prometheusOperator.verticalPodAutoscaler.enabled=true -n monitoring





# openssl 버전 확인

openssl version

OpenSSL 1.0.2k-fips  26 Jan 2017



# openssl 1.1.1 이상 버전 확인

yum install openssl11 -y

openssl11 version

OpenSSL 1.1.1g FIPS  21 Apr 2020





# 스크립트파일내에 openssl11 수정

sed -i 's/openssl/openssl11/g' ~/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/gencerts.sh





5

# Deploy the Vertical Pod Autoscaler to your cluster with the following command.



watch -d kubectl get pod -n kube-system



cat hack/vpa-up.sh

./hack/vpa-up.sh





6

kubectl get crd | grep autoscaling



(masterseo0:keda) [root@myeks-bastion-EC2 vertical-pod-autoscaler]# kubectl get crd | grep autoscaling

verticalpodautoscalercheckpoints.autoscaling.k8s.io   2023-05-26T06:05:11Z

verticalpodautoscalers.autoscaling.k8s.io             2023-05-26T06:05:11Z





7

그라파나 대시보드

https://grafana.com/grafana/dashboards/?search=vpa





8

예)  pod가 실행되면 약 2~3분 뒤에 pod resource.reqeust가 VPA에 의해 수정

https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler/examples





9

# 공식 예제 배포

k ns default



cd ~/autoscaler/vertical-pod-autoscaler/

cat examples/hamster.yaml | yh

kubectl apply -f examples/hamster.yaml && kubectl get vpa -w



// -w 옵션으로 여러 이벤트 정보를 볼수 있다.







10

# 파드 리소스 Requestes 확인



kubectl describe pod | grep Requests: -A2



    Requests:

      cpu:        100m

      memory:     50Mi

--

    Requests:

      cpu:        587m

      memory:     262144k

--

    Requests:

      cpu:        587m

      memory:     262144k



//3번째가   CPU와 메모리가 스케일 업이 되었다.






11

# VPA에 의해 기존 파드 '삭제'되고 신규 파드가 생성됨



kubectl get events --sort-by=".metadata.creationTimestamp" | grep VPA



2m16s       Normal    EvictedByVPA             pod/hamster-5bccbb88c6-s6jkp         Pod was evicted by VPA Updater to apply resource recommendation.

76s         Normal    EvictedByVPA             pod/hamster-5bccbb88c6-jc6gq         Pod was evicted by VPA Updater to apply resource recommendation.







13

삭제



kubectl delete -f examples/hamster.yaml && cd ~/autoscaler/vertical-pod-autoscaler/ && ./hack/vpa-down.sh





14

신규 서비스 ?



KRR  ?

Prometheus-based Kubernetes Resource Recommendations

프로 메테우스 기반 리소스 추천.

측정해서 권고를 해주는 서비스이다.



https://github.com/robusta-dev/krr#getting-started

https://www.youtube.com/watch?v=uITOzpf82RY










<9> CA - Cluster Autoscaler. 클러스터 서버 증가


1

파드를 배포할  노드가 없는 경우,  노드를 증가 시켜준다.

신규 노드 중가 하는것이다.

태그가 있어야 증가 한다. (eksctl로 배포하면 자동으로 들어가 있다.)



ec2에  2개 태그 필수

k8s.io/cluster-autoscaler/enable

k8s.io/cluster-autoscaler/myeks





https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md

https://archive.eksworkshop.com/beginner/080_scaling/deploy_ca/

https://artifacthub.io/packages/helm/cluster-autoscaler/cluster-autoscaler







2

# 현재 autoscaling(ASG) 정보 확인

# aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='클러스터이름']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table





aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table



-----------------------------------------------------------------

|                   DescribeAutoScalingGroups                   |

+------------------------------------------------+----+----+----+

|  eks-ng1-44c41109-daa3-134c-df0e-0f28c823cb47  |  3 |  3 |  3 |

+------------------------------------------------+----+----+----+





3

# MaxSize 6개로 수정 해보자.



export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].AutoScalingGroupName" --output text)

aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 3 --desired-capacity 3 --max-size 6





# 확인

aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table

-----------------------------------------------------------------

|                   DescribeAutoScalingGroups                   |

+------------------------------------------------+----+----+----+

|  eks-ng1-c2c41e26-6213-a429-9a58-02374389d5c3  |  3 |  6 |  3 |

+------------------------------------------------+----+----+----+





4

# 배포 : Deploy the Cluster Autoscaler (CA)

curl -s -O https://raw.githubusercontent.com/kubernetes/autoscaler/master/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml



sed -i "s/<YOUR CLUSTER NAME>/$CLUSTER_NAME/g" cluster-autoscaler-autodiscover.yaml

kubectl apply -f cluster-autoscaler-autodiscover.yaml





# 확인

kubectl get pod -n kube-system | grep cluster-autoscaler

cluster-autoscaler-74785c8d45-vkdvr             1/1     Running   0          15s





kubectl describe deployments.apps -n kube-system cluster-autoscaler





5

# (옵션) cluster-autoscaler 파드가 동작하는 워커 노드가 퇴출(evict) 되지 않게 설정

kubectl -n kube-system annotate deployment.apps/cluster-autoscaler cluster-autoscaler.kubernetes.io/safe-to-evict="false"







6

CALE A CLUSTER WITH Cluster Autoscaler(CA)

https://archive.eksworkshop.com/beginner/080_scaling/test_ca/



# 모니터링 

kubectl get nodes -w



while true; do kubectl get node; echo "------------------------------" ; date ; sleep 1; done



while true; do aws ec2 describe-instances --query "Reservations[*].Instances[*].{PrivateIPAdd:PrivateIpAddress,InstanceName:Tags[?Key=='Name']|[0].Value,Status:State.Name}" --filters Name=instance-state-name,Values=running --output text ; echo "------------------------------"; date; sleep 1; done





7

# Deploy a Sample App

# We will deploy an sample nginx application as a ReplicaSet of 1 Pod



cat <<EoF> nginx.yaml

apiVersion: apps/v1

kind: Deployment

metadata:

  name: nginx-to-scaleout

spec:

  replicas: 1

  selector:

    matchLabels:

      app: nginx

  template:

    metadata:

      labels:

        service: nginx

        app: nginx

    spec:

      containers:

      - image: nginx

        name: nginx-to-scaleout

        resources:

          limits:

            cpu: 500m

            memory: 512Mi

          requests:

            cpu: 500m

            memory: 512Mi

EoF



kubectl apply -f nginx.yaml

kubectl get deployment/nginx-to-scaleout





8

# Scale our ReplicaSet

# Let’s scale out the replicaset to 15

kubectl scale --replicas=15 deployment/nginx-to-scaleout && date



// 워커 노드가 감당이 안되어  , 노드가 증가 한다!!!

// cpu 용량이 부족하다. 그래서 노드가 증가 한다.

// 증설에 1분 정도 걸린다.  느리다.





9

# 확인

kubectl get pods -l app=nginx -o wide --watch



kubectl -n kube-system logs -f deployment/cluster-autoscaler





10

# 노드 자동 증가 확인

kubectl get nodes



aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]"  --output table



11

./eks-node-viewer







12

# 디플로이먼트 삭제

kubectl delete -f nginx.yaml && date



# 노드 갯수 축소 : 기본은 10분 후 scale down 됨, 

물론 아래 flag 로 시간 수정 가능 >> 그러니 디플로이먼트 삭제 후 10분 기다리고 나서 보자!





# By default, cluster autoscaler will wait 10 minutes between scale down operations, 

# you can adjust this using the --scale-down-delay-after-add, --scale-down-delay-after-delete, 

# and --scale-down-delay-after-failure flag. 

# E.g. --scale-down-delay-after-add=5m to decrease the scale down delay to 5 minutes after a node has been added.





13

# 터미널1  모니터링

watch -d kubectl get node





14

삭제

위 실습 중 디플로이먼트 삭제 후 10분 후 노드 갯수 축소되는 것을 확인 후 아래 삭제를 해보자! >> 

만약 바로 아래 CA 삭제 시 워커 노드는 4개 상태가 되어서 수동으로 2대 변경 하자!



kubectl delete -f nginx.yaml





15

# size 수정 

aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 3 --desired-capacity 3 --max-size 3



aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table





16

# Cluster Autoscaler 삭제

kubectl delete -f cluster-autoscaler-autodiscover.yaml





17

CA 문제점  ?

하나의 자원에 대해 두군데 (AWS ASG vs AWS EKS)에서 각자의 방식으로 관리하여 문제가 발생함.

 ⇒ 관리 정보가 서로 동기화되지 않아 다양한 문제 발생한다.









<10> CPA - Cluster Proportional Autoscaler




1

node가  늘어나면   파드가 자동 확장 되는것이다.

노드 수 증가에 비례하여 성능 처리가 필요한 애플리케이션(컨테이너/파드)를 수평으로 자동 확장

예) core dns



https://github.com/kubernetes-sigs/cluster-proportional-autoscaler

https://www.eksworkshop.com/docs/autoscaling/workloads/cluster-proportional-autoscaler/



참고 블로그

https://malwareanalysis.tistory.com/604





2

# helm차트로 설치가능하다.

디플로이먼트 부터 생성후  사용가능하다.



helm repo add cluster-proportional-autoscaler https://kubernetes-sigs.github.io/cluster-proportional-autoscaler





# CPA규칙을 설정하고 helm차트를 릴리즈 필요



helm upgrade --install cluster-proportional-autoscaler cluster-proportional-autoscaler/cluster-proportional-autoscaler

Release "cluster-proportional-autoscaler" does not exist. Installing it now.

Error: execution error at (cluster-proportional-autoscaler/templates/deployment.yaml:3:3): options.target must be one of deployment, replicationcontroller, or replicaset





3

# nginx 디플로이먼트 배포



cat <<EOT > cpa-nginx.yaml

apiVersion: apps/v1

kind: Deployment

metadata:

  name: nginx-deployment

spec:

  replicas: 1

  selector:

    matchLabels:

      app: nginx

  template:

    metadata:

      labels:

        app: nginx

    spec:

      containers:

      - name: nginx

        image: nginx:latest

        resources:

          limits:

            cpu: "100m"

            memory: "64Mi"

          requests:

            cpu: "100m"

            memory: "64Mi"

        ports:

        - containerPort: 80

EOT

kubectl apply -f cpa-nginx.yaml





4

# CPA 규칙 설정



노드가 1개면 , 파드가 1개 뜨게 하고

워커노드가 3개이면, 파드가 3개 

워커노드가 4개이면, 파드가 3개 

워커 보드가 5개면 파드도 5개가 되도록  정책을 정함.





cat <<EOF > cpa-values.yaml

config:

  ladder:

    nodesToReplicas:

      - [1, 1]

      - [2, 2]

      - [3, 3]

      - [4, 3]

      - [5, 5]

options:

  namespace: default

  target: "deployment/nginx-deployment"

EOF





5

# 모니터링

watch -d kubectl get pod





6

# helm 업그레이드

helm upgrade --install cluster-proportional-autoscaler -f cpa-values.yaml cluster-proportional-autoscaler/cluster-proportional-autoscaler





7

# 노드 5개로 증가

export ASG_NAME=$(aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].AutoScalingGroupName" --output text)



aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 5 --desired-capacity 5 --max-size 5



aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table



-----------------------------------------------------------------

|                   DescribeAutoScalingGroups                   |

+------------------------------------------------+----+----+----+

|  eks-ng1-5cc42b27-9865-214a-8e75-15dacc7b9c5a  |  5 |  5 |  5 |

+------------------------------------------------+----+----+----+







결과?

nginx  pod도 5개가 만들어진다.








8

# 노드 4개로 축소

aws autoscaling update-auto-scaling-group --auto-scaling-group-name ${ASG_NAME} --min-size 4 --desired-capacity 4 --max-size 4



aws autoscaling describe-auto-scaling-groups --query "AutoScalingGroups[? Tags[? (Key=='eks:cluster-name') && Value=='myeks']].[AutoScalingGroupName, MinSize, MaxSize,DesiredCapacity]" --output table

-----------------------------------------------------------------

|                   DescribeAutoScalingGroups                   |

+------------------------------------------------+----+----+----+

|  eks-ng1-5cc42b27-9865-214a-8e75-15dacc7b9c5a  |  4 |  4 |  4 |

+------------------------------------------------+----+----+----+





노드 1개가 제거 되고 있다.






2분후

노드는 4개, 파드는 3개






9

helm uninstall cluster-proportional-autoscaler && kubectl delete -f cpa-nginx.yam









<11> Karpenter 실습 환경 준비를 위해서 현재 EKS 실습 환경 전부 삭제


1

EKS를 전부 삭제하고 다시 생성한다.





Helm Chart 삭제



helm uninstall -n kube-system kube-ops-view

helm uninstall -n monitoring kube-prometheus-stack





2

eksctl delete cluster --name $CLUSTER_NAME && aws cloudformation delete-stack --stack-name $CLUSTER_NAME





3

삭제



NATGW도 확인하고 삭제 하자!!!



ACM도 삭제 필요



ALB(Ingress)가 잘 삭제가 되지 않을 경우 수동으로 ALB와 TG를 삭제하고, 이후 VPC를 직접 삭제해주자 → 이후 다시 CloudFormation 스택을 삭제하면 됨

콘솔가서 로드 밸런서등  삭제 확인하자.