-
Notifications
You must be signed in to change notification settings - Fork 252
Support configurable log group and EMF with OTLP #1993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: So the EMF exporter is unnamed. Do we want to name it ( |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,12 +11,19 @@ exporters: | |
| external_id: "" | ||
| imds_retries: 1 | ||
| local_mode: false | ||
| log_group_name: /aws/cwagent | ||
| log_group_name: /aws/application/otlp | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Could we have a test that shows it fallsback to defaults if not configured or does that already exist in a different sample config? |
||
| log_retention: 0 | ||
| log_stream_name: "" | ||
| max_retries: 2 | ||
| metric_descriptors: | ||
| - metric_name: request_count | ||
| overwrite: false | ||
| unit: Count | ||
| - metric_name: request_duration | ||
| overwrite: false | ||
| unit: Milliseconds | ||
| middleware: agenthealth/logs | ||
| namespace: CWAgent | ||
| namespace: MyApplication/OTLP | ||
| no_verify_ssl: false | ||
| num_workers: 8 | ||
| output_destination: cloudwatch | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| [agent] | ||
| collection_jitter = "0s" | ||
| debug = false | ||
| flush_interval = "1s" | ||
| flush_jitter = "0s" | ||
| hostname = "" | ||
| interval = "60s" | ||
| logfile = "/opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log" | ||
| logtarget = "lumberjack" | ||
| metric_batch_size = 1000 | ||
| metric_buffer_limit = 10000 | ||
| omit_hostname = false | ||
| precision = "" | ||
| quiet = false | ||
| round_interval = false | ||
|
|
||
| [[inputs.prometheus]] | ||
| prometheus_config_path = "/etc/prometheus/prometheus.yaml" | ||
|
|
||
| [[outputs.cloudwatchlogs]] | ||
| force_flush_interval = "30s" | ||
| log_stream_name = "i-UNKNOWN" | ||
| region = "us-west-2" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| { | ||
| "logs": { | ||
| "force_flush_interval": 30, | ||
| "metrics_collected": { | ||
| "prometheus": { | ||
| "log_group_name": "/aws/prometheus/metrics", | ||
| "prometheus_config_path": "/etc/prometheus/prometheus.yaml", | ||
| "emf_processor": { | ||
| "metric_namespace": "PrometheusApp", | ||
| "metric_unit": { | ||
| "http_requests_total": "Count", | ||
| "http_request_duration_seconds": "Seconds" | ||
| }, | ||
| "metric_declaration": [ | ||
| { | ||
| "source_labels": ["job"], | ||
| "label_matcher": "^kubernetes-pod-jmx$", | ||
| "dimensions": [ | ||
| ["ClusterName", "Namespace"] | ||
| ], | ||
| "metric_selectors": [ | ||
| "^jvm_threads_current$", | ||
| "^jvm_memory_bytes_used$" | ||
| ] | ||
| } | ||
| ] | ||
| } | ||
| }, | ||
| "otlp": { | ||
| "grpc_endpoint": "0.0.0.0:4317", | ||
| "http_endpoint": "0.0.0.0:4318", | ||
| "log_group_name": "/aws/otlp/metrics", | ||
| "emf_processor": { | ||
| "metric_namespace": "OTLPApp", | ||
| "metric_unit": { | ||
| "request_duration": "Milliseconds", | ||
| "request_count": "Count", | ||
| "error_rate": "Percent" | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,195 @@ | ||
| exporters: | ||
| awsemf: | ||
| add_entity: true | ||
| certificate_file_path: "" | ||
| detailed_metrics: false | ||
| dimension_rollup_option: NoDimensionRollup | ||
| disable_metric_extraction: false | ||
| eks_fargate_container_insights_enabled: false | ||
| endpoint: "" | ||
| enhanced_container_insights: false | ||
| external_id: "" | ||
| imds_retries: 1 | ||
| local_mode: false | ||
| log_group_name: /aws/otlp/metrics | ||
| log_retention: 0 | ||
| log_stream_name: "" | ||
| max_retries: 2 | ||
| metric_descriptors: | ||
| - metric_name: request_count | ||
| overwrite: false | ||
| unit: Count | ||
| - metric_name: error_rate | ||
| overwrite: false | ||
| unit: Percent | ||
| - metric_name: request_duration | ||
| overwrite: false | ||
| unit: Milliseconds | ||
| middleware: agenthealth/logs | ||
| namespace: OTLPApp | ||
| no_verify_ssl: false | ||
| num_workers: 8 | ||
| output_destination: cloudwatch | ||
| profile: "" | ||
| proxy_address: "" | ||
| region: us-west-2 | ||
| request_timeout_seconds: 30 | ||
| resource_arn: "" | ||
| resource_to_telemetry_conversion: | ||
| enabled: true | ||
| retain_initial_value_of_delta_metric: false | ||
| role_arn: "" | ||
| version: "0" | ||
| awsemf/prometheus: | ||
| add_entity: false | ||
| certificate_file_path: "" | ||
| detailed_metrics: false | ||
| dimension_rollup_option: NoDimensionRollup | ||
| disable_metric_extraction: false | ||
| eks_fargate_container_insights_enabled: false | ||
| endpoint: "" | ||
| enhanced_container_insights: false | ||
| external_id: "" | ||
| imds_retries: 1 | ||
| local_mode: false | ||
| log_group_name: /aws/prometheus/metrics | ||
| log_retention: 0 | ||
| log_stream_name: '{JobName}' | ||
| max_retries: 2 | ||
| metric_declarations: | ||
| - dimensions: | ||
| - - ClusterName | ||
| - Namespace | ||
| label_matchers: | ||
| - label_names: | ||
| - job | ||
| regex: ^kubernetes-pod-jmx$ | ||
| separator: ; | ||
| metric_name_selectors: | ||
| - ^jvm_threads_current$ | ||
| - ^jvm_memory_bytes_used$ | ||
| metric_descriptors: | ||
| - metric_name: http_request_duration_seconds | ||
| overwrite: false | ||
| unit: Seconds | ||
| - metric_name: http_requests_total | ||
| overwrite: false | ||
| unit: Count | ||
| middleware: agenthealth/logs | ||
| namespace: PrometheusApp | ||
| no_verify_ssl: false | ||
| num_workers: 8 | ||
| output_destination: cloudwatch | ||
| profile: "" | ||
| proxy_address: "" | ||
| region: us-west-2 | ||
| request_timeout_seconds: 30 | ||
| resource_arn: "" | ||
| resource_to_telemetry_conversion: | ||
| enabled: true | ||
| retain_initial_value_of_delta_metric: false | ||
| role_arn: "" | ||
| version: "0" | ||
| extensions: | ||
| agenthealth/logs: | ||
| is_usage_data_enabled: true | ||
| stats: | ||
| operations: | ||
| - PutLogEvents | ||
| usage_flags: | ||
| mode: EC2 | ||
| region_type: ACJ | ||
| agenthealth/statuscode: | ||
| is_status_code_enabled: true | ||
| is_usage_data_enabled: true | ||
| stats: | ||
| usage_flags: | ||
| mode: EC2 | ||
| region_type: ACJ | ||
| entitystore: | ||
| mode: ec2 | ||
| region: us-west-2 | ||
| processors: | ||
| awsentity/service/otlp: | ||
| entity_type: Service | ||
| platform: ec2 | ||
| batch/hostOtlpMetrics/cloudwatchlogs: | ||
| metadata_cardinality_limit: 1000 | ||
| send_batch_max_size: 0 | ||
| send_batch_size: 8192 | ||
| timeout: 30s | ||
| batch/prometheus/cloudwatchlogs: | ||
| metadata_cardinality_limit: 1000 | ||
| send_batch_max_size: 0 | ||
| send_batch_size: 8192 | ||
| timeout: 30s | ||
| cumulativetodelta/hostOtlpMetrics/cloudwatchlogs: | ||
| exclude: | ||
| match_type: "" | ||
| include: | ||
| match_type: "" | ||
| initial_value: 2 | ||
| max_staleness: 0s | ||
| receivers: | ||
| otlp/grpc_0_0_0_0_4317: | ||
| protocols: | ||
| grpc: | ||
| endpoint: 0.0.0.0:4317 | ||
| keepalive: | ||
| enforcement_policy: {} | ||
| server_parameters: {} | ||
| read_buffer_size: 524288 | ||
| transport: tcp | ||
| otlp/http_0_0_0_0_4318: | ||
| protocols: | ||
| http: | ||
| cors: {} | ||
| endpoint: 0.0.0.0:4318 | ||
| idle_timeout: 0s | ||
| logs_url_path: /v1/logs | ||
| metrics_url_path: /v1/metrics | ||
| read_header_timeout: 0s | ||
| traces_url_path: /v1/traces | ||
| write_timeout: 0s | ||
| telegraf_prometheus: | ||
| collection_interval: 1m0s | ||
| initial_delay: 1s | ||
| timeout: 0s | ||
| service: | ||
| extensions: | ||
| - agenthealth/logs | ||
| - agenthealth/statuscode | ||
| - entitystore | ||
| pipelines: | ||
| metrics/hostOtlpMetrics/cloudwatchlogs: | ||
| exporters: | ||
| - awsemf | ||
| processors: | ||
| - cumulativetodelta/hostOtlpMetrics/cloudwatchlogs | ||
| - awsentity/service/otlp | ||
| - batch/hostOtlpMetrics/cloudwatchlogs | ||
| receivers: | ||
| - otlp/grpc_0_0_0_0_4317 | ||
| - otlp/http_0_0_0_0_4318 | ||
| metrics/prometheus/cloudwatchlogs: | ||
| exporters: | ||
| - awsemf/prometheus | ||
| processors: | ||
| - batch/prometheus/cloudwatchlogs | ||
| receivers: | ||
| - telegraf_prometheus | ||
| telemetry: | ||
| logs: | ||
| encoding: console | ||
| level: info | ||
| output_paths: | ||
| - /opt/aws/amazon-cloudwatch-agent/logs/amazon-cloudwatch-agent.log | ||
| sampling: | ||
| enabled: true | ||
| initial: 2 | ||
| thereafter: 500 | ||
| tick: 10s | ||
| metrics: | ||
| level: None | ||
| traces: | ||
| level: None |
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why was this added? Why do we need this now when it wasn't required before? |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| // SPDX-License-Identifier: MIT | ||
|
|
||
| package otlp | ||
|
|
||
| import ( | ||
| "github.com/aws/amazon-cloudwatch-agent/translator" | ||
| parent "github.com/aws/amazon-cloudwatch-agent/translator/translate/logs/metrics_collected" | ||
| ) | ||
|
|
||
| type Rule translator.Rule | ||
|
|
||
| type Otlp struct { | ||
| } | ||
|
|
||
| const SectionKey = "otlp" | ||
|
|
||
| func GetCurPath() string { | ||
| curPath := parent.GetCurPath() + SectionKey + "/" | ||
| return curPath | ||
| } | ||
|
|
||
| func (o *Otlp) ApplyRule(input interface{}) (string, interface{}) { | ||
| im := input.(map[string]interface{}) | ||
| result := map[string]map[string]interface{}{} | ||
| inputs := map[string]interface{}{} | ||
| processors := map[string]interface{}{} | ||
|
|
||
| // Check if this plugin exists in the input instance | ||
| if _, ok := im[SectionKey]; !ok { | ||
| return "", "" | ||
| } | ||
| // OTLP configuration is handled by the OTEL pipeline translator | ||
| // This rule just validates the configuration exists | ||
| result["inputs"] = inputs | ||
| result["processors"] = processors | ||
| return SectionKey, result | ||
| } | ||
|
|
||
| func init() { | ||
| o := new(Otlp) | ||
| parent.RegisterLinuxRule(SectionKey, o) | ||
| parent.RegisterDarwinRule(SectionKey, o) | ||
| parent.RegisterWindowsRule(SectionKey, o) | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should probably place a restriction on length like for the other fields
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I can make it to use already existing
logGroupNameDefinitionbut the current schema is already inconsistent where prometheus containslog_group_namewithout any restrictions. Maybe it's better to uselogGroupNameDefinitioneverywhere but this might affect agent configs already using longer names with prometheus.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't have to be in this PR, but based on documentation https://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_CreateLogGroup.html