Skip to content

Commit 934a8b3

Browse files
committed
containerized aks-agent
1 parent ef668ae commit 934a8b3

28 files changed

+1852
-3174
lines changed

src/aks-agent/HISTORY.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ To release a new version, please select a new version number (usually plus 1 to
1212
Pending
1313
+++++++
1414

15+
1.0.0b12
16+
++++++++
17+
18+
1519
1.0.0b11
1620
++++++++
1721
* Fix(agent-init): replace max_tokens with max_completion_tokens for connection check of Azure OpenAI service.

src/aks-agent/azext_aks_agent/__init__.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,8 @@
44
# --------------------------------------------------------------------------------------------
55

66

7-
import os
8-
97
# pylint: disable=unused-import
10-
import azext_aks_agent._help
11-
from azext_aks_agent._consts import (
12-
CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY,
13-
CONST_AGENT_NAME,
14-
CONST_AGENT_NAME_ENV_KEY,
15-
CONST_DISABLE_PROMETHEUS_TOOLSET_ENV_KEY,
16-
CONST_PRIVACY_NOTICE_BANNER,
17-
CONST_PRIVACY_NOTICE_BANNER_ENV_KEY,
18-
)
198
from azure.cli.core import AzCommandsLoader
20-
from azure.cli.core.api import get_config_dir
219

2210

2311
class ContainerServiceCommandsLoader(AzCommandsLoader):
@@ -44,14 +32,3 @@ def load_arguments(self, command):
4432

4533

4634
COMMAND_LOADER_CLS = ContainerServiceCommandsLoader
47-
48-
49-
# NOTE(mainred): holmesgpt leverages the environment variables to customize its behavior.
50-
def customize_holmesgpt():
51-
os.environ[CONST_DISABLE_PROMETHEUS_TOOLSET_ENV_KEY] = "true"
52-
os.environ[CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY] = get_config_dir()
53-
os.environ[CONST_AGENT_NAME_ENV_KEY] = CONST_AGENT_NAME
54-
os.environ[CONST_PRIVACY_NOTICE_BANNER_ENV_KEY] = CONST_PRIVACY_NOTICE_BANNER
55-
56-
57-
customize_holmesgpt()

src/aks-agent/azext_aks_agent/_consts.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,17 @@
3333
# Color constants for terminal output
3434
HELP_COLOR = "cyan" # same as AI_COLOR for now
3535
ERROR_COLOR = "red"
36+
37+
# Kubernetes WebSocket exec protocol constants
38+
RESIZE_CHANNEL = 4 # WebSocket channel for terminal resize messages
39+
# WebSocket heartbeat configuration (matching kubectl client-go)
40+
# Based on kubernetes/client-go/tools/remotecommand/websocket.go#L59-L65
41+
# pingPeriod = 5 * time.Second
42+
# pingReadDeadline = (pingPeriod * 12) + (1 * time.Second)
43+
# The read deadline is calculated to allow up to 12 missed pings plus 1 second buffer
44+
# This provides tolerance for network delays while detecting actual connection failures
45+
HEARTBEAT_INTERVAL = 5.0 # pingPeriod: 5 seconds between pings
46+
HEARTBEAT_TIMEOUT = (HEARTBEAT_INTERVAL * 12) + 1 # pingReadDeadline: 61 seconds total timeout
47+
48+
AGENT_NAMESPACE = "kube-system"
49+
AGENT_LABEL_SELECTOR = "app.kubernetes.io/name=aks-agent"

src/aks-agent/azext_aks_agent/_help.py

Lines changed: 5 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# --------------------------------------------------------------------------------------------
66

77
# pylint: disable=too-many-lines
8-
98
from knack.help_files import helps
109

1110
helps[
@@ -33,12 +32,6 @@
3332
Each provider may require different environment variables and model naming conventions.
3433
For a full list of supported providers, model patterns, and required environment variables, see https://docs.litellm.ai/docs/providers.
3534
Note: For Azure OpenAI, it is recommended to set the deployment name as the model name until https://github.com/BerriAI/litellm/issues/13950 is resolved.
36-
- name: --api-key
37-
type: string
38-
short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY). (Deprecated)
39-
- name: --config-file
40-
type: string
41-
short-summary: Path to configuration file.
4235
- name: --max-steps
4336
type: int
4437
short-summary: Maximum number of steps the LLM can take to investigate the issue.
@@ -57,11 +50,11 @@
5750
- name: --status
5851
type: bool
5952
short-summary: Show AKS agent configuration and status information.
60-
- name: --aks-mcp
61-
type: bool
62-
short-summary: Enable AKS MCP integration for enhanced capabilities. Traditional mode is the default.
6353
6454
examples:
55+
- name: Ask about pod issues in the cluster with OpenAI
56+
text: |-
57+
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
6558
- name: Ask about pod issues in the cluster with last configured model
6659
text: |-
6760
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup
@@ -71,78 +64,26 @@
7164
- name: Ask about pod issues in the cluster with OpenAI
7265
text: |-
7366
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
74-
- name: Run agent with config file
75-
text: |
76-
az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --name MyManagedCluster --resource-group MyResourceGroup
77-
Here is an example of config file:
78-
```json
79-
llms:
80-
- provider: azure
81-
MODEL_NAME: gpt-4.1
82-
AZURE_API_KEY: *******
83-
AZURE_API_BASE: https://{azure-openai-service-name}.openai.azure.com/
84-
AZURE_API_VERSION: 2025-04-01-preview
85-
# define a list of mcp servers, mcp server can be defined
86-
mcp_servers:
87-
aks_mcp:
88-
description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
89-
url: "http://localhost:8003/sse"
90-
91-
# try adding your own tools or toggle the built-in toolsets here
92-
# e.g. query company-specific data, fetch logs from your existing observability tools, etc
93-
# To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
94-
# To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
95-
toolsets:
96-
# add a new json processor toolset
97-
json_processor:
98-
description: "A toolset for processing JSON data using jq"
99-
prerequisites:
100-
- command: "jq --version" # Ensure jq is installed
101-
tools:
102-
- name: "process_json"
103-
description: "A tool that uses jq to process JSON input"
104-
command: "echo '{{ json_input }}' | jq '.'" # Example jq command to format JSON
105-
# disable a built-in toolsets
106-
aks/core:
107-
enabled: false
108-
```
10967
- name: Run in interactive mode without a question
110-
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1 --api-key "sk-xxx"
68+
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1
11169
- name: Run in non-interactive batch mode
11270
text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/gpt-4.1
11371
- name: Show detailed tool output during analysis
11472
text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/gpt-4.1
115-
- name: Use custom configuration file
116-
text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/gpt-4.1
11773
- name: Run agent with no echo of the original question
11874
text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/gpt-4.1
11975
- name: Refresh toolsets to get the latest available tools
12076
text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/gpt-4.1
12177
- name: Show agent status (MCP readiness)
12278
text: az aks agent --status
12379
- name: Run in interactive mode without a question
124-
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
80+
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
12581
- name: Run in non-interactive batch mode
12682
text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
12783
- name: Show detailed tool output during analysis
12884
text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
129-
- name: Use custom configuration file
130-
text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
13185
- name: Run agent with no echo of the original question
13286
text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
13387
- name: Refresh toolsets to get the latest available tools
13488
text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
13589
"""
136-
137-
helps[
138-
"aks agent-init"
139-
] = """
140-
type: command
141-
short-summary: Initialize and validate LLM provider/model configuration for AKS agent.
142-
long-summary: |-
143-
This command interactively guides you to select an LLM provider and model, validates the connection, and saves the configuration for later use.
144-
You can run this command multiple times to add or update different model configurations.
145-
examples:
146-
- name: Initialize configuration for Azure OpenAI, OpenAI or other llms
147-
text: az aks agent-init
148-
"""

src/aks-agent/azext_aks_agent/_params.py

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,6 @@
44
# --------------------------------------------------------------------------------------------
55

66
# pylint: disable=too-many-statements,too-many-lines
7-
import os.path
8-
9-
from azext_aks_agent._consts import CONST_AGENT_CONFIG_FILE_NAME
10-
from azext_aks_agent._validators import validate_agent_config_file
11-
from azure.cli.core.api import get_config_dir
12-
from azure.cli.core.commands.parameters import get_three_state_flag
13-
14-
157
def load_arguments(self, _):
168
with self.argument_context("aks agent") as c:
179
c.positional(
@@ -40,24 +32,17 @@ def load_arguments(self, _):
4032
help="Maximum number of steps the LLM can take to investigate the issue.",
4133
)
4234
c.argument(
43-
"config_file",
44-
default=os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME),
45-
validator=validate_agent_config_file,
35+
"init",
4636
required=False,
47-
help="Path to the config file.",
37+
help="Initialize llm configurations and aks-agent environment on the AKS cluster.",
38+
action="store_true",
4839
)
4940
c.argument(
5041
"model",
5142
help=" Specify the LLM provider and model or deployment to use for the AI assistant.",
5243
required=False,
5344
type=str,
5445
)
55-
c.argument(
56-
"api_key",
57-
help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
58-
required=False,
59-
type=str,
60-
)
6146
c.argument(
6247
"no_interactive",
6348
help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
@@ -85,13 +70,8 @@ def load_arguments(self, _):
8570
help="Show AKS agent configuration and status information.",
8671
)
8772
c.argument(
88-
"use_aks_mcp",
89-
options_list=["--aks-mcp"],
90-
default=False,
91-
arg_type=get_three_state_flag(),
92-
help=(
93-
"Enable AKS MCP integration for enhanced capabilities. "
94-
"Traditional mode is the default. Use --aks-mcp to enable MCP mode, or "
95-
"--no-aks-mcp to explicitly disable it."
96-
),
73+
"cleanup",
74+
options_list=["--cleanup"],
75+
action="store_true",
76+
help="Remove aks-agent resources on the AKS cluster.",
9777
)

src/aks-agent/azext_aks_agent/_validators.py

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)