Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ devops-agent/
└── docs/ # Documentation
```

## Common issues and fixes
- if you see any error like `INFO Error checking if content_hash ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73 exists: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Bad request: Index required but not found for \\"content_hash\\" of one of the following types: [keyword]. Help: Create an index for this key or use a
different filter."},"time":2 ...' `
```text
curl --request PUT \
--url https://9df18135-290c-45b3-8158-f73b103dc352.eu-west-2-0.aws.cloud.qdrant.io:6333/collections/devops-memory/index \
--header 'Authorization: Bearer YOUR_API_KEY' \
--header 'Content-Type: application/json' \
--data '{
"field_name": "content_hash",
"field_schema": {
"type": "keyword",
"on_disk": true
}
}'
```

## Contributing

Contributions are welcome! Please feel free to submit a Pull Request.
Expand Down
18 changes: 13 additions & 5 deletions devops_agent/core/devops_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import os
from textwrap import dedent

from agno.agent import Agent
from agno.knowledge import Knowledge
Expand Down Expand Up @@ -48,13 +49,20 @@ def execute_devops_agent(provider: str, user_query: str = None) -> Agent:
devops_assist = Agent(
name="DevOps Agent",
model=model,
description="You help answer questions about the devops domain.",
description="You help answer questions about the devops domain like kubernetes troubleshooting, docker troubleshooting etc.",
instructions=devops_prompt,
# knowledge=knowledge,
additional_input=dedent("""\
Instruction: You should always answer scenarios like below (few examples as below).
- Debug high memory usage in Kubernetes pods causing frequent OOMKills and restarts
- Analyze distributed tracing data to identify performance bottleneck in microservices architecture
- Troubleshoot intermittent 504 gateway timeout errors in production load balancer
- Investigate CI/CD pipeline failures and implement automated debugging workflows
- Root cause analysis for database deadlocks causing application timeouts
- Debug DNS resolution issues affecting service discovery in Kubernetes cluster
- Analyze logs to identify security breach and implement containment procedures
- Troubleshoot GitOps deployment failures and implement automated rollback procedures
"""),
stream_intermediate_steps=True,
# add_knowledge_to_context=True,
# add_datetime_to_context=True,
# add_session_summary_to_context=True,
markdown=True,
)

Expand Down
18 changes: 13 additions & 5 deletions devops_agent/core/kubernetes_agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import os
from textwrap import dedent

from agno.agent import Agent
from agno.knowledge import Knowledge
Expand Down Expand Up @@ -51,13 +52,20 @@ def execute_k8s_agent(provider: str, user_query: str = None) -> Agent:
k8s_assist = Agent(
name="Kubernetes Agent",
model=model,
description="You help answer questions about the kubernetes domain of any infrastructure like Azure(AKS), AWS(EKS), and GCP(GKS)",
description="You help answer questions about the application with kubernetes design and implementation domain of any infrastructure like Azure(AKS), AWS(EKS), and GCP(GKS)",
instructions=k8s_prompt,
# knowledge=knowledge,
additional_input=dedent("""\
Instruction: You should always answer scenarios like below (few examples as below).
- Design a multi-cluster Kubernetes platform with GitOps for a financial services company
- Implement progressive delivery with Argo Rollouts and service mesh traffic splitting
- Create a secure multi-tenant Kubernetes platform with namespace isolation and RBAC
- Design disaster recovery for stateful applications across multiple Kubernetes clusters
- Optimize Kubernetes costs while maintaining performance and availability SLAs
- Implement observability stack with Prometheus, Grafana, and OpenTelemetry for microservices
- Create CI/CD pipeline with GitOps for container applications with security scanning
- Design Kubernetes operator for custom application lifecycle management
"""),
stream_intermediate_steps=True,
# add_knowledge_to_context=True,
# add_datetime_to_context=True,
# add_session_summary_to_context=True,
markdown=True,
)

Expand Down
2 changes: 1 addition & 1 deletion devops_agent/core/terraform_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

console = Console()

def execute_terraform_agent(provider: str, user_query: str = None) -> Agent:
def execute_terraform_agent(provider: str) -> Agent:

console.print(Panel.fit(
"[bold cyan]Terraform Agent Invoking...[/bold cyan]",
Expand Down
2 changes: 1 addition & 1 deletion devops_agent/prompts/devops.poml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<poml>
<task>You are a DevOps troubleshooter specializing in rapid incident response, advanced debugging, and modern observability practices. Your purpose is to provide expert troubleshooting with comprehensive knowledge of modern observability tools, debugging methodologies, and incident response practices.</task>
<task>You are a DevOps troubleshooter specializing in rapid incident response, advanced debugging, and modern observability practices. Your purpose is to provide expert troubleshooting with comprehensive knowledge of modern observability tools, debugging methodologies, and incident response practices. Think through the user ask, plan properly and then only answer the user.</task>

<stepwise-instructions>
<list listStyle="decimal">
Expand Down
2 changes: 1 addition & 1 deletion devops_agent/prompts/kubernetes.poml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<poml>
<task>You are a Kubernetes architect specializing in cloud-native infrastructure, modern GitOps workflows, and enterprise container orchestration at scale. Your purpose is to provide expert Kubernetes architecture with comprehensive knowledge of container orchestration, cloud-native technologies, and modern GitOps practices across all major providers.</task>
<task>You are a Kubernetes architect specializing in cloud-native infrastructure, modern GitOps workflows, and enterprise container orchestration at scale. Your purpose is to provide expert Kubernetes architecture with comprehensive knowledge of container orchestration, cloud-native technologies, and modern GitOps practices across all major providers. Think through the user ask, plan properly and then only answer the user.</task>

<stepwise-instructions>
<list listStyle="decimal">
Expand Down
2 changes: 1 addition & 1 deletion devops_agent/prompts/terraform.poml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<poml>
<task>You are a Terraform/OpenTofu specialist focused on advanced infrastructure automation, state management, and modern IaC practices. Your purpose is to provide expert Infrastructure as Code guidance with comprehensive knowledge of Terraform, OpenTofu, and modern IaC ecosystems for enterprise-scale infrastructure automation.</task>
<task>You are a Terraform/OpenTofu specialist focused on advanced infrastructure automation, state management, and modern IaC practices. Your purpose is to provide expert Infrastructure as Code guidance with comprehensive knowledge of Terraform, OpenTofu, and modern IaC ecosystems for enterprise-scale infrastructure automation. Think through the user ask, plan properly and then only answer the user.</task>

<stepwise-instructions>
<list listStyle="decimal">
Expand Down
78 changes: 74 additions & 4 deletions devops_agent/utils/stream_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def __init__(
# Content trackers
self.response_content = ""
self.reasoning_content = ""
self.reasoning_steps = []
self.processed_reasoning_steps = set() # Track processed reasoning steps
self.input_content = ""

# Tool call trackers
Expand Down Expand Up @@ -62,6 +64,48 @@ def _create_panel(
padding=(1, 2),
)

def _add_reasoning_step(self, step: Any):
"""Add a reasoning step, avoiding duplicates"""
if step is None:
return

# Create a unique identifier for the reasoning step
# Use multiple attributes to create a more robust ID
step_id = None

# Try to create ID from step attributes
if hasattr(step, 'title') and hasattr(step, 'reasoning'):
title = getattr(step, 'title', '')
reasoning = getattr(step, 'reasoning', '')
# Use a hash of title + reasoning content (first 100 chars to avoid huge IDs)
step_id = hash(f"{title}:{reasoning[:100] if reasoning else ''}")
else:
# Fallback to hash of string representation
step_id = hash(str(step)[:200])

# Only add if we haven't seen this step before
if step_id not in self.processed_reasoning_steps:
self.processed_reasoning_steps.add(step_id)
self.reasoning_steps.append(step)

def _format_reasoning_step(self, step: Any) -> str:
"""Format a reasoning step for display"""
if isinstance(step, str):
return step

# Try to extract content from ReasoningStep object
content = getattr(step, 'content', None)
if content:
return str(content)

# Try to extract text or message
text = getattr(step, 'text', None) or getattr(step, 'message', None)
if text:
return str(text)

# Fallback to string representation
return str(step)

def _format_tool_call(self, tool: Any) -> str:
"""Format a tool call for display"""
if tool is None:
Expand Down Expand Up @@ -110,7 +154,18 @@ def _build_panels(self) -> List[Panel]:
)
panels.append(message_panel)

# Reasoning panel
# Reasoning steps panels
if self.reasoning_steps and self.show_reasoning:
for i, step in enumerate(self.reasoning_steps, 1):
reasoning_text = self._format_reasoning_step(step)
reasoning_panel = self._create_panel(
Text(reasoning_text),
f"Reasoning Step {i}",
border_style="green"
)
panels.append(reasoning_panel)

# Reasoning content panel (for string-based reasoning)
if self.reasoning_content and self.show_reasoning:
thinking_panel = self._create_panel(
Text(self.reasoning_content),
Expand Down Expand Up @@ -240,16 +295,31 @@ def handle_stream(
self.response_content += content

elif event_type == "TeamReasoningStep":
# Reasoning content
# Reasoning content - could be string or ReasoningStep object
reasoning = getattr(event, 'content', '')
if reasoning:
self.reasoning_content += reasoning
if isinstance(reasoning, str):
self.reasoning_content += reasoning
else:
# It's a ReasoningStep object - deduplicate
self._add_reasoning_step(reasoning)

elif event_type == "reasoning_content":
# Alternative reasoning event
reasoning = getattr(event, 'reasoning_content', '')
if reasoning:
self.reasoning_content += reasoning
if isinstance(reasoning, str):
self.reasoning_content += reasoning
else:
self._add_reasoning_step(reasoning)

# Handle reasoning_steps attribute - deduplicate each step
if hasattr(event, 'reasoning_steps') and event.reasoning_steps:
if isinstance(event.reasoning_steps, list):
for step in event.reasoning_steps:
self._add_reasoning_step(step)
else:
self._add_reasoning_step(event.reasoning_steps)

elif event_type == "TeamToolCallStarted":
# Team tool call started
Expand Down