Skip to content

Commit 141beb9

Browse files
author
wlanboy
committed
added envoy stats
1 parent 795dc8a commit 141beb9

3 files changed

Lines changed: 132 additions & 18 deletions

File tree

src/main/java/com/wlanboy/javahttpclient/client/K8sDiagnosticService.java

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,14 @@ public Map<String, Object> getFullSidecarDetails() {
110110
// Sektion B: Gesundheit & Fehler
111111
Map<String, Object> health = new HashMap<>();
112112
String rawStats = restTemplate.getForObject(
113-
envoyAdminUrl + "/stats?filter=.*(errors|5xx|timeout|retry|fail|reset|refused|overflow|pending|cx_none).*",
113+
envoyAdminUrl + "/stats",
114114
String.class);
115-
Map<String, String> activeErrors = parseStats(rawStats, true);
115+
Map<String, String> activeErrors = parseStats(rawStats, false);
116+
// xds-grpc ist interne Istio Control-Plane – kein App-Traffic, rausfiltern
117+
activeErrors.entrySet().removeIf(e -> e.getKey().startsWith("cluster.xds-grpc"));
116118
health.put("activeErrorMetrics", activeErrors);
117119
health.put("errorCount", activeErrors.size());
120+
health.put("diagnoses", diagnoseMetrics(activeErrors));
118121
report.put("healthDiagnostics", health);
119122

120123
report.put("timestamp", new Date());
@@ -185,6 +188,57 @@ public List<Object> getIstioResources(String namespace, String type) {
185188
return Collections.emptyList();
186189
}
187190

191+
private List<Map<String, Object>> diagnoseMetrics(Map<String, String> metrics) {
192+
record Rule(String pattern, String severity, String title, String description, String recommendation) {}
193+
194+
List<Rule> rules = List.of(
195+
new Rule("upstream_rq_pending_overflow", "KRITISCH", "Circuit Breaker / Pool-Overflow",
196+
"Der Connection Pool ist voll oder ein Circuit Breaker ist offen.",
197+
"DestinationRule.trafficPolicy.connectionPool und outlierDetection prüfen."),
198+
new Rule("upstream_cx_none_healthy", "KRITISCH", "Keine gesunden Endpoints",
199+
"Alle Upstream-Endpoints sind nicht erreichbar.",
200+
"Pod-Status und Readiness-Probes prüfen. Envoy-Cluster-Health in Tab A ansehen."),
201+
new Rule("upstream_cx_connect_fail", "KRITISCH", "Verbindung abgelehnt (Connection refused)",
202+
"Envoy kann keine TCP-Verbindung zum Upstream aufbauen.",
203+
"Prüfen: Pod läuft? Richtiger Port? NetworkPolicy blockiert? Service-Selector korrekt?"),
204+
new Rule("upstream_rq_timeout", "WARNUNG", "Request Timeouts",
205+
"Requests zum Upstream überschreiten das Timeout.",
206+
"NetworkPolicy auf blockierte Ports prüfen. DestinationRule Timeout-Werte anpassen."),
207+
new Rule("upstream_cx_connect_timeout", "WARNUNG", "Connection Timeout",
208+
"Verbindungsaufbau zum Upstream schlägt fehl.",
209+
"Pod läuft möglicherweise nicht oder Port ist falsch. NetworkPolicy prüfen."),
210+
new Rule("upstream_rq_5xx", "WARNUNG", "5xx Fehler vom Upstream",
211+
"Der Upstream-Service gibt 5xx-Statuscodes zurück.",
212+
"Upstream-Logs prüfen. VirtualService-Routing und DestinationRule-Gewichtungen validieren."),
213+
new Rule("upstream_rq_retry_limit_exceeded", "INFO", "Retry-Limit überschritten",
214+
"Requests wurden mehrfach wiederholt und das Limit wurde erreicht.",
215+
"VirtualService retryPolicy anpassen oder Upstream-Stabilität verbessern."),
216+
new Rule("upstream_cx_destroy_remote_with_active_rq", "INFO", "Verbindung mit aktiven Requests abgebrochen",
217+
"Die Remote-Seite hat die Verbindung während aktiver Requests getrennt.",
218+
"Upstream Keep-Alive Konfiguration und Istio idle_timeout prüfen.")
219+
);
220+
221+
List<Map<String, Object>> diagnoses = new ArrayList<>();
222+
for (Rule rule : rules) {
223+
List<String> matchingKeys = metrics.entrySet().stream()
224+
.filter(e -> e.getKey().contains(rule.pattern()))
225+
.map(Map.Entry::getKey)
226+
.sorted()
227+
.collect(java.util.stream.Collectors.toList());
228+
229+
if (!matchingKeys.isEmpty()) {
230+
Map<String, Object> diagnosis = new LinkedHashMap<>();
231+
diagnosis.put("severity", rule.severity());
232+
diagnosis.put("title", rule.title());
233+
diagnosis.put("description", rule.description());
234+
diagnosis.put("recommendation", rule.recommendation());
235+
diagnosis.put("affectedMetrics", matchingKeys);
236+
diagnoses.add(diagnosis);
237+
}
238+
}
239+
return diagnoses;
240+
}
241+
188242
private String summarizeClusters(String rawClusters) {
189243
if (rawClusters == null || rawClusters.isBlank())
190244
return "Keine Upstream-Daten";

src/main/resources/public/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ <h6 class="mb-0 text-primary fw-bold"><i class="bi bi-diagram-3-fill me-2"></i>I
124124
<li class="nav-item">
125125
<button class="nav-link small text-danger" id="errors-tab" data-bs-toggle="tab"
126126
data-bs-target="#tab-errors" type="button">
127-
<i class="bi bi-exclamation-octagon me-1"></i>B) Aktive Fehler-Metriken
127+
<i class="bi bi-exclamation-octagon me-1"></i>B) Aktive Metriken
128128
</button>
129129
</li>
130130
<li class="nav-item">

src/main/resources/public/js/k8s-client.js

Lines changed: 75 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,22 +59,81 @@ const K8sClient = (() => {
5959
// Tab B: Fehler-Metriken
6060
// =========================================================================
6161

62-
function renderTabB(healthDiagnostics, errorDiv) {
62+
function renderTabB(healthDiagnostics, errorDiv, targetUrl = '') {
6363
const errorEntries = Object.entries(healthDiagnostics?.activeErrorMetrics ?? {});
6464
const errorCount = healthDiagnostics?.errorCount ?? errorEntries.length;
65-
66-
errorDiv.innerHTML = errorEntries.length === 0
67-
? `<div class="alert alert-success small"><i class="bi bi-check-circle me-2"></i>Keine aktiven Fehler-Metriken.</div>`
68-
: `<div class="mb-2"><span class="badge bg-danger">${errorCount} aktive Fehler</span></div>
69-
<table class="table table-sm x-small">
70-
<thead><tr><th>Metrik</th><th>Wert</th></tr></thead>
71-
<tbody>${errorEntries.map(([k, v]) => `
72-
<tr>
73-
<td class="font-monospace text-truncate" style="max-width:300px;" title="${k}">${k}</td>
74-
<td class="fw-bold text-danger">${v}</td>
75-
</tr>`).join('')}
76-
</tbody>
77-
</table>`;
65+
const diagnoses = healthDiagnostics?.diagnoses ?? [];
66+
67+
// Hostname aus Ziel-URL extrahieren für Cluster-Korrelation
68+
let targetHostname = '';
69+
try { targetHostname = new URL(targetUrl).hostname.toLowerCase(); } catch (_) {}
70+
const targetMetrics = targetHostname
71+
? errorEntries.filter(([k]) => k.toLowerCase().includes(targetHostname))
72+
: [];
73+
const targetHtml = targetHostname && targetMetrics.length > 0 ? `
74+
<div class="alert alert-danger py-2 px-3 mb-2">
75+
<div class="d-flex align-items-center mb-1">
76+
<i class="bi bi-crosshair me-2"></i>
77+
<span class="fw-bold small">ZIEL-KORRELATION – ${targetHostname}</span>
78+
</div>
79+
<p class="mb-1 x-small">Envoy-Metriken direkt für das Ziel-Cluster gefunden:</p>
80+
<table class="table table-sm x-small mb-0">
81+
<tbody>${targetMetrics.map(([k, v]) => `
82+
<tr>
83+
<td class="font-monospace text-truncate" style="max-width:280px;" title="${k}">${k}</td>
84+
<td class="fw-bold text-danger">${v}</td>
85+
</tr>`).join('')}
86+
</tbody>
87+
</table>
88+
</div>` : '';
89+
90+
const severityConfig = {
91+
'KRITISCH': { cls: 'danger', icon: 'bi-exclamation-octagon-fill' },
92+
'WARNUNG': { cls: 'warning', icon: 'bi-exclamation-triangle-fill' },
93+
'INFO': { cls: 'info', icon: 'bi-info-circle-fill' },
94+
};
95+
96+
const diagnosisHtml = diagnoses.map(d => {
97+
const cfg = severityConfig[d.severity] ?? { cls: 'secondary', icon: 'bi-question-circle' };
98+
const metricsHtml = d.affectedMetrics.map(m =>
99+
`<code class="d-block x-small text-truncate" style="max-width:100%;" title="${m}">${m}</code>`
100+
).join('');
101+
return `
102+
<div class="alert alert-${cfg.cls} py-2 px-3 mb-2">
103+
<div class="d-flex align-items-center mb-1">
104+
<i class="bi ${cfg.icon} me-2"></i>
105+
<span class="fw-bold small">${d.severity}${d.title}</span>
106+
</div>
107+
<p class="mb-1 x-small">${d.description}</p>
108+
<p class="mb-1 x-small"><strong>Empfehlung:</strong> ${d.recommendation}</p>
109+
<details class="x-small mt-1">
110+
<summary class="text-muted" style="cursor:pointer;">Betroffene Metriken (${d.affectedMetrics.length})</summary>
111+
<div class="mt-1">${metricsHtml}</div>
112+
</details>
113+
</div>`;
114+
}).join('');
115+
116+
const tableHtml = errorEntries.length === 0 ? '' : `
117+
<div class="mt-3 mb-1 x-small fw-bold text-uppercase text-muted">Alle aktiven Fehler-Metriken</div>
118+
<table class="table table-sm x-small">
119+
<thead><tr><th>Metrik</th><th>Wert</th></tr></thead>
120+
<tbody>${errorEntries.map(([k, v]) => `
121+
<tr>
122+
<td class="font-monospace text-truncate" style="max-width:300px;" title="${k}">${k}</td>
123+
<td class="fw-bold text-danger">${v}</td>
124+
</tr>`).join('')}
125+
</tbody>
126+
</table>`;
127+
128+
if (errorEntries.length === 0) {
129+
errorDiv.innerHTML = `<div class="alert alert-success small"><i class="bi bi-check-circle me-2"></i>Keine aktiven Fehler-Metriken.</div>`;
130+
} else {
131+
errorDiv.innerHTML = `
132+
<div class="mb-2"><span class="badge bg-secondary">${errorCount} Metriken</span></div>
133+
${targetHtml}
134+
${diagnosisHtml}
135+
${tableHtml}`;
136+
}
78137
}
79138

80139
// =========================================================================
@@ -289,8 +348,9 @@ const K8sClient = (() => {
289348
configDiv.innerHTML = warning;
290349
errorDiv.innerHTML = `<div class="alert alert-warning m-3 small">Keine Daten – Sidecar nicht aktiv.</div>`;
291350
} else {
351+
const targetUrl = document.getElementById('url')?.value ?? '';
292352
renderTabA(report.reachability, configDiv);
293-
renderTabB(report.healthDiagnostics, errorDiv);
353+
renderTabB(report.healthDiagnostics, errorDiv, targetUrl);
294354
}
295355

296356
renderTabC(context, status, resourceDiv);

0 commit comments

Comments
 (0)