Skip to content

Commit a5f6de3

Browse files
iishabakaevШабакаев Илья Исмаилович
andauthored
feat(*): add ilm metrics (#513)
* feat(*): add ilm metrics Signed-off-by: iishabakaev <[email protected]> * Refactor ilm collectors Signed-off-by: Шабакаев Илья Исмаилович <[email protected]> Signed-off-by: Шабакаев Илья Исмаилович <[email protected]> * fix lint checks Signed-off-by: Шабакаев Илья Исмаилович <[email protected]> Signed-off-by: iishabakaev <[email protected]> Signed-off-by: Шабакаев Илья Исмаилович <[email protected]> Signed-off-by: Шабакаев Илья Исмаилович <[email protected]> Co-authored-by: Шабакаев Илья Исмаилович <[email protected]>
1 parent 0509051 commit a5f6de3

File tree

5 files changed

+529
-0
lines changed

5 files changed

+529
-0
lines changed

collector/ilm_indices.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// Copyright 2023 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"encoding/json"
18+
"fmt"
19+
"io/ioutil"
20+
"net/http"
21+
"net/url"
22+
"path"
23+
24+
"github.com/go-kit/log"
25+
"github.com/go-kit/log/level"
26+
"github.com/prometheus/client_golang/prometheus"
27+
)
28+
29+
type ilmMetric struct {
30+
Type prometheus.ValueType
31+
Desc *prometheus.Desc
32+
Value func(timeMillis float64) float64
33+
Labels []string
34+
}
35+
36+
// Index Lifecycle Management information object
37+
type IlmIndiciesCollector struct {
38+
logger log.Logger
39+
client *http.Client
40+
url *url.URL
41+
42+
up prometheus.Gauge
43+
totalScrapes prometheus.Counter
44+
jsonParseFailures prometheus.Counter
45+
46+
ilmMetric ilmMetric
47+
}
48+
49+
type IlmResponse struct {
50+
Indices map[string]IlmIndexResponse `json:"indices"`
51+
}
52+
53+
type IlmIndexResponse struct {
54+
Index string `json:"index"`
55+
Managed bool `json:"managed"`
56+
Phase string `json:"phase"`
57+
Action string `json:"action"`
58+
Step string `json:"step"`
59+
StepTimeMillis float64 `json:"step_time_millis"`
60+
}
61+
62+
var (
63+
defaultIlmIndicesMappingsLabels = []string{"index", "phase", "action", "step"}
64+
)
65+
66+
// NewIlmIndicies defines Index Lifecycle Management Prometheus metrics
67+
func NewIlmIndicies(logger log.Logger, client *http.Client, url *url.URL) *IlmIndiciesCollector {
68+
subsystem := "ilm_index"
69+
70+
return &IlmIndiciesCollector{
71+
logger: logger,
72+
client: client,
73+
url: url,
74+
75+
up: prometheus.NewGauge(prometheus.GaugeOpts{
76+
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
77+
Help: "Was the last scrape of the ElasticSearch ILM endpoint successful.",
78+
}),
79+
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
80+
Name: prometheus.BuildFQName(namespace, subsystem, "total_scrapes"),
81+
Help: "Current total ElasticSearch ILM scrapes.",
82+
}),
83+
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
84+
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures"),
85+
Help: "Number of errors while parsing JSON.",
86+
}),
87+
ilmMetric: ilmMetric{
88+
Type: prometheus.GaugeValue,
89+
Desc: prometheus.NewDesc(
90+
prometheus.BuildFQName(namespace, subsystem, "status"),
91+
"Status of ILM policy for index",
92+
defaultIlmIndicesMappingsLabels, nil),
93+
Value: func(timeMillis float64) float64 {
94+
return timeMillis
95+
},
96+
},
97+
}
98+
}
99+
100+
// Describe adds metrics description
101+
func (i *IlmIndiciesCollector) Describe(ch chan<- *prometheus.Desc) {
102+
ch <- i.ilmMetric.Desc
103+
ch <- i.up.Desc()
104+
ch <- i.totalScrapes.Desc()
105+
ch <- i.jsonParseFailures.Desc()
106+
}
107+
108+
func (i *IlmIndiciesCollector) fetchAndDecodeIlm() (IlmResponse, error) {
109+
var ir IlmResponse
110+
111+
u := *i.url
112+
u.Path = path.Join(u.Path, "/_all/_ilm/explain")
113+
114+
res, err := i.client.Get(u.String())
115+
if err != nil {
116+
return ir, fmt.Errorf("failed to get index stats from %s://%s:%s%s: %s",
117+
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
118+
}
119+
120+
defer func() {
121+
err = res.Body.Close()
122+
if err != nil {
123+
_ = level.Warn(i.logger).Log(
124+
"msg", "failed to close http.Client",
125+
"err", err,
126+
)
127+
}
128+
}()
129+
130+
if res.StatusCode != http.StatusOK {
131+
return ir, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
132+
}
133+
134+
bts, err := ioutil.ReadAll(res.Body)
135+
if err != nil {
136+
i.jsonParseFailures.Inc()
137+
return ir, err
138+
}
139+
140+
if err := json.Unmarshal(bts, &ir); err != nil {
141+
i.jsonParseFailures.Inc()
142+
return ir, err
143+
}
144+
145+
return ir, nil
146+
}
147+
148+
func bool2int(managed bool) float64 {
149+
if managed {
150+
return 1
151+
}
152+
return 0
153+
}
154+
155+
// Collect pulls metric values from Elasticsearch
156+
func (i *IlmIndiciesCollector) Collect(ch chan<- prometheus.Metric) {
157+
defer func() {
158+
ch <- i.up
159+
ch <- i.totalScrapes
160+
ch <- i.jsonParseFailures
161+
}()
162+
163+
// indices
164+
ilmResp, err := i.fetchAndDecodeIlm()
165+
if err != nil {
166+
i.up.Set(0)
167+
_ = level.Warn(i.logger).Log(
168+
"msg", "failed to fetch and decode ILM stats",
169+
"err", err,
170+
)
171+
return
172+
}
173+
i.totalScrapes.Inc()
174+
i.up.Set(1)
175+
176+
for indexName, indexIlm := range ilmResp.Indices {
177+
ch <- prometheus.MustNewConstMetric(
178+
i.ilmMetric.Desc,
179+
i.ilmMetric.Type,
180+
i.ilmMetric.Value(bool2int(indexIlm.Managed)),
181+
indexName, indexIlm.Phase, indexIlm.Action, indexIlm.Step,
182+
)
183+
}
184+
}

collector/ilm_indices_test.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// Copyright 2023 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"fmt"
18+
"net/http"
19+
"net/http/httptest"
20+
"net/url"
21+
"testing"
22+
23+
"github.com/go-kit/log"
24+
)
25+
26+
func TestILMMetrics(t *testing.T) {
27+
// Testcases created using:
28+
// docker run -d -p 9200:9200 elasticsearch:VERSION
29+
// curl -XPUT http://localhost:9200/twitter
30+
// curl -X PUT "localhost:9200/_ilm/policy/my_policy?pretty" -H 'Content-Type: application/json' -d'
31+
// {
32+
// "policy": {
33+
// "phases": {
34+
// "warm": {
35+
// "min_age": "10d",
36+
// "actions": {
37+
// "forcemerge": {
38+
// "max_num_segments": 1
39+
// }
40+
// }
41+
// },
42+
// "delete": {
43+
// "min_age": "30d",
44+
// "actions": {
45+
// "delete": {}
46+
// }
47+
// }
48+
// }
49+
// }
50+
// }
51+
// '
52+
// curl -X PUT "localhost:9200/facebook?pretty" -H 'Content-Type: application/json' -d'
53+
// {
54+
// "settings": {
55+
// "index": {
56+
// "lifecycle": {
57+
// "name": "my_policy"
58+
// }
59+
// }
60+
// }
61+
// }
62+
// '
63+
// curl http://localhost:9200/_all/_ilm/explain
64+
tcs := map[string]string{
65+
"6.6.0": `{
66+
"indices": {
67+
"twitter": { "index": "twitter", "managed": false },
68+
"facebook": {
69+
"index": "facebook",
70+
"managed": true,
71+
"policy": "my_policy",
72+
"lifecycle_date_millis": 1660799138565,
73+
"phase": "new",
74+
"phase_time_millis": 1660799138651,
75+
"action": "complete",
76+
"action_time_millis": 1660799138651,
77+
"step": "complete",
78+
"step_time_millis": 1660799138651
79+
}
80+
}
81+
}`,
82+
}
83+
for ver, out := range tcs {
84+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
85+
fmt.Fprintln(w, out)
86+
}))
87+
defer ts.Close()
88+
89+
u, err := url.Parse(ts.URL)
90+
if err != nil {
91+
t.Fatalf("Failed to parse URL: %s", err)
92+
}
93+
c := NewIlmIndicies(log.NewNopLogger(), http.DefaultClient, u)
94+
chr, err := c.fetchAndDecodeIlm()
95+
if err != nil {
96+
t.Fatalf("Failed to fetch or decode indices ilm metrics: %s", err)
97+
}
98+
t.Logf("[%s] indices ilm metrics Response: %+v", ver, chr)
99+
100+
if chr.Indices["twitter"].Managed != false {
101+
t.Errorf("Invalid ilm metrics at twitter.managed")
102+
}
103+
if chr.Indices["facebook"].Managed != true {
104+
t.Errorf("Invalid ilm metrics at facebook.managed")
105+
}
106+
if chr.Indices["facebook"].Phase != "new" {
107+
t.Errorf("Invalid ilm metrics at facebook.phase")
108+
}
109+
if chr.Indices["facebook"].Action != "complete" {
110+
t.Errorf("Invalid ilm metrics at facebook.action")
111+
}
112+
if chr.Indices["facebook"].Step != "complete" {
113+
t.Errorf("Invalid ilm metrics at facebook.step")
114+
}
115+
116+
}
117+
}

0 commit comments

Comments
 (0)