Skip to content

Commit bb4b91c

Browse files
committed
feat(*): add ilm metrics
Signed-off-by: iishabakaev <[email protected]>
1 parent 6bef1fc commit bb4b91c

File tree

6 files changed

+438
-0
lines changed

6 files changed

+438
-0
lines changed

collector/ilm_indices.go

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"encoding/json"
18+
"fmt"
19+
"net/http"
20+
"net/url"
21+
"path"
22+
23+
"github.com/go-kit/log"
24+
"github.com/go-kit/log/level"
25+
"github.com/prometheus/client_golang/prometheus"
26+
)
27+
28+
type ilmMetric struct {
29+
Type prometheus.ValueType
30+
Desc *prometheus.Desc
31+
Value func(timeMillis float64) float64
32+
Labels []string
33+
}
34+
35+
// Index Lifecycle Management information object
36+
type IlmIndiciesCollector struct {
37+
logger log.Logger
38+
client *http.Client
39+
url *url.URL
40+
41+
up prometheus.Gauge
42+
totalScrapes prometheus.Counter
43+
jsonParseFailures prometheus.Counter
44+
45+
ilmMetric ilmMetric
46+
}
47+
48+
var (
49+
defaultIlmIndicesMappingsLabels = []string{"index", "phase", "action", "step"}
50+
)
51+
52+
// NewIlmIndicies defines Index Lifecycle Management Prometheus metrics
53+
func NewIlmIndicies(logger log.Logger, client *http.Client, url *url.URL) *IlmIndiciesCollector {
54+
subsystem := "ilm_index"
55+
56+
return &IlmIndiciesCollector{
57+
logger: logger,
58+
client: client,
59+
url: url,
60+
61+
up: prometheus.NewGauge(prometheus.GaugeOpts{
62+
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
63+
Help: "Was the last scrape of the ElasticSearch ILM endpoint successful.",
64+
}),
65+
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
66+
Name: prometheus.BuildFQName(namespace, subsystem, "total_scrapes"),
67+
Help: "Current total ElasticSearch ILM scrapes.",
68+
}),
69+
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
70+
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures"),
71+
Help: "Number of errors while parsing JSON.",
72+
}),
73+
ilmMetric: ilmMetric{
74+
Type: prometheus.GaugeValue,
75+
Desc: prometheus.NewDesc(
76+
prometheus.BuildFQName(namespace, subsystem, "status"),
77+
"Status of ILM policy for index",
78+
defaultIlmIndicesMappingsLabels, nil),
79+
Value: func(timeMillis float64) float64 {
80+
return timeMillis
81+
},
82+
},
83+
}
84+
}
85+
86+
// Describe adds metrics description
87+
func (i *IlmIndiciesCollector) Describe(ch chan<- *prometheus.Desc) {
88+
ch <- i.ilmMetric.Desc
89+
ch <- i.up.Desc()
90+
ch <- i.totalScrapes.Desc()
91+
ch <- i.jsonParseFailures.Desc()
92+
}
93+
94+
func (i *IlmIndiciesCollector) fetchAndDecodeIlm() (IlmResponse, error) {
95+
var ir IlmResponse
96+
97+
u := *i.url
98+
u.Path = path.Join(u.Path, "/_all/_ilm/explain")
99+
100+
res, err := i.client.Get(u.String())
101+
if err != nil {
102+
return ir, fmt.Errorf("failed to get index stats from %s://%s:%s%s: %s",
103+
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
104+
}
105+
106+
defer func() {
107+
err = res.Body.Close()
108+
if err != nil {
109+
_ = level.Warn(i.logger).Log(
110+
"msg", "failed to close http.Client",
111+
"err", err,
112+
)
113+
}
114+
}()
115+
116+
if res.StatusCode != http.StatusOK {
117+
return ir, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
118+
}
119+
120+
if err := json.NewDecoder(res.Body).Decode(&ir); err != nil {
121+
i.jsonParseFailures.Inc()
122+
return ir, err
123+
}
124+
125+
return ir, nil
126+
}
127+
128+
func bool2int(managed bool) float64 {
129+
if managed {
130+
return 1
131+
}
132+
return 0
133+
}
134+
135+
// Collect pulls metric values from Elasticsearch
136+
func (i *IlmIndiciesCollector) Collect(ch chan<- prometheus.Metric) {
137+
defer func() {
138+
ch <- i.up
139+
ch <- i.totalScrapes
140+
ch <- i.jsonParseFailures
141+
}()
142+
143+
// indices
144+
ilmResp, err := i.fetchAndDecodeIlm()
145+
if err != nil {
146+
i.up.Set(0)
147+
_ = level.Warn(i.logger).Log(
148+
"msg", "failed to fetch and decode ILM stats",
149+
"err", err,
150+
)
151+
return
152+
}
153+
i.totalScrapes.Inc()
154+
i.up.Set(1)
155+
156+
for indexName, indexIlm := range ilmResp.Indices {
157+
ch <- prometheus.MustNewConstMetric(
158+
i.ilmMetric.Desc,
159+
i.ilmMetric.Type,
160+
i.ilmMetric.Value(bool2int(indexIlm.Managed)),
161+
indexName, indexIlm.Phase, indexIlm.Action, indexIlm.Step,
162+
)
163+
}
164+
}

collector/ilm_indices_response.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
type IlmResponse struct {
17+
Indices map[string]IlmIndexResponse `json:"indices"`
18+
}
19+
20+
type IlmIndexResponse struct {
21+
Index string `json:"index"`
22+
Managed bool `json:"managed"`
23+
Phase string `json:"phase"`
24+
Action string `json:"action"`
25+
Step string `json:"step"`
26+
StepTimeMillis float64 `json:"step_time_millis"`
27+
}

collector/ilm_status.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"encoding/json"
18+
"fmt"
19+
"io/ioutil"
20+
"net/http"
21+
"net/url"
22+
"path"
23+
24+
"github.com/go-kit/log"
25+
"github.com/go-kit/log/level"
26+
"github.com/prometheus/client_golang/prometheus"
27+
)
28+
29+
var (
30+
ilm_statuses = []string{"STOPPED", "RUNNING", "STOPPING"}
31+
)
32+
33+
type ilmStatusMetric struct {
34+
Type prometheus.ValueType
35+
Desc *prometheus.Desc
36+
Value func(ilm *IlmStatusResponse, status string) float64
37+
Labels func(status string) []string
38+
}
39+
40+
// IlmStatusCollector information struct
41+
type IlmStatusCollector struct {
42+
logger log.Logger
43+
client *http.Client
44+
url *url.URL
45+
46+
up prometheus.Gauge
47+
totalScrapes, jsonParseFailures prometheus.Counter
48+
49+
metric ilmStatusMetric
50+
}
51+
52+
// NewIlmStatus defines Indices IndexIlms Prometheus metrics
53+
func NewIlmStatus(logger log.Logger, client *http.Client, url *url.URL) *IlmStatusCollector {
54+
subsystem := "ilm"
55+
56+
return &IlmStatusCollector{
57+
logger: logger,
58+
client: client,
59+
url: url,
60+
61+
up: prometheus.NewGauge(prometheus.GaugeOpts{
62+
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
63+
Help: "Was the last scrape of the ElasticSearch Indices Ilms endpoint successful.",
64+
}),
65+
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
66+
Name: prometheus.BuildFQName(namespace, subsystem, "scrapes_total"),
67+
Help: "Current total ElasticSearch Indices Ilms scrapes.",
68+
}),
69+
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
70+
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures_total"),
71+
Help: "Number of errors while parsing JSON.",
72+
}),
73+
metric: ilmStatusMetric{
74+
Type: prometheus.GaugeValue,
75+
Desc: prometheus.NewDesc(
76+
prometheus.BuildFQName(namespace, subsystem, "status"),
77+
"Current status of ilm. Status can be STOPPED, RUNNING, STOPPING.",
78+
ilm_statuses, nil,
79+
),
80+
Value: func(ilm *IlmStatusResponse, status string) float64 {
81+
if ilm.OperationMode == status {
82+
return 1
83+
}
84+
return 0
85+
},
86+
},
87+
}
88+
}
89+
90+
// Describe add Snapshots metrics descriptions
91+
func (im *IlmStatusCollector) Describe(ch chan<- *prometheus.Desc) {
92+
ch <- im.metric.Desc
93+
ch <- im.up.Desc()
94+
ch <- im.totalScrapes.Desc()
95+
ch <- im.jsonParseFailures.Desc()
96+
}
97+
98+
func (im *IlmStatusCollector) getAndParseURL(u *url.URL) (*IlmStatusResponse, error) {
99+
res, err := im.client.Get(u.String())
100+
if err != nil {
101+
return nil, fmt.Errorf("failed to get from %s://%s:%s%s: %s",
102+
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
103+
}
104+
105+
if res.StatusCode != http.StatusOK {
106+
return nil, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
107+
}
108+
109+
body, err := ioutil.ReadAll(res.Body)
110+
if err != nil {
111+
_ = level.Warn(im.logger).Log("msg", "failed to read response body", "err", err)
112+
return nil, err
113+
}
114+
115+
err = res.Body.Close()
116+
if err != nil {
117+
_ = level.Warn(im.logger).Log("msg", "failed to close response body", "err", err)
118+
return nil, err
119+
}
120+
121+
var imr IlmStatusResponse
122+
if err := json.Unmarshal(body, &imr); err != nil {
123+
im.jsonParseFailures.Inc()
124+
return nil, err
125+
}
126+
127+
return &imr, nil
128+
}
129+
130+
func (im *IlmStatusCollector) fetchAndDecodeIlm() (*IlmStatusResponse, error) {
131+
u := *im.url
132+
u.Path = path.Join(u.Path, "/_ilm/status")
133+
return im.getAndParseURL(&u)
134+
}
135+
136+
// Collect gets all indices Ilms metric values
137+
func (im *IlmStatusCollector) Collect(ch chan<- prometheus.Metric) {
138+
139+
im.totalScrapes.Inc()
140+
defer func() {
141+
ch <- im.up
142+
ch <- im.totalScrapes
143+
ch <- im.jsonParseFailures
144+
}()
145+
146+
indicesIlmsResponse, err := im.fetchAndDecodeIlm()
147+
if err != nil {
148+
im.up.Set(0)
149+
_ = level.Warn(im.logger).Log(
150+
"msg", "failed to fetch and decode cluster ilm status",
151+
"err", err,
152+
)
153+
return
154+
}
155+
im.up.Set(1)
156+
157+
for _, status := range ilm_statuses {
158+
ch <- prometheus.MustNewConstMetric(
159+
im.metric.Desc,
160+
im.metric.Type,
161+
im.metric.Value(indicesIlmsResponse, status),
162+
status,
163+
)
164+
}
165+
166+
}

collector/ilm_status_response.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
// IlmStatusResponse is a representation of elasticsearch ilm status
17+
type IlmStatusResponse struct {
18+
OperationMode string `json:"operation_mode"`
19+
}

0 commit comments

Comments
 (0)