Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions lib/clusterops/clusterreports.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

configv1beta1 "github.com/projectsveltos/addon-controller/api/v1beta1"
"github.com/projectsveltos/addon-controller/lib/utils"
libsveltosv1beta1 "github.com/projectsveltos/libsveltos/api/v1beta1"
)

Expand All @@ -38,8 +39,8 @@ const (
func UpdateClusterReportWithResourceReports(ctx context.Context, c client.Client,
clusterNamespace, clusterName string, clusterType libsveltosv1beta1.ClusterType, isDryRun bool,
profileRef *corev1.ObjectReference, resourceReports []libsveltosv1beta1.ResourceReport,
featureID libsveltosv1beta1.FeatureID) error {

featureID libsveltosv1beta1.FeatureID,
) error {
// This is no-op unless in DryRun mode
if !isDryRun {
return nil
Expand Down Expand Up @@ -69,13 +70,13 @@ func UpdateClusterReportWithResourceReports(ctx context.Context, c client.Client
}

func GetClusterReportName(profileKind, profileName, clusterName string, clusterType libsveltosv1beta1.ClusterType) string {
// TODO: shorten this value
prefix := "" // For backward compatibility (before addition of Profile) leave this empty for ClusterProfiles
if profileKind == configv1beta1.ProfileKind {
prefix = "p--"
}
return prefix + profileName + nameSeparator + strings.ToLower(string(clusterType)) +
name := prefix + profileName + nameSeparator + strings.ToLower(string(clusterType)) +
nameSeparator + clusterName
return utils.EllipsizeName(name)
}

// ConvertResourceReportsToObjectReference converts a slice of ResourceReports to
Expand Down
12 changes: 9 additions & 3 deletions lib/clusterops/clustersummary.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

configv1beta1 "github.com/projectsveltos/addon-controller/api/v1beta1"
"github.com/projectsveltos/addon-controller/lib/utils"
libsveltosv1beta1 "github.com/projectsveltos/libsveltos/api/v1beta1"
)

Expand All @@ -46,7 +47,8 @@ const (
// ClusterProfile/Profile for a specific Cluster
func GetClusterSummary(ctx context.Context, c client.Client,
profileKind, profileName string, clusterNamespace, clusterName string,
clusterType libsveltosv1beta1.ClusterType) (*configv1beta1.ClusterSummary, error) {
clusterType libsveltosv1beta1.ClusterType,
) (*configv1beta1.ClusterSummary, error) {

profileLabel := ClusterProfileLabelName
if profileKind == configv1beta1.ProfileKind {
Expand Down Expand Up @@ -88,12 +90,16 @@ func GetClusterSummaryName(profileKind, profileName, clusterName string, isSvelt
clusterType = libsveltosv1beta1.ClusterTypeSveltos
}
prefix := GetPrefix(clusterType)

var name string
if profileKind == configv1beta1.ClusterProfileKind {
// For backward compatibility (code before addition of Profiles) do not change this
return fmt.Sprintf("%s-%s-%s", profileName, prefix, clusterName)
name = fmt.Sprintf("%s-%s-%s", profileName, prefix, clusterName)
} else {
name = fmt.Sprintf("p--%s-%s-%s", profileName, prefix, clusterName)
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great. Thank you.

We need to handle upgrade scenario. Since we are changing the name, if ClusterSummary instance already exists before upgrade, we need to handle that. I am thinking we need to first check if clusterSummary with old name already exists, if so return that name. Otherwise return the new name.

this method getManagementClusterClient returns the client to the management cluster.

Also, even when we use the name ellipsized name, we need to handle conflicts. So when we generate the ellipsized name we probably need to:

  1. validate no clusterSummary exists with that name (so it can be used)
  2. if one exists, verify we are talking about the same resource (by making sure the labels refer to the correct profile and cluster)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are changing behavior only in case of actually hitting an edge case for long object name, at least the idea in change was like this, don't change names for cases where inject name is valid, ellipsize name for cases when object name was already invalid and it would be impossible to create it.

There shouldn't be any name conflict, I mean the probability of this happening when we add FNV hash as suffix is quite low (checkout fuzz test).

Problem with fetching a list of existing clusterSummary objects before generating unique name is a possibility to have a race condition and we would need to add a retry logic in every place where we create this objects, this will be more bigger/impactful change.

If we do decide to go with fetch list approach, please let me know if hash suffix is acceptable as object name differentiator in that case, we would still need to create unique object names in case of conflict and adding something like an index number as suffix would probbaly increase a chance of races (at least I think it will)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Sorry i missed that we are doing this only when length is over 63 characters. So then yes you are right we don't have to worry about upgrade.

Regarding the conflict, let me think a little more. Maybe instead of fetching, we can keep in memory map. Key could be the generated name and the value profile/cluster names. That will prevent race conditions. And we can rebuild map on restart and postpone profile reconciliation till that is complete

Even if rare I feel we should cover it. Overriding a clustersummary might cause stale resources or missing applications.

What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can keep in memory map

I guess the only question is, how that would impact memory footprint in general, probably not noticeable. Generally speaking in-mem map would simplify things a lot, avoiding retry logic in a lot of places.

On a side note, if we can say that our clusters won't go back in time, we can use unixtime as object suffix and completely avoid messing with hashing, on downside that would mean names will always be unique, not predictable per each input. In our usecase I think this can be acceptable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Yes we would need to store only names when exceeding 63 chars. So it won't impact.

But good point. We can safely say clusters won't go back on time. But I am not sure that will work. Those methods to get name are invoked also after clustersummary instances are created. For instance clusterprofile reconciliation invokes it for every matching clusters at every reconciliation

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those methods to get name are invoked also after clustersummary instances are created. For instance clusterprofile reconciliation invokes it for every matching clusters at every reconciliation

This makes me think that we need everything to be deterministic, so any kind of counter approach at suffix won't work (counter, unixtime, ...)

Let's assume we implement a in-mem map for tracking objects, and we use some very prone to collisions hash function, we can detect collision via map key (same would apply to a case of direct k8s object lookup) but what we do next is unclear, how do we recover from collision and still keep names deterministic?

So I am thinking we need to invest a bit more time into that fuzz test, basically limiting the upper-bound string length to what we actual use (~ 63*4 + 5), ensure that it generates okeish looking strings, run it against current hash func for 24h and see if we have collisions, if we do, increase hash size and/or replace hash function, run again and repeat until we are not getting collision detection, use that as solution and put into docs that for larger cluster profile and cluster names we use hashing to create predictable object names, hashing has a low level of collusion chances but still not zero so if you ever hit that kind of a problem, please report that and rename your profile.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks. Can we do this:

  1. if name is less than 63 characters, no changes (you are already doing it)
  2. if name is longer than 63 character, check if we have already allocated one for this profile/cluster. If so use that one (this is done using an in memory map)
  3. If name is longer than 63 character and we never allocated one before, allocate a new one (using the function you have currently). Verify this has not been allocated to anybody else, update the in memory map and return it
  4. If name is longer than 63 character and we never allocated one before, allocate a new one and allocation one with the function you have currently we hit a collision append an index to it and keep verifying it till we hit no collision. update the in memory map and return it

On restart we can rebuild this in memory map before we reconcile any clusterProfile/profile

I feel your PR already took care of most, we just need to handle those corners cases.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we can but keep in mind that in case of collision and appending index we can get to a situation when some other object that created collision gets removed and we will have none-deterministic name for new object (that was created with index), basically reusing same name.
(this is what I was referencing before)

And if we already talking about adding indexes we might as well do in-tree map + unixtime and remove a need of hash and additional hash+index

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct but when a clustersummary is deleted, if name was allocated via this new mechanism, we can remove from map.

Also what we want to solve is to make sure clustersummary name is always valid. Today if ClusterProfile name is really long and the cluster name is also long (exceeding the 253 chars) creation of a clustersummary will fail

return fmt.Sprintf("p--%s-%s-%s", profileName, prefix, clusterName)
return utils.EllipsizeName(name)
}

func GetPrefix(clusterType libsveltosv1beta1.ClusterType) string {
Expand Down
27 changes: 27 additions & 0 deletions lib/utils/names.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package utils

import (
"fmt"
"hash/fnv"
)

// EllipsizeName ensures a Kubernetes object name is <= 63 characters.
// If the name exceeds 63 characters, it truncates and appends an 8-character FNV-32 hash
// for uniqueness.
func EllipsizeName(name string) string {
const maxLength = 63
if len(name) <= maxLength {
return name
}

// Generate 8-char FNV-32 hex suffix
h := fnv.New32a()
h.Write([]byte(name))
hash := fmt.Sprintf("%08x", h.Sum32())

// Reserve 9 chars: 8 for hash + 1 for separator
truncateLength := maxLength - 9
truncated := name[:truncateLength]

return truncated + "-" + hash
}
132 changes: 132 additions & 0 deletions lib/utils/names_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package utils

import (
"regexp"
"strings"
"testing"
"unicode"
)

func TestEllipsizeName(t *testing.T) {
tests := []struct {
name string
input string
wantSame bool // true if we expect output == input
}{
{
name: "short name unchanged",
input: "short",
wantSame: true,
},
{
name: "exactly 63 chars unchanged",
input: strings.Repeat("a", 63),
wantSame: true,
},
{
name: "64 chars gets ellipsized",
input: strings.Repeat("a", 64),
wantSame: false,
},
{
name: "very long name gets ellipsized",
input: strings.Repeat("a", 100),
wantSame: false,
},
{
name: "real cluster summary name",
input: "very-long-profile-name-that-exceeds-limits-capi-very-long-cluster-name-that-also-exceeds",
wantSame: false,
},
{
name: "real cluster report name",
input: "p--very-long-profile-name-that-exceeds-limits--capi--very-long-cluster-name-that-also-exceeds",
wantSame: false,
},
}

hashSuffixPattern := regexp.MustCompile(`-[0-9a-f]{8}$`)

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := EllipsizeName(tt.input)

// Check if result matches expectation
if tt.wantSame && result != tt.input {
t.Errorf("EllipsizeK8sName() = %q, want same as input %q", result, tt.input)
}
if !tt.wantSame && result == tt.input {
t.Errorf("EllipsizeK8sName() = %q, expected different from input %q", result, tt.input)
}

// Check length constraint
if len(result) > 63 {
t.Errorf("EllipsizeK8sName() result length = %d, want <= 63", len(result))
}

// For ellipsized names, check hash suffix format
if len(tt.input) > 63 && tt.input != "" {
if len(result) != 63 {
t.Errorf("EllipsizeK8sName() ellipsized result length = %d, want 63", len(result))
}

// Should end with -<8hexchars> pattern
if !hashSuffixPattern.MatchString(result) {
t.Errorf("EllipsizeK8sName() result %q should end with -<8hexchars>", result)
}
}
})
}
}

func isValidObjectName(name string) bool {
if name == "" || len(name) > 63 {
return false
}

isAlphanumeric := func(c rune) bool {
return unicode.IsLower(c) || unicode.IsDigit(c)
}

if !isAlphanumeric(rune(name[0])) || !isAlphanumeric(rune(name[len(name)-1])) {
return false
}

for _, c := range name {
if !isAlphanumeric(c) && c != '-' && c != '.' {
return false
}
}

return true
}

func FuzzEllipsizeObjectNameHashCollisions(f *testing.F) {
f.Add("very-long-profile-name-capi-cluster1" + strings.Repeat("x", 30))
f.Add("very-long-profile-name-capi-cluster2" + strings.Repeat("x", 30))
f.Add(strings.Repeat("a", 80))
f.Add(strings.Repeat("b", 80))
f.Add("p--profile-name--capi--cluster-name" + strings.Repeat("z", 50))

results := make(map[string]string)

f.Fuzz(func(t *testing.T, input string) {
// Only care about inputs that will get hashed (longer than 63 chars)
if len(input) <= 63 {
t.Skip("Input will not be hashed")
}

if !isValidObjectName(input) {
t.Skip("Input contains invalid characters for Kubernetes names")
}

result := EllipsizeName(input)

// Check for hash collisions
if existing, exists := results[result]; exists && existing != input {
t.Errorf("Hash collision detected: inputs %q and %q both produced %q", input, existing, result)
}

results[result] = input
})
}