Skip to content

Commit 9399604

Browse files
afdesknikpivkin
andauthored
fix(license): using common way for splitting licenses (#4434)
* fix(license): using common way for splitting licenses * add test cases * TEST new regex * extract function * fix version detection --------- Co-authored-by: Nikita Pivkin <[email protected]>
1 parent 3e2416d commit 9399604

File tree

4 files changed

+95
-10
lines changed

4 files changed

+95
-10
lines changed

pkg/fanal/analyzer/language/analyze.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ func toApplication(fileType, filePath, libFilePath string, r dio.ReadSeekerAt, l
9292
for _, lib := range libs {
9393
var licenses []string
9494
if lib.License != "" {
95-
licenses = strings.Split(lib.License, ",")
95+
licenses = licensing.SplitLicenses(lib.License)
9696
for i, license := range licenses {
9797
licenses[i] = licensing.Normalize(strings.TrimSpace(license))
9898
}

pkg/fanal/analyzer/pkg/dpkg/copyright.go

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ var (
2727
dpkgLicenseAnalyzerVersion = 1
2828

2929
commonLicenseReferenceRegexp = regexp.MustCompile(`/?usr/share/common-licenses/([0-9A-Za-z_.+-]+[0-9A-Za-z+])`)
30-
licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")
3130
)
3231

3332
// dpkgLicenseAnalyzer parses copyright files and detect licenses
@@ -90,14 +89,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens
9089

9190
l = normalizeLicense(l)
9291
if len(l) > 0 {
93-
// Split licenses without considering "and"/"or"
94-
// examples:
95-
// 'GPL-1+,GPL-2' => {"GPL-1", "GPL-2"}
96-
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1", "Artistic", "Artistic-dist"}
97-
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3", "GPLv2"}
98-
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
99-
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
100-
for _, lic := range licenseSplitRegexp.Split(l, -1) {
92+
for _, lic := range licensing.SplitLicenses(l) {
10193
lic = licensing.Normalize(lic)
10294
if !slices.Contains(licenses, lic) {
10395
licenses = append(licenses, lic)

pkg/licensing/normalize.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package licensing
22

33
import (
4+
"regexp"
45
"strings"
56
)
67

@@ -80,9 +81,34 @@ var mapping = map[string]string{
8081
"PUBLIC DOMAIN": Unlicense,
8182
}
8283

84+
// Split licenses without considering "and"/"or"
85+
// examples:
86+
// 'GPL-1+,GPL-2' => {"GPL-1+", "GPL-2"}
87+
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1+", "Artistic", "Artistic-dist"}
88+
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3+", "GPLv2"}
89+
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
90+
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
91+
// 'BSD 3-Clause License or Apache License, Version 2.0' => {"BSD 3-Clause License", "Apache License, Version 2.0"}
92+
// var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")
93+
94+
var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")
95+
8396
func Normalize(name string) string {
8497
if l, ok := mapping[strings.ToUpper(name)]; ok {
8598
return l
8699
}
87100
return name
88101
}
102+
103+
func SplitLicenses(str string) []string {
104+
var licenses []string
105+
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
106+
lower := strings.ToLower(maybeLic)
107+
if (strings.HasPrefix(lower, "ver ") || strings.HasPrefix(lower, "version ")) && len(licenses) > 0 {
108+
licenses[len(licenses)-1] += ", " + maybeLic
109+
} else {
110+
licenses = append(licenses, maybeLic)
111+
}
112+
}
113+
return licenses
114+
}

pkg/licensing/normalize_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package licensing_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
8+
"github.com/aquasecurity/trivy/pkg/licensing"
9+
)
10+
11+
func TestSplitLicenses(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
license string
15+
licenses []string
16+
}{
17+
{
18+
"simple list comma-separated",
19+
"GPL-1+,GPL-2",
20+
[]string{"GPL-1+", "GPL-2"},
21+
},
22+
{
23+
"simple list comma-separated",
24+
"GPL-1+,GPL-2,GPL-3",
25+
[]string{"GPL-1+", "GPL-2", "GPL-3"},
26+
},
27+
{
28+
"3 licenses 'or'-separated",
29+
"GPL-1+ or Artistic or Artistic-dist",
30+
[]string{"GPL-1+", "Artistic", "Artistic-dist"},
31+
},
32+
// '
33+
{
34+
"two licenses _or_ separated",
35+
"LGPLv3+_or_GPLv2+",
36+
[]string{"LGPLv3+", "GPLv2+"},
37+
},
38+
// '
39+
{
40+
"licenses `and`-separated",
41+
"BSD-3-CLAUSE and GPL-2",
42+
[]string{"BSD-3-CLAUSE", "GPL-2"},
43+
},
44+
{
45+
"three licenses and/or separated",
46+
"GPL-1+ or Artistic, and BSD-4-clause-POWERDOG",
47+
[]string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"},
48+
},
49+
{
50+
"two licenses with version",
51+
"Apache License,Version 2.0, OSET Public License version 2.1",
52+
[]string{"Apache License, Version 2.0", "OSET Public License version 2.1"},
53+
},
54+
{
55+
"the license starts with `ver`",
56+
"verbatim and BSD-4-clause",
57+
[]string{"verbatim", "BSD-4-clause"},
58+
},
59+
}
60+
61+
for _, tt := range tests {
62+
t.Run(tt.name, func(t *testing.T) {
63+
res := licensing.SplitLicenses(tt.license)
64+
assert.Equal(t, tt.licenses, res)
65+
})
66+
}
67+
}

0 commit comments

Comments
 (0)