2020-11-28 21:57:22

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH v5] checkpatch: add fix and improve warning msg for non-standard signature

On Sun, 2020-11-29 at 00:05 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.

Seems OK, but here are some last trivial notes:

> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
[]
> +sub find_standard_signature {
> + my ($sign_off) = @_;
> + my @standard_signature_tags = (
> + 'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
> + 'reviewed-by:', 'reported-by:', 'suggested-by:'

I would change this to the normal signatures:

my @standard_signature_tags = (
'Signed-off-by:', 'Co-developed-by:', 'Acked-by:', 'Tested-by:',
'Reviewed-by:', 'Reported-by:', 'Suggested-by:'

> + );
> + my $standard_signature;
> + my $min_edit_distance = 20; # setting default value

20 seems arbitrary, maybe (~0 << 1) ?

> + my $edit_distance;

move this into the foreach (or maybe not use this at all)

> + foreach (@standard_signature_tags) {

foreach style in this code uses foreach my $<something> and not $_

foreach my $standard (@standard_signature_tags) {

> + $edit_distance = get_edit_distance($sign_off, $_);

So:

my $edit_distance = get_edit_distance($sign_off, $standard);

> + if ($edit_distance < $min_edit_distance) {
> + $min_edit_distance = $edit_distance;
> + $standard_signature = $_;
> + }
> + }
> + if ($min_edit_distance <= 2) {
> + return ucfirst($standard_signature);

return $standard;

Though maybe it's simpler to test in the loop if it's <= 2 as
the lowercase and dash strip is done inside get_edit_distance
so this seems rather simpler:

foreach my $standard (@standard_signature_tags) {
return $standard if (get_edit_distance($sign_off, $standard) <= 2);
}

return "";

> @@ -2773,8 +2839,17 @@ sub process {
> ? my $ucfirst_sign_off = ucfirst(lc($sign_off));
> ?
>
> ? if ($sign_off !~ /$signature_tags/) {
> - WARN("BAD_SIGN_OFF",
> - "Non-standard signature: $sign_off\n" . $herecurr);
> + my $suggested_signature = find_standard_signature($sign_off);
> + if ($suggested_signature eq "") {
> + WARN("BAD_SIGN_OFF",
> + "Non-standard signature: $sign_off\n" . $herecurr);
> + } else {
> + if (WARN("BAD_SIGN_OFF",
> + "Non-standard signature: $sign_off. Perhaps '$suggested_signature'\n" . $herecurr) &&

Please use consistent '' or nothing around signatures:

"Non-standard signature: '$sign_off' - likely typo of '$suggested_signature'\n" . $herecurr) &&

> + $fix) {
> + $fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> + }
> + }
> ? }
> ? if (defined $space_before && $space_before ne "") {
> ? if (WARN("BAD_SIGN_OFF",



2020-11-28 22:00:56

by Aditya Srivastava

[permalink] [raw]
Subject: [PATCH v6] checkpatch: add fix and improve warning msg for non-standard signature

Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
styles.

A large number of these warnings occur because of typo mistakes in
signature tags. An evaluation over v4.13..v5.8 showed that out of 539
warnings due to non-standard signatures, 87 are due to typo mistakes.

Following are the standard signature tags which are often incorrectly
used, along with their individual counts of incorrect use (over
v4.13..v5.8):

Reviewed-by: 42
Signed-off-by: 25
Reported-by: 6
Acked-by: 4
Tested-by: 4
Suggested-by: 4

Provide a fix by calculating levenshtein distance for the signature tag
with all the standard signatures and suggest a fix with a signature, whose
edit distance is less than or equal to 2 with the misspelled signature.

Out of the 86 mispelled signatures fixed with this approach, 85 were
found to be good corrections and 1 was bad correction.

Following was found to be a bad correction:
Tweeted-by (count: 1) => Tested-by

Signed-off-by: Aditya Srivastava <[email protected]>
---
applies on next-20201120

changes in v2: modify commit message: replace specific example with overall evaluation, minor changes

changes in v3: summarize commit message

changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

changes in v5: modify coding styles: improve function names, whitespaces

changes in v6: Simplify foreach loop; change standard signature tag values to normal ucfirst; modify warning message

scripts/checkpatch.pl | 71 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fdfd5ec09be6..4a026926139f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -506,6 +506,64 @@ our $signature_tags = qr{(?xi:
Cc:
)};

+sub edit_distance_min {
+ my (@arr) = @_;
+ my $len = scalar @arr;
+ if ((scalar @arr) < 1) {
+ # if underflow, return
+ return;
+ }
+ my $min = $arr[0];
+ for my $i (0 .. ($len-1)) {
+ if ($arr[$i] < $min) {
+ $min = $arr[$i];
+ }
+ }
+ return $min;
+}
+
+sub get_edit_distance {
+ my ($str1, $str2) = @_;
+ $str1 = lc($str1);
+ $str2 = lc($str2);
+ $str1 =~ s/-//g;
+ $str2 =~ s/-//g;
+ my $len1 = length($str1);
+ my $len2 = length($str2);
+ # two dimensional array storing minimum edit distance
+ my @distance;
+ for my $i (0 .. $len1) {
+ for my $j (0 .. $len2) {
+ if ($i == 0) {
+ $distance[$i][$j] = $j;
+ } elsif ($j == 0) {
+ $distance[$i][$j] = $i;
+ } elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
+ $distance[$i][$j] = $distance[$i - 1][$j - 1];
+ } else {
+ my $dist1 = $distance[$i][$j - 1]; #insert distance
+ my $dist2 = $distance[$i - 1][$j]; # remove
+ my $dist3 = $distance[$i - 1][$j - 1]; #replace
+ $distance[$i][$j] = 1 + edit_distance_min($dist1, $dist2, $dist3);
+ }
+ }
+ }
+ return $distance[$len1][$len2];
+}
+
+sub find_standard_signature {
+ my ($sign_off) = @_;
+ my @standard_signature_tags = (
+ 'Signed-off-by:', 'Co-developed-by:', 'Acked-by:', 'Tested-by:',
+ 'Reviewed-by:', 'Reported-by:', 'Suggested-by:'
+ );
+ foreach my $signature (@standard_signature_tags) {
+ return $signature if (get_edit_distance($sign_off, $signature) <= 2);
+ }
+
+ return "";
+}
+
our @typeListMisordered = (
qr{char\s+(?:un)?signed},
qr{int\s+(?:(?:un)?signed\s+)?short\s},
@@ -2773,8 +2831,17 @@ sub process {
my $ucfirst_sign_off = ucfirst(lc($sign_off));

if ($sign_off !~ /$signature_tags/) {
- WARN("BAD_SIGN_OFF",
- "Non-standard signature: $sign_off\n" . $herecurr);
+ my $suggested_signature = find_standard_signature($sign_off);
+ if ($suggested_signature eq "") {
+ WARN("BAD_SIGN_OFF",
+ "Non-standard signature: $sign_off\n" . $herecurr);
+ } else {
+ if (WARN("BAD_SIGN_OFF",
+ "Non-standard signature: '$sign_off' - perhaps '$suggested_signature'?\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
+ }
+ }
}
if (defined $space_before && $space_before ne "") {
if (WARN("BAD_SIGN_OFF",
--
2.17.1

2020-11-28 22:01:07

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH v6] checkpatch: add fix and improve warning msg for non-standard signature

On Sun, 2020-11-29 at 02:13 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.

Thanks.

Acked-by: Joe Perches <[email protected]>

> A large number of these warnings occur because of typo mistakes in
> signature tags. An evaluation over v4.13..v5.8 showed that out of 539
> warnings due to non-standard signatures, 87 are due to typo mistakes.
>
> Following are the standard signature tags which are often incorrectly
> used, along with their individual counts of incorrect use (over
> v4.13..v5.8):
>
> ?Reviewed-by: 42
> ?Signed-off-by: 25
> ?Reported-by: 6
> ?Acked-by: 4
> ?Tested-by: 4
> ?Suggested-by: 4
>
> Provide a fix by calculating levenshtein distance for the signature tag
> with all the standard signatures and suggest a fix with a signature, whose
> edit distance is less than or equal to 2 with the misspelled signature.
>
> Out of the 86 mispelled signatures fixed with this approach, 85 were
> found to be good corrections and 1 was bad correction.
>
> Following was found to be a bad correction:
> ?Tweeted-by (count: 1) => Tested-by
>
> Signed-off-by: Aditya Srivastava <[email protected]>
> ---
> applies on next-20201120
>
> changes in v2: modify commit message: replace specific example with overall evaluation, minor changes
>
> changes in v3: summarize commit message
>
> changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')
>
> changes in v5: modify coding styles: improve function names, whitespaces
>
> changes in v6: Simplify foreach loop; change standard signature tag values to normal ucfirst; modify warning message
>
> ?scripts/checkpatch.pl | 71 +++++++++++++++++++++++++++++++++++++++++--
> ?1 file changed, 69 insertions(+), 2 deletions(-)
>
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index fdfd5ec09be6..4a026926139f 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -506,6 +506,64 @@ our $signature_tags = qr{(?xi:
> ? Cc:
> ?)};
> ?
>
> +sub edit_distance_min {
> + my (@arr) = @_;
> + my $len = scalar @arr;
> + if ((scalar @arr) < 1) {
> + # if underflow, return
> + return;
> + }
> + my $min = $arr[0];
> + for my $i (0 .. ($len-1)) {
> + if ($arr[$i] < $min) {
> + $min = $arr[$i];
> + }
> + }
> + return $min;
> +}
> +
> +sub get_edit_distance {
> + my ($str1, $str2) = @_;
> + $str1 = lc($str1);
> + $str2 = lc($str2);
> + $str1 =~ s/-//g;
> + $str2 =~ s/-//g;
> + my $len1 = length($str1);
> + my $len2 = length($str2);
> + # two dimensional array storing minimum edit distance
> + my @distance;
> + for my $i (0 .. $len1) {
> + for my $j (0 .. $len2) {
> + if ($i == 0) {
> + $distance[$i][$j] = $j;
> + } elsif ($j == 0) {
> + $distance[$i][$j] = $i;
> + } elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
> + $distance[$i][$j] = $distance[$i - 1][$j - 1];
> + } else {
> + my $dist1 = $distance[$i][$j - 1]; #insert distance
> + my $dist2 = $distance[$i - 1][$j]; # remove
> + my $dist3 = $distance[$i - 1][$j - 1]; #replace
> + $distance[$i][$j] = 1 + edit_distance_min($dist1, $dist2, $dist3);
> + }
> + }
> + }
> + return $distance[$len1][$len2];
> +}
> +
> +sub find_standard_signature {
> + my ($sign_off) = @_;
> + my @standard_signature_tags = (
> + 'Signed-off-by:', 'Co-developed-by:', 'Acked-by:', 'Tested-by:',
> + 'Reviewed-by:', 'Reported-by:', 'Suggested-by:'
> + );
> + foreach my $signature (@standard_signature_tags) {
> + return $signature if (get_edit_distance($sign_off, $signature) <= 2);
> + }
> +
> + return "";
> +}
> +
> ?our @typeListMisordered = (
> ? qr{char\s+(?:un)?signed},
> ? qr{int\s+(?:(?:un)?signed\s+)?short\s},
> @@ -2773,8 +2831,17 @@ sub process {
> ? my $ucfirst_sign_off = ucfirst(lc($sign_off));
> ?
>
> ? if ($sign_off !~ /$signature_tags/) {
> - WARN("BAD_SIGN_OFF",
> - "Non-standard signature: $sign_off\n" . $herecurr);
> + my $suggested_signature = find_standard_signature($sign_off);
> + if ($suggested_signature eq "") {
> + WARN("BAD_SIGN_OFF",
> + "Non-standard signature: $sign_off\n" . $herecurr);
> + } else {
> + if (WARN("BAD_SIGN_OFF",
> + "Non-standard signature: '$sign_off' - perhaps '$suggested_signature'?\n" . $herecurr) &&
> + $fix) {
> + $fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> + }
> + }
> ? }
> ? if (defined $space_before && $space_before ne "") {
> ? if (WARN("BAD_SIGN_OFF",