2020-11-28 21:59:13

by Aditya Srivastava

[permalink] [raw]
Subject: [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature

Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
styles.

A large number of these warnings occur because of typo mistakes in
signature tags. An evaluation over v4.13..v5.8 showed that out of 539
warnings due to non-standard signatures, 87 are due to typo mistakes.

Following are the standard signature tags which are often incorrectly
used, along with their individual counts of incorrect use (over
v4.13..v5.8):

Reviewed-by: 42
Signed-off-by: 25
Reported-by: 6
Acked-by: 4
Tested-by: 4
Suggested-by: 4

Provide a fix by calculating levenshtein distance for the signature tag
with all the standard signatures and suggest a fix with a signature, whose
edit distance is less than or equal to 2 with the misspelled signature.

Out of the 86 mispelled signatures fixed with this approach, 85 were
found to be good corrections and 1 was bad correction.

Following was found to be a bad correction:
Tweeted-by (count: 1) => Tested-by

Signed-off-by: Aditya Srivastava <[email protected]>
---
changes in v2: modify commit message: replace specific example with overall evaluation, minor changes

changes in v3: summarize commit message

changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

scripts/checkpatch.pl | 85 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 83 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fdfd5ec09be6..2b1afd763d8d 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -506,6 +506,77 @@ our $signature_tags = qr{(?xi:
Cc:
)};

+sub get_min {
+ my (@arr) = @_;
+ my $len = scalar @arr;
+ if((scalar @arr) < 1) {
+ # if underflow, return
+ return;
+ }
+ my $min = $arr[0];
+ for my $i (0 .. ($len-1)) {
+ if ($arr[$i] < $min) {
+ $min = $arr[$i];
+ }
+ }
+ return $min;
+}
+
+sub get_edit_distance {
+ my ($str1, $str2) = @_;
+ my $len1 = length($str1);
+ my $len2 = length($str2);
+ # two dimensional array storing minimum edit distance
+ my @distance;
+ for my $i (0 .. $len1) {
+ for my $j (0 .. $len2) {
+ if ($i == 0) {
+ $distance[$i][$j] = $j;
+ }
+ elsif ($j == 0) {
+ $distance[$i][$j] = $i;
+ }
+ elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
+ $distance[$i][$j] = $distance[$i - 1][$j - 1];
+ }
+ else {
+ my $dist1 = $distance[$i][$j - 1]; #insert distance
+ my $dist2 = $distance[$i - 1][$j]; # remove
+ my $dist3 = $distance[$i - 1][$j - 1]; #replace
+ $distance[$i][$j] = 1 + get_min($dist1, $dist2, $dist3);
+ }
+ }
+ }
+ return $distance[$len1][$len2];
+}
+
+sub get_standard_signature {
+ my ($sign_off) = @_;
+ $sign_off = lc($sign_off);
+ $sign_off =~ s/\-//g; # to match with formed hash
+ my @standard_signature_tags = (
+ 'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
+ 'reviewed-by:', 'reported-by:', 'suggested-by:'
+ );
+ # setting default values
+ my $standard_signature = 'signed-off-by';
+ my $min_edit_distance = 20;
+ my $edit_distance;
+ foreach (@standard_signature_tags) {
+ my $signature = $_;
+ $_ =~ s/\-//g;
+ $edit_distance = get_edit_distance($sign_off, $_);
+ if ($edit_distance < $min_edit_distance) {
+ $min_edit_distance = $edit_distance;
+ $standard_signature = $signature;
+ }
+ }
+ if($min_edit_distance<=2) {
+ return ucfirst($standard_signature);
+ }
+ return "";
+}
+
our @typeListMisordered = (
qr{char\s+(?:un)?signed},
qr{int\s+(?:(?:un)?signed\s+)?short\s},
@@ -2773,8 +2844,18 @@ sub process {
my $ucfirst_sign_off = ucfirst(lc($sign_off));

if ($sign_off !~ /$signature_tags/) {
- WARN("BAD_SIGN_OFF",
- "Non-standard signature: $sign_off\n" . $herecurr);
+ my $suggested_signature = get_standard_signature($sign_off);
+ if ($suggested_signature eq "") {
+ WARN("BAD_SIGN_OFF",
+ "Non-standard signature: $sign_off\n" . $herecurr);
+ }
+ else {
+ if (WARN("BAD_SIGN_OFF",
+ "Non-standard signature: $sign_off. Please use '$suggested_signature' instead\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
+ }
+ }
}
if (defined $space_before && $space_before ne "") {
if (WARN("BAD_SIGN_OFF",
--
2.17.1


2020-11-28 22:10:06

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH v4] checkpatch: add fix and improve warning msg for non-standard signature

On Sat, 2020-11-28 at 18:35 +0530, Aditya Srivastava wrote:
> Currently checkpatch warns for BAD_SIGN_OFF on non-standard signature
> styles.
>
> A large number of these warnings occur because of typo mistakes in
> signature tags. An evaluation over v4.13..v5.8 showed that out of 539
> warnings due to non-standard signatures, 87 are due to typo mistakes.
>
> Following are the standard signature tags which are often incorrectly
> used, along with their individual counts of incorrect use (over
> v4.13..v5.8):
>
> ?Reviewed-by: 42
> ?Signed-off-by: 25
> ?Reported-by: 6
> ?Acked-by: 4
> ?Tested-by: 4
> ?Suggested-by: 4
>
> Provide a fix by calculating levenshtein distance for the signature tag
> with all the standard signatures and suggest a fix with a signature, whose
> edit distance is less than or equal to 2 with the misspelled signature.
>
> Out of the 86 mispelled signatures fixed with this approach, 85 were
> found to be good corrections and 1 was bad correction.
>
> Following was found to be a bad correction:
> ?Tweeted-by (count: 1) => Tested-by
>
> Signed-off-by: Aditya Srivastava <[email protected]>
> ---
> changes in v2: modify commit message: replace specific example with overall evaluation, minor changes
>
> changes in v3: summarize commit message
>
> changes in v4: improve commit message; remove signature suggestions of small length (ie 'cc' and 'to')

Seems OKish but this needs style modifications as there are
several whitespace uses that don't match the typical forms
and perhaps some new function naming could be improved.

> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
[]
> @@ -506,6 +506,77 @@ our $signature_tags = qr{(?xi:
> ? Cc:
> ?)};
> ?
>
> +sub get_min {

probably a poor name choice. Maybe edit_distance_min

> + my (@arr) = @_;
> + my $len = scalar @arr;
> + if((scalar @arr) < 1) {

space after if

> + # if underflow, return
> + return;
> + }
> + my $min = $arr[0];
> + for my $i (0 .. ($len-1)) {
> + if ($arr[$i] < $min) {
> + $min = $arr[$i];
> + }
> + }
> + return $min;
> +}
> +
> +sub get_edit_distance {
> + my ($str1, $str2) = @_;

maybe lc($str) =~ s/-//g; here instead of the code in the caller

> + my $len1 = length($str1);
> + my $len2 = length($str2);
> + # two dimensional array storing minimum edit distance
> + my @distance;
> + for my $i (0 .. $len1) {
> + for my $j (0 .. $len2) {
> + if ($i == 0) {
> + $distance[$i][$j] = $j;
> + }
> + elsif ($j == 0) {

} elsif {

> + $distance[$i][$j] = $i;
> + }
> + elsif (substr($str1, $i-1, 1) eq substr($str2, $j-1, 1)) {
> + $distance[$i][$j] = $distance[$i - 1][$j - 1];
> + }
> + else {

} else {

> + my $dist1 = $distance[$i][$j - 1]; #insert distance
> + my $dist2 = $distance[$i - 1][$j]; # remove
> + my $dist3 = $distance[$i - 1][$j - 1]; #replace
> + $distance[$i][$j] = 1 + get_min($dist1, $dist2, $dist3);
> + }
> + }
> + }
> + return $distance[$len1][$len2];
> +}
> +
> +sub get_standard_signature {

find_standard_signature ?

> + my ($sign_off) = @_;
> + $sign_off = lc($sign_off);
> + $sign_off =~ s/\-//g; # to match with formed hash

why not strip the dashes in get_edit_distance instead
of using this weird dance with dashes here?

> + my @standard_signature_tags = (
> + 'signed-off-by:', 'co-developed-by:', 'acked-by:', 'tested-by:',
> + 'reviewed-by:', 'reported-by:', 'suggested-by:'
> + );
> + # setting default values
> + my $standard_signature = 'signed-off-by';

why is does this need to be given a value?

> + my $min_edit_distance = 20;
> + my $edit_distance;
> + foreach (@standard_signature_tags) {
> + my $signature = $_;
> + $_ =~ s/\-//g;

and this dancing here

> + $edit_distance = get_edit_distance($sign_off, $_);
> + if ($edit_distance < $min_edit_distance) {
> + $min_edit_distance = $edit_distance;
> + $standard_signature = $signature;
> + }
> + }
> + if($min_edit_distance<=2) {

bad indentation, if (, spaces around test <=

> + return ucfirst($standard_signature);
> + }

bad indentation

> + return "";
> +}
> +
> ?our @typeListMisordered = (
> ? qr{char\s+(?:un)?signed},
> ? qr{int\s+(?:(?:un)?signed\s+)?short\s},
> @@ -2773,8 +2844,18 @@ sub process {
> ? my $ucfirst_sign_off = ucfirst(lc($sign_off));
> ?
>
> ? if ($sign_off !~ /$signature_tags/) {
> - WARN("BAD_SIGN_OFF",
> - "Non-standard signature: $sign_off\n" . $herecurr);
> + my $suggested_signature = get_standard_signature($sign_off);
> + if ($suggested_signature eq "") {
> + WARN("BAD_SIGN_OFF",
> + "Non-standard signature: $sign_off\n" . $herecurr);

bad alignment

> + }
> + else {

} else {

> + if (WARN("BAD_SIGN_OFF",
> + "Non-standard signature: $sign_off. Please use '$suggested_signature' instead\n" . $herecurr) &&

"perhaps" rather than "please use" or "likely typo of"

> + $fix) {
> + $fixed[$fixlinenr] =~ s/$sign_off/$suggested_signature/;
> + }
> + }
> ? }
> ? if (defined $space_before && $space_before ne "") {
> ? if (WARN("BAD_SIGN_OFF",