2011-06-08 19:34:37

by Joe Perches

[permalink] [raw]
Subject: [PATCH] checkpatch: Validate signature styles and To: and Cc: lines

Signatures have many forms and can sometimes cause problems
if not in the correct format when using git send-email or quilt.

Try to verify the signature tags and email addresses to use
the generally accepted "Signed-off-by: Full Name <[email protected]>"
form.

Original-idea-by: anish kumar <[email protected]>
Signed-off-by: Joe Perches <[email protected]>
---
scripts/checkpatch.pl | 96 ++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8657f99..c8bde02 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -216,6 +216,16 @@ our $logFunctions = qr{(?x:
MODULE_[A-Z_]+
)};

+our $signature_tags = qr{(?xi:
+ Signed-off-by:|
+ Acked-by:|
+ Tested-by:|
+ Reviewed-by:|
+ Reported-by:|
+ To:|
+ Cc:
+)};
+
our @typeList = (
qr{void},
qr{(?:unsigned\s+)?char},
@@ -341,6 +351,60 @@ sub top_of_kernel_tree {
return 1;
}

+sub parse_email {
+ my ($formatted_email) = @_;
+
+ my $name = "";
+ my $address = "";
+
+ if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
+ $name = $1;
+ $address = $2;
+ } elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>.*$/) {
+ $address = $1;
+ } elsif ($formatted_email =~ /(\S+\@\S+)$/) {
+ $address = $1;
+ $formatted_email =~ s/$address.*$//;
+ $name = $formatted_email;
+ }
+
+ $name =~ s/^\s+|\s+$//g;
+ $name =~ s/^\"|\"$//g;
+ $address =~ s/^\s+|\s+$//g;
+ $address =~ s/^\<|\>$//g;
+ $address =~ s/\s//g;
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ return ($name, $address);
+}
+
+sub format_email {
+ my ($name, $address) = @_;
+
+ my $formatted_email;
+
+ $name =~ s/^\s+|\s+$//g;
+ $name =~ s/^\"|\"$//g;
+ $address =~ s/^\s+|\s+$//g;
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ if ("$name" eq "") {
+ $formatted_email = "$address";
+ } else {
+ $formatted_email = "$name <$address>";
+ }
+
+ return $formatted_email;
+}
+
sub expand_tabs {
my ($str) = @_;

@@ -1365,17 +1429,33 @@ sub process {
}
}

-#check the patch for a signoff:
+# Check the patch for a signoff:
if ($line =~ /^\s*signed-off-by:/i) {
- # This is a signoff, if ugly, so do not double report.
$signoff++;
- if (!($line =~ /^\s*Signed-off-by:/)) {
- WARN("Signed-off-by: is the preferred form\n" .
- $herecurr);
+ }
+
+# Check signature styles
+ if ($line =~ /^(\s*)($signature_tags)(\s*)(.*)/) {
+ my $space_before = $1;
+ my $sign_off = $2;
+ my $space_after = $3;
+ my $email = $4;
+ my $ucfirst_sign_off = ucfirst(lc($sign_off));
+
+ if (defined $space_before && $space_before ne "") {
+ WARN("Do not use whitespace before $ucfirst_sign_off\n" . $herecurr);
}
- if ($line =~ /^\s*signed-off-by:\S/i) {
- WARN("space required after Signed-off-by:\n" .
- $herecurr);
+ if ($sign_off ne $ucfirst_sign_off) {
+ WARN("'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr);
+ }
+ if (!defined $space_after || $space_after ne " ") {
+ WARN("Use a single space after $ucfirst_sign_off\n" . $herecurr);
+ }
+ my $suggested_email = format_email(parse_email($email));
+ if ($suggested_email eq "") {
+ ERROR("email address '$email' is unrecognizable\n" . $herecurr);
+ } elsif ($suggested_email ne $email) {
+ WARN("email address '$email' might be better as '$suggested_email'\n" . $herecurr);
}
}

--
1.7.6.rc0


2011-06-08 20:24:38

by Nick Bowler

[permalink] [raw]
Subject: Re: [PATCH] checkpatch: Validate signature styles and To: and Cc: lines

On 2011-06-08 12:34 -0700, Joe Perches wrote:
> Signatures have many forms and can sometimes cause problems
> if not in the correct format when using git send-email or quilt.
>
> Try to verify the signature tags and email addresses to use
> the generally accepted "Signed-off-by: Full Name <[email protected]>"
> form.
[...]
> + my $suggested_email = format_email(parse_email($email));
> + if ($suggested_email eq "") {
> + ERROR("email address '$email' is unrecognizable\n" . $herecurr);
> + } elsif ($suggested_email ne $email) {
> + WARN("email address '$email' might be better as '$suggested_email'\n" . $herecurr);

If you're going to make suggestions, you should at the very least ensure
that the suggestions are actually valid email addresses. Otherwise,
this script will suggest replacing valid addresses with invalid ones!

In particular, angle brackets inside "quotes" or (comments) will
seriously trip up this script. For example:

WARNING: email address '"Foo Bar <x124>" <[email protected]>' might be
better as 'Foo Bar <x124>"<[email protected]>' #8:
Signed-off-by: "Foo Bar <x124>" <[email protected]>

Even more amusing is that we can actually follow the bogus suggestion
and checkpatch.pl won't complain about the resulting invalid email
address (at least it's consistent, I guess):

Signed-off-by: Foo Bar <x124>"<[email protected]>

patch has no obvious style problems and is ready for submission.

--
Nick Bowler, Elliptic Technologies (http://www.elliptictech.com/)

2011-06-08 20:40:53

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] checkpatch: Validate signature styles and To: and Cc: lines

On Wed, 2011-06-08 at 16:24 -0400, Nick Bowler wrote:
> On 2011-06-08 12:34 -0700, Joe Perches wrote:
> > Signatures have many forms and can sometimes cause problems
> > if not in the correct format when using git send-email or quilt.
> > Try to verify the signature tags and email addresses to use
> > the generally accepted "Signed-off-by: Full Name <[email protected]>"
> > form.
> [...]
> > + my $suggested_email = format_email(parse_email($email));
> > + if ($suggested_email eq "") {
> > + ERROR("email address '$email' is unrecognizable\n" . $herecurr);
> > + } elsif ($suggested_email ne $email) {
> > + WARN("email address '$email' might be better as '$suggested_email'\n" . $herecurr);
> If you're going to make suggestions, you should at the very least ensure
> that the suggestions are actually valid email addresses. Otherwise,
> this script will suggest replacing valid addresses with invalid ones!

Anyone that puts angle brackets in a quoted name
is pretty silly. There aren't any in git history.

What I did is pretty simple and covers most all
normally used cases.

You are free to improve email parsing.

2011-06-09 05:45:06

by Joe Perches

[permalink] [raw]
Subject: [PATCH V2] checkpatch: Validate signature styles and To: and Cc: lines

>From 23cfd1bbb676c9bf133038aee8c224ea83ec346b Mon Sep 17 00:00:00 2001
Message-Id: <23cfd1bbb676c9bf133038aee8c224ea83ec346b.1307598088.git.joe@perches.com>
From: Joe Perches <[email protected]>
Date: Wed, 8 Jun 2011 12:12:59 -0700
Subject: [PATCH] checkpatch: Validate signature styles and To: and Cc: lines

Signatures have many forms and can sometimes cause problems
if not in the correct format when using git send-email or quilt.

Try to verify the signature tags and email addresses to use
the generally accepted "Signed-off-by: Full Name <[email protected]>"
form.

Original-idea-by: anish kumar <[email protected]>
Signed-off-by: Joe Perches <[email protected]>
---

V2: Better email address validation.
Don't complain about git-send-email "cc:" or "to:" email header lines
Tested with all the patch descriptions in git.
Seems to work well.

scripts/checkpatch.pl | 123 +++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 115 insertions(+), 8 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 8657f99..ab65a16 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -216,6 +216,16 @@ our $logFunctions = qr{(?x:
MODULE_[A-Z_]+
)};

+our $signature_tags = qr{(?xi:
+ Signed-off-by:|
+ Acked-by:|
+ Tested-by:|
+ Reviewed-by:|
+ Reported-by:|
+ To:|
+ Cc:
+)};
+
our @typeList = (
qr{void},
qr{(?:unsigned\s+)?char},
@@ -341,6 +351,76 @@ sub top_of_kernel_tree {
return 1;
}

+sub parse_email {
+ my ($formatted_email) = @_;
+
+ my $name = "";
+ my $address = "";
+ my $comment = "";
+
+ if ($formatted_email =~ /^(.*)<(\S+\@\S+)>(.*)$/) {
+ $name = $1;
+ $address = $2;
+ $comment = $3 if defined $3;
+ } elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>(.*)$/) {
+ $address = $1;
+ $comment = $2 if defined $2;
+ } elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
+ $address = $1;
+ $comment = $2 if defined $2;
+ $formatted_email =~ s/$address.*$//;
+ $name = $formatted_email;
+ $name =~ s/^\s+|\s+$//g;
+ $name =~ s/^\"|\"$//g;
+ # If there's a name left after stripping spaces and
+ # leading quotes, and the address doesn't have both
+ # leading and trailing angle brackets, the address
+ # is invalid. ie:
+ # "joe smith [email protected]" bad
+ # "joe smith <[email protected]" bad
+ if ($name ne "" && $address !~ /^<[^>]+>$/) {
+ $name = "";
+ $address = "";
+ $comment = "";
+ }
+ }
+
+ $name =~ s/^\s+|\s+$//g;
+ $name =~ s/^\"|\"$//g;
+ $address =~ s/^\s+|\s+$//g;
+ $address =~ s/^\<|\>$//g;
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ return ($name, $address, $comment);
+}
+
+sub format_email {
+ my ($name, $address) = @_;
+
+ my $formatted_email;
+
+ $name =~ s/^\s+|\s+$//g;
+ $name =~ s/^\"|\"$//g;
+ $address =~ s/^\s+|\s+$//g;
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ if ("$name" eq "") {
+ $formatted_email = "$address";
+ } else {
+ $formatted_email = "$name <$address>";
+ }
+
+ return $formatted_email;
+}
+
sub expand_tabs {
my ($str) = @_;

@@ -1365,17 +1445,44 @@ sub process {
}
}

-#check the patch for a signoff:
+# Check the patch for a signoff:
if ($line =~ /^\s*signed-off-by:/i) {
- # This is a signoff, if ugly, so do not double report.
$signoff++;
- if (!($line =~ /^\s*Signed-off-by:/)) {
- WARN("Signed-off-by: is the preferred form\n" .
- $herecurr);
+ }
+
+# Check signature styles
+ if ($line =~ /^(\s*)($signature_tags)(\s*)(.*)/) {
+ my $space_before = $1;
+ my $sign_off = $2;
+ my $space_after = $3;
+ my $email = $4;
+ my $ucfirst_sign_off = ucfirst(lc($sign_off));
+
+ if (defined $space_before && $space_before ne "") {
+ WARN("Do not use whitespace before $ucfirst_sign_off\n" . $herecurr);
}
- if ($line =~ /^\s*signed-off-by:\S/i) {
- WARN("space required after Signed-off-by:\n" .
- $herecurr);
+ if ($sign_off =~ /-by:$/i && $sign_off ne $ucfirst_sign_off) {
+ WARN("'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr);
+ }
+ if (!defined $space_after || $space_after ne " ") {
+ WARN("Use a single space after $ucfirst_sign_off\n" . $herecurr);
+ }
+
+ my ($email_name, $email_address, $comment) = parse_email($email);
+ my $suggested_email = format_email(($email_name, $email_address));
+ if ($suggested_email eq "") {
+ ERROR("Unrecognized email address: '$email'\n" . $herecurr);
+ } else {
+ my $dequoted = $suggested_email;
+ $dequoted =~ s/^"//;
+ $dequoted =~ s/" </ </;
+ # Don't force email to have quotes
+ # Allow just an angle bracketed address
+ if ("$dequoted$comment" ne $email &&
+ "<$email_address>$comment" ne $email &&
+ "$suggested_email$comment" ne $email) {
+ WARN("email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr);
+ }
}
}

--
1.7.6.rc0