2007-08-13 05:49:55

by Joe Perches

[permalink] [raw]
Subject: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

I grew weary of looking up the appropriate
maintainer email address(es) to CC: for a patch.

I added flags to the MAINTAINERS file

F: file pattern

for each maintained block and a script to parse
the modified blocks for maintainer and list
email addresses.

perl scripts/get_maintainer.pl <patch>

gives the appropriate maintainer(s).

Modifications since last post:

Added options to control email address style and multiple address separator

Please apply.

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
new file mode 100755
index 0000000..3e5cc6c
--- /dev/null
+++ b/scripts/get_maintainer.pl
@@ -0,0 +1,217 @@
+#!/usr/bin/perl -w
+# (c) 2007, Joe Perches <[email protected]>
+# created from checkpatch.pl
+# Licensed under the terms of the GNU GPL License version 2
+
+use strict;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.02';
+
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $tree = "./";
+my $email_maintainer = 1;
+my $email_maintainer_name = 1;
+my $email_list = 1;
+my $email_subscriber_list = 0;
+my $email_separator = ", ";
+my %saw;
+
+GetOptions(
+ 'tree=s' => \$tree,
+ 'm!' => \$email_maintainer,
+ 'n!' => \$email_maintainer_name,
+ 'l!' => \$email_list,
+ 's!' => \$email_subscriber_list,
+ 'separator=s' => \$email_separator,
+ ) or exit;
+
+my $exit = 0;
+
+if ($#ARGV < 0 ||
+ ($email_maintainer == 0 && $email_list == 0 && $email_subscriber_list == 0)) {
+ print "usage: $P [options] patchfile\n";
+ print "version: $V\n";
+ print " --tree [path] => linux kernel source path\n";
+ print " --m => include maintainer(s) if any\n";
+ print " --n => include maintainer name 'Full Name <addr\@domain.tld>'\n";
+ print " --l => include list(s) if any\n";
+ print " --s => include subscriber only list(s) if any\n";
+ print " --separator [, ] => separator for multiple addresses\n";
+ print "Default: [--m --l --separator \", \"]\n";
+ print "Be sure to select something...\n";
+ exit(1);
+}
+
+if ($tree && !top_of_kernel_tree($tree)) {
+ if (${tree} ne "") {
+ print "'${tree}' ";
+ } else {
+ print "The current directory ";
+ }
+ print "doesn't appear to be a linux kernel source tree\n";
+ exit(2);
+}
+
+## Read MAINTAINERS for type/value pairs
+
+my @typevalue = ();
+open(MAINT, "<${tree}MAINTAINERS") || die "$P: Can't open ${tree}MAINTAINERS\n";
+while (<MAINT>) {
+ if (m/^(\C):\s*(.*)/) {
+ my $type = $1;
+ my $value = $2;
+ if ($type eq "F") { ##Filename pattern matching
+ $value =~ s@\.@\\\.@g; ##Convert . to \.
+ $value =~ s/\*/\.\*/g; ##Convert * to .*
+ }
+ push(@typevalue, "$type:$value");
+ } elsif (!/^(\s)*$/) {
+ push(@typevalue, $_);
+ }
+}
+close(MAINT);
+
+## Find the patched filenames
+
+my @patchedfiles = ();
+open(PATCH, "<$ARGV[0]") or die "Can't open $ARGV[0]\n";
+while (<PATCH>) {
+ if (m/^\+\+\+\s+(\S+)/) {
+ my $file = $1;
+ $file =~ s@^[^/]*/@@;
+ $file =~ s@\n@@;
+ push(@patchedfiles, $file);
+ }
+}
+close(PATCH);
+
+# Sort and uniq patchedfiles
+
+undef %saw;
+@patchedfiles = sort @patchedfiles;
+@patchedfiles = grep(!$saw{$_}++, @patchedfiles);
+
+# Find responsible parties
+
+my @email_to = ();
+foreach (@patchedfiles) {
+ my $patchedfile = $_;
+ my $tvi = 0;
+
+ foreach (@typevalue) {
+ if (m/^(\C):(.*)/) {
+ my $type = $1;
+ my $value = $2;
+ if ($type eq 'F') {
+ if (file_match_pattern($patchedfile, $value)) {
+ my $ptvi = $tvi - 1;
+ while ($ptvi >= 0) {
+ my $tv = $typevalue[$ptvi];
+ if ($tv =~ m/^(\C):(.*)/) {
+ my $ptype = $1;
+ my $pvalue = $2;
+ if ($ptype eq "L") {
+ my $subscr = $pvalue;
+ if ($subscr =~ m/\s*\(subscribers-only\)/) {
+ if ($email_subscriber_list > 0) {
+ $subscr =~ s/\s*\(subscribers-only\)//g;
+ push(@email_to, $subscr);
+ }
+ } else {
+ if ($email_list > 0) {
+ push(@email_to, $pvalue);
+ }
+ }
+ } elsif ($ptype eq "M") {
+ if ($email_maintainer > 0) {
+ if ($ptvi >= 0) {
+ my $tv = $typevalue[$ptvi - 1];
+ if ($tv =~ m/^(\C):(.*)/) {
+ if ($1 eq "P" && $email_maintainer_name > 0) {
+ push(@email_to, "$2 <$pvalue>");
+ } else {
+ push(@email_to, $pvalue);
+ }
+ }
+ } else {
+ push(@email_to, $pvalue);
+ }
+ }
+ }
+ $ptvi--;
+ } else {
+ $ptvi = -1;
+ }
+ }
+ }
+ }
+ }
+ $tvi++;
+ }
+}
+
+## sort and uniq email_to
+
+@email_to = sort @email_to;
+undef %saw;
+@email_to = grep(!$saw{$_}++, @email_to);
+
+## add lk if noone else...
+
+my $address_cnt = @email_to;
+if ($address_cnt == 0 && $email_list > 0) {
+ push(@email_to, "linux-kernel\@vger.kernel.org");
+}
+print(join($email_separator, @email_to));
+print("\n");
+
+exit($exit);
+
+sub file_match_pattern {
+ my ($file, $pattern) = @_;
+ if (substr($pattern, -1) eq "/") {
+ if ($file =~ m@^$pattern@) {
+ return 1;
+ }
+ } else {
+ if ($file =~ m@^$pattern@) {
+ my $s1 = ($file =~ tr@/@@);
+ my $s2 = ($pattern =~ tr@/@@);
+ if ($s1 == $s2) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+sub top_of_kernel_tree {
+ my ($tree) = @_;
+
+ if ($tree ne "" && substr($tree,length($tree)-1,1) ne "/") {
+ $tree = $tree . "/";
+ }
+ if ( (-f "${tree}COPYING")
+ && (-f "${tree}CREDITS")
+ && (-f "${tree}Kbuild")
+ && (-f "${tree}MAINTAINERS")
+ && (-f "${tree}Makefile")
+ && (-f "${tree}README")
+ && (-d "${tree}Documentation")
+ && (-d "${tree}arch")
+ && (-d "${tree}include")
+ && (-d "${tree}drivers")
+ && (-d "${tree}fs")
+ && (-d "${tree}init")
+ && (-d "${tree}ipc")
+ && (-d "${tree}kernel")
+ && (-d "${tree}lib")
+ && (-d "${tree}scripts")) {
+ return 1;
+ }
+ return 0;
+}



2007-08-13 12:16:56

by Michal Piotrowski

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Hey Joe,

On 13/08/07, Joe Perches <[email protected]> wrote:
> I grew weary of looking up the appropriate
> maintainer email address(es) to CC: for a patch.
>
> I added flags to the MAINTAINERS file
>
> F: file pattern
>
> for each maintained block and a script to parse
> the modified blocks for maintainer and list
> email addresses.
>
> perl scripts/get_maintainer.pl <patch>
>
> gives the appropriate maintainer(s).

Cool thing.

Please do not send gazzillion patches against the same file.

Regards,
Michal

--
LOG
http://www.stardust.webpages.pl/log/

2007-08-13 12:40:56

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Sun, Aug 12, 2007 at 10:49:34PM -0700, Joe Perches wrote:
> I grew weary of looking up the appropriate
> maintainer email address(es) to CC: for a patch.

Does the acronym GAFL ring any bells? It's not that idea is worthless -
it sure as hell is a useful thing, but what the bleeding hell is that
splitup supposed to achieve?

Please, people, try to think for a minute. Patch series are good not
just because; there are rational reasons for that. None of those
applies here; if anything, you've made sure that this patchbomb will
be less reviewed.

Seriously, get a fucking life. This is way past ridiculous. It does not
make changes easier to test. It does not help bisect. It does not help
to make changes more self-contained. It does not reduce the odds of
conflict with pending patches. It does not simplify conflict resolution
when porting. It does not split the change into easier understood parts.
Hell, it doesn't even make it easier to revert broken parts.

Use the common sense, folks. Please.

2007-08-13 16:37:48

by Kok, Auke

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Joe Perches wrote:
> I grew weary of looking up the appropriate
> maintainer email address(es) to CC: for a patch.
>
> I added flags to the MAINTAINERS file
>
> F: file pattern
>
> for each maintained block and a script to parse
> the modified blocks for maintainer and list
> email addresses.
>
> perl scripts/get_maintainer.pl <patch>
>
> gives the appropriate maintainer(s).
>
> Modifications since last post:
>
> Added options to control email address style and multiple address separator
>
> Please apply.

Well, I came up with this implementation, so I definately support it and the way
Joe implemented it:

Acked-by: Auke Kok <[email protected]>

Cheers,

Auke


>
> diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> new file mode 100755
> index 0000000..3e5cc6c
> --- /dev/null
> +++ b/scripts/get_maintainer.pl
> @@ -0,0 +1,217 @@
> +#!/usr/bin/perl -w
> +# (c) 2007, Joe Perches <[email protected]>
> +# created from checkpatch.pl
> +# Licensed under the terms of the GNU GPL License version 2
> +
> +use strict;
> +
> +my $P = $0;
> +$P =~ s@.*/@@g;
> +
> +my $V = '0.02';
> +
> +use Getopt::Long qw(:config no_auto_abbrev);
> +
> +my $tree = "./";
> +my $email_maintainer = 1;
> +my $email_maintainer_name = 1;
> +my $email_list = 1;
> +my $email_subscriber_list = 0;
> +my $email_separator = ", ";
> +my %saw;
> +
> +GetOptions(
> + 'tree=s' => \$tree,
> + 'm!' => \$email_maintainer,
> + 'n!' => \$email_maintainer_name,
> + 'l!' => \$email_list,
> + 's!' => \$email_subscriber_list,
> + 'separator=s' => \$email_separator,
> + ) or exit;
> +
> +my $exit = 0;
> +
> +if ($#ARGV < 0 ||
> + ($email_maintainer == 0 && $email_list == 0 && $email_subscriber_list == 0)) {
> + print "usage: $P [options] patchfile\n";
> + print "version: $V\n";
> + print " --tree [path] => linux kernel source path\n";
> + print " --m => include maintainer(s) if any\n";
> + print " --n => include maintainer name 'Full Name <addr\@domain.tld>'\n";
> + print " --l => include list(s) if any\n";
> + print " --s => include subscriber only list(s) if any\n";
> + print " --separator [, ] => separator for multiple addresses\n";
> + print "Default: [--m --l --separator \", \"]\n";
> + print "Be sure to select something...\n";
> + exit(1);
> +}
> +
> +if ($tree && !top_of_kernel_tree($tree)) {
> + if (${tree} ne "") {
> + print "'${tree}' ";
> + } else {
> + print "The current directory ";
> + }
> + print "doesn't appear to be a linux kernel source tree\n";
> + exit(2);
> +}
> +
> +## Read MAINTAINERS for type/value pairs
> +
> +my @typevalue = ();
> +open(MAINT, "<${tree}MAINTAINERS") || die "$P: Can't open ${tree}MAINTAINERS\n";
> +while (<MAINT>) {
> + if (m/^(\C):\s*(.*)/) {
> + my $type = $1;
> + my $value = $2;
> + if ($type eq "F") { ##Filename pattern matching
> + $value =~ s@\.@\\\.@g; ##Convert . to \.
> + $value =~ s/\*/\.\*/g; ##Convert * to .*
> + }
> + push(@typevalue, "$type:$value");
> + } elsif (!/^(\s)*$/) {
> + push(@typevalue, $_);
> + }
> +}
> +close(MAINT);
> +
> +## Find the patched filenames
> +
> +my @patchedfiles = ();
> +open(PATCH, "<$ARGV[0]") or die "Can't open $ARGV[0]\n";
> +while (<PATCH>) {
> + if (m/^\+\+\+\s+(\S+)/) {
> + my $file = $1;
> + $file =~ s@^[^/]*/@@;
> + $file =~ s@\n@@;
> + push(@patchedfiles, $file);
> + }
> +}
> +close(PATCH);
> +
> +# Sort and uniq patchedfiles
> +
> +undef %saw;
> +@patchedfiles = sort @patchedfiles;
> +@patchedfiles = grep(!$saw{$_}++, @patchedfiles);
> +
> +# Find responsible parties
> +
> +my @email_to = ();
> +foreach (@patchedfiles) {
> + my $patchedfile = $_;
> + my $tvi = 0;
> +
> + foreach (@typevalue) {
> + if (m/^(\C):(.*)/) {
> + my $type = $1;
> + my $value = $2;
> + if ($type eq 'F') {
> + if (file_match_pattern($patchedfile, $value)) {
> + my $ptvi = $tvi - 1;
> + while ($ptvi >= 0) {
> + my $tv = $typevalue[$ptvi];
> + if ($tv =~ m/^(\C):(.*)/) {
> + my $ptype = $1;
> + my $pvalue = $2;
> + if ($ptype eq "L") {
> + my $subscr = $pvalue;
> + if ($subscr =~ m/\s*\(subscribers-only\)/) {
> + if ($email_subscriber_list > 0) {
> + $subscr =~ s/\s*\(subscribers-only\)//g;
> + push(@email_to, $subscr);
> + }
> + } else {
> + if ($email_list > 0) {
> + push(@email_to, $pvalue);
> + }
> + }
> + } elsif ($ptype eq "M") {
> + if ($email_maintainer > 0) {
> + if ($ptvi >= 0) {
> + my $tv = $typevalue[$ptvi - 1];
> + if ($tv =~ m/^(\C):(.*)/) {
> + if ($1 eq "P" && $email_maintainer_name > 0) {
> + push(@email_to, "$2 <$pvalue>");
> + } else {
> + push(@email_to, $pvalue);
> + }
> + }
> + } else {
> + push(@email_to, $pvalue);
> + }
> + }
> + }
> + $ptvi--;
> + } else {
> + $ptvi = -1;
> + }
> + }
> + }
> + }
> + }
> + $tvi++;
> + }
> +}
> +
> +## sort and uniq email_to
> +
> +@email_to = sort @email_to;
> +undef %saw;
> +@email_to = grep(!$saw{$_}++, @email_to);
> +
> +## add lk if noone else...
> +
> +my $address_cnt = @email_to;
> +if ($address_cnt == 0 && $email_list > 0) {
> + push(@email_to, "linux-kernel\@vger.kernel.org");
> +}
> +print(join($email_separator, @email_to));
> +print("\n");
> +
> +exit($exit);
> +
> +sub file_match_pattern {
> + my ($file, $pattern) = @_;
> + if (substr($pattern, -1) eq "/") {
> + if ($file =~ m@^$pattern@) {
> + return 1;
> + }
> + } else {
> + if ($file =~ m@^$pattern@) {
> + my $s1 = ($file =~ tr@/@@);
> + my $s2 = ($pattern =~ tr@/@@);
> + if ($s1 == $s2) {
> + return 1;
> + }
> + }
> + }
> + return 0;
> +}
> +
> +sub top_of_kernel_tree {
> + my ($tree) = @_;
> +
> + if ($tree ne "" && substr($tree,length($tree)-1,1) ne "/") {
> + $tree = $tree . "/";
> + }
> + if ( (-f "${tree}COPYING")
> + && (-f "${tree}CREDITS")
> + && (-f "${tree}Kbuild")
> + && (-f "${tree}MAINTAINERS")
> + && (-f "${tree}Makefile")
> + && (-f "${tree}README")
> + && (-d "${tree}Documentation")
> + && (-d "${tree}arch")
> + && (-d "${tree}include")
> + && (-d "${tree}drivers")
> + && (-d "${tree}fs")
> + && (-d "${tree}init")
> + && (-d "${tree}ipc")
> + && (-d "${tree}kernel")
> + && (-d "${tree}lib")
> + && (-d "${tree}scripts")) {
> + return 1;
> + }
> + return 0;
> +}
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2007-08-13 16:52:49

by Sam Ravnborg

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

>
> Please, people, try to think for a minute. Patch series are good not
> just because; there are rational reasons for that. None of those
> applies here; if anything, you've made sure that this patchbomb will
> be less reviewed.

This huge serie had one nice property.
Relavent people got only relevant patches sent direct.
They should have been sent *only* to these people and then
with relevant modifications could have been applied as a single patch.

Sam

2007-08-13 17:20:43

by Ray Lee

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 8/12/07, Joe Perches <[email protected]> wrote:
> I grew weary of looking up the appropriate
> maintainer email address(es) to CC: for a patch.
>
> I added flags to the MAINTAINERS file
>
> F: file pattern
>
> for each maintained block and a script to parse
> the modified blocks for maintainer and list
> email addresses.

Why not parse git annotate or blame instead (other than it's freakin'
slow)? Using the repository history has the added benefit of telling
you a lot more fine-grained detail about who may want to know about
your patch.

2007-08-13 17:36:35

by Mariusz Kozlowski

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Hello,

I don't recall discusion about this so here are my 3 cents:

I like the idea. The numerous responses you got that you made a mistake
and someone else is the maintainer just prove that this kind of information would
be nice to have. Even if it is not going to be included in mainline it is still
nice to have around as a patchset or sth. Personally I often run into trouble finding
right person to CC on patches.

I get the feeling that only the maintainers themselves + a few old geeks here know
who is maintaining what (and which file) ;-) But maybe it's just me.

The rest is as people say. These ~550 patches without 'in reply to' is a nightmare.

Regards,

Mariusz

2007-08-13 17:49:53

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/13/2007 09:16 AM, Al Viro wrote:

> On Sun, Aug 12, 2007 at 10:49:34PM -0700, Joe Perches wrote:
>> I grew weary of looking up the appropriate
>> maintainer email address(es) to CC: for a patch.
>
> Does the acronym GAFL ring any bells? It's not that idea is worthless -
> it sure as hell is a useful thing, but what the bleeding hell is that
> splitup supposed to achieve?

Well, to be fair, he's CCing the addresses in the individual entries which
is at least somewhat of a reason. Yes, could've worked via preparation via
private mail as well or something but hey, posting some 600 patches is at
least incredibly funny :-)

Only thing left now is to now teach Linus's copy of git about these entries
so that it doesn't turn into an wholy incomplete, obsolete mess through
addition, removal and movement of files in a few months time...

Rene.

2007-08-13 17:59:03

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl


On Mon, 2007-08-13 at 19:33 +0200, Mariusz Kozlowski wrote:
> Hello,
>
> I don't recall discusion about this so here are my 3 cents:
>
> I like the idea.

I don't actually. It shows a central MAINTAINERS file is the wrong
approach; just that 500+ patches to the same file were needed shows
that.

The maintainer info should be in the source file itself! That's the only
reasonable way to keep it updated; now I'm all for having it machine
parsable so that tools can use it, but it still really should be in the
code itself, not in some central file that will always just go out of
data, and will be a huge source of needless patch conflicts.


2007-08-13 18:28:07

by Mariusz Kozlowski

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

> > I don't recall discusion about this so here are my 3 cents:
> >
> > I like the idea.
>
> I don't actually. It shows a central MAINTAINERS file is the wrong
> approach; just that 500+ patches to the same file were needed shows
> that.
>
> The maintainer info should be in the source file itself! That's the only
> reasonable way to keep it updated; now I'm all for having it machine
> parsable so that tools can use it, but it still really should be in the
> code itself, not in some central file that will always just go out of
> data, and will be a huge source of needless patch conflicts.

The downside is that a lot of info will get duplicated. It will be more accurate though.
I agree either way.

Regards,

Mariusz

2007-08-13 18:33:50

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, 13 Aug 2007 10:09:08 PDT, Ray Lee said:
> On 8/12/07, Joe Perches <[email protected]> wrote:
> > I grew weary of looking up the appropriate
> > maintainer email address(es) to CC: for a patch.
> >
> > I added flags to the MAINTAINERS file
> >
> > F: file pattern
> >
> > for each maintained block and a script to parse
> > the modified blocks for maintainer and list
> > email addresses.
>
> Why not parse git annotate or blame instead (other than it's freakin'
> slow)? Using the repository history has the added benefit of telling
> you a lot more fine-grained detail about who may want to know about
> your patch.

1) Not everybody wants to install git and pull down the whole kernel tree
just so they can do 'git blame' and point the report at the right cc:'s.
(Heck, *I* don't even have a 'git pull' of the kernel handy, and I probably
whinge about a lot more stuff than the average person reporting a bug)

2) A quick 'grep git' through Documentation/ doesn't find any actual mention
of how to find a web page that will do a 'git blame' for you.

I posit that if you want users to point bug reports at the correct cc:'s,
all the information needed has to be in the distributed linux-2.6.foo.tar file
(I'd allow that a functional "Go to this URL" suffices for this).


Attachments:
(No filename) (226.00 B)

2007-08-13 18:37:07

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/13/2007 07:42 PM, Arjan van de Ven wrote:

> On Mon, 2007-08-13 at 19:33 +0200, Mariusz Kozlowski wrote:
>> Hello,
>>
>> I don't recall discusion about this so here are my 3 cents:
>>
>> I like the idea.
>
> I don't actually. It shows a central MAINTAINERS file is the wrong
> approach; just that 500+ patches to the same file were needed shows
> that.

Quite.

> The maintainer info should be in the source file itself! That's the only
> reasonable way to keep it updated; now I'm all for having it machine
> parsable so that tools can use it, but it still really should be in the
> code itself, not in some central file that will always just go out of
> data, and will be a huge source of needless patch conflicts.

This is quite like the OO-INDEX discussion now going, where I saw it argued
that the one-line summaries could go at the top of the actual Documentation
files themselves, where they could be mechanically extracted to _build_ the
index files. Keeping things in sync is the important reason there as well.

While the notion behing these patches appears to be good, the tree sees many
changes through addition, removal and movement of files which affects this.
Is Linus's copy of git going to check if an added file doesn't overlap with
an existing wildcard in the MAINTAINERS file and delele or adjust wildcards
upon removal or movement?

I personally think it wouldn't be such as bad idea to introduce a standard
Linux source file header, with (when present) information such as the
summary for index files, maintainer information, and other information now
present in MAINTAINERS. With it being in the sourcefile themslves, it will
stand a much better chance of staying up to date.

Rene.

2007-08-13 18:39:45

by Satyam Sharma

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl



On Mon, 13 Aug 2007, Arjan van de Ven wrote:

>
> On Mon, 2007-08-13 at 19:33 +0200, Mariusz Kozlowski wrote:
> > Hello,
> >
> > I don't recall discusion about this so here are my 3 cents:
> >
> > I like the idea.
>
> I don't actually. It shows a central MAINTAINERS file is the wrong
> approach; just that 500+ patches to the same file were needed shows
> that.
>
> The maintainer info should be in the source file itself! That's the only
> reasonable way to keep it updated; now I'm all for having it machine
> parsable so that tools can use it, but it still really should be in the
> code itself, not in some central file that will always just go out of
> data, and will be a huge source of needless patch conflicts.

I second this thought (keeping MAINTAINERS info closer to code than in
a central kernel-global location), but have a differing opinion about the
implementation. Having MAINTAINERS-style annotations in all source files
sounds needlessly redundant. Worse still, I expect people will avoid adding
these annotations to all source files precisely for this reason, thus
someone editing drivers/xxx/foo.c would have no idea that the maintainer
info for this file is actually in drivers/xxx/bar.c.

Better solution is to have multiple MAINTAINERS files distributed in the
kernel tree, IMHO -- say a drivers/net/MAINTAINERS for maintainer info on
all various net drivers, drivers/kvm/MAINTAINERS for KVM maintainer info,
fs/ext3/MAINTAINERS for ext3 maintainers, fs/MAINTAINERS for generic VFS
maintainers info, so on and so forth. Of course, these individual
MAINTAINERS files could still have the newly-introduced "F:" fields as
well (drivers/net/MAINTAINERS would clearly require it, f.e.) ...


Satyam

2007-08-13 18:52:40

by Krzysztof Halasa

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Arjan van de Ven <[email protected]> writes:

> The maintainer info should be in the source file itself!

Nope, it should be outside of the (downloadable) tarball, because
once someone get a tarball you can't update the data in it.
This is fine WRT source (which is static given a version) but
doesn't work for fast-changing data.
--
Krzysztof Halasa

2007-08-13 19:03:25

by Krzysztof Halasa

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Krzysztof Halasa <[email protected]> writes:

> Nope, it should be outside of the (downloadable) tarball, because
> once someone get a tarball you can't update the data in it.
> This is fine WRT source (which is static given a version) but
> doesn't work for fast-changing data.

... OTOH I think this additional info in MAINTAINERS file is better
than the file without it.
--
Krzysztof Halasa

2007-08-13 19:22:24

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl


On Aug 14 2007 00:02, Satyam Sharma wrote:
>
>Better solution is to have multiple MAINTAINERS files distributed in the
>kernel tree, IMHO -- say a drivers/net/MAINTAINERS for maintainer info on
>all various net drivers, drivers/kvm/MAINTAINERS for KVM maintainer info,
>fs/ext3/MAINTAINERS for ext3 maintainers, fs/MAINTAINERS for generic VFS
>maintainers info, so on and so forth. Of course, these individual
>MAINTAINERS files could still have the newly-introduced "F:" fields as
>well (drivers/net/MAINTAINERS would clearly require it, f.e.) ...

Yes please.

Or perhaps even putting the maintainer into the Kconfig files?


Jan
--

2007-08-13 19:43:27

by Richard Knutsson

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Kok, Auke wrote:
> Joe Perches wrote:
>> I grew weary of looking up the appropriate
>> maintainer email address(es) to CC: for a patch.
>>
>> I added flags to the MAINTAINERS file
>>
>> F: file pattern
>>
>> for each maintained block and a script to parse
>> the modified blocks for maintainer and list
>> email addresses.
>>
>> perl scripts/get_maintainer.pl <patch>
>>
>> gives the appropriate maintainer(s).
>>
>> Modifications since last post:
>>
>> Added options to control email address style and multiple address
>> separator
>>
>> Please apply.
>
> Well, I came up with this implementation, so I definately support it
> and the way Joe implemented it:
>
Really? Please check out this thread with at least two ideas of possible
implementations, with Stefan Richter as the spokesman of the current
implementation: http://marc.info/?l=linux-kernel&m=116870578531280&w=3
...and I am not saying there hasn't been someone suggesting it even earlier.

The expression; nothing is new under the sun comes to mind ;)

Anyway, glad to see someone actually implementing it. Thanks!

Richard Knutsson

2007-08-13 19:55:50

by Richard Knutsson

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Jan Engelhardt wrote:
> On Aug 14 2007 00:02, Satyam Sharma wrote:
>
>> Better solution is to have multiple MAINTAINERS files distributed in the
>> kernel tree, IMHO -- say a drivers/net/MAINTAINERS for maintainer info on
>> all various net drivers, drivers/kvm/MAINTAINERS for KVM maintainer info,
>> fs/ext3/MAINTAINERS for ext3 maintainers, fs/MAINTAINERS for generic VFS
>> maintainers info, so on and so forth. Of course, these individual
>> MAINTAINERS files could still have the newly-introduced "F:" fields as
>> well (drivers/net/MAINTAINERS would clearly require it, f.e.) ...
>>
>
> Yes please.
>
> Or perhaps even putting the maintainer into the Kconfig files?
>
>
Hope I am not biting my ass now, but I believed this was suggested but
the counter-argument were; is really a file/system made to configure the
kernel related to the maintainer?

I like the idea of ".maintainers" (or maybe even ".maintainer" now). It
has also been suggested that the file should be at the lowest common
pathway to avoid duplication. Downside is the added need to search for
the file...

Richard Knutsson

2007-08-13 20:06:38

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, 13 Aug 2007 21:21:10 +0200, Jan Engelhardt said:
>
> On Aug 14 2007 00:02, Satyam Sharma wrote:
> >
> >Better solution is to have multiple MAINTAINERS files distributed in the
> >kernel tree, IMHO -- say a drivers/net/MAINTAINERS for maintainer info on
> >all various net drivers, drivers/kvm/MAINTAINERS for KVM maintainer info,
> >fs/ext3/MAINTAINERS for ext3 maintainers, fs/MAINTAINERS for generic VFS
> >maintainers info, so on and so forth. Of course, these individual
> >MAINTAINERS files could still have the newly-introduced "F:" fields as
> >well (drivers/net/MAINTAINERS would clearly require it, f.e.) ...
>
> Yes please.
>
> Or perhaps even putting the maintainer into the Kconfig files?

There's large swaths of code that don't have an obvious Kconfig entry
but do have a Maintainer entry - VFS, lots of kernel/*, and so on.


Attachments:
(No filename) (226.00 B)

2007-08-13 20:17:34

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, Aug 13, 2007 at 08:41:03PM +0200, Krzysztof Halasa wrote:
> Arjan van de Ven <[email protected]> writes:
>
> > The maintainer info should be in the source file itself!
>
> Nope, it should be outside of the (downloadable) tarball, because
> once someone get a tarball you can't update the data in it.
> This is fine WRT source (which is static given a version) but
> doesn't work for fast-changing data.

But the maintainers file is in the tarball today. If someone wants to
take the information from the latest git tree, gather it up into a
single html file, and put it on the web, more power to them. But it
seems that the master source of the data should be the source file.

- Ted

2007-08-13 20:29:26

by Kok, Auke

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Richard Knutsson wrote:
> Kok, Auke wrote:
>> Joe Perches wrote:
>>> I grew weary of looking up the appropriate
>>> maintainer email address(es) to CC: for a patch.
>>>
>>> I added flags to the MAINTAINERS file
>>>
>>> F: file pattern
>>>
>>> for each maintained block and a script to parse
>>> the modified blocks for maintainer and list
>>> email addresses.
>>>
>>> perl scripts/get_maintainer.pl <patch>
>>>
>>> gives the appropriate maintainer(s).
>>>
>>> Modifications since last post:
>>>
>>> Added options to control email address style and multiple address
>>> separator
>>>
>>> Please apply.
>> Well, I came up with this implementation, so I definately support it
>> and the way Joe implemented it:
>>
> Really? Please check out this thread with at least two ideas of possible
> implementations, with Stefan Richter as the spokesman of the current
> implementation: http://marc.info/?l=linux-kernel&m=116870578531280&w=3
> ...and I am not saying there hasn't been someone suggesting it even earlier.
>
> The expression; nothing is new under the sun comes to mind ;)
>
> Anyway, glad to see someone actually implementing it. Thanks!

bad wording my side, I never implemented anything, but I did post the "F:" flag
idea specific to allocating files to developers on lkml:
http://lkml.org/lkml/2007/6/29/300

Cheers,

Auke

2007-08-13 20:38:33

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, 2007-08-13 at 10:42 -0700, Arjan van de Ven wrote:
> The maintainer info should be in the source file itself! That's the only
> reasonable way to keep it updated; now I'm all for having it machine
> parsable so that tools can use it, but it still really should be in the
> code itself, not in some central file that will always just go out of
> data, and will be a huge source of needless patch conflicts.

If the problem is to do with people failing to update the MAINTAINERS
file, why would moving the same data into 20 or 30 source files motivate
them to keep it up to date? As far as I can see, that would just serve
to multiply the amount of stale data...

Trond

2007-08-13 20:46:03

by Richard Knutsson

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Kok, Auke wrote:
> Richard Knutsson wrote:
>> Kok, Auke wrote:
>>> Joe Perches wrote:
>>>> I grew weary of looking up the appropriate
>>>> maintainer email address(es) to CC: for a patch.
>>>>
>>>> I added flags to the MAINTAINERS file
>>>>
>>>> F: file pattern
>>>>
>>>> for each maintained block and a script to parse
>>>> the modified blocks for maintainer and list
>>>> email addresses.
>>>>
>>>> perl scripts/get_maintainer.pl <patch>
>>>>
>>>> gives the appropriate maintainer(s).
>>>>
>>>> Modifications since last post:
>>>>
>>>> Added options to control email address style and multiple address
>>>> separator
>>>>
>>>> Please apply.
>>> Well, I came up with this implementation, so I definately support it
>>> and the way Joe implemented it:
>>>
>> Really? Please check out this thread with at least two ideas of
>> possible implementations, with Stefan Richter as the spokesman of the
>> current implementation:
>> http://marc.info/?l=linux-kernel&m=116870578531280&w=3
>> ...and I am not saying there hasn't been someone suggesting it even
>> earlier.
>>
>> The expression; nothing is new under the sun comes to mind ;)
>>
>> Anyway, glad to see someone actually implementing it. Thanks!
>
> bad wording my side, I never implemented anything, but I did post the
> "F:" flag idea specific to allocating files to developers on lkml:
> http://lkml.org/lkml/2007/6/29/300
>
...and so did Stefan:
http://marc.info/?l=linux-kernel&m=116871954100026&w=3 :)

cu
Richard Knutsson

2007-08-13 21:05:56

by Adrian Bunk

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, Aug 13, 2007 at 10:09:08AM -0700, Ray Lee wrote:
> On 8/12/07, Joe Perches <[email protected]> wrote:
> > I grew weary of looking up the appropriate
> > maintainer email address(es) to CC: for a patch.
> >
> > I added flags to the MAINTAINERS file
> >
> > F: file pattern
> >
> > for each maintained block and a script to parse
> > the modified blocks for maintainer and list
> > email addresses.
>
> Why not parse git annotate or blame instead (other than it's freakin'
> slow)? Using the repository history has the added benefit of telling
> you a lot more fine-grained detail about who may want to know about
> your patch.

The git tree simply does not contain this information.

Some of the obvious problems:
- recently changed maintainership
- new maintainer email address with the old one bouncing
- git never contains maintainer mailing list addresses
- you can't distinguish between a maintainer of a driver and people only
writing or forwarding patches for a driver

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-08-14 01:23:28

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl


On Mon, 2007-08-13 at 16:37 -0400, Trond Myklebust wrote:
> On Mon, 2007-08-13 at 10:42 -0700, Arjan van de Ven wrote:
> > The maintainer info should be in the source file itself! That's the only
> > reasonable way to keep it updated; now I'm all for having it machine
> > parsable so that tools can use it, but it still really should be in the
> > code itself, not in some central file that will always just go out of
> > data, and will be a huge source of needless patch conflicts.
>
> If the problem is to do with people failing to update the MAINTAINERS
> file, why would moving the same data into 20 or 30 source files motivate
> them to keep it up to date? As far as I can see, that would just serve
> to multiply the amount of stale data...

if each .c file has a MODULE_MAINTAINER() tag...

people tend to update .c files a lot better than way off-the-side other
files.

--
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org

2007-08-14 01:55:55

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/14/2007 03:19 AM, Arjan van de Ven wrote:

> On Mon, 2007-08-13 at 16:37 -0400, Trond Myklebust wrote:
>> On Mon, 2007-08-13 at 10:42 -0700, Arjan van de Ven wrote:

>>> The maintainer info should be in the source file itself! That's the only
>>> reasonable way to keep it updated; now I'm all for having it machine
>>> parsable so that tools can use it, but it still really should be in the
>>> code itself, not in some central file that will always just go out of
>>> data, and will be a huge source of needless patch conflicts.
>>
>> If the problem is to do with people failing to update the MAINTAINERS
>> file, why would moving the same data into 20 or 30 source files
>> motivate them to keep it up to date? As far as I can see, that would
>> just serve to multiply the amount of stale data...
>
> if each .c file has a MODULE_MAINTAINER() tag...
>
> people tend to update .c files a lot better than way off-the-side other
> files.

MODULE_MAINTAINER() was discussed a while ago but embedding information into
the binary has the problem you can't ever change deployed systems, meaning
it lags by design. If a maintainer changes, people would still be using the
information from their old binaries, meaning a replaced maintainer might get
contacted for potentially years still (and the new one not).

(you could avoid that by placing not a name/address in the maintainer tag
but a pointer to somewhere else but at that point this gets to be about
solving something else).

Keeping it in the source alone is fine. C files could just embed their
MAINTAINERS entry as a header:

/*
* P: Maintainer
* M: Mail patches to
* L: Mailing list that is relevant to this area
* W: Web-page with status/info
* T: SCM tree type and location. Type is one of: git, hg, quilt.
* S: Status, one of the following:
*/

And probably adding fields:

* I: Info/Summary (for index files and the like)
* A: Author
* G: License

and such. Yes, while we're at it, we can pick better letters or full word
tags ;-)

Rene.

2007-08-14 02:07:48

by John W. Linville

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, Aug 13, 2007 at 06:19:26PM -0700, Arjan van de Ven wrote:
>
> On Mon, 2007-08-13 at 16:37 -0400, Trond Myklebust wrote:

> > If the problem is to do with people failing to update the MAINTAINERS
> > file, why would moving the same data into 20 or 30 source files motivate
> > them to keep it up to date? As far as I can see, that would just serve
> > to multiply the amount of stale data...
>
> if each .c file has a MODULE_MAINTAINER() tag...
>
> people tend to update .c files a lot better than way off-the-side other
> files.

I think this approach makes a lot more sense than adding a bunch of
data to MAINTAINERS all for a tool that I'll wager few people would
ever use.

John
--
John W. Linville
[email protected]

2007-08-14 02:09:07

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/14/2007 03:51 AM, Rene Herman wrote:

> MODULE_MAINTAINER() was discussed a while ago but embedding information
> into the binary has the problem you can't ever change deployed systems,
> meaning it lags by design. If a maintainer changes, people would still
> be using the information from their old binaries, meaning a replaced
> maintainer might get contacted for potentially years still (and the new
> one not).
>
> (you could avoid that by placing not a name/address in the maintainer
> tag but a pointer to somewhere else but at that point this gets to be
> about solving something else).
>
> Keeping it in the source alone is fine. C files could just embed their
> MAINTAINERS entry as a header:
>
> /*
> * P: Maintainer
> * M: Mail patches to
> * L: Mailing list that is relevant to this area
> * W: Web-page with status/info
> * T: SCM tree type and location. Type is one of: git, hg, quilt.
> * S: Status, one of the following:
> */
>
> And probably adding fields:
>
> * I: Info/Summary (for index files and the like)
> * A: Author
> * G: License
>
> and such. Yes, while we're at it, we can pick better letters or full
> word tags ;-)

Okay, and if a single "maintenance unit" consists of many files, this gets
to be too much yes. But they _could_ just grow a header pointing back to the
MINTAINERS file/database;

/*
* MAINTAINERS: 3C359 NETWORK DRIVER
*/

Thst should keep things minimal enough to keep them updated, no?

Rene.

2007-08-14 02:15:30

by Manu Abraham

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 8/13/07, Arjan van de Ven <[email protected]> wrote:
>
> On Mon, 2007-08-13 at 19:33 +0200, Mariusz Kozlowski wrote:
> > Hello,
> >
> > I don't recall discusion about this so here are my 3 cents:
> >
> > I like the idea.
>
> I don't actually. It shows a central MAINTAINERS file is the wrong
> approach; just that 500+ patches to the same file were needed shows
> that.
>
> The maintainer info should be in the source file itself! That's the only
> reasonable way to keep it updated; now I'm all for having it machine
> parsable so that tools can use it, but it still really should be in the
> code itself, not in some central file that will always just go out of
> data, and will be a huge source of needless patch conflicts.


ACK. Very much agree. In fact MAINTAINERS is a wrong thing altogether.

For example, code/drivers under a subsystem, might not be easily add
"able" to a central file in some cases as it is scattered around.

Maintainer info in the source is the right way to go.

2007-08-14 09:14:24

by Alan

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

> MODULE_MAINTAINER() was discussed a while ago but embedding information into
> the binary has the problem you can't ever change deployed systems, meaning
> it lags by design. If a maintainer changes, people would still be using the
> information from their old binaries, meaning a replaced maintainer might get
> contacted for potentially years still (and the new one not).

And as was pointed out at the time, the people whining about that were
talking out of the wrong equipment. The supplier of the code can no more
or less easily change the binary as the matching source tree once its been
shipped. In fact its probably easier to change the binaries as the
sources will be left on CD.

The only non-stale source is git-blame.

Alan

2007-08-14 13:51:23

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl


On Tue, 2007-08-14 at 10:20 +0100, Alan Cox wrote:
> > MODULE_MAINTAINER() was discussed a while ago but embedding information into
> > the binary has the problem you can't ever change deployed systems, meaning
> > it lags by design. If a maintainer changes, people would still be using the
> > information from their old binaries, meaning a replaced maintainer might get
> > contacted for potentially years still (and the new one not).
>
> And as was pointed out at the time, the people whining about that were
> talking out of the wrong equipment. The supplier of the code can no more
> or less easily change the binary as the matching source tree once its been
> shipped. In fact its probably easier to change the binaries as the
> sources will be left on CD.
>
> The only non-stale source is git-blame.

the other angle is this: if someone becomes the new maintainer, does he
really want to "maintain" all the really old versions of the code out
there that predate him, or does he only want to go forward?
He wouldn't know anything about, say, the 2.4.21 version of the driver
anyway... nor would it be reasonable to expect him to.

I can an option where distros just set all the maintainer data to
themselves for long lived enterprise products... since effectively they
are the de-facto maintainers of those codebases.

--
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org

2007-08-14 14:22:25

by Adrian Bunk

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Mon, Aug 13, 2007 at 06:19:26PM -0700, Arjan van de Ven wrote:
>
> On Mon, 2007-08-13 at 16:37 -0400, Trond Myklebust wrote:
> > On Mon, 2007-08-13 at 10:42 -0700, Arjan van de Ven wrote:
> > > The maintainer info should be in the source file itself! That's the only
> > > reasonable way to keep it updated; now I'm all for having it machine
> > > parsable so that tools can use it, but it still really should be in the
> > > code itself, not in some central file that will always just go out of
> > > data, and will be a huge source of needless patch conflicts.
> >
> > If the problem is to do with people failing to update the MAINTAINERS
> > file, why would moving the same data into 20 or 30 source files motivate
> > them to keep it up to date? As far as I can see, that would just serve
> > to multiply the amount of stale data...
>
> if each .c file has a MODULE_MAINTAINER() tag...
>
> people tend to update .c files a lot better than way off-the-side other
> files.

The move of netdev to vger would have required updating
approx. 1300 C files...

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-08-14 14:29:23

by Adrian Bunk

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, Aug 14, 2007 at 06:47:20AM -0700, Arjan van de Ven wrote:
>
> On Tue, 2007-08-14 at 10:20 +0100, Alan Cox wrote:
> > > MODULE_MAINTAINER() was discussed a while ago but embedding information into
> > > the binary has the problem you can't ever change deployed systems, meaning
> > > it lags by design. If a maintainer changes, people would still be using the
> > > information from their old binaries, meaning a replaced maintainer might get
> > > contacted for potentially years still (and the new one not).
> >
> > And as was pointed out at the time, the people whining about that were
> > talking out of the wrong equipment. The supplier of the code can no more
> > or less easily change the binary as the matching source tree once its been
> > shipped. In fact its probably easier to change the binaries as the
> > sources will be left on CD.
> >
> > The only non-stale source is git-blame.
>
> the other angle is this: if someone becomes the new maintainer, does he
> really want to "maintain" all the really old versions of the code out
> there that predate him, or does he only want to go forward?
>...

What about cases like maintainers using company email addresses and
changing company?

E.g. Jens is still block layer maintainer but the @suse address he used
for years suddenly no longer existed after he left Suse.

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-08-14 14:37:46

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl


> > the other angle is this: if someone becomes the new maintainer, does he
> > really want to "maintain" all the really old versions of the code out
> > there that predate him, or does he only want to go forward?
> >...
>
> What about cases like maintainers using company email addresses and
> changing company?
>
> E.g. Jens is still block layer maintainer but the @suse address he used
> for years suddenly no longer existed after he left Suse.

if you're using a current enough kernel, no big deal. If you use an old
one and it bounces, then you go look further. The biggest deal for this
feature is to find out mechanically whom to CC patches to; if you're
doing patches, you better use a recent enough kernel annyway.
(and if you're doing patches against something ancient, you more than
likely have a modern tree somewhere for lookaside)
--
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org

2007-08-14 14:38:50

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl


On Tue, 2007-08-14 at 16:22 +0200, Adrian Bunk wrote:
> > people tend to update .c files a lot better than way off-the-side other
> > files.
>
> The move of netdev to vger would have required updating
> approx. 1300 C files...


and your point is?
This is an easily sed-able operation with a single git commit.
BIG DEAL... NOT.

--
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org

2007-08-14 15:07:59

by John W. Linville

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, Aug 14, 2007 at 04:22:05PM +0200, Adrian Bunk wrote:
> On Mon, Aug 13, 2007 at 06:19:26PM -0700, Arjan van de Ven wrote:
> >
> > On Mon, 2007-08-13 at 16:37 -0400, Trond Myklebust wrote:
> > > On Mon, 2007-08-13 at 10:42 -0700, Arjan van de Ven wrote:
> > > > The maintainer info should be in the source file itself! That's the only
> > > > reasonable way to keep it updated; now I'm all for having it machine
> > > > parsable so that tools can use it, but it still really should be in the
> > > > code itself, not in some central file that will always just go out of
> > > > data, and will be a huge source of needless patch conflicts.
> > >
> > > If the problem is to do with people failing to update the MAINTAINERS
> > > file, why would moving the same data into 20 or 30 source files motivate
> > > them to keep it up to date? As far as I can see, that would just serve
> > > to multiply the amount of stale data...
> >
> > if each .c file has a MODULE_MAINTAINER() tag...
> >
> > people tend to update .c files a lot better than way off-the-side other
> > files.
>
> The move of netdev to vger would have required updating
> approx. 1300 C files...

Wouldn't it be individuals listed in MODULE_MAINTAINER?

Even if it is the mailing list, is this the kind of thing that sed
is perfect to handle?

John
--
John W. Linville
[email protected]

2007-08-14 15:58:41

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/14/2007 11:20 AM, Alan Cox wrote:

>> MODULE_MAINTAINER() was discussed a while ago but embedding information into
>> the binary has the problem you can't ever change deployed systems, meaning
>> it lags by design. If a maintainer changes, people would still be using the
>> information from their old binaries, meaning a replaced maintainer might get
>> contacted for potentially years still (and the new one not).
>
> And as was pointed out at the time, the people whining about that were
> talking out of the wrong equipment. The supplier of the code can no more
> or less easily change the binary as the matching source tree once its been
> shipped. In fact its probably easier to change the binaries as the
> sources will be left on CD.

That's just not a complete argument if one accepts that users can be people
without _any_ source tree lying around. There's no reason this user would
believe that any source tree, matching or not, would provide him with beter
information than the information modinfo just spat at him. The only thing
that helps is not have modinfo spit _any_ contact information at him so he
knows to look elsewhere.

And even more importantly ...

PUHLEASE PUHLEASE don't dirty this discussion with binary tags in the first
place! It isn't about MODULE_FOO() tags, it is about tagging /source/ files
to help with putting CCs on patch submissals. People who submit patches sort
of by definition have a current source tree lying around, and do not need to
grab information from any binaries. As such, putting it in a comment inside
the source is all that's relevant here, not anything to do with binaries.

So, let's talk source. If we want to link source file foo.c and the
MAINTAINERS information, we have 3 options:

1. MAINTAINERS --> foo.c

This is what Joe Perches' current 550 piece proposal does. Although I can
hardly wait for version 2 of the patchset, high potential to turn into an
incomplete obsolete mess upon adding, removing and moving files around.

2. foo.c --> MAINTAINERS

Putting a copy of the MAINTAINERS entry in a header in a every single source
file (Joe already nicely provided us with the paths to script something like
that) works but considering that single "maintenance units" might consist of
many source files, people might not bother to keep them all updated and in
sync and really, they shouldn't need to.

Sticking a single backlink to a MAINTAINERS file entry at the top of a
source file might work:

--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ ... @@
+/*
+ * MAINTAINERS: IDE/ATAPI CDROM DRIVER
+ */

[ ... ]

3. foo.c <--> some 3rd file <--> MAINTAINERS

Just for completeness and trying to make sure I'm not inventing an or/or but
I don't see any use in this linkage, so it's 1 or 2 it seems.

Note, perhaps after we have a MAINTAINERS source tag, we can discuss whether
or not it could in fact be a MODULE_MAINTAINER() binary tag, but that's then
about something else at that point...

Rene.

2007-08-14 17:00:49

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 17:53 +0200, Rene Herman wrote:
> It isn't about MODULE_FOO() tags, it is about tagging /source/ files
> to help with putting CCs on patch submissals.
> If we want to link source file foo.c and the
> MAINTAINERS information, we have 3 options:
> 1. MAINTAINERS --> foo.c
> 2. foo.c --> MAINTAINERS
> 3. foo.c <--> some 3rd file <--> MAINTAINERS

I added [email protected] and Junio Hamano

Another possibility is improving git to allow
some sort of "declaration of interest" in bits
of projects.

That would allow options like:

o git-format-patch to include CCs
o git-commit and git-branch to notify or
take some other action

etc...

It's generic, applies to multiple projects, etc.

I don't care which mechanism is used, I just want
to be able to CC appropriate people and lists on
changes to their areas of interest without wasting
time searching all over the place per file changed.

The LK MAINTAINERS file is weakly specified, but
I'm not a git-geek, nor do I want to be one, so
MAINTAINERS was the file I could easiest change
with minimal impact to LK sources.

The get_maintainer script is trivial,
I'm not wedded to it at all.

2007-08-14 18:07:36

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/14/2007 07:00 PM, Joe Perches wrote:

> On Tue, 2007-08-14 at 17:53 +0200, Rene Herman wrote:

>> It isn't about MODULE_FOO() tags, it is about tagging /source/ files
>> to help with putting CCs on patch submissals.
>> If we want to link source file foo.c and the
>> MAINTAINERS information, we have 3 options:
>> 1. MAINTAINERS --> foo.c
>> 2. foo.c --> MAINTAINERS
>> 3. foo.c <--> some 3rd file <--> MAINTAINERS
>
> I added [email protected] and Junio Hamano

Well, yes, I agree -- going through GIT seems to be the only really workable
solution.

That is, instead of (case 2, you snipped it) having a backlink to the
MAINTAINERS file in a header inside the source GIT would maintain this
backlink -- and at that point, you can basically forego the MAINTAINERS file
completely other than as something GIT can generate and just regard all of
it meta-information (you may want to generate MAINTAINERS for releases but
making GIT the source is the idea).

"git info --maintainer drivers/ide/ide-cd.c" or some such would say "Alan
Cox <alan@...>".

There are more possibilities for this kind of meta information. git info
--author, git info --license, git info --whatever. Given that it's intended
for developers, needing GIT should not get in the way but there's always the
generated MAINTAINERS file in releases as well.

It would ofcourse automatically stay up to date through deleting and moving
of files. You'd probably want to devise a way to enable a submitter to also
automatically provide meta-information upon addition of files. This can be
done in the same way as a "Signed-off-by". Just tags in a submit email.

This should probably turn out to be the way things work yes. The paths in
the MAINTAINERS file grow stale, source headers might also and sticking
headers on every source file isn't nice anyway -- it's meta-information and
the SCM can maintain it.

Rene.

2007-08-14 18:28:36

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 20:03 +0200, Rene Herman wrote:
> "git info --maintainer drivers/ide/ide-cd.c" or some such would say "Alan
> Cox <alan@...>".

Perhaps maintainer(s), approver(s), listener(s)?

I think something like this should be a git-goal.
What do the git-wranglers think?

Until a time in the future when a system like that exists,
I suggest keeping MAINTAINERS up-to-date with

F: pattern

It'll be useful as git-set-maintainer seeds at least.

> sticking headers on every source file isn't nice anyway --
> it's meta-information and the SCM can maintain it.

It's like looking at $CVS$ keywords. Unsightly.

cheers, Joe

2007-08-14 18:37:20

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/14/2007 08:28 PM, Joe Perches wrote:

> On Tue, 2007-08-14 at 20:03 +0200, Rene Herman wrote:
>> "git info --maintainer drivers/ide/ide-cd.c" or some such would say "Alan
>> Cox <alan@...>".
>
> Perhaps maintainer(s), approver(s), listener(s)?
>
> I think something like this should be a git-goal.
> What do the git-wranglers think?

I agree. If this thing has source management, let's use it.

> Until a time in the future when a system like that exists,
> I suggest keeping MAINTAINERS up-to-date with
>
> F: pattern
>
> It'll be useful as git-set-maintainer seeds at least.

Yes. Seeing as how it's already been useful in updating the information it
would be a shame to throw what you already did away. Don't underestimate how
fast git-wranglers can implement stuff if they agree though... :-)

>> sticking headers on every source file isn't nice anyway --
>> it's meta-information and the SCM can maintain it.
>
> It's like looking at $CVS$ keywords. Unsightly.

Again agree.

Rene.


2007-08-14 18:43:17

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl



On Tue, 14 Aug 2007, Joe Perches wrote:

> On Tue, 2007-08-14 at 20:03 +0200, Rene Herman wrote:
> > "git info --maintainer drivers/ide/ide-cd.c" or some such would say "Alan
> > Cox <alan@...>".
>
> Perhaps maintainer(s), approver(s), listener(s)?
>
> I think something like this should be a git-goal.
> What do the git-wranglers think?

The thing is, if you have git, you can basically already do this.

Do a script like this:

#!/bin/sh
git log --since=6.months.ago -- "$@" |
grep -i '^ [-a-z]*by:.*@' |
sort | uniq -c |
sort -r -n | head

and it gives you a rather good picture of who is involved with a
particular subdirectory or file.

A much *better* picture than some manually maintained thing, in fact,
because it tells you who really does the work, and which way patches go...

(Maybe you want to add a

grep -v '\(Linus Torvalds\)\|\(Andrew Morton\)'

to avoid seeing the normal chain too much, but hey, we probably want to
know too. Anyway - the script can certainly be tweaked, the point is
really just that the git tree _already_ contains the relevant
information).

Linus

2007-08-14 18:55:30

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 11:40 -0700, Linus Torvalds wrote:
> Anyway - the script can certainly be tweaked, the point is
> really just that the git tree _already_ contains the relevant
> information).

I believe it's not specific enough.
Things like email lists would never show up.

2007-08-14 19:34:19

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, Aug 14, 2007 at 11:40:09AM -0700, Linus Torvalds wrote:

> A much *better* picture than some manually maintained thing, in fact,
> because it tells you who really does the work, and which way patches go...
>
> (Maybe you want to add a
>
> grep -v '\(Linus Torvalds\)\|\(Andrew Morton\)'
>
> to avoid seeing the normal chain too much, but hey, we probably want to
> know too. Anyway - the script can certainly be tweaked, the point is
> really just that the git tree _already_ contains the relevant
> information).

FWIW, I suspect that we are looking at that from the wrong POV. If
that's about "who ought to be Cc'd on the issues dealing with <list
of pathnames>", why does it have to be tied to "who is maintainer for
<pathname>"?

I'm not suggesting something like [email protected] with something
like majordomo allowing to add yourself to those, but something less
extreme in that direction might be worth thinking about... Hell,
even simple
$ finger fs/minix/[email protected]
with majordomo-like interface for adding yourself to such lists
might solve most of those problems...

2007-08-14 19:57:51

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 20:33 +0100, Al Viro wrote:
> FWIW, I suspect that we are looking at that from the wrong POV. If
> that's about "who ought to be Cc'd on the issues dealing with <list
> of pathnames>", why does it have to be tied to "who is maintainer for
> <pathname>"?

Right, it doesn't have to.
I think a notification list would be just fine.

> I'm not suggesting something like [email protected] with something
> like majordomo allowing to add yourself to those, but something less
> extreme in that direction might be worth thinking about...
> Hell, even simple
> $ finger fs/minix/[email protected]
> with majordomo-like interface for adding yourself to such lists
> might solve most of those problems...

Might solve all of my wants for this problem.

cheers, Joe

2007-08-15 01:32:27

by Junio C Hamano

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Joe Perches <[email protected]> writes:

> On Tue, 2007-08-14 at 17:53 +0200, Rene Herman wrote:
>> It isn't about MODULE_FOO() tags, it is about tagging /source/ files
>> to help with putting CCs on patch submissals.
>> If we want to link source file foo.c and the
>> MAINTAINERS information, we have 3 options:
>> 1. MAINTAINERS --> foo.c
>> 2. foo.c --> MAINTAINERS
>> 3. foo.c <--> some 3rd file <--> MAINTAINERS
>
> I added [email protected] and Junio Hamano
>
> Another possibility is improving git to allow
> some sort of "declaration of interest" in bits
> of projects.
>
> That would allow options like:
>
> o git-format-patch to include CCs
> o git-commit and git-branch to notify or
> take some other action
>
> etc...

There are things git can help, and other things git does not
have any business with.

1. Finding out who the potentially interested parties are.

Linus already gave a script to grep *-by: lines from commit
messages. I find this is probably be the best option, as it
follows "yesterday's weather". People who had dealt with the
area are the ones who are likely to be interested.

git records who did the work (author) and who did the
integration to git-based patch flow (committer). It does not
structurally track intermediate people who touched the patch
on e-mail, but Signed-off-by: and Acked-by: (and sometimes I
see Cc: as well in the commit messages) are accepted social
convention in the kernel community, and taking advantage of
that is a good idea.


2. Making it easier to send your patches to these people.

There are three possible places to add Signed-off-by: and
friends in the commit messages you would mail out:

- When you create your own commit, or commit a patch that
came to you via e-mail. The commit object in your tree
will carry them --- you can send format-patch output as-is
to Linus or Andrew and you are done.

- When you run format-patch; your commit will not have extra
Cc: or "interested parties" information, you will use the
result of 1. and insert it near your own Signed-off-by: to
the format-patch output.

- When you send format-patch output, via git-send-email
perhaps.

To make the result useful for "yesterday's weather" approach,
I think it would be the best to do the first. After all,
your commit may propagate via "git pull" not over e-mail, and
no postprocessing approach would work in such a case.

The second one is my least favorite. format-patch output is
designed to record author/committer (i.e. origin) and not to
record recipient at all. "Who's interested in this" does not
simply belong there.

On the other hand, git-send-email _is_ all about sending it
out, and it needs to know who your patch should reach. I
think it makes sense to have one script that, given a set of
paths that are affected, gives a list of potentially
interested people (that is "Finding" part -- and I see there
are 600+ patches to implement this on the list), and a new
option to git-send-email to (1) inspect the patch to see what
paths are affected, and (2) call that "Find" script to figure
out whom to send it to, and probably asking for confirmation.




2007-08-15 01:39:08

by Richard Knutsson

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Linus Torvalds wrote:
> On Tue, 14 Aug 2007, Joe Perches wrote:
>
>
>> On Tue, 2007-08-14 at 20:03 +0200, Rene Herman wrote:
>>
>>> "git info --maintainer drivers/ide/ide-cd.c" or some such would say "Alan
>>> Cox <alan@...>".
>>>
>> Perhaps maintainer(s), approver(s), listener(s)?
>>
>> I think something like this should be a git-goal.
>> What do the git-wranglers think?
>>
>
> The thing is, if you have git, you can basically already do this.
>
> Do a script like this:
>
> #!/bin/sh
> git log --since=6.months.ago -- "$@" |
> grep -i '^ [-a-z]*by:.*@' |
>
sed -r "s/^.*by: \"?([^\"]+)\"?/\1/" |
> sort | uniq -c |
> sort -r -n | head
>
> and it gives you a rather good picture of who is involved with a
> particular subdirectory or file.
>
>
Like the script! Especially since it reveled --since=6.month.ago and
uniq to me.
Just wondering, why order them in the acked, signed and tested? Other
then removing those, the added 'sed' also fix the <name> vs
"<name>"-"problem". + adding '-i' to uniq should help the result too, right?

Now a simple "diffstat -p1 -l <patch> | xargs <preferred script-name>"
makes the day. Too bad, as Joe pointed out, it does not include relevant ML.

cheers
Richard Knutsson

2007-08-15 01:46:22

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/14/2007 09:33 PM, Al Viro wrote:

> FWIW, I suspect that we are looking at that from the wrong POV. If
> that's about "who ought to be Cc'd on the issues dealing with <list
> of pathnames>", why does it have to be tied to "who is maintainer for
> <pathname>"?
>
> I'm not suggesting something like [email protected] with something
> like majordomo allowing to add yourself to those, but something less
> extreme in that direction might be worth thinking about... Hell,
> even simple
> $ finger fs/minix/[email protected]
> with majordomo-like interface for adding yourself to such lists
> might solve most of those problems...

It mostly is just about that it seems. However, this would not also allow
the other information currently in the MAINTAINERS file to be queried in
similar ways.

Git could grow a generic file meta data implementation through the use of
tags, sort of like tags on multimedia files although while with multimedia
files the tags are in fact stored as a file header, here you'd keep them
just in git. Any project using git would be free to define its own set of
info tags and you'd supply them to git simply as a list of

<tag>=<value>

pairs:

$ git info --add drivers/ide/ide-cd.c <<EOF
CC="Alan Cox <[email protected]>", [email protected]
EOF

Or as a more expansive example, with the tags set on a directory (and the
output shown this time):

$ git info drivers/infiniband/
CC="Roland Dreier <[email protected]>"
CC="Sean Hefty <[email protected]>"
CC="Hal Rosenstock <[email protected]>"
[email protected]
W=http://www.openib.org/
T=git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git

$ git info --type="W" drivers/infiniband/
http://www.openib.org/

The project can link the actual tags such as CC, W and T to --options for
the "info" command in the git configuration file for the tree (and/or just
define a few upfront I guess) making it look nicer:

$ git info --cc drivers/infiniband/
"Roland Dreier <[email protected]>"
"Sean Hefty <[email protected]>"
"Hal Rosenstock <[email protected]>"
[email protected]

$ git info --website drivers/infiniband/
http://www.openib.org/

$ git info --tree drivers/infiniband/
git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git

Extra: when you have such an implementation, you can use it for other
purposes as well such as the summary Documentation/ files want for the
00-INDEX files:

$ git info --summary Documentation/BUG-HUNTING
brute force method of doing binary search of patches to find bug.

And importantly -- when queuried for a file that itself doesn't have the
requested info tag:

$ git info --cc drivers/infiniband/core/addr.c

git looks for the tag on the drivers/infiniband/core/ directory next, and
then on drivers/infiniband/, where it finds it. [email protected]
would be the final fallback, being set on the project root.

I'd really like something like this. As long as projects are both free to
use and not use them and free to define their own set of tags I believe this
would work very nicely.

Once you have these tags, you can basically use them for anything.

Rene.

2007-08-15 02:12:33

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 18:31 -0700, Junio C Hamano wrote:
> On the other hand, git-send-email _is_ all about sending it
> out, and it needs to know who your patch should reach. I
> think it makes sense to have one script that, given a set of
> paths that are affected, gives a list of potentially
> interested people (that is "Finding" part -- and I see there
> are 600+ patches to implement this on the list), and a new
> option to git-send-email to (1) inspect the patch to see what
> paths are affected, and (2) call that "Find" script to figure
> out whom to send it to, and probably asking for confirmation.

Yes please.

The LK MAINTAINERS file is ugly.

Might there be a git portable way to "find"?

Rene Herman had an idea about using some git
metadata that might be useful. The completely
external data approach suggested by Al Viro
might be OK too in that it wouldn't tie listeners
to git requiring more content in git metadata.

Perhaps both via something like:

--external-find "cmd @filelist"

Thanks, Joe


2007-08-15 05:26:09

by Junio C Hamano

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Joe Perches <[email protected]> writes:

> Yes please.

Huh?

> Rene Herman had an idea about using some git
> metadata that might be useful. The completely
> external data approach suggested by Al Viro
> might be OK too in that it wouldn't tie listeners
> to git requiring more content in git metadata.

The reason I found Linus's suggestion desirable is because it
fundamentally does not require git to track any metadata. If
the commits are in git, then his script would let you gather the
data, but otherwise you should be able to do the same by
grepping patches. Obviously you would need to filter by paths,
looking at the diffstat, but the approach does _not_ tie users
to git.


2007-08-15 05:46:22

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/15/2007 07:25 AM, Junio C Hamano wrote:

> Joe Perches <[email protected]> writes:

>> Rene Herman had an idea about using some git
>> metadata that might be useful. The completely
>> external data approach suggested by Al Viro
>> might be OK too in that it wouldn't tie listeners
>> to git requiring more content in git metadata.
>
> The reason I found Linus's suggestion desirable is because it
> fundamentally does not require git to track any metadata. If
> the commits are in git, then his script would let you gather the
> data, but otherwise you should be able to do the same by
> grepping patches. Obviously you would need to filter by paths,
> looking at the diffstat, but the approach does _not_ tie users
> to git.

I believe that wouldn't be much of a problem really. Users in this context
are people submitting patches and most people who do will, could and maybe
even should be running git these days -- git is very good, GPLd and the
Linux source code managament system.

But for occasional contributors that don't, a MAINTAINERS file much like the
current could also be generated into releases; it's just that the source
would live as file/directory metadata inside git.

Still like the notion of a generic file/directory metadata implementation
inside git, through that "<tag>=<value>" system that I suggested. Wouldn't
be intrinsically tied to Linux or anything, with any project being free to
invent their own tags and has heaps of possible uses, from the current
MAINTAINERS info, through summary information, author/licese information,
anything goes...

Rene.

2007-08-15 09:29:20

by Stefan Richter

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Linus Torvalds wrote:
> #!/bin/sh
> git log --since=6.months.ago -- "$@" |
> grep -i '^ [-a-z]*by:.*@' |
> sort | uniq -c |
> sort -r -n | head
>
> and it gives you a rather good picture of who is involved with a
> particular subdirectory or file.

No, it doesn't. The subscribers of <[email protected]> are
not listed in patch logs.
--
Stefan Richter
-=====-=-=== =--- -====
http://arcgraph.de/sr/

2007-08-15 09:39:53

by Stefan Richter

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Joe Perches wrote:
> On Tue, 2007-08-14 at 18:31 -0700, Junio C Hamano wrote:
>> On the other hand, git-send-email _is_ all about sending it
>> out, and it needs to know who your patch should reach. I
>> think it makes sense to have one script that,
[...]

> Yes please.
>
> The LK MAINTAINERS file is ugly.
>
> Might there be a git portable way to "find"?

Note, maintainer contacts
- should be available to patch submitters and
- must be available to *problem reporters*
without having to have git and a .git repo.
--
Stefan Richter
-=====-=-=== =--- -====
http://arcgraph.de/sr/

2007-08-15 11:49:04

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/15/2007 11:39 AM, Stefan Richter wrote:

> Note, maintainer contacts
> - should be available to patch submitters and
> - must be available to *problem reporters*
> without having to have git and a .git repo.

That "must" seems rather strong. But those few non-developer users that
could care are served by a MAINTAINERS file generated into releases.

Rene.

2007-08-15 13:21:37

by Satyam Sharma

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Hi Rene,


On Wed, 15 Aug 2007, Rene Herman wrote:

> It mostly is just about that it seems. However, this would not also allow the
> other information currently in the MAINTAINERS file to be queried in similar
> ways.
>
> Git could grow a generic file meta data implementation through the use of
> tags, sort of like tags on multimedia files although while with multimedia
> files the tags are in fact stored as a file header, here you'd keep them just
> in git. Any project using git would be free to define its own set of info tags
> and you'd supply them to git simply as a list of
>
> <tag>=<value>
>
> pairs:
>
> $ git info --add drivers/ide/ide-cd.c <<EOF
> CC="Alan Cox <[email protected]>", [email protected]
> EOF
>
> Or as a more expansive example, with the tags set on a directory (and the
> output shown this time):
>
> $ git info drivers/infiniband/
> CC="Roland Dreier <[email protected]>"
> CC="Sean Hefty <[email protected]>"
> CC="Hal Rosenstock <[email protected]>"
> [email protected]

Considering some people may want to differentiate between "those who want
to be Cc'ed for patches on subsystem X" and "those who are maintainer(s)
of subsystem X", I think another "P=" kind of tag might also be useful
here.

> W=http://www.openib.org/
> T=git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
>
> $ git info --type="W" drivers/infiniband/
> http://www.openib.org/
>
> The project can link the actual tags such as CC, W and T to --options for the
> "info" command in the git configuration file for the tree (and/or just define
> a few upfront I guess) making it look nicer:
>
> $ git info --cc drivers/infiniband/
> "Roland Dreier <[email protected]>"
> "Sean Hefty <[email protected]>"
> "Hal Rosenstock <[email protected]>"
> [email protected]
>
> $ git info --website drivers/infiniband/
> http://www.openib.org/
>
> $ git info --tree drivers/infiniband/
> git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
>
> Extra: when you have such an implementation, you can use it for other purposes
> as well such as the summary Documentation/ files want for the 00-INDEX files:
>
> $ git info --summary Documentation/BUG-HUNTING
> brute force method of doing binary search of patches to find bug.
>
> And importantly -- when queuried for a file that itself doesn't have the
> requested info tag:
>
> $ git info --cc drivers/infiniband/core/addr.c
>
> git looks for the tag on the drivers/infiniband/core/ directory next, and then
> on drivers/infiniband/, where it finds it. [email protected] would
> be the final fallback, being set on the project root.
>
> I'd really like something like this. As long as projects are both free to use
> and not use them and free to define their own set of tags I believe this would
> work very nicely.
>
> Once you have these tags, you can basically use them for anything.

I'd really _love_ a tool that does all that what you've proposed above!

But why does it have to be "git-info" or anything in the git(7) suite for
that matter? This sounds like a job for a different specialised tool,
along with ".metatags" kind of files dispersed in the source tree.


Satyam

2007-08-15 13:44:33

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/15/2007 03:33 PM, Satyam Sharma wrote:

[ git info --maintainer ]

> I'd really _love_ a tool that does all that what you've proposed above!
>
> But why does it have to be "git-info" or anything in the git(7) suite for
> that matter? This sounds like a job for a different specialised tool,
> along with ".metatags" kind of files dispersed in the source tree.

To automatically move (and delete) the meta-data alongside the files
themselves is a reason.

More generally -- shouldn't it? This is about source management (well, maybe
more about project management, but...) and the source code management tool
looks to be the right place for that. The different parts of git are
somewhat/fairly stand-alone as is, no?

Rene.

2007-08-15 13:53:51

by Kyle Moffett

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Aug 15, 2007, at 09:39:44, Rene Herman wrote:
> On 08/15/2007 03:33 PM, Satyam Sharma wrote:
>
> [ git info --maintainer ]
>
>> I'd really _love_ a tool that does all that what you've proposed
>> above! But why does it have to be "git-info" or anything in the
>> git(7) suite for that matter? This sounds like a job for a
>> different specialised tool, long with ".metatags" kind of files
>> dispersed in the source tree.
>
> To automatically move (and delete) the meta-data alongside the
> files themselves is a reason.
>
> More generally -- shouldn't it? This is about source management
> (well, maybe more about project management, but...) and the source
> code management tool looks to be the right place for that. The
> different parts of git are somewhat/fairly stand-alone as is, no?

If you were going to do that I'd just suggest making git aware of the
"user.*" extended attributes and having it save those into the git
repo along with the permission data.

Cheers,
Kyle Moffett

2007-08-15 15:31:44

by Ray Lee

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 8/15/07, Stefan Richter <[email protected]> wrote:
> Linus Torvalds wrote:
> > #!/bin/sh
> > git log --since=6.months.ago -- "$@" |
> > grep -i '^ [-a-z]*by:.*@' |
> > sort | uniq -c |
> > sort -r -n | head
> >
> > and it gives you a rather good picture of who is involved with a
> > particular subdirectory or file.
>
> No, it doesn't. The subscribers of <[email protected]> are
> not listed in patch logs.

Then maybe they should be added into the patch logs. A CC: line isn't
that big of a deal, and also shows who got notified.

2007-08-15 17:27:34

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Wed, 2007-08-15 at 13:44 +0200, Rene Herman wrote:
> On 08/15/2007 11:39 AM, Stefan Richter wrote:
> > Note, maintainer contacts
> > - should be available to patch submitters and
> > - must be available to *problem reporters*
> > without having to have git and a .git repo.
> That "must" seems rather strong. But those few non-developer users that
> could care are served by a MAINTAINERS file generated into releases.

Good idea for scripts to help kernel bug reporters.
REPORTING-BUGS is underutilized as a guide.

I think Bug reporting is a separate issue from patch CC'ing.
I'd rather have MAINTAINERS disappear altogether.


2007-08-15 19:38:12

by Krzysztof Halasa

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Al Viro <[email protected]> writes:

> I'm not suggesting something like [email protected] with something
> like majordomo allowing to add yourself to those,

Why not

> but something less
> extreme in that direction might be worth thinking about... Hell,
> even simple
> $ finger fs/minix/[email protected]
> with majordomo-like interface for adding yourself to such lists
> might solve most of those problems...

I think so.

And you would be able to add yourself even if you're merely
interested in something, not a maintainer.

However I think the mailing lists could do better. Duplicate
suppression, among other things.

And they could eventually supersede the subsystem mailing lists
we use today. Just use [email protected] or [email protected].
--
Krzysztof Halasa

2007-08-15 23:20:30

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Wed, Aug 15, 2007 at 09:37:45PM +0200, Krzysztof Halasa wrote:
> > I'm not suggesting something like [email protected] with something
> > like majordomo allowing to add yourself to those,
>
> Why not

You'd need to implement serious anti-spam measures for that. Besides,
cross-postings between random sets of lists would become a nightmare
pretty soon.

2007-08-16 11:02:48

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/15/2007 03:52 PM, Kyle Moffett wrote:

> On Aug 15, 2007, at 09:39:44, Rene Herman wrote:
>> On 08/15/2007 03:33 PM, Satyam Sharma wrote:
>>
>> [ git info --maintainer ]
>>
>>> I'd really _love_ a tool that does all that what you've proposed
>>> above! But why does it have to be "git-info" or anything in the
>>> git(7) suite for that matter? This sounds like a job for a different
>>> specialised tool, long with ".metatags" kind of files dispersed in
>>> the source tree.
>>
>> To automatically move (and delete) the meta-data alongside the files
>> themselves is a reason.
>>
>> More generally -- shouldn't it? This is about source management (well,
>> maybe more about project management, but...) and the source code
>> management tool looks to be the right place for that. The different
>> parts of git are somewhat/fairly stand-alone as is, no?
>
> If you were going to do that I'd just suggest making git aware of the
> "user.*" extended attributes and having it save those into the git repo
> along with the permission data.

Am looking at it but am not so sure that's a very good idea. I guess it'd be
largely okay-ish to require the repo to be on a filesystem that supports EAs
for this feature to work, but keeping the attributes intact over file system
operations seems not all that easy (yet). Having not used EAs before I may
be missing something but my version of "cp" for example (GNU coreutils 6.9)
appears to not copy them. Nor do they seem to survive a trip through GNU tar
1.16.1. EAs appear to not be very useful unless every single tool supports
them -- a repo should be resistant against simple operations like that.

Googling around, I see subversion already has this and calls the meta-data
"properties" (svn propset/get and friends). It uses a few properties itself,
such as the svn:executable property (which I saw is also the only permission
bit git keeps) and svn:ignore, which serves the same role as the .gitignore
files for git. Both those would fit into this scheme nicely for git as well,
if git were to do something similar and reserve for example the "git.*"
namespace for internal use.

Junio (and others), do you have an opinion on this? If these properties are
versioned themselves such as in svn I believe it's a decidedly non-trivial
addition (and I'm a complete git newbie) but to me, they look incredibly
useful, both for the original "maintainers" properties (and anyone else one
would want to come up with such as summary properties and author/license
stuff) and even for git internal reasons such as sketched above.

The git-blame thing as sketched before by Linus would never be able to point
out mailing lists, or general lists of "interested parties" for example, but
these properties can do anything...

Rene.

2007-08-16 11:12:31

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/16/2007 12:58 PM, Rene Herman wrote:

> On 08/15/2007 03:52 PM, Kyle Moffett wrote:

>> If you were going to do that I'd just suggest making git aware of the
>> "user.*" extended attributes and having it save those into the git
>> repo along with the permission data.
>
> Am looking at it but am not so sure that's a very good idea. I guess
> it'd be largely okay-ish to require the repo to be on a filesystem that
> supports EAs for this feature to work, but keeping the attributes intact
> over file system operations seems not all that easy (yet). Having not
> used EAs before I may be missing something but my version of "cp" for
> example (GNU coreutils 6.9) appears to not copy them. Nor do they seem
> to survive a trip through GNU tar 1.16.1. EAs appear to not be very
> useful unless every single tool supports them -- a repo should be
> resistant against simple operations like that.
>
> Googling around, I see subversion already has this and calls the
> meta-data "properties" (svn propset/get and friends). It uses a few
> properties itself, such as the svn:executable property (which I saw is
> also the only permission bit git keeps) and svn:ignore, which serves the
> same role as the .gitignore files for git. Both those would fit into
> this scheme nicely for git as well, if git were to do something similar
> and reserve for example the "git.*" namespace for internal use.
>
> Junio (and others), do you have an opinion on this? If these properties
> are versioned themselves such as in svn I believe it's a decidedly
> non-trivial addition (and I'm a complete git newbie) but to me, they
> look incredibly useful, both for the original "maintainers" properties
> (and anyone else one would want to come up with such as summary
> properties and author/license stuff) and even for git internal reasons
> such as sketched above.
>
> The git-blame thing as sketched before by Linus would never be able to
> point out mailing lists, or general lists of "interested parties" for
> example, but these properties can do anything...

The svn implemention is that a single property is free-form text. As such, I
guess a property would be just another file, although one that only lives in
the index and is linked from the file/directory it is a property of.

Perhaps that immediately suggests an implementation to someone already
familiar with git internals?

Rene.

2007-08-16 11:45:20

by Salikh Zakirov

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Rene Herman wrote:
> Perhaps that immediately suggests an implementation to someone already
> familiar with git internals?

perhaps http://www.kernel.org/pub/software/scm/git/docs/gitattributes.html
and http://www.kernel.org/pub/software/scm/git/docs/git-check-attr.html
can help you?

2007-08-16 12:01:41

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/16/2007 01:26 PM, Salikh Zakirov wrote:

Please don't drop CCs.

> Rene Herman wrote:
>> Perhaps that immediately suggests an implementation to someone already
>> familiar with git internals?
>
> perhaps http://www.kernel.org/pub/software/scm/git/docs/gitattributes.html
> and http://www.kernel.org/pub/software/scm/git/docs/git-check-attr.html
> can help you?

No, thanks, saw them, but .gitattributes is in fact in the same category as
.gitignore, which would _be_ a property.

If you do this stuff in files scattered around the tree, updating and moving
stuff becomes a pain -- the tool would need to go edit files.

Rene.



2007-08-16 15:41:25

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Thu, Aug 16, 2007 at 12:58:19PM +0200, Rene Herman wrote:

> Googling around, I see subversion already has this and calls the meta-data
> "properties" (svn propset/get and friends). It uses a few properties
> itself, such as the svn:executable property (which I saw is also the only
> permission bit git keeps) and svn:ignore, which serves the same role as the
> .gitignore files for git. Both those would fit into this scheme nicely for
> git as well, if git were to do something similar and reserve for example
> the "git.*" namespace for internal use.

"svn does it" is usually an indication of a bad idea, but anyway - it's
fundamentally wrong in this case, simply because "$FOO is interested
in $BAR" is a property of $FOO, not of $BAR.

> The git-blame thing as sketched before by Linus would never be able to
> point out mailing lists, or general lists of "interested parties" for
> example, but these properties can do anything...

No, they can not. "I'm interested in drivers/foo/bar.c fixes" is not
an earth-shattering event and it sure as hell does not create a new revision
of the tree.

2007-08-16 15:57:36

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/16/2007 05:40 PM, Al Viro wrote:

> On Thu, Aug 16, 2007 at 12:58:19PM +0200, Rene Herman wrote:

>> The git-blame thing as sketched before by Linus would never be able to
>> point out mailing lists, or general lists of "interested parties" for
>> example, but these properties can do anything...
>
> No, they can not. "I'm interested in drivers/foo/bar.c fixes" is not
> an earth-shattering event and it sure as hell does not create a new revision
> of the tree.

That's true. Okay, it can't do those general lists of interested parties.

Rene.

2007-08-16 19:00:59

by Junio C Hamano

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

Rene Herman <[email protected]> writes:

> Am looking at it but am not so sure that's a very good idea. I guess
> it'd be largely okay-ish to require the repo to be on a filesystem
> that supports EAs for this feature to work, but keeping the attributes
> intact over file system operations seems not all that easy
> (yet). Having not used EAs before I may be missing something but my
> version of "cp" for example (GNU coreutils 6.9) appears to not copy
> them. Nor do they seem to survive a trip through GNU tar 1.16.1. EAs
> appear to not be very useful unless every single tool supports them --
> a repo should be resistant against simple operations like that.
>
> Googling around, I see subversion already has this and calls the
> meta-data "properties" (svn propset/get and friends). It uses a few
> properties itself, such as the svn:executable property (which I saw is
> also the only permission bit git keeps) and svn:ignore, which serves
> the same role as the .gitignore files for git. Both those would fit
> into this scheme nicely for git as well, if git were to do something
> similar and reserve for example the "git.*" namespace for internal use.
>
> Junio (and others), do you have an opinion on this?

Please step back a bit and imagine a world in which there was no
git. IOW, you kernel folks switched to tarballs and patches 20
months ago. It is a far superiour solution compared to CVS and
SVN, so it ought to work, right ;-)?

Now, would you implement the "whom would I send my patches to"
with EAs?

I would hope not.

Git or no git, I think a file that can be viewed with less,
edited with regular editor and processed with sed/perl/grep
tools is the way to go. I do not think adding 600+ patches to
the single MAINTAINERS list is workable in the longer term, as
it would become the single file many subsystem people need to
update and is asking for merge conflicts, but I think a file
with known name (say, "CcMe.txt") sprinkled in relevant
subdirectories, perhaps with the same format originally
suggested for MAINTAINERS, would make a lot more sense.

That would give people who work with tarballs and patches, or a
subsystem managed with something other than git (one of the most
important one is quilt), the equal access to the necessary data.

Even with git, it is my understanding that kernel community
works largely on patches exchanged over e-mails, between people
who do use git and people who do not. You would want to have
something you can easily transfer over e-mail in the patch
form.

We _could_ invent a new "patches to properties" git diff output
format that "git apply" can understand to propagate that
information, but that approach is making it less interoperable
with others, and you need to demonstrate the benefit far
outweighs that. I do not see it for this particular
application.

There may be places for "properties" that would be useful to
git, but I do not think the "find whom to send patches to" is
one of them.

2007-08-16 20:37:28

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 11:40 -0700, Linus Torvalds wrote:
> Do a script like this:
>
> #!/bin/sh
> git log --since=6.months.ago -- "$@" |
> grep -i '^ [-a-z]*by:.*@' |
> sort | uniq -c |
> sort -r -n | head
> (Maybe you want to add a
> grep -v '\(Linus Torvalds\)\|\(Andrew Morton\)'
> to avoid seeing the normal chain too much, but hey, we probably want to
> know too. Anyway - the script can certainly be tweaked, the point is
> really just that the git tree _already_ contains the relevant
> information).

So, here's the same get_maintainer.pl with the git
addition. Seems to work well in combination with MAINTAINERS.

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
new file mode 100755
index 0000000..eb3f023
--- /dev/null
+++ b/scripts/get_maintainer.pl
@@ -0,0 +1,351 @@
+#!/usr/bin/perl -w
+# (c) 2007, Joe Perches <[email protected]>
+# created from checkpatch.pl
+#
+# Print the contact information for the maintainers
+# of the files modified in a patch
+#
+# usage: perl scripts/get_maintainers.pl <patch>
+#
+# Licensed under the terms of the GNU GPL License version 2
+
+use strict;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.06';
+
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $tree = "./";
+my $email_maintainer = 1;
+my $email_usename = 1;
+my $email_list = 1;
+my $email_subscriber_list = 0;
+my $email_separator = ", ";
+my $email_git = 1;
+my $email_git_chief_penguins = 0;
+my $email_multiline = 0;
+my %saw;
+
+my $chief_penguins = "(Linus Torvalds|Andrew Morton)";
+
+GetOptions(
+ 'tree=s' => \$tree,
+ 'git!' => $email_git,
+ 'git-chief-penguins' => \$email_git_chief_penguins,
+ 'm!' => \$email_maintainer,
+ 'n!' => \$email_usename,
+ 'l!' => \$email_list,
+ 's!' => \$email_subscriber_list,
+ 'multiline!' => \$email_multiline,
+ 'separator=s' => \$email_separator,
+ ) or exit;
+
+my $exit = 0;
+
+if ($#ARGV < 0 ||
+ ($email_maintainer == 0
+ && $email_list == 0
+ && $email_subscriber_list == 0
+ && $email_git == 0)) {
+ print "usage: $P [options] patchfile\n";
+ print "version: $V\n";
+ print " --tree [path] => linux kernel source path\n";
+ print " --git => include recent git \*-by: signers\n";
+ print " --git_chief_penguins => include ${chief_penguins}\n";
+ print " --m => include maintainer(s) if any\n";
+ print " --n => include name 'Full Name <addr\@domain.tld>'\n";
+ print " --l => include list(s) if any\n";
+ print " --s => include subscriber only list(s) if any\n";
+ print " --separator [, ] => separator for multiple addresses on 1 line\n";
+ print " --multiline => print 1 address per line\n";
+ print "Default: [--g --m --l --separator \", \"]\n";
+ print "Be sure to select something...\n";
+ exit(1);
+}
+
+if ($tree && !top_of_kernel_tree($tree)) {
+ if (${tree} ne "") {
+ print "'${tree}' ";
+ } else {
+ print "The current directory ";
+ }
+ print "doesn't appear to be a linux kernel source tree\n";
+ exit(2);
+}
+
+## Read MAINTAINERS for type/value pairs
+
+my @typevalue = ();
+open(MAINT, "<${tree}MAINTAINERS") || die "$P: Can't open ${tree}MAINTAINERS\n";
+while (<MAINT>) {
+ if (m/^(\C):\s*(.*)/) {
+ my $type = $1;
+ my $value = $2;
+
+ ##Filename pattern matching
+ if ($type eq "F" || $type eq "X") {
+ $value =~ s@\.@\\\.@g; ##Convert . to \.
+ $value =~ s/\*/\.\*/g; ##Convert * to .*
+ }
+ push(@typevalue, "$type:$value");
+ } elsif (!/^(\s)*$/) {
+ push(@typevalue, $_);
+ }
+}
+close(MAINT);
+
+## Find the patched filenames
+
+my @patchedfiles = ();
+open(PATCH, "<$ARGV[0]") or die "Can't open $ARGV[0]\n";
+while (<PATCH>) {
+ if (m/^\+\+\+\s+(\S+)/) {
+ my $file = $1;
+ $file =~ s@^[^/]*/@@;
+ $file =~ s@\n@@;
+ push(@patchedfiles, $file);
+ }
+}
+close(PATCH);
+
+# Sort and uniq patchedfiles
+
+undef %saw;
+@patchedfiles = sort @patchedfiles;
+@patchedfiles = grep(!$saw{$_}++, @patchedfiles);
+
+# Find responsible parties
+
+my @email_to = ();
+foreach (@patchedfiles) {
+ my $patchedfile = $_;
+ my $exclude = 0;
+
+#Git
+
+ recent_git_signoffs($patchedfile);
+
+#Do not match excluded file patterns
+
+ foreach (@typevalue) {
+ if (m/^(\C):(.*)/) {
+ my $type = $1;
+ my $value = $2;
+ if ($type eq 'X') {
+ if (file_match_pattern($patchedfile, $value)) {
+ $exclude = 1;
+ }
+ }
+ }
+ }
+
+ if ($exclude == 0) {
+ my $tvi = 0;
+ foreach (@typevalue) {
+ if (m/^(\C):(.*)/) {
+ my $type = $1;
+ my $value = $2;
+ if ($type eq 'F') {
+ if (file_match_pattern($patchedfile, $value)) {
+ add_emails($tvi);
+ }
+ }
+ }
+ $tvi++;
+ }
+ }
+}
+
+## sort and uniq email_to
+
+@email_to = sort @email_to;
+undef %saw;
+@email_to = grep(!$saw{$_}++, @email_to);
+
+## add lk if no one is interested...
+
+my $address_cnt = @email_to;
+if ($address_cnt == 0 && $email_list > 0) {
+ push(@email_to, "linux-kernel\@vger.kernel.org");
+}
+if ($email_multiline != 0) {
+ foreach (@email_to) {
+ print("$_\n");
+ }
+} else {
+ print(join($email_separator, @email_to));
+ print("\n");
+}
+
+exit($exit);
+
+sub file_match_pattern {
+ my ($file, $pattern) = @_;
+ if (substr($pattern, -1) eq "/") {
+ if ($file =~ m@^$pattern@) {
+ return 1;
+ }
+ } else {
+ if ($file =~ m@^$pattern@) {
+ my $s1 = ($file =~ tr@/@@);
+ my $s2 = ($pattern =~ tr@/@@);
+ if ($s1 == $s2) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+sub top_of_kernel_tree {
+ my ($tree) = @_;
+
+ if ($tree ne "" && substr($tree,length($tree)-1,1) ne "/") {
+ $tree = $tree . "/";
+ }
+ if ( (-f "${tree}COPYING")
+ && (-f "${tree}CREDITS")
+ && (-f "${tree}Kbuild")
+ && (-f "${tree}MAINTAINERS")
+ && (-f "${tree}Makefile")
+ && (-f "${tree}README")
+ && (-d "${tree}Documentation")
+ && (-d "${tree}arch")
+ && (-d "${tree}include")
+ && (-d "${tree}drivers")
+ && (-d "${tree}fs")
+ && (-d "${tree}init")
+ && (-d "${tree}ipc")
+ && (-d "${tree}kernel")
+ && (-d "${tree}lib")
+ && (-d "${tree}scripts")) {
+ return 1;
+ }
+ return 0;
+}
+
+sub format_email {
+ my ($name, $email) = @_;
+ my $formatted_email = $name;
+
+ if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $formatted_email = "\"${name}\"\ \<${email}\>";
+ } else {
+ $formatted_email = "${name} \<${email}\>";
+ }
+ return $formatted_email;
+}
+
+sub add_emails {
+ my ($index) = @_;
+
+ $index = $index - 1;
+ while ($index >= 0) {
+ my $tv = $typevalue[$index];
+ if ($tv =~ m/^(\C):(.*)/) {
+ my $ptype = $1;
+ my $pvalue = $2;
+ if ($ptype eq "L") {
+ my $subscr = $pvalue;
+ if ($subscr =~ m/\s*\(subscribers-only\)/) {
+ if ($email_subscriber_list > 0) {
+ $subscr =~ s/\s*\(subscribers-only\)//g;
+ push(@email_to, $subscr);
+ }
+ } else {
+ if ($email_list > 0) {
+ push(@email_to, $pvalue);
+ }
+ }
+ } elsif ($ptype eq "M") {
+ if ($email_maintainer > 0) {
+ if ($index >= 0) {
+ my $tv = $typevalue[$index - 1];
+ if ($tv =~ m/^(\C):(.*)/) {
+ if ($1 eq "P" && $email_usename > 0) {
+ push(@email_to, format_email($2, $pvalue));
+ } else {
+ push(@email_to, $pvalue);
+ }
+ }
+ } else {
+ push(@email_to, $pvalue);
+ }
+ }
+ }
+ $index--;
+ } else {
+ $index = -1;
+ }
+ }
+}
+
+sub which {
+ my ($bin) = @_;
+
+ my $path;
+
+ foreach $path (split /:/, $ENV{PATH}) {
+ if (-e "$path/$bin") {
+ return "$path/$bin";
+ }
+ }
+
+ return "";
+}
+
+sub recent_git_signoffs {
+ my ($file) = @_;
+
+ my $sign_offs = "";
+ my $cmd = "";
+ my $output = "";
+
+ my @lines = ();
+
+ if (which("git") eq "") {
+ die("Git not found\n");
+ }
+
+# Search the git logs for "by:" lines per file
+# sort in reverse order by occurance
+# add at most 5
+
+ $cmd = "git log --since=6.months.ago -- ${file} ";
+ $cmd = $cmd . " | grep -i '^ [-a-z]*by:.*\\\@' ";
+ if ($email_git_chief_penguins == 0) {
+ $cmd = $cmd . " | grep -E -v '${chief_penguins}'";
+ }
+ $cmd = $cmd . " | sort | uniq -c | sort -r -n | head -n 5";
+ $cmd = $cmd . " | cut -f 2 -d ':' -s ";
+
+ $output = `${cmd}`;
+
+ $output =~ s/^\s*//gm;
+
+ @lines = split("\n", $output);
+ foreach (@lines) {
+ my $line = $_;
+ if ($line =~ m/(.*) <(.*)>/) {
+ my $git_name = $1;
+ my $git_addr = $2;
+ $git_name =~ tr/^\"//;
+ $git_name =~ tr/\"$//;
+ if ($email_usename > 0) {
+ push(@email_to, format_email($git_name, $git_addr));
+ } else {
+ push(@email_to, $git_addr);
+ }
+ } elsif ($line =~ m/<(.*)>/) {
+ my $git_addr = $1;
+ push(@email_to, $git_addr);
+ } else {
+ push(@email_to, $line);
+ }
+ }
+ return $output;
+}


2007-08-17 02:13:27

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Tue, 2007-08-14 at 18:31 -0700, Junio C Hamano wrote:
> On the other hand, git-send-email _is_ all about sending it
> out, and it needs to know who your patch should reach. I
> think it makes sense to have one script that, given a set of
> paths that are affected, gives a list of potentially
> interested people (that is "Finding" part -- and I see there
> are 600+ patches to implement this on the list), and a new
> option to git-send-email to (1) inspect the patch to see what
> paths are affected, and (2) call that "Find" script to figure
> out whom to send it to, and probably asking for confirmation.

Sorry, not a git developer, so the paths are wrong.
This seems to work:

Example:

git-send-email \
--cc-cmd "perl scripts/get_maintainers.pl -non -multiline" foo.diff

--- git-send-email.pl 2007-08-16 19:06:07.000000000 -0700
+++ /usr/local/bin/git-send-email 2007-05-01 11:59:14.000000000 -0700
@@ -47,9 +47,6 @@ Options:
--cc Specify an initial "Cc:" list for the entire series
of emails.

- --cc-cmd Specify a command to execute per file which adds
- per file specific cc address entries
-
--bcc Specify a list of email addresses that should be Bcc:
on all the emails.

@@ -143,7 +140,7 @@ my (@to,@cc,@initial_cc,@bcclist,@xh,

# Behavior modification variables
my ($chain_reply_to, $quiet, $suppress_from, $no_signed_off_cc,
- $dry_run, $cc_cmd) = (1, 0, 0, 0, 0, 0);
+ $dry_run) = (1, 0, 0, 0, 0);
my $smtp_server;
my $envelope_sender;

@@ -176,7 +173,6 @@ my $rc = GetOptions("from=s" => \$from,
"subject=s" => \$initial_subject,
"to=s" => \@to,
"cc=s" => \@initial_cc,
- "cc-cmd=s" => \$cc_cmd,
"bcc=s" => \@bcclist,
"chain-reply-to!" => \$chain_reply_to,
"smtp-server=s" => \$smtp_server,
@@ -611,16 +607,6 @@ foreach my $t (@files) {
}
}
close F;
-
- if (${cc_cmd} ne "") {
- my $output = `${cc_cmd} $t`;
- my @lines = split("\n", $output);
- foreach my $c (@lines) {
- push @cc, $c;
- printf("(sob) Adding cc: %s from cc-cmd: '%s'\n", $c, $t) unless $quiet;
- }
- }
-
if (defined $author_not_sender) {
$author_not_sender = unquote_rfc2047($author_not_sender);
$message = "From: $author_not_sender\n\n$message";


2007-08-17 02:31:12

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On Thu, 2007-08-16 at 19:13 -0700, Joe Perches wrote:
> Sorry, not a git developer, so the paths are wrong.
> This seems to work:

Sorry. Patch reversed too.

--- /usr/local/bin/git-send-email 2007-05-01 11:59:14.000000000 -0700
+++ /home/joe/bin/git-send-email.pl 2007-08-16 19:25:53.000000000 -0700
@@ -47,6 +47,9 @@ Options:
--cc Specify an initial "Cc:" list for the entire series
of emails.

+ --cc-cmd Specify a command to execute per file which adds
+ per file specific cc address entries
+
--bcc Specify a list of email addresses that should be Bcc:
on all the emails.

@@ -140,7 +143,7 @@ my (@to,@cc,@initial_cc,@bcclist,@xh,

# Behavior modification variables
my ($chain_reply_to, $quiet, $suppress_from, $no_signed_off_cc,
- $dry_run) = (1, 0, 0, 0, 0);
+ $dry_run, $cc_cmd) = (1, 0, 0, 0, 0, "");
my $smtp_server;
my $envelope_sender;

@@ -173,6 +176,7 @@ my $rc = GetOptions("from=s" => \$from,
"subject=s" => \$initial_subject,
"to=s" => \@to,
"cc=s" => \@initial_cc,
+ "cc-cmd=s" => \$cc_cmd,
"bcc=s" => \@bcclist,
"chain-reply-to!" => \$chain_reply_to,
"smtp-server=s" => \$smtp_server,
@@ -607,6 +611,16 @@ foreach my $t (@files) {
}
}
close F;
+
+ if (${cc_cmd} ne "") {
+ my $output = `${cc_cmd} $t`;
+ my @lines = split("\n", $output);
+ foreach my $c (@lines) {
+ push @cc, $c;
+ printf("(sob) Adding cc: %s from cc-cmd: '%s'\n", $c, $t) unless $quiet;
+ }
+ }
+
if (defined $author_not_sender) {
$author_not_sender = unquote_rfc2047($author_not_sender);
$message = "From: $author_not_sender\n\n$message";


2007-08-17 04:29:18

by Rene Herman

[permalink] [raw]
Subject: Re: [PATCH] [1/2many] - FInd the maintainer(s) for a patch - scripts/get_maintainer.pl

On 08/16/2007 09:00 PM, Junio C Hamano wrote:

> Git or no git, I think a file that can be viewed with less,
> edited with regular editor and processed with sed/perl/grep
> tools is the way to go. I do not think adding 600+ patches to
> the single MAINTAINERS list is workable in the longer term, as
> it would become the single file many subsystem people need to
> update and is asking for merge conflicts, but I think a file
> with known name (say, "CcMe.txt") sprinkled in relevant
> subdirectories, perhaps with the same format originally
> suggested for MAINTAINERS, would make a lot more sense.
>
> That would give people who work with tarballs and patches, or a
> subsystem managed with something other than git (one of the most
> important one is quilt), the equal access to the necessary data.

That is ofcourse an argument but I believe a bit of a non-argument at the
same time in practice.

There's really not much point in pretending that non-git users are still
first class citizens anyway; Linus' own suggestion of using git-blame would
tie things to git as well, as do for example frequent requests to bisect a
problem. I moreover feel there's absolutely nothing wrong with that, given
that there's nothing wrong with git.

It's the kernel's source code management tool, is included out of the box in
most distributions nowadays and is GPLd meaning that the tool (itself) won't
keep anyone from exporting data from it and importing it into something else
if someone cares to. Also, I never managed to stay un-annoyed at source code
management tools long enough to understand why I wanted to use them but have
been using git for months now so as far as I am concerned, it appears to
even be a good tool.

But, well, anyways, I did look at a git repo a bit but will unfortunately
not be able to follow up the proposal with actual (good) code in a sensible
timeframe, let alone "quickly", which means I was hoping others would agree.
I believe these properties make for an elegant setup with many possible uses
including the maintainers information, but if you disagree I guess I'm going
to shelve it...

> Even with git, it is my understanding that kernel community
> works largely on patches exchanged over e-mails, between people
> who do use git and people who do not. You would want to have
> something you can easily transfer over e-mail in the patch
> form.
>
> We _could_ invent a new "patches to properties" git diff output
> format that "git apply" can understand to propagate that
> information

Yes, not unlike the current git move "meta-diffs" ...

> but that approach is making it less interoperable with others, and you
> need to demonstrate the benefit far outweighs that. I do not see it for
> this particular application.
>
> There may be places for "properties" that would be useful to git, but I
> do not think the "find whom to send patches to" is one of them.

The important reason for wiring this into git directly would be keeping the
meta-data in sync with the data it refers to in an automated fashion. With
manual intervention, there's much more opportunity for things to grow stale.

In practice, it may not be a huge problem. It certainly is with the current
MAINTAINERS file but if one does finer-grained data around the tree, that
will probably help.

It's also not a now or never thing fortunately. If git does ever grow these
properties, the issue can be revisited, perhaps at that time both with the
experience of what the finer-grained in-tree solution did not solve and even
fewer people around that care about not making git even more of an intrinsic
part of development.

Rene.

2007-08-17 17:55:33

by Joe Perches

[permalink] [raw]
Subject: [PATCH] - git-send-email.perl

Here's a path to enable a command line option
that takes a string argument

cc-cmd

This modifies the @cc array to include whatever
output is produced by cc_cmd $patchfile

cccmd can be stored in a config settings file

previous versions of this patch were submitted
against an older version of git-send-email.perl

diff --git a/git-send-email.perl b/git-send-email.perl
index 69559b2..828a77a 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -46,6 +46,9 @@ Options:
--cc Specify an initial "Cc:" list for the entire series
of emails.

+ --cc-cmd Specify a command to execute per file which adds
+ per file specific cc address entries
+
--bcc Specify a list of email addresses that should be Bcc:
on all the emails.

@@ -157,13 +160,14 @@ if ($@) {
my ($quiet, $dry_run) = (0, 0);

# Variables with corresponding config settings
-my ($thread, $chain_reply_to, $suppress_from, $signed_off_cc);
+my ($thread, $chain_reply_to, $suppress_from, $signed_off_cc, $cc_cmd);

my %config_settings = (
"thread" => [\$thread, 1],
"chainreplyto" => [\$chain_reply_to, 1],
"suppressfrom" => [\$suppress_from, 0],
"signedoffcc" => [\$signed_off_cc, 1],
+ "cccmd" => [\$cc_cmd, ""],
);

foreach my $setting (keys %config_settings) {
@@ -189,6 +193,7 @@ my $rc = GetOptions("sender|from=s" => \$sender,
"smtp-server=s" => \$smtp_server,
"compose" => \$compose,
"quiet" => \$quiet,
+ "cc-cmd=s" => \$cc_cmd,
"suppress-from!" => \$suppress_from,
"signed-off-cc|signed-off-by-cc!" => \$signed_off_cc,
"dry-run" => \$dry_run,
@@ -652,11 +657,21 @@ foreach my $t (@files) {
}
}
close F;
+
+ if (${cc_cmd} ne "") {
+ my $output = `${cc_cmd} $t`;
+ my @lines = split("\n", $output);
+ foreach my $c (@lines) {
+ push @cc, $c;
+ printf("(cc-cmd) Adding cc: %s from: '%s'\n", $c, $cc_cmd)
+ unless $quiet;
+ }
+ }
+
if (defined $author) {
$message = "From: $author\n\n$message";
}

-
send_message();

# set up for the next message


2007-08-18 00:32:19

by Junio C Hamano

[permalink] [raw]
Subject: Re: [PATCH] - git-send-email.perl

Joe Perches <[email protected]> writes:

> Here's a path to enable a command line option
> that takes a string argument
>
> cc-cmd
>
> This modifies the @cc array to include whatever
> output is produced by cc_cmd $patchfile
>
> cccmd can be stored in a config settings file
>
> previous versions of this patch were submitted
> against an older version of git-send-email.perl

... Signed-off-by: ...


> diff --git a/git-send-email.perl b/git-send-email.perl
> index 69559b2..828a77a 100755
> --- a/git-send-email.perl
> +++ b/git-send-email.perl
> @@ -46,6 +46,9 @@ Options:
> --cc Specify an initial "Cc:" list for the entire series
> of emails.
>
> + --cc-cmd Specify a command to execute per file which adds
> + per file specific cc address entries
> +
> --bcc Specify a list of email addresses that should be Bcc:
> on all the emails.
>

I do not see a patch to "Documentation/git-send-email.txt" here...

> @@ -652,11 +657,21 @@ foreach my $t (@files) {
> }
> }
> close F;
> +
> + if (${cc_cmd} ne "") {
> + my $output = `${cc_cmd} $t`;
> + my @lines = split("\n", $output);
> + foreach my $c (@lines) {
> + push @cc, $c;
> + printf("(cc-cmd) Adding cc: %s from: '%s'\n", $c, $cc_cmd)
> + unless $quiet;
> + }
> + }
> +

Something like this, with appropriate error checking, perhaps?

open my $cc, "${cc_cmd} $t |";
while (my $c = <$cc>) {
...
}
close $cc;

2007-08-18 01:52:56

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] - git-send-email.perl

On Fri, 2007-08-17 at 16:38 -0700, Junio C Hamano wrote:
> Joe Perches <[email protected]> writes:
> ... Signed-off-by: ...
> I do not see a patch to "Documentation/git-send-email.txt" here...
> Something like this, with appropriate error checking, perhaps?
>
> open my $cc, "${cc_cmd} $t |";
> while (my $c = <$cc>) {
> ...
> }
> close $cc;

Add --cc-cmd, the ability to execute an arbitrary "cmd" to
generate per patch file specific "Cc:"s to git-send-email.perl

Signed-off-by: Joe Perches <[email protected]>

diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index d243ed1..9a48847 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -34,6 +34,12 @@ The --bcc option must be repeated for each user you want on the bcc list.
+
The --cc option must be repeated for each user you want on the cc list.

+--cc-cmd::
+ Specify a command to execute once per patch file which
+ should generate patch file specific "Cc:" entries.
+ Output of this command must be single email address per line.
+ Default is the value of 'sendemail.cccmd' configuration value.
+
--chain-reply-to, --no-chain-reply-to::
If this is set, each email will be sent as a reply to the previous
email sent. If disabled with "--no-chain-reply-to", all emails after
@@ -124,6 +130,9 @@ sendemail.aliasfiletype::
Format of the file(s) specified in sendemail.aliasesfile. Must be
one of 'mutt', 'mailrc', 'pine', or 'gnus'.

+sendemail.cccmd::
+ Command to execute to generate per patch file specific "Cc:"s.
+
sendemail.bcc::
Email address (or alias) to always bcc.

diff --git a/git-send-email.perl b/git-send-email.perl
index 69559b2..d49947c 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -46,6 +46,9 @@ Options:
--cc Specify an initial "Cc:" list for the entire series
of emails.

+ --cc-cmd Specify a command to execute per file which adds
+ per file specific cc address entries
+
--bcc Specify a list of email addresses that should be Bcc:
on all the emails.

@@ -157,13 +160,14 @@ if ($@) {
my ($quiet, $dry_run) = (0, 0);

# Variables with corresponding config settings
-my ($thread, $chain_reply_to, $suppress_from, $signed_off_cc);
+my ($thread, $chain_reply_to, $suppress_from, $signed_off_cc, $cc_cmd);

my %config_settings = (
"thread" => [\$thread, 1],
"chainreplyto" => [\$chain_reply_to, 1],
"suppressfrom" => [\$suppress_from, 0],
"signedoffcc" => [\$signed_off_cc, 1],
+ "cccmd" => [\$cc_cmd, ""],
);

foreach my $setting (keys %config_settings) {
@@ -189,6 +193,7 @@ my $rc = GetOptions("sender|from=s" => \$sender,
"smtp-server=s" => \$smtp_server,
"compose" => \$compose,
"quiet" => \$quiet,
+ "cc-cmd=s" => \$cc_cmd,
"suppress-from!" => \$suppress_from,
"signed-off-cc|signed-off-by-cc!" => \$signed_off_cc,
"dry-run" => \$dry_run,
@@ -652,11 +657,25 @@ foreach my $t (@files) {
}
}
close F;
+
+ if (${cc_cmd} ne "") {
+ open(F, "${cc_cmd} $t |")
+ or die "(cc-cmd) Could not execute '${cc_cmd}'\n";
+ while(<F>) {
+ my $c = $_;
+ $c =~ s/^\s*//g;
+ $c =~ s/\n$//g;
+ push @cc, $c;
+ printf("(cc-cmd) Adding cc: %s from: '%s'\n", $c, $cc_cmd)
+ unless $quiet;
+ }
+ close F;
+ }
+
if (defined $author) {
$message = "From: $author\n\n$message";
}

-
send_message();

# set up for the next message