[PATCH v2 0/4] patman: Speed up some operations

This little series updates patman to run the get_maintainer.py script in parallel for each commit. It also does the same with checkpatch.
In some cases this can make a dramatic different to the speed.
Changes in v2: - Fix 'uncorrect' typo in subject - Fix missing 'f' on format string
Simon Glass (4): patman: Drop an incorrect comment about git am patman: Refactor MakeCcFile() into two functions patman: Run get_maintainer.pl in parallel patman: Check patches in parallel
tools/patmanu/checkpatch.py | 46 +++++++++------- tools/patmanu/control.py | 2 +- tools/patmanu/func_test.py | 2 + tools/patmanu/series.py | 107 ++++++++++++++++++++++++++++-------- 4 files changed, 112 insertions(+), 45 deletions(-)

Patman does not do this anymore, so drop the comment.
Signed-off-by: Simon Glass sjg@chromium.org ---
Changes in v2: - Fix 'uncorrect' typo in subject
tools/patmanu/control.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/patmanu/control.py b/tools/patmanu/control.py index b1e23870d9d..6ff94776da9 100644 --- a/tools/patmanu/control.py +++ b/tools/patmanu/control.py @@ -85,7 +85,7 @@ def check_patches(series, patch_files, run_checkpatch, verbose, use_tree): # Do a few checks on the series series.DoChecks()
- # Check the patches, and run them through 'git am' just to be sure + # Check the patches if run_checkpatch: ok = checkpatch.check_patches(verbose, patch_files, use_tree) else:

Hi,
On Sun, Feb 19, 2023 at 3:50 PM Simon Glass sjg@chromium.org wrote:
Patman does not do this anymore, so drop the comment.
Signed-off-by: Simon Glass sjg@chromium.org
Changes in v2:
- Fix 'uncorrect' typo in subject
tools/patmanu/control.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
I guess this is as-of commit 7428dc14b0f2 ("patman: Remove the -a option") ? It would be nice to mention that in the commit message...
Reviewed-by: Douglas Anderson dianders@chromium.org

This function is quite long. Moving the handling of a commit into a separate function. This will make it easier to do the work in parallel.
Update function comments while we are here.
Signed-off-by: Simon Glass sjg@chromium.org ---
Changes in v2: - Fix missing 'f' on format string
tools/patmanu/series.py | 80 ++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 24 deletions(-)
diff --git a/tools/patmanu/series.py b/tools/patmanu/series.py index f2d415609d2..8ead87ef53e 100644 --- a/tools/patmanu/series.py +++ b/tools/patmanu/series.py @@ -234,6 +234,48 @@ class Series(dict): str = 'Change log exists, but no version is set' print(col.build(col.RED, str))
+ def GetCcForCommit(self, commit, process_tags, warn_on_error, + add_maintainers, limit, get_maintainer_script, + all_skips): + """Get the email CCs to use with a particular commit + + Uses subject tags and get_maintainers.pl script to find people to cc + on a patch + + Args: + commit (Commit): Commit to process + process_tags (bool): Process tags as if they were aliases + warn_on_error (bool): True to print a warning when an alias fails to + match, False to ignore it. + add_maintainers (bool or list of str): Either: + True/False to call the get_maintainers to CC maintainers + List of maintainers to include (for testing) + limit (int): Limit the length of the Cc list (None if no limit) + get_maintainer_script (str): The file name of the get_maintainer.pl + script (or compatible). + all_skips (set of str): Set of bouncing email address that were + dropped from the output + + Returns: + list of str: List of email addresses to cc + """ + cc = [] + if process_tags: + cc += gitutil.build_email_list(commit.tags, + warn_on_error=warn_on_error) + cc += gitutil.build_email_list(commit.cc_list, + warn_on_error=warn_on_error) + if type(add_maintainers) == type(cc): + cc += add_maintainers + elif add_maintainers: + cc += get_maintainer.get_maintainer(get_maintainer_script, + commit.patch) + all_skips |= set(cc) & set(settings.bounces) + cc = list(set(cc) - set(settings.bounces)) + if limit is not None: + cc = cc[:limit] + return cc + def MakeCcFile(self, process_tags, cover_fname, warn_on_error, add_maintainers, limit, get_maintainer_script): """Make a cc file for us to use for per-commit Cc automation @@ -241,15 +283,15 @@ class Series(dict): Also stores in self._generated_cc to make ShowActions() faster.
Args: - process_tags: Process tags as if they were aliases - cover_fname: If non-None the name of the cover letter. - warn_on_error: True to print a warning when an alias fails to match, - False to ignore it. - add_maintainers: Either: + process_tags (bool): Process tags as if they were aliases + cover_fname (str): If non-None the name of the cover letter. + warn_on_error (bool): True to print a warning when an alias fails to + match, False to ignore it. + add_maintainers (bool or list of str): Either: True/False to call the get_maintainers to CC maintainers List of maintainers to include (for testing) - limit: Limit the length of the Cc list (None if no limit) - get_maintainer_script: The file name of the get_maintainer.pl + limit (int): Limit the length of the Cc list (None if no limit) + get_maintainer_script (str): The file name of the get_maintainer.pl script (or compatible). Return: Filename of temp file created @@ -259,28 +301,18 @@ class Series(dict): fname = '/tmp/patman.%d' % os.getpid() fd = open(fname, 'w', encoding='utf-8') all_ccs = [] + all_skips = set() for commit in self.commits: - cc = [] - if process_tags: - cc += gitutil.build_email_list(commit.tags, - warn_on_error=warn_on_error) - cc += gitutil.build_email_list(commit.cc_list, - warn_on_error=warn_on_error) - if type(add_maintainers) == type(cc): - cc += add_maintainers - elif add_maintainers: - - cc += get_maintainer.get_maintainer(get_maintainer_script, - commit.patch) - for x in set(cc) & set(settings.bounces): - print(col.build(col.YELLOW, 'Skipping "%s"' % x)) - cc = list(set(cc) - set(settings.bounces)) - if limit is not None: - cc = cc[:limit] + cc = self.GetCcForCommit(commit, process_tags, warn_on_error, + add_maintainers, limit, + get_maintainer_script, all_skips) all_ccs += cc print(commit.patch, '\0'.join(sorted(set(cc))), file=fd) self._generated_cc[commit.patch] = cc
+ for x in sorted(list(all_skips)): + print(col.build(col.YELLOW, f'Skipping "{x}"')) + if cover_fname: cover_cc = gitutil.build_email_list(self.get('cover_cc', '')) cover_cc = list(set(cover_cc + all_ccs))

Hi,
On Sun, Feb 19, 2023 at 3:50 PM Simon Glass sjg@chromium.org wrote:
@@ -234,6 +234,48 @@ class Series(dict): str = 'Change log exists, but no version is set' print(col.build(col.RED, str))
- def GetCcForCommit(self, commit, process_tags, warn_on_error,
add_maintainers, limit, get_maintainer_script,
all_skips):
"""Get the email CCs to use with a particular commit
Uses subject tags and get_maintainers.pl script to find people to cc
on a patch
Args:
commit (Commit): Commit to process
process_tags (bool): Process tags as if they were aliases
warn_on_error (bool): True to print a warning when an alias fails to
match, False to ignore it.
add_maintainers (bool or list of str): Either:
True/False to call the get_maintainers to CC maintainers
List of maintainers to include (for testing)
limit (int): Limit the length of the Cc list (None if no limit)
get_maintainer_script (str): The file name of the get_maintainer.pl
script (or compatible).
all_skips (set of str): Set of bouncing email address that were
dropped from the output
It wouldn't hurt to mention that "all_skips" is essentially a return value from this function (this function updates it to include the email addresses that were skipped).
@@ -259,28 +301,18 @@ class Series(dict): fname = '/tmp/patman.%d' % os.getpid() fd = open(fname, 'w', encoding='utf-8') all_ccs = []
all_skips = set() for commit in self.commits:
cc = []
if process_tags:
cc += gitutil.build_email_list(commit.tags,
warn_on_error=warn_on_error)
cc += gitutil.build_email_list(commit.cc_list,
warn_on_error=warn_on_error)
if type(add_maintainers) == type(cc):
cc += add_maintainers
elif add_maintainers:
cc += get_maintainer.get_maintainer(get_maintainer_script,
commit.patch)
for x in set(cc) & set(settings.bounces):
print(col.build(col.YELLOW, 'Skipping "%s"' % x))
cc = list(set(cc) - set(settings.bounces))
if limit is not None:
cc = cc[:limit]
cc = self.GetCcForCommit(commit, process_tags, warn_on_error,
add_maintainers, limit,
get_maintainer_script, all_skips) all_ccs += cc print(commit.patch, '\0'.join(sorted(set(cc))), file=fd) self._generated_cc[commit.patch] = cc
for x in sorted(list(all_skips)):
Why "sorted(list(all_skips))" and not just "sorted(all_skips)"?
Both of the above are nits, so I'm OK w/:
Reviewed-by: Douglas Anderson dianders@chromium.org

This script can take ages on some series. Try to limit the time by using threads. If a few stubborn patches remain, show progress so the user has some idea what is going on.
Signed-off-by: Simon Glass sjg@chromium.org ---
(no changes since v1)
tools/patmanu/func_test.py | 2 ++ tools/patmanu/series.py | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/tools/patmanu/func_test.py b/tools/patmanu/func_test.py index 238fd5b6100..48109ae5725 100644 --- a/tools/patmanu/func_test.py +++ b/tools/patmanu/func_test.py @@ -240,6 +240,8 @@ class TestFunctional(unittest.TestCase): self.assertEqual('Change log missing for v3', next(lines)) self.assertEqual('Change log for unknown version v4', next(lines)) self.assertEqual("Alias 'pci' not found", next(lines)) + while next(lines) != 'Cc processing complete': + pass self.assertIn('Dry run', next(lines)) self.assertEqual('', next(lines)) self.assertIn('Send a total of %d patches' % count, next(lines)) diff --git a/tools/patmanu/series.py b/tools/patmanu/series.py index 8ead87ef53e..e7a5f91da87 100644 --- a/tools/patmanu/series.py +++ b/tools/patmanu/series.py @@ -5,8 +5,11 @@ from __future__ import print_function
import collections +import concurrent.futures import itertools import os +import sys +import time
from patmanu import get_maintainer from patmanu import gitutil @@ -302,10 +305,34 @@ class Series(dict): fd = open(fname, 'w', encoding='utf-8') all_ccs = [] all_skips = set() + with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: + for i, commit in enumerate(self.commits): + commit.seq = i + commit.future = executor.submit( + self.GetCcForCommit, commit, process_tags, warn_on_error, + add_maintainers, limit, get_maintainer_script, all_skips) + + # Show progress any commits that are taking forever + lastlen = 0 + while True: + left = [commit for commit in self.commits + if not commit.future.done()] + if not left: + break + names = ', '.join(f'{c.seq + 1}:{c.subject}' + for c in left[:2]) + out = f'\r{len(left)} remaining: {names}'[:79] + spaces = ' ' * (lastlen - len(out)) + if lastlen: # Don't print anything the first time + print(out, spaces, end='') + sys.stdout.flush() + lastlen = len(out) + time.sleep(.25) + print(f'\rdone{" " * lastlen}\r', end='') + print('Cc processing complete') + for commit in self.commits: - cc = self.GetCcForCommit(commit, process_tags, warn_on_error, - add_maintainers, limit, - get_maintainer_script, all_skips) + cc = commit.future.result() all_ccs += cc print(commit.patch, '\0'.join(sorted(set(cc))), file=fd) self._generated_cc[commit.patch] = cc

Hi,
On Sun, Feb 19, 2023 at 3:50 PM Simon Glass sjg@chromium.org wrote:
This script can take ages on some series. Try to limit the time by using threads. If a few stubborn patches remain, show progress so the user has some idea what is going on.
Signed-off-by: Simon Glass sjg@chromium.org
(no changes since v1)
tools/patmanu/func_test.py | 2 ++ tools/patmanu/series.py | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/tools/patmanu/func_test.py b/tools/patmanu/func_test.py index 238fd5b6100..48109ae5725 100644 --- a/tools/patmanu/func_test.py +++ b/tools/patmanu/func_test.py @@ -240,6 +240,8 @@ class TestFunctional(unittest.TestCase): self.assertEqual('Change log missing for v3', next(lines)) self.assertEqual('Change log for unknown version v4', next(lines)) self.assertEqual("Alias 'pci' not found", next(lines))
while next(lines) != 'Cc processing complete':
pass self.assertIn('Dry run', next(lines)) self.assertEqual('', next(lines)) self.assertIn('Send a total of %d patches' % count, next(lines))
diff --git a/tools/patmanu/series.py b/tools/patmanu/series.py index 8ead87ef53e..e7a5f91da87 100644 --- a/tools/patmanu/series.py +++ b/tools/patmanu/series.py @@ -5,8 +5,11 @@ from __future__ import print_function
import collections +import concurrent.futures import itertools import os +import sys +import time
from patmanu import get_maintainer from patmanu import gitutil @@ -302,10 +305,34 @@ class Series(dict): fd = open(fname, 'w', encoding='utf-8') all_ccs = [] all_skips = set()
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
for i, commit in enumerate(self.commits):
commit.seq = i
commit.future = executor.submit(
self.GetCcForCommit, commit, process_tags, warn_on_error,
add_maintainers, limit, get_maintainer_script, all_skips)
# Show progress any commits that are taking forever
lastlen = 0
while True:
left = [commit for commit in self.commits
if not commit.future.done()]
if not left:
break
names = ', '.join(f'{c.seq + 1}:{c.subject}'
for c in left[:2])
out = f'\r{len(left)} remaining: {names}'[:79]
spaces = ' ' * (lastlen - len(out))
if lastlen: # Don't print anything the first time
print(out, spaces, end='')
sys.stdout.flush()
lastlen = len(out)
time.sleep(.25)
print(f'\rdone{" " * lastlen}\r', end='')
print('Cc processing complete')
for commit in self.commits:
cc = self.GetCcForCommit(commit, process_tags, warn_on_error,
add_maintainers, limit,
get_maintainer_script, all_skips)
cc = commit.future.result()
I've never used "concurrent.futures" before, but looks reasonable to me.
Reviewed-by: Douglas Anderson dianders@chromium.org

For large series this can take a while. Run checkpatch in parallel to try to reduce the time. The checkpatch information is still reported in sequential order, so a very slow patch at the start can still slow things down. But overall this gives good results.
Signed-off-by: Simon Glass sjg@chromium.org ---
(no changes since v1)
tools/patmanu/checkpatch.py | 46 +++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-)
diff --git a/tools/patmanu/checkpatch.py b/tools/patmanu/checkpatch.py index 55d962f536f..1317f50b464 100644 --- a/tools/patmanu/checkpatch.py +++ b/tools/patmanu/checkpatch.py @@ -3,6 +3,7 @@ #
import collections +import concurrent.futures import os import re import sys @@ -244,26 +245,31 @@ def check_patches(verbose, args, use_tree): error_count, warning_count, check_count = 0, 0, 0 col = terminal.Color()
- for fname in args: - result = check_patch(fname, verbose, use_tree=use_tree) - if not result.ok: - error_count += result.errors - warning_count += result.warnings - check_count += result.checks - print('%d errors, %d warnings, %d checks for %s:' % (result.errors, - result.warnings, result.checks, col.build(col.BLUE, fname))) - if (len(result.problems) != result.errors + result.warnings + - result.checks): - print("Internal error: some problems lost") - # Python seems to get confused by this - # pylint: disable=E1133 - for item in result.problems: - sys.stderr.write( - get_warning_msg(col, item.get('type', '<unknown>'), - item.get('file', '<unknown>'), - item.get('line', 0), item.get('msg', 'message'))) - print - #print(stdout) + with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: + futures = [] + for fname in args: + f = executor.submit(check_patch, fname, verbose, use_tree=use_tree) + futures.append(f) + + for fname, f in zip(args, futures): + result = f.result() + if not result.ok: + error_count += result.errors + warning_count += result.warnings + check_count += result.checks + print('%d errors, %d warnings, %d checks for %s:' % (result.errors, + result.warnings, result.checks, col.build(col.BLUE, fname))) + if (len(result.problems) != result.errors + result.warnings + + result.checks): + print("Internal error: some problems lost") + # Python seems to get confused by this + # pylint: disable=E1133 + for item in result.problems: + sys.stderr.write( + get_warning_msg(col, item.get('type', '<unknown>'), + item.get('file', '<unknown>'), + item.get('line', 0), item.get('msg', 'message'))) + print if error_count or warning_count or check_count: str = 'checkpatch.pl found %d error(s), %d warning(s), %d checks(s)' color = col.GREEN

Hi,
On Sun, Feb 19, 2023 at 3:50 PM Simon Glass sjg@chromium.org wrote:
For large series this can take a while. Run checkpatch in parallel to try to reduce the time. The checkpatch information is still reported in sequential order, so a very slow patch at the start can still slow things down. But overall this gives good results.
Signed-off-by: Simon Glass sjg@chromium.org
(no changes since v1)
tools/patmanu/checkpatch.py | 46 +++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-)
Reviewed-by: Douglas Anderson dianders@chromium.org
participants (2)
-
Doug Anderson
-
Simon Glass