diff --git a/tools/reports/commits-report.py b/tools/reports/commits-report.py new file mode 100644 index 0000000000000000000000000000000000000000..a974b9b58483038b5454d2204fb1109b205e60ef --- /dev/null +++ b/tools/reports/commits-report.py @@ -0,0 +1,285 @@ +# Mantid Repository : https://github.com/mantidproject/mantid +# +# Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI, +# NScD Oak Ridge National Laboratory, European Spallation Source, +# Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS +# SPDX - License - Identifier: GPL - 3.0 + +#pylint: disable=invalid-name + +import datetime +import csv +import os +import re + +temp_filename = 'all-commits.stdout' +regex_git_log_entry = re.compile( + r"Author:\s+(.+?)\s+Date:\s+(.+?)\B\s+(\S+).*?((\d+)\sfile.+?)?((\d+)+\sinsertion.+?)?((\d+)+\sdeletion.+?)?(commit\s[0-9a-f]{40}|$)", + re.DOTALL) +regex_git_log_splitter = re.compile( + r"commit\s[0-9a-f]{40}") +regex_name_email_address = re.compile(r"(.*?)<(\S+)>") + +organisations = ['STFC', 'ORNL', 'ESS', 'ILL', 'PSI', 'ANSTO', 'KITWARE', 'JUELICH', 'OTHERS', 'CSNS'] + +domains = {'stfc.ac.uk': 'STFC', + 'clrc.ac.uk': 'STFC', + 'tessella.com': 'STFC', + 'ornl.gov': 'ORNL', + 'sns.gov': 'ORNL', + 'esss.se': 'ESS', + 'ill.fr': 'ILL', + 'ill.eu': 'ILL', + 'psi.ch': 'PSI', + 'ansto.gov.au': 'ANSTO', + 'ansto': 'ANSTO', + 'mantidproject.org': 'OTHERS', + 'MichaelWedel@users.noreply.github.com': 'PSI', + 'stuart.i.campbell@gmail.com': 'ORNL', + 'uwstout.edu': 'ORNL', + 'kitware.com': 'KITWARE', + 'juelich.de': 'JUELICH', + 'ian.bush@tessella.com': 'STFC', + 'dan@dan-nixon.com': 'STFC', + 'peterfpeterson@gmail.com': 'ORNL', + 'stuart@stuartcampbell.me': 'ORNL', + 'harry@exec64.co.uk': 'STFC', + 'martyn.gigg@gmail.com': 'STFC', + 'raquelalvarezbanos@users.noreply.github.com': 'STFC', + 'torben.nielsen@nbi.dk': 'ESS', + 'borreguero@gmail.com': 'ORNL', + 'raquel.alvarez.banos@gmail.com': 'STFC', + 'anton.piccardo-selg@tessella.com': 'STFC', + 'rosswhitfield@users.noreply.github.com': 'ORNL', + 'mareuternh@gmail.com': 'ORNL', + 'quantumsteve@gmail.com': 'ORNL', + 'ricleal@gmail.com': 'ORNL', + 'jawrainey@gmail.com': 'STFC', + 'xingxingyao@gmail.com': 'ORNL', + 'owen@laptop-ubuntu': 'STFC', + 'picatess@users.noreply.github.com': 'STFC', + 'Janik@Janik': 'ORNL', + 'debdepba@dasganma.tk': 'OTHERS', + 'matd10@yahoo.com': 'OTHERS', + 'diegomon93@gmail.com': 'OTHERS', + 'mgt110@ic.ac.uk': 'OTHERS', + 'granrothge@users.noreply.github.com': 'ORNL', + 'tom.g.r.brooks@gmail.com': 'STFC', + 'ross.whitfield@gmail.com': 'ORNL', + 'samueljackson@outlook.com': 'STFC', + 'AntonPiccardoSelg@users.noreply.github.com': 'STFC', + 'antibones@users.noreply.github.com': 'ILL', + 'MikeHart85@users.noreply.github.com': 'STFC', + 'dbt@aber.ac.uk': 'STFC', + 'DavidFair@users.noreply.github.com': 'STFC', + 'reimundILL@users.noreply.github.com': 'ILL', + 'jan@c53.be': 'JUELICH', + 'reimund@il.eu': 'ILL', + 'davidfair@users.noreply.github.com': 'STFC', + 'louisemccann@users.noreply.github.com': 'STFC', + 'DimitarTasev@users.noreply.github.com': 'STFC', + 'dimtasev@gmail.com': 'STFC', + 'fedepou@gmail.com': 'STFC', + 'cip.pruteanu@gmail.com': 'OTHERS', + 'kdymkowski84@gmail.com': 'OTHERS', + 'mayer.ali@t-online.de': 'OTHERS', + 'gagikvar@gmail.com': 'ILL', + 'bartomeu.llopis.vidal@gmail.com': 'STFC', + 'anton.piccardo-selg@tessella.ac.uk': 'STFC', + 'jamesphysics@users.noreply.github.com': 'STFC', + 'michaeljturner@live.com': 'STFC', + 'rprospero@gmail.com': 'STFC', + 'roman.tolchenov@gmail.com': 'STFC', + 'jiao.lin@gmail.com': 'ORNL', + 'erkn@fysik.dtu.dk': 'ESS', + 'daniel@pajerowski.com': 'ORNL', + 'ElliotAOram@users.noreply.github.com': 'STFC', + '37333817+thomueller@users.noreply.github.com': 'ESS', + 't.w.jubb@gmail.com': 'STFC', + 'edward.brown.96@live.co.uk': 'STFC', + 'bhuvan_777@outlook.com': 'STFC', + 'joachimcoenen@icloud.com': 'JUELICH', + 'anton.piccardo.selg@gmail.com': 'STFC', + '29330338+JoachimCoenen@users.noreply.github.com': 'JUELICH', + 'samjones714@gmail.com': 'STFC', + '5237234+ewancook@users.noreply.github.com': 'STFC', + '40766142+SamJenkins1@users.noreply.github.com': 'STFC', + 'aybamidele@gmail.com': 'STFC', + '5237234+ewancook@users.noreply.github.com': 'STFC', + 'samjones714@gmail.com': 'STFC', + '40830825+robertapplin@users.noreply.github.com': 'STFC', + 'robertgjapplin@gmail.com': 'STFC', + 'luzpaz@users.noreply.github.com': 'OTHERS', + 't.j.titcombe@gmail.com': 'STFC', + '32938439+TTitcombe@users.noreply.github.com': 'STFC', + '35809089+EdwardsLT@users.noreply.github.com': 'STFC', + 'EdwardsLT@cardiff.ac.uk': 'STFC', + '39047984+nvaytet@users.noreply.github.com': 'ESS', + 'igudich@gmail.com': 'ESS', + '46603316+alicerussell1@users.noreply.github.com': 'STFC', + '31194136+aybamidele@users.noreply.github.com': 'STFC', + '49688535+Harrietbrown@users.noreply.github.com': 'STFC', + 'takudzwamilli@gmail.com': 'STFC', + 'lorenzobasso@unseen.is': 'STFC', + 'a.j.jackson@physics.org': 'STFC', + '32895149+LolloB@users.noreply.github.com': 'STFC', + 'philipc99@hotmail.co.uk': 'STFC', + 'conor.m.finn.99@gmail.com': 'STFC', + '52415735+PhilColebrooke@users.noreply.github.com': 'STFC', + 'giodisiena@gmail.com': 'STFC', + 'matthew-d-jones@users.noreply.github.com': 'STFC', + '32419974+TakudzwaMakoni@users.noreply.github.com': 'STFC', + 'hankwu@Hanks-MacBook-Air.local': 'STFC', + '55147936+hankwustfc@users.noreply.github.com': 'STFC', + '55979119+RichardWaiteSTFC@users.noreply.github.com': 'STFC', + 'Waite': 'STFC', + '47181718+ConorMFinn@users.noreply.github.com': 'STFC', + '31892119+Fahima-Islam@users.noreply.github.com': 'ORNL', + '56431339+StephenSmith25@users.noreply.github.com': 'STFC', + 'williamfgc@yahoo.com': 'ORNL'} + +aliases = {'Anthony':'Anthony Lim', + 'AnthonyLim23':'Anthony Lim', + 'abuts':'Alex Buts', + 'Ayomide Bamidele':'Andre Bamidele', + 'DanielMurphy22':'Daniel Murphy', + 'Harrietbrown':'Harriet Brown', + 'PhilColebrooke':'Phil Colebrooke', + 'Phil':'Phil Colebrooke', + 'Richard':'Richard Waite', + 'RichardWaiteSTFC':'Richard Waite', + 'Stephen':'Stephen Smith', + 'StephenSmith25':'Stephen Smith', + 'StephenSmith':'Stephen Smith', + 'Anders-Markvardsen':'Anders Markvardsen', + 'AndreiSavici':'Andrei Savici', + 'Antti Soininnen':'Antti Soininen', + 'Bilheux':'Jean Bilheux', + 'brandonhewer':'Brandon Hewer', + 'celinedurniak':'Celine Durniak', + 'DavidFair':'David Fairbrother', + 'DiegoMonserrat':'Diego Monserrat', + 'Dimitar Borislavov Tasev':'Dimitar Tasev', + 'Tasev':'Dimitar Tasev', + 'giovannidisiena':'Giovanni Di Siena ', + 'hankwustfc':'Hank Wu ', + 'igudich':'Igor Gudich', + 'josephframsay':'Joseph Ramsay', + 'LamarMoore':'Lamar Moore', + 'Moore':'Lamar Moore', + 'LolloB':'Lorenzo Basso', + 'NickDraper':'Nick Draper', + 'Pete Peterson':'Peter Peterson', + 'Parker, Peter G':'Peter Parker', + 'Raquel Alvarez':'Raquel Alvarez Banos', + 'reimundILL':'Verena Reimund ', + 'Ricardo Leal':'Ricardo Ferraz Leal', + 'Ricardo M. Ferraz Leal':'Ricardo Ferraz Leal', + 'Rob':'Robert Applin', + 'Rob Applin':'Robert Applin', + 'robertapplin ':'Robert Applin', + 'Sam':'Sam Jenkins', + 'SamJenkins1':'Sam Jenkins', + 'simonfernandes':'Simon Fernandes', + 'MichaelWedel':'Michael Wedel', + 'Steven E. Hahn':'Steven Hahn', + 'VickieLynch':'Vickie Lynch' + } + + +def generate_commit_data(): + print('Generating git commit data...') + os.system("git --no-pager log --shortstat > " + temp_filename) + + +def parse_commit_data(): + print("Reading the file") + # Open a file: file + commit_entries = [] + commit_entry = "" + with open (temp_filename, "r", encoding="utf-8") as file: + # read all lines at once + log_line = file.readline() + while (log_line): + if regex_git_log_splitter.match(log_line): + commit_entries.append(commit_entry) + commit_entry = log_line + else: + commit_entry += log_line + log_line = file.readline() + + #find the matches + print("searching for regex matches") + with open('commits-report.csv', mode='w', newline='') as output_file: + commit_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + commit_writer.writerow(["Author", "Email", "Facility", "Date_time", "Year", "Quarter", "Month", "Week", "Commits", + "Files Changed", "Insertions", "Deletions", "Net Lines Changed"]) + for commit_text in commit_entries: + parse_log_entry(commit_text,commit_writer) + + +def parse_log_entry(commit_text,commit_writer): + if commit_text =="": + return + # black listed log entry that crashes the regex engine + if commit_text.startswith("commit 4a6c0077a1dff965d767dc45a1517c7411a69070") or \ + commit_text.startswith("commit 16a4f16c99e3dc3b59d214067781c932f5a9eb8a"): + return + try: + match = regex_git_log_entry.search(commit_text) + #skip merges + if match.group(3) != "Merge": + author = match.group(1) + name, email = extract_name_email_from_author(author) + date_time_str = match.group(2).strip() + date_time = None + try: + date_time = datetime.datetime.strptime(date_time_str, '%a %b %d %H:%M:%S %Y %z') + except ValueError as e: + print ("Date Parsing failed") + print(date_time_str, e) + print(commit_text) + return + files = 0 if match.group(5) is None else int(match.group(5)) + insertions = 0 if match.group(7) is None else int(match.group(7)) + deletions = 0 if match.group(9) is None else int(match.group(9)) + facility = get_user_facility(email,date_time) + commit_writer.writerow([name,email,facility,date_time.strftime("%Y-%m-%d %H:%M"), + date_time.strftime("%Y"), (date_time.month-1)//3 + 1, date_time.strftime("%m"), + date_time.isocalendar()[1], 1, + files,insertions,deletions, insertions-deletions]) + except RuntimeError as e: + print("Match failed", e) + print(commit_text) + + +def extract_name_email_from_author(author): + match = regex_name_email_address.search(author) + if match: + original_name = match.group(1).strip() + name = aliases[original_name] if original_name in aliases.keys() else original_name + return name,match.group(2) + else: + return None + + +def get_user_facility(email, datetime): + facility = "UNKNOWN" + for domain in domains.keys(): + if domain in email: + # ORNL didn't join until 2009 + if domains[domain] == 'ORNL' and datetime.year < 2009: + domain = 'stfc.ac.uk' + facility = domains[domain] + if facility == "UNKNOWN": + print("Unmatached email", email) + return facility + + +if __name__ == '__main__': + print("Generating github commit metrics...\n") + + generate_commit_data() + parse_commit_data() + os.remove(temp_filename) + + print("\n\nAll done!\n")