diff --git a/bin/licenser.py b/bin/licenser.py index 8d584f6..177f23f 100755 --- a/bin/licenser.py +++ b/bin/licenser.py @@ -1,169 +1,180 @@ #! /usr/bin/env python3 # -*- coding: utf-8 -*- """ licenser.py: Main executable of py-licenser""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import argparse import datetime as dt import sys +import tqdm +import multiprocessing as mp import pylicenser as pylic def mkdate(datestring): return dt.datetime.strptime(datestring, '%Y-%m-%d').date() if __name__ == "__main__": parser = argparse.ArgumentParser(prog='licenser', add_help=True) parser.add_argument( "-i,--input", help="Filename to check", dest="filename", default=None) parser.add_argument( "-f,--file_list", help="File containing a list of files", dest="file_list", default=None) parser.add_argument( "--repo", help="Repository to consider", dest="repo", default=None) parser.add_argument( "-p,--path", help="Folder where to find the files", dest="path", default="") parser.add_argument( "-s,--skip-first", help="Skip the first files when using the -f option", dest="skip_first", type=int, - default=0) + default=-1) parser.add_argument( "-v,--versioning-backend", dest="vc_backend", help="Backend used as versioning system (svn, git, none)") parser.add_argument( "-r,--release-date", help="Date at which the release is prepared", dest='release_date', type=mkdate, default=dt.datetime.now().date()) parser.add_argument( "-a,--no-author-check", help="Do not check the author list", dest="no_author_check", action='store_true', default=False) parser.add_argument( "-b,--no-brief-check", help="Do not check the brief", dest="no_brief_check", action='store_true', default=False) parser.add_argument( "--ignore-threshold", help="Limit of number of line to consider an author from the VC system", dest="ignore_threshold", type=int, default=0) parser.add_argument( "--ignore-filled-briefs", help="Do not check the brief if they are not empty", dest="ignore_filled_briefs", action='store_true', default=False) parser.add_argument( "--dry-run", help="Do nothing for real", dest='dry_run', action='store_true', default=False) parser.add_argument( "-l,--force-license", help="Force a give license", dest="force_license", default=None) parser.add_argument( "--force", help="Force to update the header even it is considered up-to-date", dest="force", action='store_true', default=False) parser.add_argument( "--yes", help="Answers yes to keep author and brief questions", dest="yes", action='store_true', default=False) parser.add_argument( "configuration_file", help="File containing the configuration, .csv or .db (sqlite)") args = parser.parse_args() if (args.filename is None) and (args.file_list is None): print("You should at least give a filename or a file_list") parser.print_help() sys.exit(-1) if (args.filename is not None) and (args.file_list is not None): print("You should give only on of the option filename or file_list") parser.print_help() sys.exit(-1) file_list = [] if args.filename is not None: file_list.append(args.filename) if args.file_list is not None: with open(args.file_list, "r") as fh: file_list = [l.strip() for l in fh] db = pylic.LicenserDB(args.configuration_file) c = 0 t = len(file_list) _kwargs = vars(args) _kwargs.pop("filename", None) - for f in file_list: - c += 1 - print("[{0:>3}%]({2:>3}/{3}) {1}".format( - int(float(c) / t * 100), - pylic.print_colored(f, attrs=['bold']), - c, t), end="") + file_tqdm = tqdm.tqdm(total=0, position=0, bar_format='{desc}') + exception_tqdm = tqdm.tqdm(total=0, position=2, bar_format='{desc}') + + def process_file(t): + c, f = t + res = pylic.print_colored(f, attrs=['bold']) + skip = False + suffix = "" if c <= args.skip_first: - print(" ({0})".format(pylic.print_colored( - "skipped", "red", attrs=['bold']))) - continue + suffix = "skipped" + skip = True elif args.force: - print(" ({0})".format(pylic.print_colored( - "forced", "red", attrs=['bold']))) - else: - print("") + suffix = "forced" + + if suffix: + res = f"""[{pylic.print_colored(suffix, "red", attrs=['bold'])}] {res}""" + file_tqdm.set_description_str(f"Current : {res}") if not args.path == "": path = args.path.rstrip("/") + "/" else: path = "" - ft = pylic.FileTransformer(path + f, db, - **_kwargs) - - ft.replace_file(args.dry_run) + if not skip: + try: + ft = pylic.FileTransformer(path + f, db, + **_kwargs) + ft.replace_file(args.dry_run) + except Exception as e: + exception_tqdm.set_description_str(f"Last error on file: {f} - {e}") + pass + + with mp.Pool(10) as p: + list(tqdm.tqdm(p.imap(process_file, enumerate(file_list)), total=len(file_list), position=1, desc='Files')) diff --git a/pylicenser/file_info.py b/pylicenser/file_info.py index ac4a851..db28f80 100755 --- a/pylicenser/file_info.py +++ b/pylicenser/file_info.py @@ -1,261 +1,261 @@ # -*- coding: utf-8 -*- """ file_info.py: Per file type specialisation of the code""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import os import re import jinja2 import copy from . import export UNKNOWN_TYPE = 0 CPP_SOURCES = 1 CMAKE_FILES = 2 SWIG_FILES = 3 PYTHON_SCRIPT = 4 CREATION_MODIFICATION = 0 LAST_MODIFICATION = 1 CREATION = 2 @export class FileInfo: __date_style_conv = {'creation_modification': CREATION_MODIFICATION, 'last_modification': LAST_MODIFICATION, 'creation': CREATION} __supported_ext = [] __supported_types = { CPP_SOURCES: [".cc", ".c", ".hh", ".h"], CMAKE_FILES: [".txt", ".cmake"], SWIG_FILES: [".i"], PYTHON_SCRIPT: [".py"]} _file_type = UNKNOWN_TYPE _file_content = "" _warn_list = list() _brief = "" _sections = list() _authors = set() _header = "" def __init__(self, filename): self._warn_list = list() self._sections = list() self._authors = set() self._filename = os.path.expanduser(filename) garbage, self._ext = os.path.splitext(os.path.basename(filename)) if self._ext == '.in': self._ext = os.path.splitext(garbage)[1] for key, value in self.__supported_types.items(): self.__supported_ext.extend(value) if self._ext not in self.__supported_ext: raise NotImplementedError( "File {0} has been skipped based on its extension".format( self._filename)) for key, value in self.__supported_types.items(): if self._ext in value: self._file_type = key self.__analyse_content() def __analyse_content(self): with open(self._filename) as f: self._file_content = f.read() try: self.__split_header() except Exception as error: raise Exception( "While dealing with file {0}:\n{1}".format(self._filename, error)) self.__find_authors() self.__find_brief() self.__save_sections() if self._warn_list: print("\nWARNING!:") print(" While dealing with file {0}".format(self._filename)) print(" the following shit happened:") [print(" {0}".format(warning)) for warning in self._warn_list] print("{0}:1:".format(self._filename)) def __save_sections(self): reg = re.compile('^(.*@section)', re.MULTILINE) pref = re.search(reg, self._header) pref = pref.group(1) if pref is not None else "" reg = re.compile('^.*@section', re.MULTILINE) licreg = re.compile('LICEN.E') for section in re.split(reg, self._header)[1:]: if not re.search(licreg, section.split('\n')[0]): self._sections.append(pref + section) def __split_header(self): self.__pre_header = "" if not re.search("(section LICENSE|[Cc]opyright)", self._file_content): self._header = "" self._body = self._file_content print("This file as no header or a really badly formatted one!!") else: if self._file_type in [CPP_SOURCES, SWIG_FILES]: self._header, self._body = re.split( r"\*/", self._file_content, maxsplit=1) self._header = '\n'.join( (line for line in self._header.split('\n') if line.strip())) elif self._file_type == CMAKE_FILES: lines = self._file_content.split('\n') nb_sep = 0 nb_whitelines = 0 header_lines = list() while nb_sep < 2: if lines[0].strip().startswith('#===='): nb_sep += 1 elif not lines[0].strip(): nb_whitelines += 1 header_lines.append(lines.pop(0)) if nb_whitelines > 0: self._warn_list.append( "The header has a blank line or is ill-formed") self._header = "\n".join(header_lines) self._body = "\n".join(lines) elif self._file_type == PYTHON_SCRIPT: _pre_header_re = re.compile(r"^(#.*\n)*", re.MULTILINE) matches = _pre_header_re.match(self._file_content) if matches: self.__pre_header = matches.group().strip() self._body = copy.copy(self._file_content) self._body = self._body.replace(self.__pre_header, '').strip() self._header = '' reg_doc_string = re.compile( r"""(?P["']{3}""" + r"""(?:\s*(?P.*?):\s*)""" + r"""(?P(?:.|\n)*?)["']{3})?\n*""", re.MULTILINE) match = reg_doc_string.search(self._body) if match: self._header = self._header + match.group(0) reg_key_value = re.compile( r"""(?P(?P""" + r"""__(?:license|author|credits|copyright)__)""" + # NOQA: E501, keywords r"""\s*=(?P(?:(?P\s*(\[|\()\n?)?""" + # NOQA: E501, opening braces - r"""(?:\s*".*"\s*(?:\\|,)?\n)+""" + # NOQA: E501, value, multiline or comma separated + r"""(?:\s*".*"\s*(?:\\|,)?\n?)+""" + # NOQA: E501, value, multiline or comma separated r"""(?(brace)\s*(\]|\))\n))?))\n*""", # NOQA: E501, closing braces if opening present re.MULTILINE) for match in re.finditer(reg_key_value, self._body): self._header = self._header + match.group(0) self._body = self._body.replace(self._header, '') def __find_authors(self): if self._file_type == PYTHON_SCRIPT: reg = re.compile( r"__credits__ = \[(?P(?:\s*\"(.*?)\",?\n?)*)\]") # NOQA(E501) matches = reg.search(self._header) if matches: names = matches.group('names').strip() reg = re.compile( r"\"(?P\w+)\s*(?P\w+?)(?:\s*<(?P.*?)>)?\"") # NOQA(E501) else: reg = re.compile( r'@author\s+(?P\w+)(?:\w|\s|\d|\.)*\s+(?P\w+)(?:\s+<(?P.+@.+\..+)>)?', # NOQA(E501) re.IGNORECASE) def __find_brief(self): if self._file_type == PYTHON_SCRIPT: _brief_re = re.compile( r"(?P[\"']{3}(?:\s*(?P.*?):\s*)(?P.*?)[\"']{3})", # NOQA(E501) re.MULTILINE | re.DOTALL) matches = _brief_re.match(self._header) if matches: _brief = ' '.join( [line.strip() for line in matches.group('brief').split('\n')]) else: _brief = "" return if self._file_type in [CPP_SOURCES, SWIG_FILES]: prefix = " *" elif self._file_type == CMAKE_FILES: prefix = "#" else: raise IOError("file type unknown") reg = re.compile( '@brief(.*?)^ ?\S\s+(@|Copyright)', re.MULTILINE | re.DOTALL) matches = [match for match in re.finditer(reg, self._header)] if len(matches) == 0: self._brief = "" return if len(matches) > 1: self._warn_list.append("There's multiple @brief") _brief = "\n".join((line.replace(prefix, "").strip() for line in matches[0].group(1).split('\n') if len(line.strip()) > 1)) def generate_header(self, **kwargs): args = copy.copy(kwargs) args['filename'] = os.path.basename(self._filename) args['pre_header'] = self.__pre_header args['sections'] = self._sections if self._file_type in [CPP_SOURCES, SWIG_FILES]: template_file = 'cpp.j2' elif self._file_type == CMAKE_FILES: template_file = 'cmake.j2' elif self._file_type == PYTHON_SCRIPT: template_file = 'python.j2' env = jinja2.Environment( loader=jinja2.FileSystemLoader('pylicenser/header_templates/'), trim_blocks=True, undefined=jinja2.DebugUndefined ) template = env.get_template(template_file) return template.render(args) def replace_file(self, new_header=None): if new_header is None: new_header = self._header body = self._body.split("\n") while body and not body[0].strip(): body.pop(0) new_file = (new_header + '\n\n' + '\n'.join(body)) if self._file_type == PYTHON_SCRIPT: new_file = new_file + '\n' # ensure new line at the end W292 with open(self._filename, "w") as fh: print(new_file, file=fh, end='') diff --git a/pylicenser/file_transformer.py b/pylicenser/file_transformer.py index 54a6f57..014c131 100755 --- a/pylicenser/file_transformer.py +++ b/pylicenser/file_transformer.py @@ -1,156 +1,156 @@ # -*- coding: utf-8 -*- """ file_transformer.py: Coordination of the different transformations""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" from datetime import datetime as dt from . import licenser_ask_question from . import author_db as adb from . import copyright_db as cdb from . import export from . import file_info as fi from . import print_colored from . import version_info as vc import os from pygments import highlight from pygments.lexers.diff import DiffLexer from pygments.formatters.terminal256 import Terminal256Formatter from pygments.formatters.terminal import TerminalFormatter @export class FileTransformer(object): """ Class that reformat the headers """ __ignore = False _new_header = None def __init__(self, filename, db, release_date=dt.now().date(), force=False, repo=None, **kwargs): self.__filename = filename self.__release_date = release_date self.__db = db self.__repo = repo if "vc_backend" in kwargs and kwargs["vc_backend"] is not None: vc_back = kwargs["vc_backend"] else: vc_back = db.versioning_backend if vc_back != "none": self._date_style = self.__db.get_config('date_style') if not self.__repo: self.__repo = self.__db.get_config('repo') if not self.__repo: raise RuntimeError("You should specify a repo either in the config or on the arguments") self._vc_info = vc.VersionInfo( self.__repo, self.__filename, backend=vc_back, rev_to=release_date) self.__name = self._vc_info.name self.__filename = self.__repo + '/' + self.__name self._creation_date = self._vc_info.creation_date else: self._creation_date = None self._date_style = None self._vc_info = None try: self._file = fi.FileInfo(self.__filename) except NotImplementedError: print("File {0} ignored due to {1}".format( self._vc_info.name, print_colored('unknown type', 'red', attrs=['bold']))) self.__ignore = True return rev_from = None license_id = None self.__oldest_name = "" if vc_back != "none": self.__oldest_name = self._vc_info.oldest_name file_maj = self.__db.find_file(self.__oldest_name) if file_maj is not None: rev_from, license_id = file_maj if not force and rev_from.date() >= release_date: print(("File {0} ignored due to recent " + "modifications ({1})").format( self._vc_info.name, print_colored( rev_from.strftime("%Y-%m-%d"), 'red', attrs=['bold']))) self.__ignore = True return if "force_license" in kwargs and kwargs['force_license'] is not None: license_id = kwargs["force_license"] # Getting the license content self.__copyright_base = cdb.CopyrightDB(db) if not license_id: license_id = self.__db.get_config('default_copyright_id') self._lic = self.__copyright_base.find_by_id(license_id) if self._lic is None: raise ValueError("The license with the id {0} is not defined".format( license_id)) - # Generates the new header file + # Generates the new header file start_year=self._creation_date.year end_year=self.__release_date.year lic_text = None if self._lic.text: lic_text = self._lic.text.format( date=f"{start_year}" if start_year == end_year else f"{start_year}-{end_year}" ) _header_params = { 'copyright_txt': lic_text, 'creation_date': self._creation_date, } self._new_header = self._file.generate_header( license_header=self._lic.header, need_header=self._lic.need_header, license=self._lic.license, **_header_params ) def replace_file(self, dry_run=True): if self.__ignore: return if dry_run: print(self._new_header) else: self._file.replace_file(self._new_header) diff --git a/pylicenser/header_templates/cpp.j2 b/pylicenser/header_templates/cpp.j2 index d53a684..7334eca 100644 --- a/pylicenser/header_templates/cpp.j2 +++ b/pylicenser/header_templates/cpp.j2 @@ -1,12 +1,12 @@ /** {% if copyright_txt %} - * {{ copyright_txt | wordwrap(77) | replace('\n', '\n * ') | replace('\n * \n', '\n *\n')}} + * {{ copyright_txt | wordwrap(76) | replace('\n', '\n * ') | replace('\n * \n', '\n *\n')}} {% endif %} {% if license_header and need_header %} * - * {{ license_header | wordwrap(77) | replace('\n', '\n * ') | replace('\n * \n', '\n *\n')}} + * {{ license_header | wordwrap(77) | replace('\n', '\n * ') | replace(' * \n', ' *\n')}} {% endif %} {% for section in sections %} {{ section }} {% endfor %} */ diff --git a/pylicenser/header_templates/python.j2 b/pylicenser/header_templates/python.j2 index 8f9c03c..d07db4e 100644 --- a/pylicenser/header_templates/python.j2 +++ b/pylicenser/header_templates/python.j2 @@ -1,9 +1,11 @@ {% if pre_header %} {{ pre_header }} {% endif %} {% if copyright_txt %} -__copyright__ = "{{ copyright_txt | wordwrap(59) | replace('\n', '" \\\n " ') }}" +__copyright__ = ( +{{ ' "' + copyright_txt | wordwrap(79, wrapstring='"\n "') }}" +) {% endif -%} {% if license %} __license__ = "{{license}}" {% endif -%}