| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 | #!/usr/bin/env python3# coding=utf-8# amalgamate.py - Amalgamate C source and header files.# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se># # Redistribution and use in source and binary forms, with or without modification,# are permitted provided that the following conditions are met:# #  * Redistributions of source code must retain the above copyright notice,#  this list of conditions and the following disclaimer.# #  * Redistributions in binary form must reproduce the above copyright notice,#  this list of conditions and the following disclaimer in the documentation#  and/or other materials provided with the distribution.# #  * Neither the name of Erik Edlund, nor the names of its contributors may#  be used to endorse or promote products derived from this software without#  specific prior written permission.# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.from __future__ import divisionfrom __future__ import print_functionfrom __future__ import unicode_literalsimport argparseimport datetimeimport jsonimport osimport reclass Amalgamation(object):    # Prepends self.source_path to file_path if needed.    def actual_path(self, file_path):        if not os.path.isabs(file_path):            file_path = os.path.join(self.source_path, file_path)        return file_path    # Search included file_path in self.include_paths and    # in source_dir if specified.    def find_included_file(self, file_path, source_dir):        search_dirs = self.include_paths[:]        if source_dir:            search_dirs.insert(0, source_dir)        for search_dir in search_dirs:            search_path = os.path.join(search_dir, file_path)            if os.path.isfile(self.actual_path(search_path)):                return search_path        return None    def __init__(self, args):        with open(args.config, 'r') as f:            config = json.loads(f.read())            for key in config:                setattr(self, key, config[key])            self.verbose = args.verbose == "yes"            self.prologue = args.prologue            self.source_path = args.source_path            self.included_files = []    # Generate the amalgamation and write it to the target file.    def generate(self):        amalgamation = ""        if self.prologue:            with open(self.prologue, 'r') as f:                amalgamation += datetime.datetime.now().strftime(f.read())        if self.verbose:            print("Config:")            print(" target        = {0}".format(self.target))            print(" working_dir   = {0}".format(os.getcwd()))            print(" include_paths = {0}".format(self.include_paths))        print("Creating amalgamation:")        for file_path in self.sources:            # Do not check the include paths while processing the source            # list, all given source paths must be correct.            # actual_path = self.actual_path(file_path)            print(" - processing \"{0}\"".format(file_path))            t = TranslationUnit(file_path, self, True)            amalgamation += t.content        with open(self.target, 'w') as f:            f.write(amalgamation)        print("...done!\n")        if self.verbose:            print("Files processed: {0}".format(self.sources))            print("Files included: {0}".format(self.included_files))        print("")def _is_within(match, matches):    for m in matches:        if match.start() > m.start() and \                match.end() < m.end():            return True    return Falseclass TranslationUnit(object):    # // C++ comment.    cpp_comment_pattern = re.compile(r"//.*?\n")    # /* C comment. */    c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)    # "complex \"stri\\\ng\" value".    string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)    # Handle simple include directives. Support for advanced    # directives where macros and defines needs to expanded is    # not a concern right now.    include_pattern = re.compile(        r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)    # #pragma once    pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)    # Search for pattern in self.content, add the match to    # contexts if found and update the index accordingly.    def _search_content(self, index, pattern, contexts):        match = pattern.search(self.content, index)        if match:            contexts.append(match)            return match.end()        return index + 2    # Return all the skippable contexts, i.e., comments and strings    def _find_skippable_contexts(self):        # Find contexts in the content in which a found include        # directive should not be processed.        skippable_contexts = []        # Walk through the content char by char, and try to grab        # skippable contexts using regular expressions when found.        i = 1        content_len = len(self.content)        while i < content_len:            j = i - 1            current = self.content[i]            previous = self.content[j]            if current == '"':                # String value.                i = self._search_content(j, self.string_pattern,                                         skippable_contexts)            elif current == '*' and previous == '/':                # C style comment.                i = self._search_content(j, self.c_comment_pattern,                                         skippable_contexts)            elif current == '/' and previous == '/':                # C++ style comment.                i = self._search_content(j, self.cpp_comment_pattern,                                         skippable_contexts)            else:                # Skip to the next char.                i += 1        return skippable_contexts    # Returns True if the match is within list of other matches    # Removes pragma once from content    def _process_pragma_once(self):        content_len = len(self.content)        if content_len < len("#include <x>"):            return 0        # Find contexts in the content in which a found include        # directive should not be processed.        skippable_contexts = self._find_skippable_contexts()        pragmas = []        pragma_once_match = self.pragma_once_pattern.search(self.content)        while pragma_once_match:            if not _is_within(pragma_once_match, skippable_contexts):                pragmas.append(pragma_once_match)            pragma_once_match = self.pragma_once_pattern.search(self.content,                                                                pragma_once_match.end())        # Handle all collected pragma once directives.        prev_end = 0        tmp_content = ''        for pragma_match in pragmas:            tmp_content += self.content[prev_end:pragma_match.start()]            prev_end = pragma_match.end()        tmp_content += self.content[prev_end:]        self.content = tmp_content    # Include all trivial #include directives into self.content.    def _process_includes(self):        content_len = len(self.content)        if content_len < len("#include <x>"):            return 0        # Find contexts in the content in which a found include        # directive should not be processed.        skippable_contexts = self._find_skippable_contexts()        # Search for include directives in the content, collect those        # which should be included into the content.        includes = []        include_match = self.include_pattern.search(self.content)        while include_match:            if not _is_within(include_match, skippable_contexts):                include_path = include_match.group("path")                search_same_dir = include_match.group(1) == '"'                found_included_path = self.amalgamation.find_included_file(                    include_path, self.file_dir if search_same_dir else None)                if found_included_path:                    includes.append((include_match, found_included_path))            include_match = self.include_pattern.search(self.content,                                                        include_match.end())        # Handle all collected include directives.        prev_end = 0        tmp_content = ''        for include in includes:            include_match, found_included_path = include            tmp_content += self.content[prev_end:include_match.start()]            tmp_content += "// {0}\n".format(include_match.group(0))            if found_included_path not in self.amalgamation.included_files:                t = TranslationUnit(found_included_path, self.amalgamation, False)                tmp_content += t.content            prev_end = include_match.end()        tmp_content += self.content[prev_end:]        self.content = tmp_content        return len(includes)    # Make all content processing    def _process(self):        if not self.is_root:            self._process_pragma_once()        self._process_includes()    def __init__(self, file_path, amalgamation, is_root):        self.file_path = file_path        self.file_dir = os.path.dirname(file_path)        self.amalgamation = amalgamation        self.is_root = is_root        self.amalgamation.included_files.append(self.file_path)        actual_path = self.amalgamation.actual_path(file_path)        if not os.path.isfile(actual_path):            raise IOError("File not found: \"{0}\"".format(file_path))        with open(actual_path, 'r') as f:            self.content = f.read()            self._process()def main():    description = "Amalgamate C source and header files."    usage = " ".join([        "amalgamate.py",        "[-v]",        "-c path/to/config.json",        "-s path/to/source/dir",        "[-p path/to/prologue.(c|h)]"    ])    argsparser = argparse.ArgumentParser(        description=description, usage=usage)    argsparser.add_argument("-v", "--verbose", dest="verbose",                            choices=["yes", "no"], metavar="", help="be verbose")    argsparser.add_argument("-c", "--config", dest="config",                            required=True, metavar="", help="path to a JSON config file")    argsparser.add_argument("-s", "--source", dest="source_path",                            required=True, metavar="", help="source code path")    argsparser.add_argument("-p", "--prologue", dest="prologue",                            required=False, metavar="", help="path to a C prologue file")    amalgamation = Amalgamation(argsparser.parse_args())    amalgamation.generate()if __name__ == "__main__":    main()
 |