Refactor memap for speed

2017-09-15 16:32:30 -05:00 · 2017-09-15 16:32:30 -05:00 · 802cfcfbca
parent 6e087488e1
commit 802cfcfbca
1 changed files with 109 additions and 206 deletions
--- a/tools/memap.py
+++ b/tools/memap.py
@ -20,6 +20,19 @@ RE_IAR = re.compile(
    r'^\s+(.+)\s+(zero|const|ro code|inited|uninit)\s'
    r'+0x(\w{8})\s+0x(\w+)\s+(.+)\s.+$')
 RE_IS_TEST = re.compile(r'^(.+)\/.*TESTS\/.+\.map$')
 RE_LIBRARY_IAR = re.compile(r'^(.+\.a)\:.+$')
 RE_OBJECT_LIBRARY_IAR = re.compile(r'^\s+(.+\.o)\s.*')
 RE_OBJECT_FILE_GCC = re.compile(r'^(.+\/.+\.o)$')
 RE_LIBRARY_OBJECT_GCC = re.compile(r'^.+\/(lib.+\.a)\((.+\.o)\)$')
 RE_STD_SECTION_GCC = re.compile(r'^\s+.*0x(\w{8,16})\s+0x(\w+)\s(.+)$')
 RE_FILL_SECTION_GCC = re.compile(r'^\s*\*fill\*\s+0x(\w{8,16})\s+0x(\w+).*$')
 RE_OBJECT_ARMCC = re.compile(r'(.+\.(l|ar))\((.+\.o)\)')
 class MemapParser(object):
    """An object that represents parsed results, parses the memory map files,
    and writes out different file types of memory results
@ -60,31 +73,6 @@ class MemapParser(object):
        self.misc_flash_mem = 0
    def remove_unused_modules(self):
        """ Removes modules/objects that were compiled but are not used
        """
        # Using keys to be able to remove entry
        for i in self.modules.keys():
            size = 0
            for k in self.print_sections:
                size += self.modules[i][k]
            if size == 0:
                del self.modules[i]
    def module_init(self, object_name):
        """ Initialize a module. Just adds the name of the module
        Positional arguments:
        object_name - name of the entry to add
        """
        if object_name not in self.modules:
            temp_dic = dict()
            for section_idx in self.all_sections:
                temp_dic[section_idx] = 0
            self.modules[object_name] = temp_dic
    def module_add(self, object_name, size, section):
        """ Adds a module / section to the list
@ -94,28 +82,27 @@ class MemapParser(object):
        section - the section the module contributes to
        """
-        # Check if object is a sub-string of key
+        if not object_name or not size or not section:
-        for module_path in self.modules:
+            return
-            # this is required to differenciate: main.o vs xxxmain.o
+        if object_name in self.modules:
-            module_split = os.path.basename(module_path)
+            self.modules[object_name].setdefault(section, 0)
-            obj_split = os.path.basename(object_name)
+            self.modules[object_name][section] += size
            return
-            if module_split == obj_split:
+        obj_split = os.sep + os.path.basename(object_name)
-                self.modules[module_path][section] += size
+        for module_path, contents in self.modules.items():
            if module_path.endswith(obj_split) or module_path == object_name:
                contents.setdefault(section, 0)
                contents[section] += size
                return
-        new_module = dict()
+        new_module = {section: size}
        for section_idx in self.all_sections:
            new_module[section_idx] = 0
        new_module[section] = size
        self.modules[object_name] = new_module
    def module_replace(self, old_object, new_object):
        """ Replaces an object name with a new one
        """
        # Check if object is a sub-string of key
        if old_object in self.modules:
            self.modules[new_object] = self.modules[old_object]
            del self.modules[old_object]
@ -139,7 +126,7 @@ class MemapParser(object):
            return False         # everything else, means no change in section
-    def parse_object_name_gcc(self, line):
+    def parse_object_name_gcc(self, line, prefixes):
        """ Parse a path to object file
        Positional arguments:
@ -147,8 +134,7 @@ class MemapParser(object):
        """
        line = line.replace('\\', '/')
-        RE_OBJECT_FILE = r'^.+\/(.+\.o)$'
+        test_re_mbed_os_name = re.match(RE_OBJECT_FILE_GCC, line)
        test_re_mbed_os_name = re.match(RE_OBJECT_FILE, line)
        if test_re_mbed_os_name:
@ -156,14 +142,17 @@ class MemapParser(object):
            # corner case: certain objects are provided by the GCC toolchain
            if 'arm-none-eabi' in line:
-                object_name = '[lib]/misc/' + object_name
+                return '[lib]/misc/' + object_name
            for prefix in prefixes:
                if object_name.startswith(prefix):
                    return os.path.relpath(object_name, prefix)
            return object_name
        else:
-            RE_LIBRARY_OBJECT_FILE = r'^.+\/(lib.+\.a)\((.+\.o)\)$'
+            test_re_obj_name = re.match(RE_LIBRARY_OBJECT_GCC, line)
            test_re_obj_name = re.match(RE_LIBRARY_OBJECT_FILE, line)
            if test_re_obj_name:
                object_name = test_re_obj_name.group(1) + '/' + \
@ -175,7 +164,7 @@ class MemapParser(object):
                print "Malformed input found when parsing GCC map: %s" % line
                return '[misc]'
-    def parse_section_gcc(self, line):
+    def parse_section_gcc(self, line, prefixes):
        """ Parse data from a section of gcc map file
        examples:
@ -186,40 +175,19 @@ class MemapParser(object):
        line - the line to parse a section from
        """
-        RE_STD_SECTION_GCC = re.compile(
+        is_fill = re.match(RE_FILL_SECTION_GCC, line)
-            r'^\s+.*0x(\w{8,16})\s+0x(\w+)\s(.+)$')
+        if is_fill:
            o_name = '[fill]'
            o_size = int(is_fill.group(2), 16)
            return [o_name, o_size]
-        test_address_len_name = re.match(RE_STD_SECTION_GCC, line)
+        is_section = re.match(RE_STD_SECTION_GCC, line)
-
+        if is_section:
-        if test_address_len_name:
+            o_name = self.parse_object_name_gcc(is_section.group(3), prefixes)
-
+            o_size = int(is_section.group(2), 16)
-            if int(test_address_len_name.group(2), 16) == 0: # size == 0
+            return [o_name, o_size]
                return ["", 0] # no valid entry
            else:
                o_name = self.parse_object_name_gcc(\
                    test_address_len_name.group(3))
                o_size = int(test_address_len_name.group(2), 16)
                return [o_name, o_size]
        else: # special corner case for *fill* sections
            #  example
            # *fill*         0x0000abe4        0x4
            RE_FILL_SECTION_GCC = r'^\s+\*fill\*\s+0x(\w{8,16})\s+0x(\w+).*$'
            test_address_len = re.match(RE_FILL_SECTION_GCC, line)
            if test_address_len:
                if int(test_address_len.group(2), 16) == 0: # size == 0
                    return ["", 0] # no valid entry
                else:
                    o_name = '[fill]'
                    o_size = int(test_address_len.group(2), 16)
                    return [o_name, o_size]
            else:
                return ["", 0] # no valid entry
        return ["", 0]
    def parse_map_file_gcc(self, file_desc):
        """ Main logic to decode gcc map files
@ -230,31 +198,29 @@ class MemapParser(object):
        current_section = 'unknown'
-        with file_desc as infile:
+        prefixes = [os.path.abspath(os.path.dirname(file_desc.name))]
        is_test = re.match(RE_IS_TEST, file_desc.name)
        if is_test:
            prefixes.append(os.path.abspath(is_test.group(1)))
-            # Search area to parse
+        with file_desc as infile:
            for line in infile:
                if line.startswith('Linker script and memory map'):
                    current_section = "unknown"
                    break
            # Start decoding the map file
            for line in infile:
                next_section = self.check_new_section_gcc(line)
-                change_section = self.check_new_section_gcc(line)
+                if next_section == "OUTPUT":
                    return
                elif next_section:
                    current_section = next_section
-                if change_section == "OUTPUT": # finish parsing file: exit
+                object_name, object_size = self.parse_section_gcc(line, prefixes)
                    break
                elif change_section != False:
                    current_section = change_section
-                [object_name, object_size] = self.parse_section_gcc(line)
+                self.module_add(object_name, object_size, current_section)
                if object_size == 0 or object_name == "":
                    pass
                else:
                    self.module_add(object_name, object_size,\
                                        current_section)
    def parse_object_name_armcc(self, line):
        """ Parse object file
@ -269,7 +235,6 @@ class MemapParser(object):
        else:
            RE_OBJECT_ARMCC = r'(.+\.(l|ar))\((.+\.o)\)'
            test_re_obj_name = re.match(RE_OBJECT_ARMCC, line)
            if test_re_obj_name:
@ -329,7 +294,7 @@ class MemapParser(object):
        """
        # simple object (not library)
-        if line[-2] == '.' and line[-1] == 'o':
+        if line.endswith(".o"):
            object_name = line
            return object_name
@ -361,11 +326,11 @@ class MemapParser(object):
            size = int(test_re_iar.group(4), 16)
-            if test_re_iar.group(2) == 'const' or \
+            if (test_re_iar.group(2) == 'const' or
-               test_re_iar.group(2) == 'ro code':
+                test_re_iar.group(2) == 'ro code'):
                section = '.text'
-            elif test_re_iar.group(2) == 'zero' or \
+            elif (test_re_iar.group(2) == 'zero' or
-            test_re_iar.group(2) == 'uninit':
+                  test_re_iar.group(2) == 'uninit'):
                if test_re_iar.group(1)[0:4] == 'HEAP':
                    section = '.heap'
                elif test_re_iar.group(1)[0:6] == 'CSTACK':
@ -403,15 +368,12 @@ class MemapParser(object):
            # Start decoding the map file
            for line in infile:
-
+                object_name, object_size, section = self.parse_section_armcc(line)
-                [object_name, object_size, section] = \
+                if object_name is not "" and section is not "":
                                self.parse_section_armcc(line)
                if object_size == 0 or object_name == "" or section == "":
                    pass
                else:
                    self.module_add(object_name, object_size, section)
            self.rename_modules_from_fs(infile.name)
    def check_new_library_iar(self, line):
        """
@ -420,7 +382,6 @@ class MemapParser(object):
        """
        RE_LIBRARY_IAR = re.compile(r'^(.+\.a)\:.+$')
        test_address_line = re.match(RE_LIBRARY_IAR, line)
@ -441,8 +402,6 @@ class MemapParser(object):
        """
        RE_OBJECT_LIBRARY_IAR = re.compile(r'^\s+(.+\.o)\s.*')
        test_address_line = re.match(RE_OBJECT_LIBRARY_IAR, line)
        if test_address_line:
@ -483,80 +442,54 @@ class MemapParser(object):
                library = self.check_new_library_iar(line)
-                if library != "":
+                if library:
                    current_library = library
                object_name = self.check_new_object_lib_iar(line)
-                if object_name != "" and current_library != "":
+                if object_name and current_library:
                    temp = '[lib]' + '/'+ current_library + '/'+ object_name
                    self.module_replace(object_name, temp)
            self.rename_modules_from_fs(infile.name)
    def _rename_from_path(self, path, to_find, to_update, skip):
        for root, subdirs, obj_files in os.walk(path):
            if os.path.basename(root) in skip:
                subdirs[:] = []
                continue
            basename = os.path.relpath(root, path)
            for filename in (to_find.intersection(set(obj_files))):
                to_find.remove(filename)
                to_update[os.path.join(basename, filename)] = self.modules[filename]
-    export_formats = ["json", "csv-ci", "table"]
+    def rename_modules_from_fs(self, path):
-
+        """ Converts the current module list to use the path to a module instead
-    def list_dir_obj(self, path):
+        of the name in the map file
        """ Searches all objects in BUILD directory and creates list
        Positional arguments:
        path - the path to a map file
        """
-
+        new_modules = dict()
-        path = path.replace('\\', '/')
+        to_match = set(k for k in self.modules.keys() if not k.startswith("[lib]"))
-
+        for module, v in self.modules.items():
-        # check location of map file
+            if module.startswith("[lib]"):
-        RE_PATH_MAP_FILE = r'^(.+)\/.+\.map$'
+                new_modules[module] = v
-        test_re = re.match(RE_PATH_MAP_FILE, path)
+        is_test = re.match(RE_IS_TEST, path)
-
+        if is_test:
-        if test_re:
+            self._rename_from_path(is_test.group(1), to_match, new_modules, set(["TESTS"]))
-            search_path = test_re.group(1)
+            self._rename_from_path(os.path.dirname(path), to_match, new_modules, set())
        else:
-            print "Warning: this doesn't look like an mbed project"
+            self._rename_from_path(os.path.dirname(path), to_match, new_modules, [])
            return
-        # create empty disctionary
+        for module in to_match:
-        self.modules = dict()
+            new_modules[module] = self.modules[module]
-        # search for object files
+        self.modules = new_modules
        for root, _, obj_files in os.walk(search_path):
            for obj_file in obj_files:
                if obj_file.endswith(".o"):
                    txt = os.path.join(root, obj_file)
                    txt = txt.replace('\\', '/')
                    # add relative path + object to list
                    self.module_init(txt[len(search_path)+1:])
        # The code below is a special case for TESTS.
        # mbed-os lives in a separate location and we need to explicitly search
        # their object files skiping the TESTS folder (already scanned above)
        # check location of mbed-os
        RE_PATH_MAP_FILE = r'^(.+)\/mbed-os\/.*TESTS\/.+\.map$'
        test_re = re.match(RE_PATH_MAP_FILE, path)
        if test_re == None:
            return
        search_path = test_re.group(1)
        # search for object files
        for root, _, obj_files in os.walk(search_path):
            for obj_file in obj_files:
                if 'TESTS' not in root and obj_file.endswith(".o"):
                    txt = os.path.join(root, obj_file)
                    txt = txt.replace('\\', '/')
                    # add relative path + object to list
                    self.module_init(txt[len(search_path)+1:])
    def reduce_depth(self, depth):
        """
-        prints list of directories and objects. Examples:
+        populates the short_modules attribute with a truncated module list
        (1) depth = 1:
        main.o
@ -568,43 +501,19 @@ class MemapParser(object):
        mbed-os/drivers
        """
        # depth 0 or None shows all entries
        if depth == 0 or depth == None:
            self.short_modules = deepcopy(self.modules)
-            return
+        else:
-
+            self.short_modules = dict()
-        self.short_modules = dict()
+            for module_name, v in self.modules.items():
-
+                split_name = module_name.split('/')
-        # create reduced list
+                if split_name[0] == '':
-        for line in self.modules:
+                    split_name = split_name[1:]
-
+                new_name = "/".join(split_name[:depth])
-            data = line.split('/')
+                self.short_modules.setdefault(new_name, {})
-            ndir = len(data)
+                for section_idx, value in v.items():
-
+                    self.short_modules[new_name].setdefault(section_idx, 0)
-            temp = ''
+                    self.short_modules[new_name][section_idx] += self.modules[module_name][section_idx]
            count = 0
            # iterate until the max depth level
            max_level = min(depth, ndir)
            # rebuild the path based on depth level
            while count < max_level:
                if count > 0:    # ignore '/' from first entry
                    temp = temp + '/'
                temp = temp + data[count]
                count += 1
            if temp not in self.short_modules:
                temp_dic = dict()
                for section_idx in self.all_sections:
                    temp_dic[section_idx] = 0
                self.short_modules[temp] = temp_dic
            for section_idx in self.all_sections:
                self.short_modules[temp][section_idx] += \
                    self.modules[line][section_idx]
    export_formats = ["json", "csv-ci", "table"]
@ -728,12 +637,12 @@ class MemapParser(object):
    def compute_report(self):
        """ Generates summary of memory usage for main areas
        """
        for k in self.sections:
            self.subtotal[k] = 0
-        for i in sorted(self.short_modules):
+        for i in self.short_modules:
            for k in self.sections:
                self.short_modules[i].setdefault(k, 0)
                self.subtotal[k] += self.short_modules[i][k]
        self.mem_summary = {
@ -746,7 +655,7 @@ class MemapParser(object):
            self.mem_report.append({
                "module":i,
                "size":{
-                    k:self.short_modules[i][k] for k in self.print_sections
+                    k: self.short_modules[i][k] for k in self.print_sections
                }
            })
@ -765,10 +674,6 @@ class MemapParser(object):
        result = True
        try:
            with open(mapfile, 'r') as file_input:
                # Common to all toolchains: first search for objects in BUILD
                self.list_dir_obj(os.path.abspath(mapfile))
                if toolchain in ("ARM", "ARM_STD", "ARM_MICRO", "ARMC6"):
                    self.parse_map_file_armcc(file_input)
                elif toolchain == "GCC_ARM" or toolchain == "GCC_CR":
@ -778,8 +683,6 @@ class MemapParser(object):
                else:
                    result = False
                self.remove_unused_modules()
        except IOError as error:
            print "I/O error({0}): {1}".format(error.errno, error.strerror)
            result = False