Refactor memap for speed

pull/5125/head
Jimmy Brisson 2017-09-15 16:32:30 -05:00
parent a0d55036f9
commit 2114ccd5a1
1 changed files with 109 additions and 206 deletions

View File

@ -20,6 +20,19 @@ RE_IAR = re.compile(
r'^\s+(.+)\s+(zero|const|ro code|inited|uninit)\s'
r'+0x(\w{8})\s+0x(\w+)\s+(.+)\s.+$')
RE_IS_TEST = re.compile(r'^(.+)\/.*TESTS\/.+\.map$')
RE_LIBRARY_IAR = re.compile(r'^(.+\.a)\:.+$')
RE_OBJECT_LIBRARY_IAR = re.compile(r'^\s+(.+\.o)\s.*')
RE_OBJECT_FILE_GCC = re.compile(r'^(.+\/.+\.o)$')
RE_LIBRARY_OBJECT_GCC = re.compile(r'^.+\/(lib.+\.a)\((.+\.o)\)$')
RE_STD_SECTION_GCC = re.compile(r'^\s+.*0x(\w{8,16})\s+0x(\w+)\s(.+)$')
RE_FILL_SECTION_GCC = re.compile(r'^\s*\*fill\*\s+0x(\w{8,16})\s+0x(\w+).*$')
RE_OBJECT_ARMCC = re.compile(r'(.+\.(l|ar))\((.+\.o)\)')
class MemapParser(object):
"""An object that represents parsed results, parses the memory map files,
and writes out different file types of memory results
@ -60,31 +73,6 @@ class MemapParser(object):
self.misc_flash_mem = 0
def remove_unused_modules(self):
""" Removes modules/objects that were compiled but are not used
"""
# Using keys to be able to remove entry
for i in self.modules.keys():
size = 0
for k in self.print_sections:
size += self.modules[i][k]
if size == 0:
del self.modules[i]
def module_init(self, object_name):
""" Initialize a module. Just adds the name of the module
Positional arguments:
object_name - name of the entry to add
"""
if object_name not in self.modules:
temp_dic = dict()
for section_idx in self.all_sections:
temp_dic[section_idx] = 0
self.modules[object_name] = temp_dic
def module_add(self, object_name, size, section):
""" Adds a module / section to the list
@ -94,28 +82,27 @@ class MemapParser(object):
section - the section the module contributes to
"""
# Check if object is a sub-string of key
for module_path in self.modules:
if not object_name or not size or not section:
return
# this is required to differenciate: main.o vs xxxmain.o
module_split = os.path.basename(module_path)
obj_split = os.path.basename(object_name)
if object_name in self.modules:
self.modules[object_name].setdefault(section, 0)
self.modules[object_name][section] += size
return
if module_split == obj_split:
self.modules[module_path][section] += size
obj_split = os.sep + os.path.basename(object_name)
for module_path, contents in self.modules.items():
if module_path.endswith(obj_split) or module_path == object_name:
contents.setdefault(section, 0)
contents[section] += size
return
new_module = dict()
for section_idx in self.all_sections:
new_module[section_idx] = 0
new_module[section] = size
new_module = {section: size}
self.modules[object_name] = new_module
def module_replace(self, old_object, new_object):
""" Replaces an object name with a new one
"""
# Check if object is a sub-string of key
if old_object in self.modules:
self.modules[new_object] = self.modules[old_object]
del self.modules[old_object]
@ -139,7 +126,7 @@ class MemapParser(object):
return False # everything else, means no change in section
def parse_object_name_gcc(self, line):
def parse_object_name_gcc(self, line, prefixes):
""" Parse a path to object file
Positional arguments:
@ -147,8 +134,7 @@ class MemapParser(object):
"""
line = line.replace('\\', '/')
RE_OBJECT_FILE = r'^.+\/(.+\.o)$'
test_re_mbed_os_name = re.match(RE_OBJECT_FILE, line)
test_re_mbed_os_name = re.match(RE_OBJECT_FILE_GCC, line)
if test_re_mbed_os_name:
@ -156,14 +142,17 @@ class MemapParser(object):
# corner case: certain objects are provided by the GCC toolchain
if 'arm-none-eabi' in line:
object_name = '[lib]/misc/' + object_name
return '[lib]/misc/' + object_name
for prefix in prefixes:
if object_name.startswith(prefix):
return os.path.relpath(object_name, prefix)
return object_name
else:
RE_LIBRARY_OBJECT_FILE = r'^.+\/(lib.+\.a)\((.+\.o)\)$'
test_re_obj_name = re.match(RE_LIBRARY_OBJECT_FILE, line)
test_re_obj_name = re.match(RE_LIBRARY_OBJECT_GCC, line)
if test_re_obj_name:
object_name = test_re_obj_name.group(1) + '/' + \
@ -175,7 +164,7 @@ class MemapParser(object):
print "Malformed input found when parsing GCC map: %s" % line
return '[misc]'
def parse_section_gcc(self, line):
def parse_section_gcc(self, line, prefixes):
""" Parse data from a section of gcc map file
examples:
@ -186,40 +175,19 @@ class MemapParser(object):
line - the line to parse a section from
"""
RE_STD_SECTION_GCC = re.compile(
r'^\s+.*0x(\w{8,16})\s+0x(\w+)\s(.+)$')
is_fill = re.match(RE_FILL_SECTION_GCC, line)
if is_fill:
o_name = '[fill]'
o_size = int(is_fill.group(2), 16)
return [o_name, o_size]
test_address_len_name = re.match(RE_STD_SECTION_GCC, line)
if test_address_len_name:
if int(test_address_len_name.group(2), 16) == 0: # size == 0
return ["", 0] # no valid entry
else:
o_name = self.parse_object_name_gcc(\
test_address_len_name.group(3))
o_size = int(test_address_len_name.group(2), 16)
return [o_name, o_size]
else: # special corner case for *fill* sections
# example
# *fill* 0x0000abe4 0x4
RE_FILL_SECTION_GCC = r'^\s+\*fill\*\s+0x(\w{8,16})\s+0x(\w+).*$'
test_address_len = re.match(RE_FILL_SECTION_GCC, line)
if test_address_len:
if int(test_address_len.group(2), 16) == 0: # size == 0
return ["", 0] # no valid entry
else:
o_name = '[fill]'
o_size = int(test_address_len.group(2), 16)
return [o_name, o_size]
else:
return ["", 0] # no valid entry
is_section = re.match(RE_STD_SECTION_GCC, line)
if is_section:
o_name = self.parse_object_name_gcc(is_section.group(3), prefixes)
o_size = int(is_section.group(2), 16)
return [o_name, o_size]
return ["", 0]
def parse_map_file_gcc(self, file_desc):
""" Main logic to decode gcc map files
@ -230,31 +198,29 @@ class MemapParser(object):
current_section = 'unknown'
with file_desc as infile:
prefixes = [os.path.abspath(os.path.dirname(file_desc.name))]
is_test = re.match(RE_IS_TEST, file_desc.name)
if is_test:
prefixes.append(os.path.abspath(is_test.group(1)))
# Search area to parse
with file_desc as infile:
for line in infile:
if line.startswith('Linker script and memory map'):
current_section = "unknown"
break
# Start decoding the map file
for line in infile:
next_section = self.check_new_section_gcc(line)
change_section = self.check_new_section_gcc(line)
if next_section == "OUTPUT":
return
elif next_section:
current_section = next_section
if change_section == "OUTPUT": # finish parsing file: exit
break
elif change_section != False:
current_section = change_section
object_name, object_size = self.parse_section_gcc(line, prefixes)
[object_name, object_size] = self.parse_section_gcc(line)
self.module_add(object_name, object_size, current_section)
if object_size == 0 or object_name == "":
pass
else:
self.module_add(object_name, object_size,\
current_section)
def parse_object_name_armcc(self, line):
""" Parse object file
@ -269,7 +235,6 @@ class MemapParser(object):
else:
RE_OBJECT_ARMCC = r'(.+\.(l|ar))\((.+\.o)\)'
test_re_obj_name = re.match(RE_OBJECT_ARMCC, line)
if test_re_obj_name:
@ -329,7 +294,7 @@ class MemapParser(object):
"""
# simple object (not library)
if line[-2] == '.' and line[-1] == 'o':
if line.endswith(".o"):
object_name = line
return object_name
@ -361,11 +326,11 @@ class MemapParser(object):
size = int(test_re_iar.group(4), 16)
if test_re_iar.group(2) == 'const' or \
test_re_iar.group(2) == 'ro code':
if (test_re_iar.group(2) == 'const' or
test_re_iar.group(2) == 'ro code'):
section = '.text'
elif test_re_iar.group(2) == 'zero' or \
test_re_iar.group(2) == 'uninit':
elif (test_re_iar.group(2) == 'zero' or
test_re_iar.group(2) == 'uninit'):
if test_re_iar.group(1)[0:4] == 'HEAP':
section = '.heap'
elif test_re_iar.group(1)[0:6] == 'CSTACK':
@ -403,15 +368,12 @@ class MemapParser(object):
# Start decoding the map file
for line in infile:
[object_name, object_size, section] = \
self.parse_section_armcc(line)
if object_size == 0 or object_name == "" or section == "":
pass
else:
object_name, object_size, section = self.parse_section_armcc(line)
if object_name is not "" and section is not "":
self.module_add(object_name, object_size, section)
self.rename_modules_from_fs(infile.name)
def check_new_library_iar(self, line):
"""
@ -420,7 +382,6 @@ class MemapParser(object):
"""
RE_LIBRARY_IAR = re.compile(r'^(.+\.a)\:.+$')
test_address_line = re.match(RE_LIBRARY_IAR, line)
@ -441,8 +402,6 @@ class MemapParser(object):
"""
RE_OBJECT_LIBRARY_IAR = re.compile(r'^\s+(.+\.o)\s.*')
test_address_line = re.match(RE_OBJECT_LIBRARY_IAR, line)
if test_address_line:
@ -483,80 +442,54 @@ class MemapParser(object):
library = self.check_new_library_iar(line)
if library != "":
if library:
current_library = library
object_name = self.check_new_object_lib_iar(line)
if object_name != "" and current_library != "":
if object_name and current_library:
temp = '[lib]' + '/'+ current_library + '/'+ object_name
self.module_replace(object_name, temp)
self.rename_modules_from_fs(infile.name)
def _rename_from_path(self, path, to_find, to_update, skip):
for root, subdirs, obj_files in os.walk(path):
if os.path.basename(root) in skip:
subdirs[:] = []
continue
basename = os.path.relpath(root, path)
for filename in (to_find.intersection(set(obj_files))):
to_find.remove(filename)
to_update[os.path.join(basename, filename)] = self.modules[filename]
export_formats = ["json", "csv-ci", "table"]
def list_dir_obj(self, path):
""" Searches all objects in BUILD directory and creates list
def rename_modules_from_fs(self, path):
""" Converts the current module list to use the path to a module instead
of the name in the map file
Positional arguments:
path - the path to a map file
"""
path = path.replace('\\', '/')
# check location of map file
RE_PATH_MAP_FILE = r'^(.+)\/.+\.map$'
test_re = re.match(RE_PATH_MAP_FILE, path)
if test_re:
search_path = test_re.group(1)
new_modules = dict()
to_match = set(k for k in self.modules.keys() if not k.startswith("[lib]"))
for module, v in self.modules.items():
if module.startswith("[lib]"):
new_modules[module] = v
is_test = re.match(RE_IS_TEST, path)
if is_test:
self._rename_from_path(is_test.group(1), to_match, new_modules, set(["TESTS"]))
self._rename_from_path(os.path.dirname(path), to_match, new_modules, set())
else:
print "Warning: this doesn't look like an mbed project"
return
self._rename_from_path(os.path.dirname(path), to_match, new_modules, [])
# create empty disctionary
self.modules = dict()
for module in to_match:
new_modules[module] = self.modules[module]
# search for object files
for root, _, obj_files in os.walk(search_path):
for obj_file in obj_files:
if obj_file.endswith(".o"):
txt = os.path.join(root, obj_file)
txt = txt.replace('\\', '/')
# add relative path + object to list
self.module_init(txt[len(search_path)+1:])
# The code below is a special case for TESTS.
# mbed-os lives in a separate location and we need to explicitly search
# their object files skiping the TESTS folder (already scanned above)
# check location of mbed-os
RE_PATH_MAP_FILE = r'^(.+)\/mbed-os\/.*TESTS\/.+\.map$'
test_re = re.match(RE_PATH_MAP_FILE, path)
if test_re == None:
return
search_path = test_re.group(1)
# search for object files
for root, _, obj_files in os.walk(search_path):
for obj_file in obj_files:
if 'TESTS' not in root and obj_file.endswith(".o"):
txt = os.path.join(root, obj_file)
txt = txt.replace('\\', '/')
# add relative path + object to list
self.module_init(txt[len(search_path)+1:])
self.modules = new_modules
def reduce_depth(self, depth):
"""
prints list of directories and objects. Examples:
populates the short_modules attribute with a truncated module list
(1) depth = 1:
main.o
@ -568,43 +501,19 @@ class MemapParser(object):
mbed-os/drivers
"""
# depth 0 or None shows all entries
if depth == 0 or depth == None:
self.short_modules = deepcopy(self.modules)
return
self.short_modules = dict()
# create reduced list
for line in self.modules:
data = line.split('/')
ndir = len(data)
temp = ''
count = 0
# iterate until the max depth level
max_level = min(depth, ndir)
# rebuild the path based on depth level
while count < max_level:
if count > 0: # ignore '/' from first entry
temp = temp + '/'
temp = temp + data[count]
count += 1
if temp not in self.short_modules:
temp_dic = dict()
for section_idx in self.all_sections:
temp_dic[section_idx] = 0
self.short_modules[temp] = temp_dic
for section_idx in self.all_sections:
self.short_modules[temp][section_idx] += \
self.modules[line][section_idx]
else:
self.short_modules = dict()
for module_name, v in self.modules.items():
split_name = module_name.split('/')
if split_name[0] == '':
split_name = split_name[1:]
new_name = "/".join(split_name[:depth])
self.short_modules.setdefault(new_name, {})
for section_idx, value in v.items():
self.short_modules[new_name].setdefault(section_idx, 0)
self.short_modules[new_name][section_idx] += self.modules[module_name][section_idx]
export_formats = ["json", "csv-ci", "table"]
@ -728,12 +637,12 @@ class MemapParser(object):
def compute_report(self):
""" Generates summary of memory usage for main areas
"""
for k in self.sections:
self.subtotal[k] = 0
for i in sorted(self.short_modules):
for i in self.short_modules:
for k in self.sections:
self.short_modules[i].setdefault(k, 0)
self.subtotal[k] += self.short_modules[i][k]
self.mem_summary = {
@ -746,7 +655,7 @@ class MemapParser(object):
self.mem_report.append({
"module":i,
"size":{
k:self.short_modules[i][k] for k in self.print_sections
k: self.short_modules[i][k] for k in self.print_sections
}
})
@ -765,10 +674,6 @@ class MemapParser(object):
result = True
try:
with open(mapfile, 'r') as file_input:
# Common to all toolchains: first search for objects in BUILD
self.list_dir_obj(os.path.abspath(mapfile))
if toolchain in ("ARM", "ARM_STD", "ARM_MICRO", "ARMC6"):
self.parse_map_file_armcc(file_input)
elif toolchain == "GCC_ARM" or toolchain == "GCC_CR":
@ -778,8 +683,6 @@ class MemapParser(object):
else:
result = False
self.remove_unused_modules()
except IOError as error:
print "I/O error({0}): {1}".format(error.errno, error.strerror)
result = False