Rework parsing dispatch

Parsing dispatch now uses a metaclass as a Trait and a series of classes
that implement the trait for dispatching parsing. This structure gives
each parser it's own namespace, avoiding tacking on suffixes to each
attribute indicating which parser uses it.
pull/5775/head
Jimmy Brisson 2018-01-02 11:19:25 -06:00
parent fc9bd60d4c
commit 527c0a2554
2 changed files with 193 additions and 153 deletions

View File

@ -2,6 +2,7 @@
"""Memory Map File Analyser for ARM mbed"""
import abc
import sys
import os
import re
@ -14,67 +15,19 @@ from prettytable import PrettyTable
from utils import argparse_filestring_type, \
argparse_lowercase_hyphen_type, argparse_uppercase_type
RE_ARMCC = re.compile(
r'^\s+0x(\w{8})\s+0x(\w{8})\s+(\w+)\s+(\w+)\s+(\d+)\s+[*]?.+\s+(.+)$')
RE_IAR = re.compile(
r'^\s+(.+)\s+(zero|const|ro code|inited|uninit)\s'
r'+0x(\w{8})\s+0x(\w+)\s+(.+)\s.+$')
RE_CMDLINE_FILE_IAR = re.compile(r'^#\s+(.+\.o)')
RE_LIBRARY_IAR = re.compile(r'^(.+\.a)\:.+$')
RE_OBJECT_LIBRARY_IAR = re.compile(r'^\s+(.+\.o)\s.*')
RE_OBJECT_FILE_GCC = re.compile(r'^(.+\/.+\.o)$')
RE_LIBRARY_OBJECT_GCC = re.compile(r'^.+\/lib(.+\.a)\((.+\.o)\)$')
RE_STD_SECTION_GCC = re.compile(r'^\s+.*0x(\w{8,16})\s+0x(\w+)\s(.+)$')
RE_FILL_SECTION_GCC = re.compile(r'^\s*\*fill\*\s+0x(\w{8,16})\s+0x(\w+).*$')
RE_OBJECT_ARMCC = re.compile(r'(.+\.(l|ar))\((.+\.o)\)')
class MemapParser(object):
"""An object that represents parsed results, parses the memory map files,
and writes out different file types of memory results
"""
print_sections = ('.text', '.data', '.bss')
misc_flash_sections = ('.interrupts', '.flash_config')
other_sections = ('.interrupts_ram', '.init', '.ARM.extab',
class _Parser(object):
"""Internal interface for parsing"""
__metaclass__ = abc.ABCMeta
SECTIONS = ('.text', '.data', '.bss', '.heap', '.stack')
MISC_FLASH_SECTIONS = ('.interrupts', '.flash_config')
OTHER_SECTIONS = ('.interrupts_ram', '.init', '.ARM.extab',
'.ARM.exidx', '.ARM.attributes', '.eh_frame',
'.init_array', '.fini_array', '.jcr', '.stab',
'.stabstr', '.ARM.exidx', '.ARM')
# sections to print info (generic for all toolchains)
sections = ('.text', '.data', '.bss', '.heap', '.stack')
def __init__(self):
# list of all modules and their sections
# full list - doesn't change with depth
self.modules = dict()
# short version with specific depth
self.short_modules = dict()
# sections must be defined in this order to take irrelevant out
self.all_sections = self.sections + self.other_sections + \
self.misc_flash_sections + ('unknown', 'OUTPUT')
# Memory report (sections + summary)
self.mem_report = []
# Memory summary
self.mem_summary = dict()
# Totals of ".text", ".data" and ".bss"
self.subtotal = dict()
# Flash no associated with a module
self.misc_flash_mem = 0
# Modules passed to the linker on the command line
# this is a dict because modules are looked up by their basename
self.cmd_modules = {}
def module_add(self, object_name, size, section):
""" Adds a module or section to the list
@ -109,14 +62,38 @@ class MemapParser(object):
self.modules[new_object] = self.modules[old_object]
del self.modules[old_object]
def check_new_section_gcc(self, line):
""" Check whether a new section in a map file has been detected (only
applies to gcc)
@abc.abstractmethod
def parse_mapfile(self, mapfile):
"""Parse a given file object pointing to a map file
Positional arguments:
mapfile - an open file object that reads a map file
return value - a dict mapping from object names to section dicts,
where a section dict maps from sections to sizes
"""
raise NotImplemented
class _GccParser(_Parser):
RE_OBJECT_FILE = re.compile(r'^(.+\/.+\.o)$')
RE_LIBRARY_OBJECT = re.compile(r'^.+\/lib(.+\.a)\((.+\.o)\)$')
RE_STD_SECTION = re.compile(r'^\s+.*0x(\w{8,16})\s+0x(\w+)\s(.+)$')
RE_FILL_SECTION = re.compile(r'^\s*\*fill\*\s+0x(\w{8,16})\s+0x(\w+).*$')
ALL_SECTIONS = _Parser.SECTIONS + _Parser.OTHER_SECTIONS + \
_Parser.MISC_FLASH_SECTIONS + ('unknown', 'OUTPUT')
def check_new_section(self, line):
""" Check whether a new section in a map file has been detected
Positional arguments:
line - the line to check for a new section
return value - A section name, if a new section was found, False
otherwise
"""
for i in self.all_sections:
for i in self.ALL_SECTIONS:
if line.startswith(i):
# should name of the section (assuming it's a known one)
return i
@ -127,13 +104,15 @@ class MemapParser(object):
return False # everything else, means no change in section
def parse_object_name_gcc(self, line):
def parse_object_name(self, line):
""" Parse a path to object file
Positional arguments:
txt - the path to parse the object and module name from
line - the path to parse the object and module name from
return value - an object file name
"""
test_re_mbed_os_name = re.match(RE_OBJECT_FILE_GCC, line)
test_re_mbed_os_name = re.match(self.RE_OBJECT_FILE, line)
if test_re_mbed_os_name:
object_name = test_re_mbed_os_name.group(1)
@ -144,7 +123,7 @@ class MemapParser(object):
return object_name
else:
test_re_obj_name = re.match(RE_LIBRARY_OBJECT_GCC, line)
test_re_obj_name = re.match(self.RE_LIBRARY_OBJECT, line)
if test_re_obj_name:
object_name = os.path.join(test_re_obj_name.group(1),
@ -154,32 +133,32 @@ class MemapParser(object):
print "Unknown object name found in GCC map file: %s" % line
return '[misc]'
def parse_section_gcc(self, line):
def parse_section(self, line):
""" Parse data from a section of gcc map file
examples:
0x00004308 0x7c ./BUILD/K64F/GCC_ARM/mbed-os/hal/targets/hal/TARGET_Freescale/TARGET_KPSDK_MCUS/spi_api.o
.text 0x00000608 0x198 ./BUILD/K64F/GCC_ARM/mbed-os/core/mbed-rtos/rtx/TARGET_CORTEX_M/TARGET_RTOS_M4_M7/TOOLCHAIN_GCC/HAL_CM4.o
.text 0x00000608 0x198 ./BUILD/K64F/GCC_ARM/mbed-os/core/mbed-rtos/rtx/TARGET_CORTEX_M/TARGET_RTOS_M4_M7/TOOLCHAIN/HAL_CM4.o
Positional arguments:
line - the line to parse a section from
"""
is_fill = re.match(RE_FILL_SECTION_GCC, line)
is_fill = re.match(self.RE_FILL_SECTION, line)
if is_fill:
o_name = '[fill]'
o_size = int(is_fill.group(2), 16)
return [o_name, o_size]
is_section = re.match(RE_STD_SECTION_GCC, line)
is_section = re.match(self.RE_STD_SECTION, line)
if is_section:
o_size = int(is_section.group(2), 16)
if o_size:
o_name = self.parse_object_name_gcc(is_section.group(3))
o_name = self.parse_object_name(is_section.group(3))
return [o_name, o_size]
return ["", 0]
def parse_map_file_gcc(self, file_desc):
def parse_mapfile(self, file_desc):
""" Main logic to decode gcc map files
Positional arguments:
@ -194,14 +173,14 @@ class MemapParser(object):
break
for line in infile:
next_section = self.check_new_section_gcc(line)
next_section = self.check_new_section(line)
if next_section == "OUTPUT":
break
elif next_section:
current_section = next_section
object_name, object_size = self.parse_section_gcc(line)
object_name, object_size = self.parse_section(line)
self.module_add(object_name, object_size, current_section)
common_prefix = os.path.dirname(os.path.commonprefix([
@ -214,9 +193,15 @@ class MemapParser(object):
new_modules[os.path.relpath(name, common_prefix)] = stats
else:
new_modules[name] = stats
self.modules = new_modules
return new_modules
def parse_object_name_armcc(self, line):
class _ArmccParser(_Parser):
RE = re.compile(
r'^\s+0x(\w{8})\s+0x(\w{8})\s+(\w+)\s+(\w+)\s+(\d+)\s+[*]?.+\s+(.+)$')
RE_OBJECT = re.compile(r'(.+\.(l|ar))\((.+\.o)\)')
def parse_object_name(self, line):
""" Parse object file
Positional arguments:
@ -226,7 +211,7 @@ class MemapParser(object):
return line
else:
is_obj = re.match(RE_OBJECT_ARMCC, line)
is_obj = re.match(self.RE_OBJECT, line)
if is_obj:
object_name = os.path.join(os.path.basename(is_obj.group(1)),
is_obj.group(3))
@ -235,47 +220,91 @@ class MemapParser(object):
print "Malformed input found when parsing ARMCC map: %s" % line
return '[misc]'
def parse_section_armcc(self, line):
def parse_section(self, line):
""" Parse data from an armcc map file
Examples of armcc map file:
Base_Addr Size Type Attr Idx E Section Name Object
0x00000000 0x00000400 Data RO 11222 RESET startup_MK64F12.o
0x00000000 0x00000400 Data RO 11222 self.RESET startup_MK64F12.o
0x00000410 0x00000008 Code RO 49364 * !!!main c_w.l(__main.o)
Positional arguments:
line - the line to parse the section data from
"""
test_re_armcc = re.match(RE_ARMCC, line)
test_re = re.match(self.RE, line)
if test_re_armcc:
size = int(test_re_armcc.group(2), 16)
if test_re:
size = int(test_re.group(2), 16)
if test_re_armcc.group(4) == 'RO':
if test_re.group(4) == 'RO':
section = '.text'
else:
if test_re_armcc.group(3) == 'Data':
if test_re.group(3) == 'Data':
section = '.data'
elif test_re_armcc.group(3) == 'Zero':
elif test_re.group(3) == 'Zero':
section = '.bss'
elif test_re_armcc.group(3) == 'Code':
elif test_re.group(3) == 'Code':
section = '.text'
else:
print "Malformed input found when parsing armcc map: %s, %r" %\
(line, test_re_armcc.groups())
(line, test_re.groups())
return ["", 0, ""]
# check name of object or library
object_name = self.parse_object_name_armcc(
test_re_armcc.group(6))
object_name = self.parse_object_name(
test_re.group(6))
return [object_name, size, section]
else:
return ["", 0, ""]
def parse_object_name_iar(self, object_name):
def parse_mapfile(self, file_desc):
""" Main logic to decode armc5 map files
Positional arguments:
file_desc - a file like object to parse as an armc5 map file
"""
with file_desc as infile:
# Search area to parse
for line in infile:
if line.startswith(' Base Addr Size'):
break
# Start decoding the map file
for line in infile:
self.module_add(*self.parse_section(line))
common_prefix = os.path.dirname(os.path.commonprefix([
o for o in self.modules.keys() if (o.endswith(".o") and o != "anon$$obj.o" and not o.startswith("[lib]"))]))
new_modules = {}
for name, stats in self.modules.items():
if name == "anon$$obj.o" or name.startswith("[lib]"):
new_modules[name] = stats
elif name.endswith(".o"):
new_modules[os.path.relpath(name, common_prefix)] = stats
else:
new_modules[name] = stats
return new_modules
class _IarParser(_Parser):
RE = re.compile(
r'^\s+(.+)\s+(zero|const|ro code|inited|uninit)\s'
r'+0x(\w{8})\s+0x(\w+)\s+(.+)\s.+$')
RE_CMDLINE_FILE = re.compile(r'^#\s+(.+\.o)')
RE_LIBRARY = re.compile(r'^(.+\.a)\:.+$')
RE_OBJECT_LIBRARY = re.compile(r'^\s+(.+\.o)\s.*')
def __init__(self):
_Parser.__init__(self)
# Modules passed to the linker on the command line
# this is a dict because modules are looked up by their basename
self.cmd_modules = {}
def parse_object_name(self, object_name):
""" Parse object file
Positional arguments:
@ -289,7 +318,7 @@ class MemapParser(object):
else:
return '[misc]'
def parse_section_iar(self, line):
def parse_section(self, line):
""" Parse data from an IAR map file
Examples of IAR map file:
@ -306,76 +335,48 @@ class MemapParser(object):
Positional_arguments:
line - the line to parse section data from
"""
test_re_iar = re.match(RE_IAR, line)
if test_re_iar:
if (test_re_iar.group(2) == 'const' or
test_re_iar.group(2) == 'ro code'):
test_re = re.match(self.RE, line)
if test_re:
if (test_re.group(2) == 'const' or
test_re.group(2) == 'ro code'):
section = '.text'
elif (test_re_iar.group(2) == 'zero' or
test_re_iar.group(2) == 'uninit'):
if test_re_iar.group(1)[0:4] == 'HEAP':
elif (test_re.group(2) == 'zero' or
test_re.group(2) == 'uninit'):
if test_re.group(1)[0:4] == 'HEAP':
section = '.heap'
elif test_re_iar.group(1)[0:6] == 'CSTACK':
elif test_re.group(1)[0:6] == 'CSTACK':
section = '.stack'
else:
section = '.bss' # default section
elif test_re_iar.group(2) == 'inited':
elif test_re.group(2) == 'inited':
section = '.data'
else:
print "Malformed input found when parsing IAR map: %s" % line
return ["", 0, ""]
# lookup object in dictionary and return module name
object_name = self.parse_object_name_iar(test_re_iar.group(5))
object_name = self.parse_object_name(test_re.group(5))
size = int(test_re_iar.group(4), 16)
size = int(test_re.group(4), 16)
return [object_name, size, section]
else:
return ["", 0, ""]
def parse_map_file_armcc(self, file_desc):
""" Main logic to decode armc5 map files
Positional arguments:
file_desc - a file like object to parse as an armc5 map file
"""
with file_desc as infile:
# Search area to parse
for line in infile:
if line.startswith(' Base Addr Size'):
break
# Start decoding the map file
for line in infile:
self.module_add(*self.parse_section_armcc(line))
common_prefix = os.path.dirname(os.path.commonprefix([
o for o in self.modules.keys() if (o.endswith(".o") and o != "anon$$obj.o" and not o.startswith("[lib]"))]))
new_modules = {}
for name, stats in self.modules.items():
if name == "anon$$obj.o" or name.startswith("[lib]"):
new_modules[name] = stats
elif name.endswith(".o"):
new_modules[os.path.relpath(name, common_prefix)] = stats
else:
new_modules[name] = stats
self.modules = new_modules
def check_new_library_iar(self, line):
def check_new_library(self, line):
"""
Searches for libraries and returns name. Example:
m7M_tls.a: [43]
"""
test_address_line = re.match(RE_LIBRARY_IAR, line)
test_address_line = re.match(self.RE_LIBRARY, line)
if test_address_line:
return test_address_line.group(1)
else:
return ""
def check_new_object_lib_iar(self, line):
def check_new_object_lib(self, line):
"""
Searches for objects within a library section and returns name. Example:
rt7M_tl.a: [44]
@ -386,13 +387,13 @@ class MemapParser(object):
I64DivZer.o 2
"""
test_address_line = re.match(RE_OBJECT_LIBRARY_IAR, line)
test_address_line = re.match(self.RE_OBJECT_LIBRARY, line)
if test_address_line:
return test_address_line.group(1)
else:
return ""
def parse_iar_command_line(self, lines):
def parse_command_line(self, lines):
"""Parse the files passed on the command line to the iar linker
Positional arguments:
@ -410,37 +411,72 @@ class MemapParser(object):
self.cmd_modules = {s: os.path.relpath(f, common_prefix)
for s, f in self.cmd_modules.items()}
def parse_map_file_iar(self, file_desc):
def parse_mapfile(self, file_desc):
""" Main logic to decode IAR map files
Positional arguments:
file_desc - a file like object to parse as an IAR map file
"""
with file_desc as infile:
self.parse_iar_command_line(infile)
self.parse_command_line(infile)
for line in infile:
if line.startswith(' Section '):
break
for line in infile:
self.module_add(*self.parse_section_iar(line))
self.module_add(*self.parse_section(line))
if line.startswith('*** MODULE SUMMARY'): # finish section
break
current_library = ""
for line in infile:
library = self.check_new_library_iar(line)
library = self.check_new_library(line)
if library:
current_library = library
object_name = self.check_new_object_lib_iar(line)
object_name = self.check_new_object_lib(line)
if object_name and current_library:
temp = os.path.join('[lib]', current_library, object_name)
self.module_replace(object_name, temp)
return self.modules
class MemapParser(object):
"""An object that represents parsed results, parses the memory map files,
and writes out different file types of memory results
"""
print_sections = ('.text', '.data', '.bss')
# sections to print info (generic for all toolchains)
sections = _Parser.SECTIONS
misc_flash_sections = _Parser.MISC_FLASH_SECTIONS
other_sections = _Parser.OTHER_SECTIONS
def __init__(self):
# list of all modules and their sections
# full list - doesn't change with depth
self.modules = dict()
# short version with specific depth
self.short_modules = dict()
# Memory report (sections + summary)
self.mem_report = []
# Memory summary
self.mem_summary = dict()
# Totals of ".text", ".data" and ".bss"
self.subtotal = dict()
# Flash no associated with a module
self.misc_flash_mem = 0
def reduce_depth(self, depth):
"""
@ -620,22 +656,22 @@ class MemapParser(object):
mapfile - the file name of the memory map file
toolchain - the toolchain used to create the file
"""
result = True
if toolchain in ("ARM", "ARM_STD", "ARM_MICRO", "ARMC6"):
parser = _ArmccParser()
elif toolchain == "GCC_ARM" or toolchain == "GCC_CR":
parser = _GccParser()
elif toolchain == "IAR":
parser = _IarParser()
else:
return False
try:
with open(mapfile, 'r') as file_input:
if toolchain in ("ARM", "ARM_STD", "ARM_MICRO", "ARMC6"):
self.parse_map_file_armcc(file_input)
elif toolchain == "GCC_ARM" or toolchain == "GCC_CR":
self.parse_map_file_gcc(file_input)
elif toolchain == "IAR":
self.parse_map_file_iar(file_input)
else:
result = False
self.modules = parser.parse_mapfile(file_input)
return True
except IOError as error:
print "I/O error({0}): {1}".format(error.errno, error.strerror)
result = False
return result
return False
def main():
"""Entry Point"""

View File

@ -5,7 +5,7 @@ import json
import pytest
from tools.memap import MemapParser
from tools.memap import MemapParser, _ArmccParser
from copy import deepcopy
@ -19,7 +19,9 @@ PARSED_ARM_DATA = {
def test_parse_armcc():
memap = MemapParser()
memap.parse_map_file_armcc(open(join(dirname(__file__), "arm.map")))
memap.parse(join(dirname(__file__), "arm.map"), "ARM")
assert memap.modules == PARSED_ARM_DATA
memap.parse(join(dirname(__file__), "arm.map"), "UARM")
assert memap.modules == PARSED_ARM_DATA
PARSED_IAR_GCC_DATA = {
@ -32,17 +34,19 @@ PARSED_IAR_GCC_DATA = {
def test_parse_iar():
memap = MemapParser()
memap.parse_map_file_iar(open(join(dirname(__file__), "iar.map")))
memap.parse(join(dirname(__file__), "iar.map"), "IAR")
assert memap.modules == PARSED_IAR_GCC_DATA
def test_parse_gcc():
memap = MemapParser()
memap.parse_map_file_gcc(open(join(dirname(__file__), "gcc.map")))
memap.parse(join(dirname(__file__), "gcc.map"), "GCC_ARM")
assert memap.modules == PARSED_IAR_GCC_DATA
memap.parse(join(dirname(__file__), "gcc.map"), "GCC_CR")
assert memap.modules == PARSED_IAR_GCC_DATA
def test_add_empty_module():
memap = MemapParser()
memap = _ArmccParser()
old_modules = deepcopy(memap.modules)
memap.module_add("", 8, ".data")
assert(old_modules == memap.modules)
@ -52,7 +56,7 @@ def test_add_empty_module():
assert(old_modules == memap.modules)
def test_add_full_module():
memap = MemapParser()
memap = _ArmccParser()
old_modules = deepcopy(memap.modules)
memap.module_add("main.o", 8, ".data")
assert(old_modules != memap.modules)