~laumann/hadb

8b3599d83d58588e38f5d02be0edcf44db6c442c — Thomas Bracht Laumann Jespersen 1 year, 1 month ago 10db5a6
Add types for parsing .debug_{info,abbrev}

We should probably start by parsing .debug_abbrev as it holds all the
information on how to decode .debug_info entries.
2 files changed, 247 insertions(+), 1 deletions(-)

A format/dwarf/debuginfo.ha
M format/dwarf/types.ha
A format/dwarf/debuginfo.ha => format/dwarf/debuginfo.ha +190 -0
@@ 0,0 1,190 @@
// Here we should be able to parse out compilation units (CUs) from
// .debug_info, with the aid of the abbreviations section
// (.debug_abbrev)
//
// Each entry in .debug_info corresponds to a compilation unit

// A single entry in .debug_info
type debuginfo = struct {
	dwarf_version: u16,
	abbrev_offset: u32,
	ptr_size: u8, // size of addresses of target (in bytes)

};

// An entry in .debug_abbrev, commonly referred to as DIE (Debugging
// Information Entry)
type die = struct {
	code: uint, // uleb128
	tag: uint,
	children: []die
};

type abbrev = struct {
	code: uint, // uleb128
	tag: tag
};

// FIXME: Convert the following hex to a proper unit test
// readelf --debug-dump=abbrev --hex-dump=.debug_abbrev simple
//
// Hex dump of section '.debug_abbrev':
//   0x00000000 01110125 0e130b03 0e1b0e11 01120110 ...%............
//   0x00000010 06000002 2e013f0c 030e3a0b 3b0b390b ......?...:.;.9.
//   0x00000020 49131101 12014006 96420c01 13000003 I.....@..B......
//   0x00000030 34000308 3a0b3b0b 390b4913 020a0000 4...:.;.9.I.....
//   0x00000040 0424000b 0b3e0b03 08000005 24000b0b .$...>......$...
//   0x00000050 3e0b030e 0000062e 013f0c03 0e3a0b3b >........?...:.;
//   0x00000060 0b390b49 13110112 01400697 420c0000 .9.I.....@..B...
//   0x00000070 00                                  .

// Contents of the .debug_abbrev section:

//   Number TAG (0x0)
//    1      DW_TAG_compile_unit    [has children]
//     DW_AT_producer     DW_FORM_strp
//     DW_AT_language     DW_FORM_data1
//     DW_AT_name         DW_FORM_strp
//     DW_AT_comp_dir     DW_FORM_strp
//     DW_AT_low_pc       DW_FORM_addr
//     DW_AT_high_pc      DW_FORM_addr
//     DW_AT_stmt_list    DW_FORM_data4
//     DW_AT value: 0     DW_FORM value: 0
//    2      DW_TAG_subprogram    [has children]
//     DW_AT_external     DW_FORM_flag
//     DW_AT_name         DW_FORM_strp
//     DW_AT_decl_file    DW_FORM_data1
//     DW_AT_decl_line    DW_FORM_data1
//     DW_AT_decl_column  DW_FORM_data1
//     DW_AT_type         DW_FORM_ref4
//     DW_AT_low_pc       DW_FORM_addr
//     DW_AT_high_pc      DW_FORM_addr
//     DW_AT_frame_base   DW_FORM_data4
//     DW_AT_GNU_all_tail_call_sites DW_FORM_flag
//     DW_AT_sibling      DW_FORM_ref4
//     DW_AT value: 0     DW_FORM value: 0
//    3      DW_TAG_variable    [no children]
//     DW_AT_name         DW_FORM_string
//     DW_AT_decl_file    DW_FORM_data1
//     DW_AT_decl_line    DW_FORM_data1
//     DW_AT_decl_column  DW_FORM_data1
//     DW_AT_type         DW_FORM_ref4
//     DW_AT_location     DW_FORM_block1
//     DW_AT value: 0     DW_FORM value: 0
//    4      DW_TAG_base_type    [no children]
//     DW_AT_byte_size    DW_FORM_data1
//     DW_AT_encoding     DW_FORM_data1
//     DW_AT_name         DW_FORM_string
//     DW_AT value: 0     DW_FORM value: 0
//    5      DW_TAG_base_type    [no children]
//     DW_AT_byte_size    DW_FORM_data1
//     DW_AT_encoding     DW_FORM_data1
//     DW_AT_name         DW_FORM_strp
//     DW_AT value: 0     DW_FORM value: 0
//    6      DW_TAG_subprogram    [has children]
//     DW_AT_external     DW_FORM_flag
//     DW_AT_name         DW_FORM_strp
//     DW_AT_decl_file    DW_FORM_data1
//     DW_AT_decl_line    DW_FORM_data1
//     DW_AT_decl_column  DW_FORM_data1
//     DW_AT_type         DW_FORM_ref4
//     DW_AT_low_pc       DW_FORM_addr
//     DW_AT_high_pc      DW_FORM_addr
//     DW_AT_frame_base   DW_FORM_data4
//     DW_AT_GNU_all_call_sites DW_FORM_flag
//     DW_AT value: 0     DW_FORM value: 0

// $ readelf --debug-dump=info -x.debug_info simple

// Hex dump of section '.debug_info':
//   0x00000000 be000000 02000000 00000801 35000000 ............5...
//   0x00000010 0c050000 000e0000 00251100 00000000 .........%......
//   0x00000020 00681100 00000000 00000000 00020130 .h.............0
//   0x00000030 00000001 08056300 00004b11 00000000 ......c...K.....
//   0x00000040 00006811 00000000 00000000 00000163 ..h............c
//   0x00000050 00000003 666f6f00 010a076a 00000002 ....foo....j....
//   0x00000060 91680004 0405696e 74000508 05270000 .h....int....'..
//   0x00000070 00060100 00000001 01066a00 00002511 ..........j...%.
//   0x00000080 00000000 00004b11 00000000 00006000 ......K.......`.
//   0x00000090 00000103 666f6f00 0103076a 00000002 ....foo....j....
//   0x000000a0 91580362 61720001 04076a00 00000291 .X.bar....j.....
//   0x000000b0 60036261 7a000105 076a0000 00029168 `.baz....j.....h
//   0x000000c0 0000                                ..

// Contents of the .debug_info section:

//   Compilation Unit @ offset 0x0:
//    Length:        0xbe (32-bit) (string-to-number "be" 16)
//    Version:       2
//    Abbrev Offset: 0x0
//    Pointer Size:  8
//  <0><b>: Abbrev Number: 1 (DW_TAG_compile_unit)
//     <c>   DW_AT_producer    : (indirect string, offset: 0x35): GNU C17 12.1.1 20220521 -mtune=generic -march=x86-64 -gdwarf-2
//     <10>   DW_AT_language    : 12	(ANSI C99)
//     <11>   DW_AT_name        : (indirect string, offset: 0x5): simple.c
//     <15>   DW_AT_comp_dir    : (indirect string, offset: 0xe): /home/t/sources/debugger
//     <19>   DW_AT_low_pc      : 0x1125
//     <21>   DW_AT_high_pc     : 0x1168
//     <29>   DW_AT_stmt_list   : 0x0
//  <1><2d>: Abbrev Number: 2 (DW_TAG_subprogram)
//     <2e>   DW_AT_external    : 1
//     <2f>   DW_AT_name        : (indirect string, offset: 0x30): main
//     <33>   DW_AT_decl_file   : 1
//     <34>   DW_AT_decl_line   : 8
//     <35>   DW_AT_decl_column : 5
//     <36>   DW_AT_type        : <0x63>
//     <3a>   DW_AT_low_pc      : 0x114b
//     <42>   DW_AT_high_pc     : 0x1168
//     <4a>   DW_AT_frame_base  : 0x0 (location list)
//     <4e>   DW_AT_GNU_all_tail_call_sites: 1
//     <4f>   DW_AT_sibling     : <0x63>
//  <2><53>: Abbrev Number: 3 (DW_TAG_variable)
//     <54>   DW_AT_name        : foo
//     <58>   DW_AT_decl_file   : 1
//     <59>   DW_AT_decl_line   : 10
//     <5a>   DW_AT_decl_column : 7
//     <5b>   DW_AT_type        : <0x6a>
//     <5f>   DW_AT_location    : 2 byte block: 91 68 	(DW_OP_fbreg: -24)
//  <2><62>: Abbrev Number: 0
//  <1><63>: Abbrev Number: 4 (DW_TAG_base_type)
//     <64>   DW_AT_byte_size   : 4
//     <65>   DW_AT_encoding    : 5	(signed)
//     <66>   DW_AT_name        : int
//  <1><6a>: Abbrev Number: 5 (DW_TAG_base_type)
//     <6b>   DW_AT_byte_size   : 8
//     <6c>   DW_AT_encoding    : 5	(signed)
//     <6d>   DW_AT_name        : (indirect string, offset: 0x27): long int
//  <1><71>: Abbrev Number: 6 (DW_TAG_subprogram)
//     <72>   DW_AT_external    : 1
//     <73>   DW_AT_name        : (indirect string, offset: 0x0): quux
//     <77>   DW_AT_decl_file   : 1
//     <78>   DW_AT_decl_line   : 1
//     <79>   DW_AT_decl_column : 6
//     <7a>   DW_AT_type        : <0x6a>
//     <7e>   DW_AT_low_pc      : 0x1125
//     <86>   DW_AT_high_pc     : 0x114b
//     <8e>   DW_AT_frame_base  : 0x60 (location list)
//     <92>   DW_AT_GNU_all_call_sites: 1
//  <2><93>: Abbrev Number: 3 (DW_TAG_variable)
//     <94>   DW_AT_name        : foo
//     <98>   DW_AT_decl_file   : 1
//     <99>   DW_AT_decl_line   : 3
//     <9a>   DW_AT_decl_column : 7
//     <9b>   DW_AT_type        : <0x6a>
//     <9f>   DW_AT_location    : 2 byte block: 91 58 	(DW_OP_fbreg: -40)
//  <2><a2>: Abbrev Number: 3 (DW_TAG_variable)
//     <a3>   DW_AT_name        : bar
//     <a7>   DW_AT_decl_file   : 1
//     <a8>   DW_AT_decl_line   : 4
//     <a9>   DW_AT_decl_column : 7
//     <aa>   DW_AT_type        : <0x6a>
//     <ae>   DW_AT_location    : 2 byte block: 91 60 	(DW_OP_fbreg: -32)
//  <2><b1>: Abbrev Number: 3 (DW_TAG_variable)
//     <b2>   DW_AT_name        : baz
//     <b6>   DW_AT_decl_file   : 1
//     <b7>   DW_AT_decl_line   : 5
//     <b8>   DW_AT_decl_column : 7
//     <b9>   DW_AT_type        : <0x6a>
//     <bd>   DW_AT_location    : 2 byte block: 91 68 	(DW_OP_fbreg: -24)
//  <2><c0>: Abbrev Number: 0
//  <1><c1>: Abbrev Number: 0

M format/dwarf/types.ha => format/dwarf/types.ha +57 -1
@@ 9,7 9,7 @@ export type unexpectedeof = !void;
export type error = !(unexpectedeof | io::error);

// registers for the Statement Program.
// 
//
// The goal of the Statement Program is to build a matrix representing one
// compilation unit.
export type registers = struct {


@@ 129,3 129,59 @@ export type lne = enum u8 {
	SET_ADDRESS = 2,
	DEFINE_FILE = 3,
};

// DWARF 2.0.0
// Numeric values for DW_TAG_* definitions
export type tag = enum {
	ARRAY_TYPE = 0x01,
	CLASS_TYPE = 0x02,
	ENTRY_POINT = 0x03,
	ENUMERATION_TYPE = 0x04,
	FORMAL_PARAMETER = 0x05,
	IMPORTED_DECLARATION = 0x08,
	LABEL = 0x0a,
	LEXICAL_BLOCK = 0x0b,
	MEMBER = 0x0d,
	POINTER_TYPE = 0x0f,
	REFERENCE_TYPE = 0x10,
	COMPILE_UNIT = 0x11,
	STRING_TYPE = 0x12,
	STRUCTURE_TYPE = 0x13,
	SUBROUTINE_TYPE = 0x15,
	TYPEDEF = 0x16,
	UNION_TYPE = 0x17,
	UNSPECIFIED_PARAMETERS = 0x18,
	VARIANT = 0x19,
	COMMON_BLOCK = 0x1a,
	COMMON_INCLUSION = 0x1b,
	INHERITANCE = 0x1c,
	INLINED_SUBROUTINE = 0x1d,
	MODULE = 0x1e,
	PTR_TO_MEMBER_TYPE = 0x1f,
	SET_TYPE = 0x20,
	SUBRANGE_TYPE = 0x21,
	WITH_STMT = 0x22,
	ACCESS_DECLARATION = 0x23,
	BASE_TYPE = 0x24,
	CATCH_BLOCK = 0x25,
	CONST_TYPE = 0x26,
	CONSTANT = 0x27,
	ENUMERATOR = 0x28,
	FILE_TYPE = 0x29,
	FRIEND = 0x2a,
	NAMELIST = 0x2b,
	NAMELIST_ITEM = 0x2c,
	PACKED_TYPE = 0x2d,
	SUBPROGRAM = 0x2e,
	TEMPLATE_TYPE_PARAM = 0x2f,
	TEMPLATE_VALUE_PARAM = 0x20,
	THROWN_TYPE = 0x31,
	TRY_BLOCK = 0x32,
	VARIANT_PART = 0x33,
	VARIABLE = 0x34,
	VOLATILE_TYPE = 0x35,
	LO_USER = 0x4080,
	HI_USER = 0xffff,
};

// Also need something like "attr" and "form"