~taiite/protodump

3d4a61d4b8f507fecc453533e7616830dcaccf22 — Hubert Hirtz 2 months ago 941d9db
Better type handling

- Parse types into proper structs
- Parse type definitions
- Handle anonymous functions
- Handle subroutine parameters
3 files changed, 784 insertions(+), 214 deletions(-)

A src/dwarf.rs
M src/main.rs
A src/types.rs
A src/dwarf.rs => src/dwarf.rs +251 -0
@@ 0,0 1,251 @@
use crate::types;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::hash::Hash;

#[derive(Clone)]
pub struct Parameter<R> {
    pub name: Option<R>,
    pub type_: types::InstancePtr<R>,
}

impl<R> PartialEq for Parameter<R>
where
    R: PartialEq,
{
    fn eq(&self, other: &Parameter<R>) -> bool {
        self.type_.eq(&other.type_)
    }
}

#[derive(Clone, PartialEq)]
pub struct Subprogram<R> {
    pub return_type: types::InstancePtr<R>,
    pub parameters: Vec<Parameter<R>>,
}

pub struct EntryChildren<'e, R>
where
    R: gimli::Reader,
{
    entries: Option<gimli::EntriesCursor<'e, 'e, R>>,
    init: bool,
}

pub fn entry_children<'e, R>(
    unit: &'e gimli::Unit<R>,
    entry: &'e gimli::DebuggingInformationEntry<R>,
) -> gimli::Result<EntryChildren<'e, R>>
where
    R: gimli::Reader,
{
    let entries = if entry.has_children() {
        let mut cursor = unit.entries_at_offset(entry.offset())?;
        // somehow EntriesCursor behaves like this??
        let (delta, child) = cursor
            .next_dfs()?
            .expect("first call to EntriesCursor::next_dfs should always succeed");
        debug_assert_eq!(
            delta, 0,
            "first call to EntriesCursor::next_dfs should always return delta==0",
        );
        debug_assert_eq!(
            entry.tag(),
            child.tag(),
            "first call to EntriesCursor::next_dfs should return the original entry",
        );
        Some(cursor)
    } else {
        None
    };
    Ok(EntryChildren {
        entries,
        init: false,
    })
}

impl<'e, R> EntryChildren<'e, R>
where
    R: gimli::Reader,
{
    pub fn next(&mut self) -> gimli::Result<Option<&gimli::DebuggingInformationEntry<'e, 'e, R>>> {
        if let Some(entries) = &mut self.entries {
            if self.init {
                entries.next_sibling()
            } else {
                self.init = true;
                let (delta, child) = entries
                    .next_dfs()?
                    // since we checked for children when creating the iterator...
                    .expect("second call to EntriesCursor::next_dfs should always succeed");
                debug_assert_eq!(
                    delta, 1,
                    "second call to EntriesCursor::next_dfs should always return delta==1"
                );
                Ok(Some(child))
            }
        } else {
            Ok(None)
        }
    }
}

pub fn entry_int_attr<R>(
    entry: &gimli::DebuggingInformationEntry<R>,
    attr: gimli::DwAt,
) -> gimli::Result<Option<u64>>
where
    R: gimli::Reader,
{
    let av = match entry.attr_value(attr)? {
        Some(av) => av,
        None => return Ok(None),
    };
    Ok(Some(match av {
        // TODO replace "as" with TryInto
        gimli::AttributeValue::Data1(n) => n as u64,
        gimli::AttributeValue::Data2(n) => n as u64,
        gimli::AttributeValue::Data4(n) => n as u64,
        gimli::AttributeValue::Data8(n) => n,
        gimli::AttributeValue::Sdata(n) => n as u64,
        gimli::AttributeValue::Udata(n) => n,
        av => panic!("unexpected attribute value for {}: {:?}", attr, av),
    }))
}

pub fn entry_byte_size<R>(entry: &gimli::DebuggingInformationEntry<R>) -> gimli::Result<Option<u64>>
where
    R: gimli::Reader,
{
    entry_int_attr(entry, gimli::DW_AT_byte_size)
}

pub fn entry_encoding<R>(
    entry: &gimli::DebuggingInformationEntry<R>,
) -> gimli::Result<Option<crate::types::Encoding>>
where
    R: gimli::Reader,
{
    let encoding_attr = match entry.attr_value(gimli::DW_AT_encoding)? {
        Some(av) => av,
        None => return Ok(None),
    };
    Ok(Some(match encoding_attr {
        gimli::AttributeValue::Encoding(encoding) => crate::types::Encoding::try_from(encoding)
            .map_err(|()| gimli::Error::Io /* TODO */)?,
        av => panic!("unexpected attribute value for DW_AT_encoding: {:?}", av),
    }))
}

pub fn entry_name<R>(
    dwarf: &gimli::Dwarf<R>,
    unit: &gimli::Unit<R>,
    entry: &gimli::DebuggingInformationEntry<R>,
) -> gimli::Result<Option<R>>
where
    R: gimli::Reader,
{
    Ok(entry
        .attr_value(gimli::DW_AT_name)?
        .and_then(|attr| dwarf.attr_string(unit, attr).ok()))
}

pub fn entry_type<R>(
    cache: &mut types::Cache<R>,
    unit: &gimli::Unit<R>,
    entry: &gimli::DebuggingInformationEntry<R>,
) -> gimli::Result<Option<types::InstancePtr<R>>>
where
    R: gimli::Reader,
{
    let type_attr = match entry.attr_value(gimli::DW_AT_type)? {
        Some(av) => av,
        None => return Ok(None),
    };
    let type_offset = match type_attr {
        gimli::AttributeValue::UnitRef(offset) => offset,
        av => panic!("unexpected attribute value for DW_AT_type: {:?}", av),
    };
    let type_ = cache.resolve(unit, type_offset)?;
    Ok(Some(type_))
}

pub struct Sources<'a, R>
where
    R: gimli::Reader,
{
    pub subprograms: HashMap<R, Subprogram<R>>,
    pub type_cache: types::Cache<'a, R>,
}

impl<'a, R> Sources<'a, R>
where
    R: gimli::Reader + Eq + Hash,
{
    pub fn parse(dwarf: &'a gimli::Dwarf<R>) -> gimli::Result<Sources<'a, R>> {
        let mut subprograms = HashMap::new();
        let mut type_cache = types::Cache::new(dwarf);

        let mut it = dwarf.units();
        while let Some(header) = it.next()? {
            let unit = dwarf.unit(header)?;

            let mut entries = unit.entries();
            while let Some((_, entry)) = entries.next_dfs()? {
                if entry.tag() != gimli::DW_TAG_subprogram {
                    // This debug entry is not a function.
                    continue;
                }
                if let Some(gimli::AttributeValue::Flag(true)) =
                    entry.attr_value(gimli::DW_AT_declaration)?
                {
                    // This debug entry does not define the function.
                    continue;
                }

                let name = entry_name(&dwarf, &unit, entry)?;
                let typ = entry_type(&mut type_cache, &unit, entry)?;
                if name.is_none() || typ.is_none() {
                    continue;
                }
                let name = name.unwrap();
                let typ = typ.unwrap();
                let mut parameters = Vec::new();

                if entry.has_children() {
                    entries.next_dfs()?.unwrap();
                    loop {
                        let entry = match entries.current() {
                            Some(entry) => entry,
                            None => break,
                        };
                        if entry.tag() != gimli::DW_TAG_formal_parameter {
                            entries.next_sibling()?;
                            continue;
                        }
                        let name = entry_name(&dwarf, &unit, entry)?;
                        let type_ = entry_type(&mut type_cache, &unit, entry)?;
                        if let Some(type_) = type_ {
                            parameters.push(Parameter { name, type_ });
                        }

                        entries.next_sibling()?;
                    }
                }

                subprograms.insert(
                    name,
                    Subprogram {
                        return_type: typ,
                        parameters,
                    },
                );
            }
        }

        Ok(Sources {
            subprograms,
            type_cache,
        })
    }
}

M src/main.rs => src/main.rs +71 -214
@@ 1,197 1,31 @@
#![feature(arc_new_cyclic)]

use anyhow::Context;
use anyhow::Result;
use object::read::Object as _;
use object::read::ObjectSection as _;
use object::read::ObjectSymbol as _;
use std::borrow;
use std::collections::HashMap;
use std::borrow::Cow;
use std::collections::HashSet;
use std::env;
use std::fs;
use std::process;

// key=symbol name
type Sources = HashMap<String, Prototype>;
type SortedSources = Vec<(String, Prototype)>;

#[derive(Clone, PartialEq, Eq)]
struct Prototype {
    return_type: String,
    parameters: Vec<Parameter>,
}

#[derive(Clone, Eq)]
struct Parameter {
    name: String,
    typ: String,
}

impl PartialEq for Parameter {
    fn eq(&self, other: &Parameter) -> bool {
        self.typ.eq(&other.typ)
    }
}

fn entry_type<R>(
    dwarf: &gimli::Dwarf<R>,
    unit: &gimli::Unit<R>,
    entry: &gimli::DebuggingInformationEntry<R>,
) -> gimli::Result<Option<String>>
where
    R: gimli::Reader,
{
    entry
        .attr(gimli::DW_AT_type)?
        .map(|attr| resolve_type(dwarf, unit, attr))
        .transpose()
}

fn entry_name<R>(
    dwarf: &gimli::Dwarf<R>,
    unit: &gimli::Unit<R>,
    entry: &gimli::DebuggingInformationEntry<R>,
) -> gimli::Result<Option<String>>
where
    R: gimli::Reader,
{
    Ok(entry
        .attr_value(gimli::DW_AT_name)?
        .and_then(|attr| dwarf.attr_string(unit, attr).ok())
        .map(|r| String::from(r.to_string_lossy().unwrap())))
}

fn resolve_type<R>(
    dwarf: &gimli::Dwarf<R>,
    unit: &gimli::Unit<R>,
    mut attr: gimli::Attribute<R>,
) -> gimli::Result<String>
where
    R: gimli::Reader,
{
    let mut buf = String::new();
    let mut tags = Vec::new();
    loop {
        let entry = match attr.value() {
            gimli::AttributeValue::UnitRef(offset) => unit.entry(offset)?,
            av => panic!("unexpected attribute value: {:?}", av),
        };
        if let Some(next_attr) = entry.attr(gimli::DW_AT_type)? {
            attr = next_attr;
            tags.push(entry.tag());
            continue;
        }
        if let Some(name_attr) = entry.attr(gimli::DW_AT_name)? {
            if entry.tag() == gimli::DW_TAG_structure_type {
                buf.push_str("struct ");
            }
            buf.push_str(
                &dwarf
                    .attr_string(unit, name_attr.value())?
                    .to_string_lossy()?,
            );
            buf.push(' ');
            break;
        }
        tags.push(entry.tag());
        buf.push_str("void ");
        break;
    }

    for tag in tags.into_iter().rev() {
        match tag {
            gimli::DW_TAG_const_type => buf.push_str("const "),
            gimli::DW_TAG_pointer_type => buf.push('*'),
            gimli::DW_TAG_reference_type => buf.push('&'),
            gimli::DW_TAG_enumeration_type => buf.push_str("enum "),
            gimli::DW_TAG_subroutine_type => buf.push_str("fn "),
            gimli::DW_TAG_restrict_type => buf.push_str("restrict "),
            gimli::DW_TAG_array_type => buf.push_str("[]"),
            gimli::DW_TAG_structure_type => {}
            gimli::DW_TAG_typedef => {}
            tag => panic!("unexpected tag: {}", tag),
        }
    }

    Ok(buf)
}
mod dwarf;
mod types;

fn read_prototypes(obj: &object::File<'_>) -> Result<Sources> {
    let endian = if obj.is_little_endian() {
        gimli::RunTimeEndian::Little
    } else {
        gimli::RunTimeEndian::Big
    };
type Reader<'a> = gimli::EndianSlice<'a, gimli::RunTimeEndian>;
type Sources<'a> = dwarf::Sources<'a, Reader<'a>>;

    let load_section = |section_id: gimli::SectionId| -> gimli::Result<borrow::Cow<[u8]>> {
fn read_dwarf<'a>(obj: &object::File<'a>) -> gimli::Result<gimli::Dwarf<Cow<'a, [u8]>>> {
    let load_section = |section_id: gimli::SectionId| -> gimli::Result<Cow<[u8]>> {
        let section_data = obj
            .section_by_name(section_id.name())
            .and_then(|section| section.uncompressed_data().ok())
            .unwrap_or(borrow::Cow::Borrowed(&[]));
            .unwrap_or(Cow::Borrowed(&[]));
        Ok(section_data)
    };
    let dwarf_cow = gimli::Dwarf::load(&load_section)?;
    let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));

    let mut prototypes = Sources::new();

    let mut it = dwarf.units();
    while let Some(header) = it.next()? {
        let unit = dwarf.unit(header)?;

        let mut entries = unit.entries();
        while let Some((_, entry)) = entries.next_dfs()? {
            if entry.tag() != gimli::DW_TAG_subprogram {
                // This debug entry is not a function.
                continue;
            }
            if let Some(gimli::AttributeValue::Flag(true)) =
                entry.attr_value(gimli::DW_AT_declaration)?
            {
                // This debug entry does not define the function.
                continue;
            }

            let name = entry_name(&dwarf, &unit, entry)?;
            let typ = entry_type(&dwarf, &unit, entry)?;
            if name.is_none() || typ.is_none() {
                continue;
            }
            let name = name.unwrap();
            let typ = typ.unwrap();
            let mut parameters = Vec::new();

            if entry.has_children() {
                entries.next_dfs()?.unwrap();
                loop {
                    let entry = match entries.current() {
                        Some(entry) => entry,
                        None => break,
                    };
                    if entry.tag() != gimli::DW_TAG_formal_parameter {
                        entries.next_sibling()?;
                        continue;
                    }
                    let name = entry_name(&dwarf, &unit, entry)?.unwrap_or_else(|| String::new());
                    let typ = entry_type(&dwarf, &unit, entry)?;
                    if let Some(typ) = typ {
                        parameters.push(Parameter { name, typ });
                    }

                    entries.next_sibling()?;
                }
            }

            prototypes.insert(
                name,
                Prototype {
                    return_type: typ,
                    parameters,
                },
            );
        }
    }

    Ok(prototypes)
    gimli::Dwarf::load(&load_section)
}

#[derive(Debug, PartialEq, Eq)]


@@ 257,17 91,21 @@ fn show_object_info_diff(prev: &ObjectInfo<'_>, next: &ObjectInfo<'_>) {
    // TODO show symbol whitelist diff?
}

enum Diff {
enum Diff<'a> {
    Added,
    Removed,
    Changed(Prototype),
    Changed(dwarf::Subprogram<Reader<'a>>),
}

fn diff_sources(prev: Sources, mut next: Sources) -> Vec<(String, Prototype, Diff)> {
fn diff_sources<'a>(
    prev: Sources<'a>,
    mut next: Sources<'a>,
) -> Vec<(Reader<'a>, dwarf::Subprogram<Reader<'a>>, Diff<'a>)> {
    let mut res: Vec<_> = prev
        .subprograms
        .into_iter()
        .filter_map(|(prev_func, prev_prototype)| {
            let next_prototype = if let Some(p) = next.remove(&prev_func) {
            let next_prototype = if let Some(p) = next.subprograms.remove(&prev_func) {
                p
            } else {
                return Some((prev_func, prev_prototype, Diff::Removed));


@@ 281,32 119,33 @@ fn diff_sources(prev: Sources, mut next: Sources) -> Vec<(String, Prototype, Dif
        .collect();

    let added = next
        .subprograms
        .into_iter()
        .map(|(func, prototype)| (func, prototype, Diff::Added));
    res.extend(added);

    res.sort_unstable_by(|(func1, _, _), (func2, _, _)| String::cmp(func1, func2));
    res.sort_unstable_by(|(func1, _, _), (func2, _, _)| func1.slice().cmp(func2.slice()));

    res
}

fn show_source_diff(d: &[(String, Prototype, Diff)]) {
fn show_source_diff(d: &[(Reader<'_>, dwarf::Subprogram<Reader<'_>>, Diff<'_>)]) {
    let color = atty::is(atty::Stream::Stdout);
    for (func, prototype, diff) in d {
        match diff {
            Diff::Added => {
                print!("add ");
                show_prototype(func, prototype, color);
                show_prototype(*func, prototype, color);
            }
            Diff::Removed => {
                print!("del ");
                show_prototype(func, prototype, color);
                show_prototype(*func, prototype, color);
            }
            Diff::Changed(new_prototype) => {
                print!("prv ");
                show_prototype(func, prototype, color);
                show_prototype(*func, prototype, color);
                print!("nxt ");
                show_prototype(func, new_prototype, color);
                show_prototype(*func, new_prototype, color);
            }
        }
    }


@@ 372,18 211,16 @@ fn read_object_info<'a>(obj: &object::File<'a>, filter: SymbolFilter) -> Result<
    })
}

fn sort_prototypes(prototypes: Sources) -> SortedSources {
    let mut sorted: Vec<_> = prototypes.into_iter().collect();
    sorted.sort_unstable_by(|(name1, _), (name2, _)| String::cmp(name1, name2));
    sorted
}

fn show_prototype(name: &str, prototype: &Prototype, color: bool) {
    print!("{}", prototype.return_type);
fn show_prototype(name: Reader<'_>, prototype: &dwarf::Subprogram<Reader<'_>>, color: bool) {
    let return_type = prototype.return_type.borrow();
    print!("{}{}", return_type, return_type.display_padding());
    if color {
        print!("{}(", ansi_term::Style::new().bold().paint(name));
        print!(
            "{}(",
            ansi_term::Style::new().bold().paint(name.to_string_lossy())
        );
    } else {
        print!("{}(", name);
        print!("{}(", name.to_string_lossy());
    }
    let mut first = true;
    for param in &prototype.parameters {


@@ 392,20 229,22 @@ fn show_prototype(name: &str, prototype: &Prototype, color: bool) {
        } else {
            print!(", ");
        }
        let typ = if param.name.is_empty() {
            param.typ.trim_end()
        } else {
            &param.typ
        };
        print!("{}{}", typ, param.name);
        let type_ = param.type_.borrow();
        print!("{}", type_);
        if let Some(name) = param.name {
            print!("{}{}", type_.display_padding(), name.to_string_lossy());
        }
    }
    println!(");");
}

fn show_prototypes(prototypes: SortedSources) {
fn show_prototypes(sources: Sources<'_>) {
    let color = atty::is(atty::Stream::Stdout);
    // TODO show types
    let mut prototypes: Vec<_> = sources.subprograms.into_iter().collect();
    prototypes.sort_unstable_by(|(func1, _), (func2, _)| func1.slice().cmp(func2.slice()));
    for (name, p) in prototypes {
        show_prototype(&name, &p, color);
        show_prototype(name, &p, color);
    }
}



@@ 440,10 279,21 @@ fn main() -> Result<()> {
    let file_contents = unsafe { memmap::Mmap::map(&file) }.context("failed to mmap file")?;
    let obj = object::File::parse(file_contents.as_ref()).context("failed to parse object")?;

    let mut sources = read_prototypes(&obj).context("failed to extract DWARF information")?;
    let dwarf = read_dwarf(&obj).context("failed to extract DWARF information")?;
    let endian = if obj.is_little_endian() {
        gimli::RunTimeEndian::Little
    } else {
        gimli::RunTimeEndian::Big
    };
    let dwarf = dwarf.borrow(|section| gimli::EndianSlice::new(section, endian));
    let mut sources = Sources::parse(&dwarf)?;
    let object_info = read_object_info(&obj, symbol_filter)?;

    sources.retain(|func, _| object_info.symbol_whitelist.contains(func.as_str()));
    sources.subprograms.retain(|func, _| {
        object_info
            .symbol_whitelist
            .contains(func.to_string_lossy().as_ref())
    });

    if let Some(baseline_path) = matches.opt_str("d") {
        let baseline = fs::File::open(baseline_path).context("failed to open diff baseline")?;


@@ 452,14 302,22 @@ fn main() -> Result<()> {
        let baseline_obj = object::File::parse(baseline_contents.as_ref())
            .context("failed to parse diff baseline as an object")?;

        let mut baseline_sources = read_prototypes(&baseline_obj)
            .context("failed to extract DWARF information from diff baseline")?;
        let baseline_dwarf =
            read_dwarf(&baseline_obj).context("failed to extract DWARF information")?;
        let baseline_endian = if baseline_obj.is_little_endian() {
            gimli::RunTimeEndian::Little
        } else {
            gimli::RunTimeEndian::Big
        };
        let baseline_dwarf =
            baseline_dwarf.borrow(|section| gimli::EndianSlice::new(section, baseline_endian));
        let mut baseline_sources = Sources::parse(&baseline_dwarf)?;
        let baseline_object_info = read_object_info(&baseline_obj, symbol_filter)?;

        baseline_sources.retain(|func, _| {
        baseline_sources.subprograms.retain(|func, _| {
            baseline_object_info
                .symbol_whitelist
                .contains(func.as_str())
                .contains(func.to_string_lossy().as_ref())
        });

        let mut is_match = true;


@@ 486,10 344,9 @@ fn main() -> Result<()> {
        } else {
            println!("Endianness: big");
        }
        if !sources.is_empty() {
        if !sources.subprograms.is_empty() {
            println!();
            let sorted_sources = sort_prototypes(sources);
            show_prototypes(sorted_sources);
            show_prototypes(sources);
        }
    }


A src/types.rs => src/types.rs +462 -0
@@ 0,0 1,462 @@
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::hash_map;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::fmt;
use std::rc::Rc;
use std::rc::Weak;

#[derive(PartialEq)]
pub enum Encoding {
    Address,
    Boolean,
    ComplexFloat,
    Float,
    Signed,
    SignedChar,
    Unsigned,
    UnsignedChar,
    ImaginaryFloat,
    PackedDecimal,
    NumericString,
    Edited,
    SignedFixed,
    UnsignedFixed,
    DecimalFloat,
    Utf,
    User(gimli::DwAte),
}

impl TryFrom<gimli::DwAte> for Encoding {
    type Error = ();

    fn try_from(ate: gimli::DwAte) -> Result<Encoding, ()> {
        Ok(match ate.0 {
            0x01 => Encoding::Address,
            0x02 => Encoding::Boolean,
            0x03 => Encoding::ComplexFloat,
            0x04 => Encoding::Float,
            0x05 => Encoding::Signed,
            0x06 => Encoding::SignedChar,
            0x07 => Encoding::Unsigned,
            0x08 => Encoding::UnsignedChar,
            0x09 => Encoding::ImaginaryFloat,
            0x0a => Encoding::PackedDecimal,
            0x0b => Encoding::NumericString,
            0x0c => Encoding::Edited,
            0x0d => Encoding::SignedFixed,
            0x0e => Encoding::UnsignedFixed,
            0x0f => Encoding::DecimalFloat,
            0x10 => Encoding::Utf,
            0x80..=0xff => Encoding::User(ate),
            _ => return Err(()),
        })
    }
}

#[derive(PartialEq)]
pub struct Member<R> {
    name: Option<R>,
    location: u64,
    type_: InstancePtr<R>,
}

#[derive(PartialEq)]
pub struct Enumerator<R> {
    name: R,
    value: u64,
}

#[derive(PartialEq)]
pub enum Definition<R> {
    Base {
        byte_size: u64,
        encoding: Encoding,
    },
    Enumeration {
        byte_size: u64,
        encoding: Encoding,
        enumerators: Vec<Enumerator<R>>,
    },
    Subroutine {
        return_type: Option<InstancePtr<R>>,
        parameters: Vec<InstancePtr<R>>,
    },
    Structure {
        members: Vec<Member<R>>,
    },
    Union {
        members: Vec<Member<R>>,
    },
}

impl<R> Definition<R> {
    fn dummy() -> Definition<R> {
        Definition::Base {
            byte_size: 0,
            encoding: Encoding::Address,
        }
    }
}

pub struct Type<R> {
    name: Option<R>,
    definition: Definition<R>,
}

pub type TypePtr<R> = Rc<RefCell<Type<R>>>;

// TODO add fields like pointer size, array count, etc.
#[derive(Copy, Clone, PartialEq)]
enum Modifier {
    Array,
    Const,
    Pointer,
    Reference,
    Restrict,
    Volatile,
}

impl Modifier {
    pub fn needs_padding(self) -> bool {
        match self {
            Modifier::Array | Modifier::Pointer | Modifier::Reference => false,
            Modifier::Const | Modifier::Restrict | Modifier::Volatile => true,
        }
    }
}

pub struct Instance<R> {
    modifiers: Vec<Modifier>,
    underlying: Option<TypePtr<R>>,
    self_weak: Weak<RefCell<Instance<R>>>,
}

pub type InstancePtr<R> = Rc<RefCell<Instance<R>>>;

impl<R> PartialEq for Instance<R>
where
    R: PartialEq,
{
    fn eq(&self, other: &Instance<R>) -> bool {
        use std::collections::hash_map::Entry;

        thread_local! {
            static CACHE: RefCell<HashMap<(usize, usize), bool>> = RefCell::new(HashMap::new());
        }

        CACHE.with(|cache| {
            let self_addr = Weak::as_ptr(&self.self_weak) as usize;
            let other_addr = Weak::as_ptr(&other.self_weak) as usize;

            match cache.borrow_mut().entry((self_addr, other_addr)) {
                Entry::Vacant(entry) => entry.insert(true),
                Entry::Occupied(entry) => return *entry.get(),
            };

            let modifiers_eq = self.modifiers == other.modifiers;
            let underlying_eq = match (&self.underlying, &other.underlying) {
                (None, None) => true,
                (Some(_), None) | (None, Some(_)) => false,
                (Some(s), Some(o)) => s.borrow().name == o.borrow().name,
            };
            let is_eq = modifiers_eq && underlying_eq;

            cache.borrow_mut().insert((self_addr, other_addr), is_eq);
            is_eq
        })
    }
}

impl<R> fmt::Display for Instance<R>
where
    R: gimli::Reader,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.underlying {
            Some(underlying) => {
                let underlying = underlying.borrow();
                let name = match &underlying.name {
                    Some(raw_name) => raw_name.to_string_lossy().map_err(|_| fmt::Error)?,
                    None => Cow::Borrowed("_UNAMED_"),
                };
                match &underlying.definition {
                    Definition::Base { .. } => write!(f, "{}", name)?,
                    Definition::Enumeration { .. } => write!(f, "enum {}", name)?,
                    Definition::Subroutine {
                        return_type,
                        parameters,
                    } => {
                        if let Some(type_) = return_type {
                            let type_ = type_.borrow();
                            write!(f, "{}{}fn(", type_, type_.display_padding())?;
                        } else {
                            write!(f, "void fn(")?;
                        }
                        if let Some(param) = parameters.first() {
                            write!(f, "{}", param.borrow())?;
                            for param in &parameters[1..] {
                                write!(f, ", {}", param.borrow())?;
                            }
                        }
                        write!(f, ")")?;
                    }
                    Definition::Structure { .. } => write!(f, "struct {}", name)?,
                    Definition::Union { .. } => write!(f, "union {}", name)?,
                }
            }
            None => write!(f, "void")?,
        }
        // TODO don't put space on both end of pointers and refs
        let mut needs_padding = true;
        for modifier in self.modifiers.iter().rev() {
            if needs_padding {
                write!(f, " ")?;
            }
            match modifier {
                Modifier::Array => write!(f, "[]")?,
                Modifier::Const => write!(f, "const")?,
                Modifier::Pointer => write!(f, "*")?,
                Modifier::Reference => write!(f, "&")?,
                Modifier::Restrict => write!(f, "restrict")?,
                Modifier::Volatile => write!(f, "volatile")?,
            }
            needs_padding = modifier.needs_padding();
        }
        Ok(())
    }
}

impl<R> Instance<R> {
    pub fn display_padding(&self) -> &'static str {
        let needs_padding = match self.modifiers.first() {
            Some(modifier) => modifier.needs_padding(),
            None => true,
        };
        if needs_padding {
            " "
        } else {
            ""
        }
    }
}

pub struct Cache<'a, R>
where
    R: gimli::Reader,
{
    dwarf: &'a gimli::Dwarf<R>,
    instances: HashMap<gimli::DebugInfoOffset<R::Offset>, InstancePtr<R>>,
    definitions: HashMap<gimli::DebugInfoOffset<R::Offset>, TypePtr<R>>,
}

impl<'a, R> Cache<'a, R>
where
    R: gimli::Reader,
{
    pub fn new(dwarf: &'a gimli::Dwarf<R>) -> Cache<'a, R> {
        Cache {
            dwarf,
            instances: HashMap::new(),
            definitions: HashMap::new(),
        }
    }

    fn resolve_base_definition(
        &mut self,
        entry: &gimli::DebuggingInformationEntry<R>,
    ) -> gimli::Result<Definition<R>> {
        Ok(Definition::Base {
            byte_size: crate::dwarf::entry_byte_size(entry)?.unwrap(),
            encoding: crate::dwarf::entry_encoding(entry)?.unwrap(),
        })
    }

    fn resolve_enumeration_definition(
        &mut self,
        unit: &gimli::Unit<R>,
        entry: &gimli::DebuggingInformationEntry<R>,
    ) -> gimli::Result<Definition<R>> {
        let byte_size = crate::dwarf::entry_byte_size(entry)?.unwrap();
        let encoding = crate::dwarf::entry_encoding(entry)?.unwrap();

        // TODO also get underlying type?

        let mut enumerators = Vec::new();
        let mut children = crate::dwarf::entry_children(unit, entry)?;
        while let Some(child) = children.next()? {
            if child.tag() != gimli::DW_TAG_enumerator {
                continue;
            }
            let name = crate::dwarf::entry_name(self.dwarf, unit, child)?.unwrap();
            let value = crate::dwarf::entry_int_attr(child, gimli::DW_AT_const_value)?.unwrap();
            enumerators.push(Enumerator { name, value });
        }

        Ok(Definition::Enumeration {
            byte_size,
            encoding,
            enumerators,
        })
    }

    fn resolve_structure_definition(
        &mut self,
        unit: &gimli::Unit<R>,
        entry: &gimli::DebuggingInformationEntry<R>,
    ) -> gimli::Result<Definition<R>> {
        let mut members = Vec::new();
        let mut children = crate::dwarf::entry_children(unit, entry)?;
        while let Some(child) = children.next()? {
            if child.tag() != gimli::DW_TAG_member {
                continue;
            }
            let name = crate::dwarf::entry_name(self.dwarf, unit, child)?;
            let location =
                crate::dwarf::entry_int_attr(child, gimli::DW_AT_data_member_location)?.unwrap();
            let type_ = crate::dwarf::entry_type(self, unit, child)?.unwrap();
            members.push(Member {
                name,
                location,
                type_,
            })
        }
        Ok(Definition::Structure { members })
    }

    fn resolve_subroutine_definition(
        &mut self,
        unit: &gimli::Unit<R>,
        entry: &gimli::DebuggingInformationEntry<R>,
    ) -> gimli::Result<Definition<R>> {
        let return_type = crate::dwarf::entry_type(self, unit, entry)?;
        let mut parameters = Vec::new();
        let mut children = crate::dwarf::entry_children(unit, entry)?;
        while let Some(child) = children.next()? {
            if child.tag() != gimli::DW_TAG_formal_parameter {
                continue;
            }
            let type_ = crate::dwarf::entry_type(self, unit, child)?.unwrap();
            parameters.push(type_);
        }
        Ok(Definition::Subroutine {
            return_type,
            parameters,
        })
    }

    fn resolve_union_definition(
        &mut self,
        unit: &gimli::Unit<R>,
        entry: &gimli::DebuggingInformationEntry<R>,
    ) -> gimli::Result<Definition<R>> {
        let mut members = Vec::new();
        let mut children = crate::dwarf::entry_children(unit, entry)?;
        while let Some(child) = children.next()? {
            if child.tag() != gimli::DW_TAG_member {
                continue;
            }
            let name = crate::dwarf::entry_name(self.dwarf, unit, child)?;
            let type_ = crate::dwarf::entry_type(self, unit, child)?.unwrap();
            members.push(Member {
                name,
                location: 0,
                type_,
            })
        }
        Ok(Definition::Union { members })
    }

    fn resolve_definition(
        &mut self,
        unit: &gimli::Unit<R>,
        entry: &gimli::DebuggingInformationEntry<R>,
    ) -> gimli::Result<TypePtr<R>> {
        let entry_offset = entry.offset().to_debug_info_offset(&unit.header).unwrap();

        // Allocate the definition and store it in the map in case the type is
        // recursive.
        let definition_placeholder = match self.definitions.entry(entry_offset) {
            hash_map::Entry::Occupied(e) => return Ok(Rc::clone(e.get())),
            hash_map::Entry::Vacant(e) => {
                let dummy_definition = Rc::new(RefCell::new(Type {
                    name: None,
                    definition: Definition::dummy(),
                }));
                e.insert(dummy_definition.clone());
                dummy_definition
            }
        };

        let name = crate::dwarf::entry_name(self.dwarf, unit, entry)?;
        let definition = match entry.tag() {
            gimli::DW_TAG_base_type => self.resolve_base_definition(entry)?,
            gimli::DW_TAG_enumeration_type => self.resolve_enumeration_definition(unit, entry)?,
            gimli::DW_TAG_structure_type => self.resolve_structure_definition(unit, entry)?,
            gimli::DW_TAG_subroutine_type => self.resolve_subroutine_definition(unit, entry)?,
            gimli::DW_TAG_union_type => self.resolve_union_definition(unit, entry)?,
            _ => unreachable!(),
        };

        *definition_placeholder.borrow_mut() = Type { name, definition };

        Ok(definition_placeholder)
    }

    pub fn resolve(
        &mut self,
        unit: &gimli::Unit<R>,
        mut entry_offset: gimli::UnitOffset<R::Offset>,
    ) -> gimli::Result<InstancePtr<R>> {
        let absolute_offset = entry_offset.to_debug_info_offset(&unit.header).unwrap();
        if let Some(instance) = self.instances.get(&absolute_offset) {
            return Ok(Rc::clone(instance));
        }

        let mut underlying = None;
        let mut modifiers = Vec::new();
        loop {
            let entry = unit.entry(entry_offset)?;
            match entry.tag() {
                gimli::DW_TAG_base_type
                | gimli::DW_TAG_enumeration_type
                | gimli::DW_TAG_structure_type
                | gimli::DW_TAG_subroutine_type
                | gimli::DW_TAG_union_type => {
                    underlying = Some(self.resolve_definition(unit, &entry)?);
                    break;
                }
                gimli::DW_TAG_array_type => modifiers.push(Modifier::Array),
                gimli::DW_TAG_const_type => modifiers.push(Modifier::Const),
                gimli::DW_TAG_pointer_type => modifiers.push(Modifier::Pointer),
                gimli::DW_TAG_reference_type => modifiers.push(Modifier::Reference),
                gimli::DW_TAG_restrict_type => modifiers.push(Modifier::Restrict),
                gimli::DW_TAG_volatile_type => modifiers.push(Modifier::Volatile),
                gimli::DW_TAG_typedef => {}
                tag => panic!("unexpected tag: {}", tag),
            }

            if let Some(next_attr) = entry.attr_value(gimli::DW_AT_type)? {
                entry_offset = match next_attr {
                    gimli::AttributeValue::UnitRef(offset) => offset,
                    av => panic!("unexpected attribute value: {:?}", av),
                };
                continue;
            }

            break;
        }

        let instance = Rc::new_cyclic(move |self_weak| {
            RefCell::new(Instance {
                modifiers,
                underlying,
                self_weak: self_weak.clone(),
            })
        });
        self.instances.insert(absolute_offset, Rc::clone(&instance));

        Ok(instance)
    }
}