~chiefnoah/bare_proc

5ba9e129f3c4369146ad55a3a57051b425a22c80 — Noah Pederson 6 months ago 13c8c81
Refactor, argless union variants

Heavily refactors to be stateful modifications of the 2 primarily shared
datastructures: the top-level TokenStream and user defined type
registry.

This refactor was largely motivated by the desire to generate unit
variants when a BARE union variant is a void alias.
1 files changed, 210 insertions(+), 239 deletions(-)

M src/lib.rs
M src/lib.rs => src/lib.rs +210 -239
@@ 106,8 106,6 @@ use proc_macro2::{Ident, Span, TokenStream};
use quote::quote;
use syn::{parse_macro_input, ExprLit};

type DefRegistry = Vec<TokenStream>;

fn ident_from_string(s: &String) -> Ident {
    Ident::new(s, Span::call_site())
}


@@ 118,7 116,6 @@ fn ident_from_string(s: &String) -> Ident {
/// path is treated as relative to the file location of the macro's use.
/// For details on how the BARE data model maps to the Rust data model, see the [`Serialize`
/// derive macro's documentation.](https://docs.rs/serde_bare/latest/serde_bare/)
// TODO: add a link to that documentation
#[proc_macro]
pub fn bare_schema(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
    let input = parse_macro_input!(item as ExprLit);


@@ 127,140 124,126 @@ pub fn bare_schema(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
        _ => panic!("Unexpected literal type, expected string"),
    };
    let file = read_to_string(path.value()).unwrap();
    let user_type_registry: BTreeMap<String, AnyType> = parse_string(&file);
    let mut user_type_syntax = DefRegistry::new();
    for (name, user_type) in user_type_registry {
        user_type_syntax = gen_user_type(user_type_syntax, &name, &user_type);
    let mut schema_generator = SchemaGenerator {
        global_output: Default::default(),
        user_type_registry: parse_string(&file),
    };

    for (name, user_type) in &schema_generator.user_type_registry.clone() {
        schema_generator.gen_user_type(&name, &user_type);
    }

    quote! {
        use std::io::{Error as IOError, Read, Write};
        use serde::{Serialize, Deserialize};
        use std::collections::HashMap;
        use serde_bare::{UInt, Int};
    schema_generator.complete().into()
}

        #(#user_type_syntax)*
    }
    .into()
struct SchemaGenerator {
    global_output: Vec<TokenStream>,
    user_type_registry: BTreeMap<String, AnyType>,
}

/// `gen_user_type` is responsible for generating the token streams of a single user type at a top
/// level. Rust does not support anonymous structs/enums/etc., so we must recursively parse any
/// anonymous definitions and generate top-level definitions. As such, this function may generate
/// multiple types.
fn gen_user_type(mut registry: DefRegistry, name: &String, t: &AnyType) -> DefRegistry {
    #[allow(unused_assignments)]
    let mut def = TokenStream::new();
    use AnyType::*;
    (registry, def) = match t {
        Primative(p) => {
            def = gen_primative_type_def(p);
            let ident = ident_from_string(name);
            (
                registry,
impl SchemaGenerator {
    /// Completes a generation cycle by consuming the `SchemaGenerator` and yielding a
    /// `TokenStream`.
    fn complete(self) -> TokenStream {
        let user_type_syntax = self.global_output;
        quote! {
            use serde::{Serialize, Deserialize};
            use std::collections::HashMap;
            use serde_bare::{Uint, Int};

            #(#user_type_syntax)*
        }
    }

    /// `gen_user_type` is responsible for generating the token streams of a single user type at a top
    /// level. Rust does not support anonymous structs/enums/etc., so we must recursively parse any
    /// anonymous definitions and generate top-level definitions. As such, this function may generate
    /// multiple types.
    fn gen_user_type(&mut self, name: &String, t: &AnyType) {
        #[allow(unused_assignments)]
        use AnyType::*;
        let def = match t {
            Primative(p) => {
                let def = gen_primative_type_def(p);
                let ident = ident_from_string(name);
                quote! {
                    type #ident = #def;
                },
            )
        }
        List { inner, length } => {
            (registry, def) = gen_list(registry, name, inner.as_ref(), length);
            let ident = ident_from_string(name);
            (
                registry,
                }
            }
            List { inner, length } => {
                let def = self.gen_list(name, inner.as_ref(), length);
                let ident = ident_from_string(name);
                quote! {
                    type #ident = #def;
                },
            )
        }
        Struct(fields) => {
            (registry, _) = gen_struct(registry, name, fields);
            // `gen_struct` only has side-effects on the registry, so we return nothing
            (registry, TokenStream::new())
        }
        Map { key, value } => {
            let (registry, map_def) = gen_map(registry, name, key.as_ref(), value.as_ref());
            let ident = ident_from_string(name);
            (
                registry,
                }
            }
            Struct(fields) => {
                self.gen_struct(name, fields);
                // `gen_struct` only has side-effects on the registry, so we return nothing
                TokenStream::new()
            }
            Map { key, value } => {
                let map_def = self.gen_map(name, key.as_ref(), value.as_ref());
                let ident = ident_from_string(name);
                quote! {
                    type #ident = #map_def;
                },
            )
        }
        Optional(inner) => {
            let (registry, inner_def) = dispatch_type(registry, name, inner);
            let ident = ident_from_string(name);
            (
                registry,
                }
            }
            Optional(inner) => {
                let inner_def = self.dispatch_type(name, inner);
                let ident = ident_from_string(name);
                quote! {
                    type #ident = #inner_def;
                },
            )
        }
        TypeReference(reference) => {
            panic!("Type reference is not valid as a top level definition: {reference}")
        }
        Enum(members) => {
            (registry, _) = gen_enum(registry, name, members);
            // `gen_enum` only has side-effects on the registry, so we return nothing
            (registry, TokenStream::new())
        }
        Union(members) => {
            (registry, _) = gen_union(registry, name, members);
            // `gen_union` only has side-effects on the registry, so we return nothing
            (registry, TokenStream::new())
        }
    };
    registry.push(def);
    registry
}
                }
            }
            TypeReference(reference) => {
                panic!("Type reference is not valid as a top level definition: {reference}")
            }
            Enum(members) => {
                self.gen_enum(name, members);
                // `gen_enum` only has side-effects on the registry, so we return nothing
                TokenStream::new()
            }
            Union(members) => {
                self.gen_union(name, members);
                // `gen_union` only has side-effects on the registry, so we return nothing
                TokenStream::new()
            }
        };
        self.global_output.push(def);
    }

fn dispatch_type(
    registry: DefRegistry,
    name: &String,
    any_type: &AnyType,
) -> (DefRegistry, TokenStream) {
    match any_type {
        AnyType::Primative(p) => (registry, gen_primative_type_def(p)),
        AnyType::List { inner, length } => gen_list(registry, name, inner.as_ref(), length),
        AnyType::Struct(fields) => gen_struct(registry, name, fields),
        AnyType::Enum(members) => gen_enum(registry, name, members),
        AnyType::Map { key, value } => gen_map(registry, name, key.as_ref(), value.as_ref()),
        AnyType::Union(members) => gen_union(registry, name, members),
        AnyType::Optional(inner) => gen_option(registry, name, inner),
        AnyType::TypeReference(i) => {
            let ident = ident_from_string(i);
            (registry, quote! { #ident })
    fn dispatch_type(&mut self, name: &String, any_type: &AnyType) -> TokenStream {
        match any_type {
            AnyType::Primative(p) => gen_primative_type_def(p),
            AnyType::List { inner, length } => self.gen_list(name, inner.as_ref(), length),
            AnyType::Struct(fields) => self.gen_struct(name, fields),
            AnyType::Enum(members) => self.gen_enum(name, members),
            AnyType::Map { key, value } => self.gen_map(name, key.as_ref(), value.as_ref()),
            AnyType::Union(members) => self.gen_union(name, members),
            AnyType::Optional(inner) => self.gen_option(name, inner),
            AnyType::TypeReference(i) => {
                let ident = ident_from_string(i);
                quote! { #ident }
            }
        }
    }
}

fn gen_map(
    registry: DefRegistry,
    name: &String,
    key: &AnyType,
    value: &AnyType,
) -> (DefRegistry, TokenStream) {
    let (registry, key_def) = dispatch_type(registry, name, key);
    let (registry, val_def) = dispatch_type(registry, name, value);
    (
        registry,
    fn gen_map(&mut self, name: &String, key: &AnyType, value: &AnyType) -> TokenStream {
        let key_def = self.dispatch_type(name, key);
        let val_def = self.dispatch_type(name, value);
        quote! {
            HashMap<#key_def, #val_def>
        },
    )
}
        }
    }

fn gen_list(
    registry: DefRegistry,
    name: &String,
    inner_type: &AnyType,
    size: &Option<usize>,
) -> (DefRegistry, TokenStream) {
    let (registry, inner_def) = dispatch_type(registry, name, inner_type);
    (
        registry,
    fn gen_list(
        &mut self,
        name: &String,
        inner_type: &AnyType,
        size: &Option<usize>,
    ) -> TokenStream {
        let inner_def = self.dispatch_type(name, inner_type);
        match *size {
            Some(size) if size <= 32 => quote! {
                [#inner_def; #size]


@@ 268,157 251,145 @@ fn gen_list(
            _ => quote! {
                Vec<#inner_def>
            },
        },
    )
}
        }
    }

fn gen_struct(
    registry: DefRegistry,
    name: &String,
    fields: &Vec<StructField>,
) -> (DefRegistry, TokenStream) {
    // clone so we can safely drain this
    let fields_clone = fields.clone();
    let (registry, fields_gen) = gen_struct_field(registry, name, fields_clone);
    gen_anonymous(registry, name, |ident| {
        quote! {
            #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
            struct #ident {
                #(#fields_gen),*
    fn gen_struct(&mut self, name: &String, fields: &Vec<StructField>) -> TokenStream {
        // clone so we can safely drain this
        let fields_clone = fields.clone();
        let fields_gen = self.gen_struct_field(name, fields_clone);
        self.gen_anonymous(name, |ident| {
            quote! {
                #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
                struct #ident {
                    #(#fields_gen),*
                }
            }
        }
    })
}
        })
    }

fn gen_union(
    mut registry: DefRegistry,
    name: &String,
    members: &Vec<AnyType>,
) -> (DefRegistry, TokenStream) {
    let mut members_def: Vec<TokenStream> = Vec::with_capacity(members.len());
    for (i, member) in members.iter().enumerate() {
        // This is to allow the `registry` binding to not shadow the function arg, but instead
        // rebind it as it's used in the subsequent `gen_anonymous` call. We'll get move errors if
        // we don't do it this way.
        #[allow(unused_assignments)]
        let mut member_def = TokenStream::new();
        (registry, member_def) = match member {
            AnyType::Struct(fields) => {
                let (registry, fields_defs) = gen_struct_field(registry, name, fields.clone());
                (
                    registry,
    fn gen_union(&mut self, name: &String, members: &Vec<AnyType>) -> TokenStream {
        let mut members_def: Vec<TokenStream> = Vec::with_capacity(members.len());
        for (i, member) in members.iter().enumerate() {
            // If this member is a user type alias for void, we'll not generate an inner type later
            let is_void_type = match member {
                AnyType::TypeReference(i) if self.user_type_registry.get(i).is_some() => {
                    let reference = self.user_type_registry.get(i).unwrap();
                    matches!(reference, AnyType::Primative(PrimativeType::Void))
                }
                _ => false,
            };

            // This is to allow the `registry` binding to not shadow the function arg, but instead
            // rebind it as it's used in the subsequent `gen_anonymous` call. We'll get move errors if
            // we don't do it this way.
            #[allow(unused_assignments)]
            let mut member_def = TokenStream::new();
            member_def = match member {
                AnyType::Struct(fields) => {
                    let fields_defs = self.gen_struct_field(name, fields.clone());
                    quote! {
                        {
                            #(#fields_defs),*
                        }
                    },
                )
            }
            _ => {
                #[allow(unused_assignments)]
                let mut inner_def = TokenStream::new();
                (registry, inner_def) =
                    dispatch_type(registry, &format!("{name}Member{i}"), member);
                (
                    registry,
                    }
                }
                AnyType::TypeReference(i) if is_void_type => {
                    let inner_def = ident_from_string(i);
                    // The `inner_def` is always a top-level type here
                    quote! {
                        #inner_def
                    }
                }
                _ => {
                    let inner_def = self.dispatch_type(&format!("{name}Member{i}"), member);
                    // The `inner_def` is always a top-level type here
                    quote! {
                        #inner_def(#inner_def)
                    },
                )
                    }
                }
            };
            members_def.push(member_def);
        }
        self.gen_anonymous(name, |ident| {
            quote! {
                #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
                enum #ident {
                    #(#members_def),*
                }
            }
        };
        members_def.push(member_def);
        })
    }
    gen_anonymous(registry, name, |ident| {
        quote! {
            #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
            enum #ident {
                #(#members_def),*
            }
        }
    })
}

fn gen_option(registry: DefRegistry, name: &String, inner: &AnyType) -> (DefRegistry, TokenStream) {
    let (registry, inner_def) = dispatch_type(registry, name, inner);
    (
        registry,
    fn gen_option(&mut self, name: &String, inner: &AnyType) -> TokenStream {
        let inner_def = self.dispatch_type(name, inner);
        quote! {
           Option<#inner_def>
        },
    )
}
        }
    }

fn gen_struct_field(
    mut registry: DefRegistry,
    struct_name: &String,
    fields: Vec<StructField>,
) -> (DefRegistry, Vec<TokenStream>) {
    let mut fields_gen: Vec<TokenStream> = Vec::with_capacity(fields.len());
    for StructField { name, type_r } in fields {
        #[allow(unused_assignments)]
        let mut field_gen = TokenStream::new();
        (registry, field_gen) = dispatch_type(registry, &format!("{struct_name}{name}"), &type_r);
        let ident = ident_from_string(&name);
        fields_gen.push(quote! {
            #ident: #field_gen
        })
    fn gen_struct_field(
        &mut self,
        struct_name: &String,
        fields: Vec<StructField>,
    ) -> Vec<TokenStream> {
        let mut fields_gen: Vec<TokenStream> = Vec::with_capacity(fields.len());
        for StructField { name, type_r } in fields {
            #[allow(unused_assignments)]
            let field_gen = self.dispatch_type(&format!("{struct_name}{name}"), &type_r);
            let ident = ident_from_string(&name);
            fields_gen.push(quote! {
                #ident: #field_gen
            })
        }
        fields_gen
    }
    (registry, fields_gen)
}

fn gen_enum(
    registry: DefRegistry,
    name: &String,
    members: &Vec<(String, Option<usize>)>,
) -> (DefRegistry, TokenStream) {
    let member_defs = members.iter().map(|(name, val)| {
        let ident = ident_from_string(name);
        if let Some(val) = val {
            quote! {
                #ident = #val
    fn gen_enum(&mut self, name: &String, members: &Vec<(String, Option<usize>)>) -> TokenStream {
        let member_defs = members.iter().map(|(name, val)| {
            let ident = ident_from_string(name);
            if let Some(val) = val {
                quote! {
                    #ident = #val
                }
            } else {
                quote! {
                    #ident
                }
            }
        } else {
        });
        self.gen_anonymous(name, |ident| {
            quote! {
                #ident
                #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
                #[repr(usize)]
                enum #ident {
                    #(#member_defs),*
                }
            }
        }
    });
    gen_anonymous(registry, name, |ident| {
        quote! {
            #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)]
            #[repr(usize)]
            enum #ident {
                #(#member_defs),*
            }
        }
    })
}
        })
    }

/// `gen_anonymous` generates an identifier from the provided `name`, passed it to `inner`, pushes
/// the result of `inner` to the `registry`, and yields a quoted version of the generated
/// identifier. This is a common operation when generating types that are anonymous in a BARE
/// schema but not allowed by be defined anonymously in Rust.
fn gen_anonymous(
    mut registry: Vec<TokenStream>,
    name: &String,
    inner: impl FnOnce(Ident) -> TokenStream,
) -> (Vec<TokenStream>, TokenStream) {
    let ident = ident_from_string(name);
    registry.push(inner(ident.clone()));
    (
        registry,
    /// `gen_anonymous` generates an identifier from the provided `name`, passed it to `inner`, pushes
    /// the result of `inner` to the `registry`, and yields a quoted version of the generated
    /// identifier. This is a common operation when generating types that are anonymous in a BARE
    /// schema but not allowed by be defined anonymously in Rust.
    fn gen_anonymous(
        &mut self,
        name: &String,
        inner: impl FnOnce(Ident) -> TokenStream,
    ) -> TokenStream {
        let ident = ident_from_string(name);
        self.global_output.push(inner(ident.clone()));
        quote! {
            #ident
        },
    )
        }
    }
}

fn gen_primative_type_def(p: &PrimativeType) -> TokenStream {
    use PrimativeType::*;
    match p {
        UInt => quote! { UInt },
        UInt => quote! { Uint },
        U64 => quote! { u64 },
        U32 => quote! { u32 },
        U16 => quote! { u16 },