~athorp96/workflow-linter

514162bb9ef29790edfea0e2e97be21d97d4c602 — Andrew Thorp 3 years ago 5840351
Handle one or many triggers
5 files changed, 388 insertions(+), 257 deletions(-)

M Cargo.lock
M Cargo.toml
A src/custom_types.rs
M src/main.rs
A src/workflow.rs
M Cargo.lock => Cargo.lock +14 -0
@@ 1,6 1,12 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "anyhow"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68803225a7b13e47191bab76f2687382b60d259e8cf37f6e1893658b84bb9479"

[[package]]
name = "dtoa"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"


@@ 80,11 86,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"

[[package]]
name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"

[[package]]
name = "workflow-linter"
version = "0.1.0"
dependencies = [
 "anyhow",
 "serde",
 "serde_yaml",
 "void",
]

[[package]]

M Cargo.toml => Cargo.toml +2 -1
@@ 7,6 7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0"
serde = { version = "1.0.118", features = ["derive"] }

serde_yaml = "0.8.14"
void = "1.0.2"

A src/custom_types.rs => src/custom_types.rs +67 -0
@@ 0,0 1,67 @@
use std::fmt;
use std::marker::PhantomData;
use std::str::FromStr;

use serde::de::{self, MapAccess, Visitor};
use serde::{Deserialize, Deserializer, Serialize};
use void::Void;

#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged, rename_all = "kebab-case")]
pub enum OneOrMany<T> {
    One(T),
    Many(Vec<T>),
}

impl<T> OneOrMany<T> {
    pub fn into_vec(self) -> Vec<T> {
        match self {
            OneOrMany::One(i) => vec![i],
            OneOrMany::Many(l) => l,
        }
    }
}

fn string_or_struct<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
    T: Deserialize<'de> + FromStr<Err = Void>,
    D: Deserializer<'de>,
{
    // This is a Visitor that forwards string types to T's `FromStr` impl and
    // forwards map types to T's `Deserialize` impl. The `PhantomData` is to
    // keep the compiler from complaining about T being an unused generic type
    // parameter. We need T in order to know the Value type for the Visitor
    // impl.
    struct StringOrStruct<T>(PhantomData<fn() -> T>);

    impl<'de, T> Visitor<'de> for StringOrStruct<T>
    where
        T: Deserialize<'de> + FromStr<Err = Void>,
    {
        type Value = T;

        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("string or map")
        }

        fn visit_str<E>(self, value: &str) -> Result<T, E>
        where
            E: de::Error,
        {
            Ok(FromStr::from_str(value).unwrap())
        }

        fn visit_map<M>(self, map: M) -> Result<T, M::Error>
        where
            M: MapAccess<'de>,
        {
            // `MapAccessDeserializer` is a wrapper that turns a `MapAccess`
            // into a `Deserializer`, allowing it to be used as the input to T's
            // `Deserialize` implementation. T then deserializes itself using
            // the entries from the map visitor.
            Deserialize::deserialize(de::value::MapAccessDeserializer::new(map))
        }
    }

    deserializer.deserialize_any(StringOrStruct(PhantomData))
}

M src/main.rs => src/main.rs +3 -256
@@ 1,266 1,13 @@
use std::collections::HashMap;
use std::io::Read;

use serde::{Deserialize, Serialize};
use serde_yaml::Value;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
enum OneOrMany<T> {
    One(T),
    Many(Vec<T>),
}

impl<T> OneOrMany<T> {
    fn into_vec(self) -> Vec<T> {
        match self {
            OneOrMany::One(i) => vec![i],
            OneOrMany::Many(l) => l,
        }
    }
}

/// You can schedule a workflow to run at specific UTC times using POSIX cron
/// syntax. Scheduled workflows run on the latest commit on the default or base
/// branch. The shortest interval you can run scheduled workflows is once every 5
/// minutes.
type Schedule = Vec<CronSchedule>;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct CronSchedule {
    // TODO: validate cron string
    cron: String,
}

/// Trigger types for a workflow.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
enum Trigger {
    Push(Value),
    PullRequest(Value),
    WorkflowDispatch(Value),
    RepositoryDispatch(Value),
    CheckRun(Value),
    CheckSuite(Value),
    Create(Value),
    Delete(Value),
    Deployment(Value),
    DeploymentStatus(Value),
    Fork(Value),
    Gollum(Value),
    IssueComment(Value),
    Issues(Value),
    Label(Value),
    Milestone(Value),
    PageBuild(Value),
    Project(Value),
    ProjectCard(Value),
    ProjectColumn(Value),
    Public(Value),
    PullRequestReview(Value),
    PullRequestReviewComment(Value),
    PullRequestTarget(Value),
    RegistryPackage(Value),
    Release(Value),
    Status(Value),
    Watch(Value),
    WorkflowRun(Value),
    Schedule(Schedule),
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct DefaultSettings {
    shell: Option<String>,
    working_directory: Option<String>,
}

/// Provide default shell and working-directory to all run steps in the job.
/// Context and expression are not allowed in this section.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Defaults {
    run: DefaultSettings,
}

/// The environment that the job references. All environment protection rules
/// must pass before a job referencing the environment is sent to a runner.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Environment {
    name: String,
    url: Option<String>,
}

// TODO
type Matrix = Value;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Strategy {
    matrix: Option<Matrix>,
    fail_fast: Option<bool>,
    max_parallel: Option<i32>,
}

// TODO
/// Steps can run commands, run setup tasks, or run an action in your
/// repository, a public repository, or an action published in a Docker registry.
/// Not all steps run actions, but all actions run as a step. Each step runs in its
/// own process in the runner environment and has access to the workspace and
/// filesystem.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Step {}

/// A container to run any steps in a job that don't already specify a container.
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
struct ContainerSpec {
    name: String,
    credentials: Option<HashMap<String, String>>,
    env: Option<Env>,
    ports: Vec<i32>,
    volumes: Vec<i32>,
    options: Vec<String>,
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
enum Container {
    ImageName(String),
    DetailedSpec(ContainerSpec),
}

impl Container {
    fn spec(self) -> ContainerSpec {
        match self {
            Container::ImageName(n) => ContainerSpec {
                name: n,
                ..Default::default()
            },
            Container::DetailedSpec(s) => s,
        }
    }
}

// TODO
/// Used to host service containers for a job in a workflow. Service containers
/// are useful for creating databases or cache services like Redis.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Service {
    name: String,
    ports: Vec<String>,
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Job {
    /// The name of the job displayed on GitHub.
    name: Option<String>,

    /// Identifies any jobs that must complete successfully before this job
    /// will run. It can be a string or array of strings. If a job fails, all jobs that
    /// need it are skipped unless the jobs use a conditional expression that causes
    /// the job to continue.
    #[serde(default)]
    needs: Vec<String>,

    /// The type of machine to run the job on. The machine can be either a GitHub-hosted
    /// runner or a self-hosted runner.
    runs_on: String,

    /// The environment that the job references. All environment protection rules must
    /// pass before a job referencing the environment is sent to a runner.
    environment: Option<Environment>,

    /// A map of outputs for a job. Job outputs are available to all downstream jobs
    /// that depend on this job.
    outputs: Option<HashMap<String, Output>>,

    /// A map of environment variables that are available to all steps in the job. You
    /// can also set environment variables for the entire workflow or an individual step.
    env: Option<Env>,

    /// A map of default settings that will apply to all steps in the job. You can also
    /// set default settings for the entire workflow.
    defaults: Option<Defaults>,

    /// You can use the if conditional to prevent a job from running unless a condition
    /// is met. You can use any supported context and expression to create a conditional.
    #[serde(rename = "if")]
    run_if: Option<String>,

    /// A job contains a sequence of tasks called steps. Because steps run in
    /// their own process, changes to environment variables are not preserved
    /// between steps. GitHub provides built-in steps to set up and complete a job.
    #[serde(default)]
    steps: Vec<Step>,

    /// The maximum number of minutes to run the step before killing the process.
    timeout_minutes: Option<i32>,

    /// A strategy creates a build matrix for your jobs. You can define different
    /// variations to run each job in.
    strategy: Option<Strategy>,

    /// Prevents a job from failing when a step fails. Set to true to allow a job to
    /// pass when this step fails.
    continue_on_error: Option<String>,

    /// If you have steps that use both script and container actions, the container
    /// actions will run as sibling containers on the same network with the same volume mounts.
    container: Option<Container>,

    /// The runner automatically creates a Docker network and manages the life
    /// cycle of the service containers.
    #[serde(default)]
    services: Vec<Service>,
}

type Env = HashMap<String, String>;

type JobMap = HashMap<String, Job>;

// TODO: determine if outputs _need_ to be an expression and validate
type Output = String;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
struct Workflow {
    /// The name of your workflow. GitHub displays the names of your workflows on your
    /// repository's actions page. If you omit name, GitHub sets it to the workflow
    /// file path relative to the root of the repository.
    name: Option<String>,

    /// The name of the GitHub event that triggers the workflow. You can provide a
    /// single event string, array of events, array of event types, or an event
    /// configuration map that schedules a workflow or restricts the execution of a
    /// workflow to specific files, tags, or branch changes.
    on: Trigger,

    /// A map of environment variables that are available to all jobs and steps
    /// in the workflow. You can also set environment variables that are only
    /// available to a job or step.
    env: Option<Env>,

    /// A map of default settings that will apply to all jobs in the workflow. You can
    /// also set default settings that are only available to a job.
    defaults: Option<Defaults>,

    /// A workflow run is made up of one or more jobs. Jobs run in parallel by
    /// default. To run jobs sequentially, you can define dependencies on other jobs
    /// using the jobs.<job_id>.needs keyword.
    jobs: JobMap,
}
mod custom_types;
mod workflow;

fn main() {
    println!("Imagine having two write this with marshmallow.py");
    let mut file = std::fs::File::open("./test_input/example_issue.yaml").unwrap();
    let mut contents = String::new();
    file.read_to_string(&mut contents).unwrap();
    let workflow: Workflow = serde_yaml::from_str(&contents).unwrap();
    let workflow: workflow::Workflow = serde_yaml::from_str(&contents).unwrap();
    print!("{:?}", workflow);
}

A src/workflow.rs => src/workflow.rs +302 -0
@@ 0,0 1,302 @@
use std::collections::HashMap;
use std::str::FromStr;

use serde::{Deserialize, Serialize};
use serde_yaml::{Error, Value};
use void::Void;

use crate::custom_types::OneOrMany;

/// You can schedule a workflow to run at specific UTC times using POSIX cron
/// syntax. Scheduled workflows run on the latest commit on the default or base
/// branch. The shortest interval you can run scheduled workflows is once every 5
/// minutes.
type Schedule = Vec<CronSchedule>;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct CronSchedule {
    // TODO: validate cron string
    cron: String,
}

// TODO: enumerate these, starting with the common ones
/// Event types
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged, rename_all = "kebab-case")]
enum Event {
    Push(Value),
    PullRequest(Value),
    WorkflowDispatch(Value),
    RepositoryDispatch(Value),
    CheckRun(Value),
    CheckSuite(Value),
    Create(Value),
    Delete(Value),
    Deployment(Value),
    DeploymentStatus(Value),
    Fork(Value),
    Gollum(Value),
    IssueComment(Value),
    Issues(Value),
    Label(Value),
    Milestone(Value),
    PageBuild(Value),
    Project(Value),
    ProjectCard(Value),
    ProjectColumn(Value),
    Public(Value),
    PullRequestReview(Value),
    PullRequestReviewComment(Value),
    PullRequestTarget(Value),
    RegistryPackage(Value),
    Release(Value),
    Status(Value),
    Watch(Value),
    WorkflowRun(Value),
}

/// Trigger types for a workflow.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
#[serde(untagged)]
enum Trigger {
    Events(OneOrMany<Event>),
    Schedule(Schedule),
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct DefaultSettings {
    shell: Option<String>,
    working_directory: Option<String>,
}

/// Provide default shell and working-directory to all run steps in the job.
/// Context and expression are not allowed in this section.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Defaults {
    run: DefaultSettings,
}

/// The environment that the job references. All environment protection rules
/// must pass before a job referencing the environment is sent to a runner.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Environment {
    name: String,
    url: Option<String>,
}

// TODO
type Matrix = Value;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Strategy {
    matrix: Option<Matrix>,
    fail_fast: Option<bool>,
    max_parallel: Option<i32>,
}

/// Runs command-line programs using the operating system's shell. If you do not
/// provide a name, the step name will default to the text specified in the run
/// command.
type ShellCommand = String;

/// Steps can run commands, run setup tasks, or run an action in your
/// repository, a public repository, or an action published in a Docker registry.
/// Not all steps run actions, but all actions run as a step. Each step runs in its
/// own process in the runner environment and has access to the workspace and
/// filesystem.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Step {
    /// A name for your step to display on GitHub.
    name: Option<String>,

    /// A unique identifier for the step. You can use the id to reference the
    /// step in contexts.
    id: Option<String>,

    /// You can use the if conditional to prevent a step from running unless a
    /// condition is met. You can use any supported context and expression to
    /// create a conditional.
    #[serde(rename = "if")]
    run_if: Option<String>,

    /// Selects an action to run as part of a step in your job. An action is a reusable
    /// unit of code. You can use an action defined in the same repository as the
    /// workflow, a public repository, or in a published Docker container image.
    uses: String,

    run: Option<ShellCommand>,

    /// A map of the input parameters defined by the action. Each input parameter is a
    /// key/value pair. Input parameters are set as environment variables. The variable
    /// is prefixed with INPUT_ and converted to upper case.
    ///
    /// Both `entrypoint` and `args` are supported and override a docker image's default
    /// values for those variables.
    #[serde(default)]
    with: HashMap<String, String>,

    /// Sets environment variables for steps to use in the runner environment. You can
    /// also set environment variables for the entire workflow or a job.
    #[serde(default)]
    env: Env,

    /// Prevents a job from failing when a step fails. Set to true to allow a job to
    /// pass when this step fails.
    continue_on_error: Option<bool>,

    /// The maximum number of minutes to run the step before killing the process.
    timeout_minutes: Option<i32>,
}

/// A container to run any steps in a job that don't already specify a container.
#[derive(Debug, Default, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct Container {
    name: String,
    credentials: Option<HashMap<String, String>>,
    env: Option<Env>,
    #[serde(default)]
    ports: Vec<i32>,
    #[serde(default)]
    volumes: Vec<i32>,
    #[serde(default)]
    options: Vec<String>,
}
impl FromStr for Container {
    // This implementation of `from_str` can never fail, so use the impossible
    // `Void` type as the error type.
    type Err = Void;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(Container {
            name: s.to_string(),
            ..Default::default()
        })
    }
}

// TODO
/// Used to host service containers for a job in a workflow. Service containers
/// are useful for creating databases or cache services like Redis.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Service {
    name: String,
    #[serde(default)]
    ports: Vec<String>,
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Job {
    /// The name of the job displayed on GitHub.
    name: Option<String>,

    /// Identifies any jobs that must complete successfully before this job
    /// will run. It can be a string or array of strings. If a job fails, all jobs that
    /// need it are skipped unless the jobs use a conditional expression that causes
    /// the job to continue.
    #[serde(default)]
    needs: Vec<String>,

    /// The type of machine to run the job on. The machine can be either a GitHub-hosted
    /// runner or a self-hosted runner.
    runs_on: OneOrMany<String>,

    /// The environment that the job references. All environment protection rules must
    /// pass before a job referencing the environment is sent to a runner.
    environment: Option<Environment>,

    /// A map of outputs for a job. Job outputs are available to all downstream jobs
    /// that depend on this job.
    outputs: Option<HashMap<String, Output>>,

    /// A map of environment variables that are available to all steps in the job. You
    /// can also set environment variables for the entire workflow or an individual step.
    #[serde(default)]
    env: Env,

    /// A map of default settings that will apply to all steps in the job. You can also
    /// set default settings for the entire workflow.
    defaults: Option<Defaults>,

    /// You can use the if conditional to prevent a job from running unless a condition
    /// is met. You can use any supported context and expression to create a conditional.
    #[serde(rename = "if")]
    run_if: Option<String>,

    /// A job contains a sequence of tasks called steps. Because steps run in
    /// their own process, changes to environment variables are not preserved
    /// between steps. GitHub provides built-in steps to set up and complete a job.
    #[serde(default)]
    steps: Vec<Step>,

    /// The maximum number of minutes to run the step before killing the process.
    timeout_minutes: Option<i32>,

    /// A strategy creates a build matrix for your jobs. You can define different
    /// variations to run each job in.
    strategy: Option<Strategy>,

    /// Prevents a job from failing when a step fails. Set to true to allow a job to
    /// pass when this step fails.
    continue_on_error: Option<String>,

    /// If you have steps that use both script and container actions, the container
    /// actions will run as sibling containers on the same network with the same volume mounts.
    container: Option<Container>,

    /// The runner automatically creates a Docker network and manages the life
    /// cycle of the service containers.
    #[serde(default)]
    services: Vec<Service>,
}

type Env = HashMap<String, String>;

type JobMap = HashMap<String, Job>;

// TODO: determine if outputs _need_ to be an expression and validate
type Output = String;

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct Workflow {
    /// The name of your workflow. GitHub displays the names of your workflows on your
    /// repository's actions page. If you omit name, GitHub sets it to the workflow
    /// file path relative to the root of the repository.
    name: Option<String>,

    /// The name of the GitHub event that triggers the workflow. You can provide a
    /// single event string, array of events, array of event types, or an event
    /// configuration map that schedules a workflow or restricts the execution of a
    /// workflow to specific files, tags, or branch changes.
    on: Trigger,

    /// A map of environment variables that are available to all jobs and steps
    /// in the workflow. You can also set environment variables that are only
    /// available to a job or step.
    env: Option<Env>,

    /// A map of default settings that will apply to all jobs in the workflow. You can
    /// also set default settings that are only available to a job.
    defaults: Option<Defaults>,

    /// A workflow run is made up of one or more jobs. Jobs run in parallel by
    /// default. To run jobs sequentially, you can define dependencies on other jobs
    /// using the jobs.<job_id>.needs keyword.
    jobs: JobMap,
}

impl Workflow {
    pub fn parse_str(input: &str) -> Result<Self, Error> {
        serde_yaml::from_str(&input)
    }
}