~ciriarte/apizotl

b6df6f8984a411c2649b8b369502e56848321278 — Carlos Iriarte 2 years ago 943bcc8
feat: re-use legislature code
3 files changed, 42 insertions(+), 59 deletions(-)

M src/congress/deputies.rs
M src/congress/senators.rs
M src/opt.rs
M src/congress/deputies.rs => src/congress/deputies.rs +5 -7
@@ 5,7 5,7 @@ use failure::Error;

use kuchiki::{ElementData, traits::*};

use crate::{log_error, opt::Opt};
use crate::{log_error};

use reqwest::Url;



@@ 26,27 26,25 @@ enum CongressMember {
    }
}

pub fn run(opts: Opt) {
pub fn run(legislature: String) {
    task::block_on(async {
        if let Err(e) = process(&opts).await {
        if let Err(e) = process(&legislature).await {
            log_error(&e);
            process::exit(1);
        };
    });
}

async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
async fn process(legislature: &str) -> std::result::Result<(), Error> {
    let legislatures = super::legislatures::fetch().await?;

    let deputies: Vec<CongressMember> = legislatures.iter()
        .filter(|l| l.name.to_owned().contains("LXIV Legislatura"))
        .filter(|l| l.name.to_owned().contains(legislature))
        .map(extract)
        .filter_map(|v| v.ok())
        .flat_map(|v| v)
        .collect();

    println!("{:?}", deputies);

    let j = serde_json::to_string(&deputies)?;

    println!("{}", j);

M src/congress/senators.rs => src/congress/senators.rs +18 -44
@@ 5,10 5,12 @@ use failure::Error;

use kuchiki::{ElementData, traits::*};

use crate::{log_error, opt::Opt};
use crate::{log_error};

use reqwest::Url;

use super::legislatures::Legislature;

#[derive(Serialize, Deserialize, Debug)]
enum CongressMember {
    ByState {


@@ 25,63 27,35 @@ enum CongressMember {
    }
}

pub fn run(opts: Opt) {
pub fn run(legislature: String) {
    task::block_on(async {
        if let Err(e) = process(&opts).await {
        if let Err(e) = process(&legislature).await {
            log_error(&e);
            process::exit(1);
        };
    });
}

async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
    let url = 
        Url::parse(
            "https://es.wikipedia.org/wiki/Anexo:Congresos_y_Legislaturas_del_Congreso_de_la_Uni%C3%B3n_de_M%C3%A9xico"
        )?;
async fn process(legislature: &str) -> std::result::Result<(), Error> {
    let legislatures = super::legislatures::fetch().await?;

    let mut res = reqwest::blocking::get(url.to_owned())?;
    let deputies: Vec<CongressMember> = legislatures.iter()
        .filter(|l| l.name.to_owned().contains(legislature))
        .map(extract)
        .filter_map(|v| v.ok())
        .flat_map(|v| v)
        .collect();

    let mut buf: Vec<u8> = vec![];
    res.copy_to(&mut buf)?;
    let s: String = String::from_utf8(buf)?;
    let j = serde_json::to_string(&deputies)?;

    let table_selector = ".wikitable";
    let document = kuchiki::parse_html().one(s);

    for table_match in document.select(table_selector).unwrap() {
        let node = table_match.as_node();
        for a in node.select("td:first-child a").unwrap() {
            let href = a
                .attributes
                .borrow()
                .get("href")
                .unwrap_or_default()
                .to_string();

            let link = if href.starts_with("/wiki") {
                let link_str =
                    format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
                Some(link_str)
            } else {
                None
            };

            if let Some(l) = link {
                let senators = extract(l)?;
                let j = serde_json::to_string(&senators)?;

                println!("{}", j);
            }
        }
    }
    println!("{}", j);

    Ok(())
}

fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
    println!("{:?}", link);
    let url = Url::parse(&link)?;
fn extract(l: &Legislature) -> std::result::Result<Vec<CongressMember>, Error> {
    println!("{:?}", l.link);
    let url = Url::parse(&l.link.as_ref().unwrap())?;

    let mut res = reqwest::blocking::get(url)?;


M src/opt.rs => src/opt.rs +19 -8
@@ 17,18 17,29 @@ pub struct Opt {

#[derive(StructOpt, Debug, PartialEq, Clone)]
pub enum Command {
    /// Lists mexican presidents
    Presidents,
    /// Lists congress people and legislatures
    Congress {
        #[structopt(subcommand)]
        command: CongressCommand,
    },
}

#[derive(StructOpt, Debug, PartialEq, Clone, Copy)]
#[derive(StructOpt, Debug, PartialEq, Clone)]
pub enum CongressCommand {
    /// Lists legislatures
    Legislatures,
    Senators,
    Deputies
    /// Lists senators
    Senators { 
        #[structopt(short, long, parse(from_str))]
        legislature: String
    },
    /// Lists deputies
    Deputies { 
        #[structopt(short, long, parse(from_str))]
        legislature: String
    },
}

pub fn parse_opts() -> OutputType {


@@ 36,10 47,10 @@ pub fn parse_opts() -> OutputType {

    match opts.command {
        Command::Presidents { .. } => OutputType::Presidents(opts),
        Command::Congress { command } => match command {
        Command::Congress { ref command } => match command {
            CongressCommand::Legislatures { .. } => OutputType::Legislatures(opts),
            CongressCommand::Senators { .. } => OutputType::Senators(opts),
            CongressCommand::Deputies { .. } => OutputType::Deputies(opts),
            CongressCommand::Senators { legislature } => OutputType::Senators(legislature.to_owned()),
            CongressCommand::Deputies { legislature } => OutputType::Deputies(legislature.to_owned()),
        }
    }
}


@@ 47,6 58,6 @@ pub fn parse_opts() -> OutputType {
pub enum OutputType {
    Presidents(Opt),
    Legislatures(Opt),
    Senators(Opt),
    Deputies(Opt),
    Senators(String),
    Deputies(String),
}
\ No newline at end of file