~ciriarte/apizotl

943bcc8ec518bda1726ed8d2783516d41b577f53 — Carlos Iriarte 2 years ago 0d4dbf2
feat: untag deputy serialization
2 files changed, 44 insertions(+), 47 deletions(-)

M src/congress/deputies.rs
M src/congress/legislatures.rs
M src/congress/deputies.rs => src/congress/deputies.rs +17 -42
@@ 9,7 9,10 @@ use crate::{log_error, opt::Opt};

use reqwest::Url;

use super::legislatures::Legislature;

#[derive(Serialize, Deserialize, Debug)]
#[serde(untagged)]
enum CongressMember {
    Uninominal {
        name: String,


@@ 33,53 36,27 @@ pub fn run(opts: Opt) {
}

async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
    let url = 
        Url::parse(
            "https://es.wikipedia.org/wiki/Anexo:Congresos_y_Legislaturas_del_Congreso_de_la_Uni%C3%B3n_de_M%C3%A9xico"
        )?;
    let legislatures = super::legislatures::fetch().await?;

    let mut res = reqwest::blocking::get(url.to_owned())?;
    let deputies: Vec<CongressMember> = legislatures.iter()
        .filter(|l| l.name.to_owned().contains("LXIV Legislatura"))
        .map(extract)
        .filter_map(|v| v.ok())
        .flat_map(|v| v)
        .collect();

    let mut buf: Vec<u8> = vec![];
    res.copy_to(&mut buf)?;
    let s: String = String::from_utf8(buf)?;
    println!("{:?}", deputies);

    let table_selector = ".wikitable";
    let document = kuchiki::parse_html().one(s);
    let j = serde_json::to_string(&deputies)?;

    for table_match in document.select(table_selector).unwrap() {
        let node = table_match.as_node();
        for a in node.select("td:first-child a").unwrap() {
            let href = a
                .attributes
                .borrow()
                .get("href")
                .unwrap_or_default()
                .to_string();

            let link = if href.starts_with("/wiki") {
                let link_str =
                    format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
                Some(link_str)
            } else {
                None
            };

            if let Some(l) = link {
                let deputies = extract(l)?;
                let j = serde_json::to_string(&deputies)?;

                println!("{}", j);
            }
        }
    }
    println!("{}", j);

    Ok(())
}

fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
    println!("{:?}", link);
    let url = Url::parse(&link)?;
fn extract(l: &Legislature) -> std::result::Result<Vec<CongressMember>, Error> {
    println!("{:?}", l.link);
    let url = Url::parse(l.link.as_ref().unwrap())?;

    let mut res = reqwest::blocking::get(url)?;



@@ 115,7 92,6 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
                };
    
                deputies.push(person);
                //println!("{:?}", person);                
            }
        }
    }


@@ 147,8 123,7 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
                    circumscription: col[0].text_contents(),
                };
    
                deputies.push(person);
                //println!("{:?}", person);                
                deputies.push(person);  
            }
        }
    }

M src/congress/legislatures.rs => src/congress/legislatures.rs +27 -5
@@ 1,3 1,4 @@
use serde::{Deserialize, Serialize};
use async_std::{process, task};
use failure::Error;



@@ 8,9 9,10 @@ use kuchiki::{ElementData, traits::*};
use crate::{log_error, opt::Opt};

use reqwest::Url;
#[derive(Debug)]
struct Legislature {
    name: String,
#[derive(Serialize, Deserialize, Debug)]
pub struct Legislature {
    pub name: String,
    pub link: Option<String>,
    start: String,
    end: String
}


@@ 24,7 26,17 @@ pub fn run(opts: Opt) {
    });
}

async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
    let legislatures = fetch().await?;

    let j = serde_json::to_string(&legislatures)?;

    println!("{}", j);

    Ok(())
}

pub async fn fetch() -> std::result::Result<Vec<Legislature>, Error> {
    let mut legislatures: Vec<Legislature> = vec![];

    let url = 


@@ 81,10 93,20 @@ async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
            Some(m) => m.get(0).unwrap().as_str().to_owned(),
            None => "".to_owned()
        };

        let link = if href.starts_with("/wiki") {
            let link_str =
                format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
            Some(link_str)
        } else {
            None
        };

        let l = Legislature {
            name,
            start,
            end
            end,
            link
        };

        legislatures.push(l);