~ciriarte/apizotl

6f846ed37d69860360368b83f4d81a73852fc897 — Carlos Iriarte 2 years ago ff48811
wip: emit as json
1 files changed, 61 insertions(+), 44 deletions(-)

M src/deputies.rs
M src/deputies.rs => src/deputies.rs +61 -44
@@ 10,10 10,17 @@ use crate::{log_error, opt::Opt};
use reqwest::Url;

#[derive(Serialize, Deserialize, Debug)]
struct CongressMember {
    name: String,
    state: String,
    district: u8
enum CongressMember {
    Uninominal {
        name: String,
        state: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        district: Option<u8>,
    },
    Plurinominal {
        name: String,
        circumscription: String
    }
}

pub fn run(opts: Opt) {


@@ 59,7 66,10 @@ async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
            };

            if let Some(l) = link {
                extract(l)?
                let deputies = extract(l)?;
                let j = serde_json::to_string(&deputies)?;

                println!("{}", j);
            }
        }
    }


@@ 67,7 77,7 @@ async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
    Ok(())
}

fn extract(link: String) -> std::result::Result<(), Error> {
fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
    println!("{:?}", link);
    let url = Url::parse(&link)?;



@@ 77,38 87,12 @@ fn extract(link: String) -> std::result::Result<(), Error> {
    res.copy_to(&mut buf)?;
    let s: String = String::from_utf8(buf)?;

    let document = kuchiki::parse_html().one(s);
    // let h3_list = document.select("h3").unwrap();

    // for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por distrito".to_string())) {
    //     let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
    //     println!("  {:?}", h3.as_node().text_contents());
    //     for tr in table.select("tr:not(:first-child)").unwrap() {
    //         let cols: Vec<kuchiki::NodeDataRef<ElementData>> = tr.as_node()
    //                     .select("td")
    //                     .unwrap()
    //                     .collect();

    //         let count = cols.len();
    //         let (first, second) = cols.split_at(count / 2);

    //         for col in &[first, second] {
    //             let person = CongressMember {
    //                 district: col[1].text_contents().parse::<u8>().unwrap_or_default(),
    //                 state: col[0].text_contents(),
    //                 name: col[2]
    //                     .as_node()
    //                     .select_first("a")
    //                     .map_or_else(|_| "".to_owned(), |v| v.text_contents())
    //             };
    
    //             println!("{:?}", person);                
    //         }
    //     }
    // }
    let mut deputies: Vec<CongressMember> = vec![];

    let document = kuchiki::parse_html().one(s);
    let h3_list = document.select("h3").unwrap();
    for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por representaci".to_string())) {

    for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por distrito".to_string())) {
        let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
        println!("  {:?}", h3.as_node().text_contents());
        for tr in table.select("tr:not(:first-child)").unwrap() {


@@ 119,24 103,57 @@ fn extract(link: String) -> std::result::Result<(), Error> {

            let count = cols.len();
            let (first, second) = cols.split_at(count / 2);
            let sub_cols = &[first, second];

            for col in sub_cols {
                let person = CongressMember {
                    district: 0,
                    state: "".to_owned(),
                    name: col[1]
            for col in &[first, second] {
                let person = CongressMember::Uninominal {
                    district: col[1].text_contents().parse::<u8>().ok(),
                    state: col[0].text_contents(),
                    name: col[2]
                        .as_node()
                        .select_first("a")
                        .map_or_else(|_| "".to_owned(), |v| v.text_contents())
                };
    
                println!("{:?}", person);                
                deputies.push(person);
                //println!("{:?}", person);                
            }
        }
    }

    Ok(())
    let h3_list = document.select("h3").unwrap();
    for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por representaci".to_string())) {
        let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
        println!("  {:?}", h3.as_node().text_contents());
        for tr in table.select("tr:not(:first-child)").unwrap() {
            let td: Vec<kuchiki::NodeDataRef<ElementData>> = tr.as_node()
                        .select("td")
                        .unwrap()
                        .collect();

            let count = td.len();
            let cols = if td.len() > 3 {
                let (first, second) = td.split_at(count / 2);
                [first, second].to_vec()
            } else {
                [td.as_slice()].to_vec()
            };

            for col in cols {
                let person = CongressMember::Plurinominal {
                    name: col[1]
                        .as_node()
                        .select_first("a")
                        .map_or_else(|_| "".to_owned(), |v| v.text_contents()),
                    circumscription: col[0].text_contents(),
                };
    
                deputies.push(person);
                //println!("{:?}", person);                
            }
        }
    }

    Ok(deputies)
}

fn with_title(