~ciriarte/apizotl

0d4dbf23d7702882daaf05a9601615a4b975864f — Carlos Iriarte 3 years ago 29b6054
fix: add senators
3 files changed, 56 insertions(+), 15 deletions(-)

M src/congress/legislatures.rs
M src/congress/mod.rs
M src/congress/senators.rs
M src/congress/legislatures.rs => src/congress/legislatures.rs +1 -1
@@ 62,7 62,7 @@ async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
            .to_string();
        let name = a.text_contents();

        let link = if href.starts_with("/wiki") {
        let _link = if href.starts_with("/wiki") {
            let link_str =
                format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
            Some(link_str)

M src/congress/mod.rs => src/congress/mod.rs +1 -1
@@ 6,7 6,7 @@ fn with_title(
    h: kuchiki::NodeDataRef<kuchiki::ElementData>,
    title: String,
) -> Option<kuchiki::NodeDataRef<kuchiki::ElementData>> {
    if h.text_contents().starts_with(&title) {
    if h.text_contents().contains(&title) {
        Some(h)
    } else {
        None

M src/congress/senators.rs => src/congress/senators.rs +54 -13
@@ 10,10 10,19 @@ use crate::{log_error, opt::Opt};
use reqwest::Url;

#[derive(Serialize, Deserialize, Debug)]
struct CongressMember {
    name: String,
    state: String,
    district: u8
enum CongressMember {
    ByState {
        name: String,
        state: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        party: Option<String>
    },

    ByList {
        name: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        party: Option<String>
    }
}

pub fn run(opts: Opt) {


@@ 59,7 68,10 @@ async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
            };

            if let Some(l) = link {
                extract(l)?
                let senators = extract(l)?;
                let j = serde_json::to_string(&senators)?;

                println!("{}", j);
            }
        }
    }


@@ 67,7 79,7 @@ async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
    Ok(())
}

fn extract(link: String) -> std::result::Result<(), Error> {
fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
    println!("{:?}", link);
    let url = Url::parse(&link)?;



@@ 80,7 92,9 @@ fn extract(link: String) -> std::result::Result<(), Error> {
    let document = kuchiki::parse_html().one(s);
    let h3_list = document.select("h3").unwrap();

    for h3 in h3_list.filter_map(|h| super::with_title(h, "Diputados por distrito".to_string())) {
    let mut senators = vec![];

    for h3 in h3_list.filter_map(|h| super::with_title(h, "entidad federativa".to_string())) {
        let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
        println!("  {:?}", h3.as_node().text_contents());
        for tr in table.select("tr:not(:first-child)").unwrap() {


@@ 93,19 107,46 @@ fn extract(link: String) -> std::result::Result<(), Error> {
            let (first, second) = cols.split_at(count / 2);

            for col in &[first, second] {
                let person = CongressMember {
                    district: col[1].text_contents().parse::<u8>().unwrap_or_default(),
                let person = CongressMember::ByState {
                    state: col[0].text_contents(),
                    name: col[2]
                    name: col[1]
                        .as_node()
                        .select_first("a")
                        .map_or_else(|_| "".to_owned(), |v| v.text_contents())
                        .map_or_else(|_| "".to_owned(), |v| v.text_contents()),
                    party: None
                };
                
                senators.push(person);
            }
        }
    }

    let h3_list = document.select("h3").unwrap();
    for h3 in h3_list.filter_map(|h| super::with_title(h, "lista nacional".to_string())) {
        let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
        println!("  {:?}", h3.as_node().text_contents());
        for tr in table.select("tr:not(:first-child)").unwrap() {
            let cols: Vec<kuchiki::NodeDataRef<ElementData>> = tr.as_node()
                        .select("td")
                        .unwrap()
                        .collect();

            let (first, rest) = cols.split_at(2);
            let (second, third) = rest.split_at(2);

            for col in &[first, second, third] {
                let person = CongressMember::ByList {
                    name: col[0]
                        .as_node()
                        .select_first("a")
                        .map_or_else(|_| "".to_owned(), |v| v.text_contents()),
                    party: None
                };
    
                println!("{:?}", person);                
                senators.push(person);
            }
        }
    }

    Ok(())
    Ok(senators)
}