@@ 62,7 62,7 @@ async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
.to_string();
let name = a.text_contents();
- let link = if href.starts_with("/wiki") {
+ let _link = if href.starts_with("/wiki") {
let link_str =
format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
Some(link_str)
@@ 6,7 6,7 @@ fn with_title(
h: kuchiki::NodeDataRef<kuchiki::ElementData>,
title: String,
) -> Option<kuchiki::NodeDataRef<kuchiki::ElementData>> {
- if h.text_contents().starts_with(&title) {
+ if h.text_contents().contains(&title) {
Some(h)
} else {
None
@@ 10,10 10,19 @@ use crate::{log_error, opt::Opt};
use reqwest::Url;
#[derive(Serialize, Deserialize, Debug)]
-struct CongressMember {
- name: String,
- state: String,
- district: u8
+enum CongressMember {
+ ByState {
+ name: String,
+ state: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ party: Option<String>
+ },
+
+ ByList {
+ name: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ party: Option<String>
+ }
}
pub fn run(opts: Opt) {
@@ 59,7 68,10 @@ async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
};
if let Some(l) = link {
- extract(l)?
+ let senators = extract(l)?;
+ let j = serde_json::to_string(&senators)?;
+
+ println!("{}", j);
}
}
}
@@ 67,7 79,7 @@ async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
Ok(())
}
-fn extract(link: String) -> std::result::Result<(), Error> {
+fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
println!("{:?}", link);
let url = Url::parse(&link)?;
@@ 80,7 92,9 @@ fn extract(link: String) -> std::result::Result<(), Error> {
let document = kuchiki::parse_html().one(s);
let h3_list = document.select("h3").unwrap();
- for h3 in h3_list.filter_map(|h| super::with_title(h, "Diputados por distrito".to_string())) {
+ let mut senators = vec![];
+
+ for h3 in h3_list.filter_map(|h| super::with_title(h, "entidad federativa".to_string())) {
let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
println!(" {:?}", h3.as_node().text_contents());
for tr in table.select("tr:not(:first-child)").unwrap() {
@@ 93,19 107,46 @@ fn extract(link: String) -> std::result::Result<(), Error> {
let (first, second) = cols.split_at(count / 2);
for col in &[first, second] {
- let person = CongressMember {
- district: col[1].text_contents().parse::<u8>().unwrap_or_default(),
+ let person = CongressMember::ByState {
state: col[0].text_contents(),
- name: col[2]
+ name: col[1]
.as_node()
.select_first("a")
- .map_or_else(|_| "".to_owned(), |v| v.text_contents())
+ .map_or_else(|_| "".to_owned(), |v| v.text_contents()),
+ party: None
+ };
+
+ senators.push(person);
+ }
+ }
+ }
+
+ let h3_list = document.select("h3").unwrap();
+ for h3 in h3_list.filter_map(|h| super::with_title(h, "lista nacional".to_string())) {
+ let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
+ println!(" {:?}", h3.as_node().text_contents());
+ for tr in table.select("tr:not(:first-child)").unwrap() {
+ let cols: Vec<kuchiki::NodeDataRef<ElementData>> = tr.as_node()
+ .select("td")
+ .unwrap()
+ .collect();
+
+ let (first, rest) = cols.split_at(2);
+ let (second, third) = rest.split_at(2);
+
+ for col in &[first, second, third] {
+ let person = CongressMember::ByList {
+ name: col[0]
+ .as_node()
+ .select_first("a")
+ .map_or_else(|_| "".to_owned(), |v| v.text_contents()),
+ party: None
};
- println!("{:?}", person);
+ senators.push(person);
}
}
}
- Ok(())
+ Ok(senators)
}