M src/congress/deputies.rs => src/congress/deputies.rs +17 -42
@@ 9,7 9,10 @@ use crate::{log_error, opt::Opt};
use reqwest::Url;
+use super::legislatures::Legislature;
+
#[derive(Serialize, Deserialize, Debug)]
+#[serde(untagged)]
enum CongressMember {
Uninominal {
name: String,
@@ 33,53 36,27 @@ pub fn run(opts: Opt) {
}
async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
- let url =
- Url::parse(
- "https://es.wikipedia.org/wiki/Anexo:Congresos_y_Legislaturas_del_Congreso_de_la_Uni%C3%B3n_de_M%C3%A9xico"
- )?;
+ let legislatures = super::legislatures::fetch().await?;
- let mut res = reqwest::blocking::get(url.to_owned())?;
+ let deputies: Vec<CongressMember> = legislatures.iter()
+ .filter(|l| l.name.to_owned().contains("LXIV Legislatura"))
+ .map(extract)
+ .filter_map(|v| v.ok())
+ .flat_map(|v| v)
+ .collect();
- let mut buf: Vec<u8> = vec![];
- res.copy_to(&mut buf)?;
- let s: String = String::from_utf8(buf)?;
+ println!("{:?}", deputies);
- let table_selector = ".wikitable";
- let document = kuchiki::parse_html().one(s);
+ let j = serde_json::to_string(&deputies)?;
- for table_match in document.select(table_selector).unwrap() {
- let node = table_match.as_node();
- for a in node.select("td:first-child a").unwrap() {
- let href = a
- .attributes
- .borrow()
- .get("href")
- .unwrap_or_default()
- .to_string();
-
- let link = if href.starts_with("/wiki") {
- let link_str =
- format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
- Some(link_str)
- } else {
- None
- };
-
- if let Some(l) = link {
- let deputies = extract(l)?;
- let j = serde_json::to_string(&deputies)?;
-
- println!("{}", j);
- }
- }
- }
+ println!("{}", j);
Ok(())
}
-fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
- println!("{:?}", link);
- let url = Url::parse(&link)?;
+fn extract(l: &Legislature) -> std::result::Result<Vec<CongressMember>, Error> {
+ println!("{:?}", l.link);
+ let url = Url::parse(l.link.as_ref().unwrap())?;
let mut res = reqwest::blocking::get(url)?;
@@ 115,7 92,6 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
};
deputies.push(person);
- //println!("{:?}", person);
}
}
}
@@ 147,8 123,7 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
circumscription: col[0].text_contents(),
};
- deputies.push(person);
- //println!("{:?}", person);
+ deputies.push(person);
}
}
}
M src/congress/legislatures.rs => src/congress/legislatures.rs +27 -5
@@ 1,3 1,4 @@
+use serde::{Deserialize, Serialize};
use async_std::{process, task};
use failure::Error;
@@ 8,9 9,10 @@ use kuchiki::{ElementData, traits::*};
use crate::{log_error, opt::Opt};
use reqwest::Url;
-#[derive(Debug)]
-struct Legislature {
- name: String,
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Legislature {
+ pub name: String,
+ pub link: Option<String>,
start: String,
end: String
}
@@ 24,7 26,17 @@ pub fn run(opts: Opt) {
});
}
-async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
+async fn process(_opts: &Opt) -> std::result::Result<(), Error> {
+ let legislatures = fetch().await?;
+
+ let j = serde_json::to_string(&legislatures)?;
+
+ println!("{}", j);
+
+ Ok(())
+}
+
+pub async fn fetch() -> std::result::Result<Vec<Legislature>, Error> {
let mut legislatures: Vec<Legislature> = vec![];
let url =
@@ 81,10 93,20 @@ async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
Some(m) => m.get(0).unwrap().as_str().to_owned(),
None => "".to_owned()
};
+
+ let link = if href.starts_with("/wiki") {
+ let link_str =
+ format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
+ Some(link_str)
+ } else {
+ None
+ };
+
let l = Legislature {
name,
start,
- end
+ end,
+ link
};
legislatures.push(l);