A .vscode/launch.json => .vscode/launch.json +45 -0
@@ 0,0 1,45 @@
+{
+ // Use IntelliSense to learn about possible attributes.
+ // Hover to view descriptions of existing attributes.
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "type": "lldb",
+ "request": "launch",
+ "name": "Debug executable 'apizotl'",
+ "cargo": {
+ "args": [
+ "build",
+ "--bin=apizotl",
+ "--package=apizotl"
+ ],
+ "filter": {
+ "name": "apizotl",
+ "kind": "bin"
+ }
+ },
+ "args": ["senators"],
+ "cwd": "${workspaceFolder}"
+ },
+ {
+ "type": "lldb",
+ "request": "launch",
+ "name": "Debug unit tests in executable 'apizotl'",
+ "cargo": {
+ "args": [
+ "test",
+ "--no-run",
+ "--bin=apizotl",
+ "--package=apizotl"
+ ],
+ "filter": {
+ "name": "apizotl",
+ "kind": "bin"
+ }
+ },
+ "args": [],
+ "cwd": "${workspaceFolder}"
+ }
+ ]
+}<
\ No newline at end of file
A .vscode/settings.json => .vscode/settings.json +4 -0
@@ 0,0 1,4 @@
+{
+ "rust-analyzer.cargo.allFeatures": true,
+ "rust-analyzer.checkOnSave.command": "clippy"
+}<
\ No newline at end of file
R src/deputies.rs => src/congress/deputies.rs +3 -14
@@ 81,7 81,7 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
println!("{:?}", link);
let url = Url::parse(&link)?;
- let mut res = reqwest::blocking::get(url.to_owned())?;
+ let mut res = reqwest::blocking::get(url)?;
let mut buf: Vec<u8> = vec![];
res.copy_to(&mut buf)?;
@@ 92,7 92,7 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
let document = kuchiki::parse_html().one(s);
let h3_list = document.select("h3").unwrap();
- for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por distrito".to_string())) {
+ for h3 in h3_list.filter_map(|h| super::with_title(h, "Diputados por distrito".to_string())) {
let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
println!(" {:?}", h3.as_node().text_contents());
for tr in table.select("tr:not(:first-child)").unwrap() {
@@ 121,7 121,7 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
}
let h3_list = document.select("h3").unwrap();
- for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por representaci".to_string())) {
+ for h3 in h3_list.filter_map(|h| super::with_title(h, "Diputados por representaci".to_string())) {
let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
println!(" {:?}", h3.as_node().text_contents());
for tr in table.select("tr:not(:first-child)").unwrap() {
@@ 155,14 155,3 @@ fn extract(link: String) -> std::result::Result<Vec<CongressMember>, Error> {
Ok(deputies)
}
-
-fn with_title(
- h: kuchiki::NodeDataRef<kuchiki::ElementData>,
- title: String,
-) -> Option<kuchiki::NodeDataRef<kuchiki::ElementData>> {
- if h.text_contents().starts_with(&title) {
- Some(h)
- } else {
- None
- }
-}>
\ No newline at end of file
A src/congress/legislatures.rs => src/congress/legislatures.rs +94 -0
@@ 0,0 1,94 @@
+use async_std::{process, task};
+use failure::Error;
+
+use regex::Regex;
+
+use kuchiki::{ElementData, traits::*};
+
+use crate::{log_error, opt::Opt};
+
+use reqwest::Url;
+#[derive(Debug)]
+struct Legislature {
+ name: String,
+ start: String,
+ end: String
+}
+
+pub fn run(opts: Opt) {
+ task::block_on(async {
+ if let Err(e) = process(&opts).await {
+ log_error(&e);
+ process::exit(1);
+ };
+ });
+}
+
+async fn process(_opts: &Opt) -> std::result::Result<Vec<Legislature>, Error> {
+ let mut legislatures: Vec<Legislature> = vec![];
+
+ let url =
+ Url::parse(
+ "https://es.wikipedia.org/wiki/Anexo:Congresos_y_Legislaturas_del_Congreso_de_la_Uni%C3%B3n_de_M%C3%A9xico"
+ )?;
+
+ let mut res = reqwest::blocking::get(url.to_owned())?;
+
+ let mut buf: Vec<u8> = vec![];
+ res.copy_to(&mut buf)?;
+ let s: String = String::from_utf8(buf)?;
+
+ let row_selector = ".wikitable tbody tr";
+ let document = kuchiki::parse_html().one(s);
+
+ for tr in document.select(row_selector).unwrap() {
+ let cells: Vec<kuchiki::NodeDataRef<ElementData>> =
+ tr.as_node()
+ .select("td")
+ .unwrap()
+ .collect();
+
+ // for some reason kuchiki captures tr from thead
+ if cells.is_empty() {
+ continue;
+ }
+
+ let a = cells[0].as_node().select_first("a").unwrap();
+ let href = a
+ .attributes
+ .borrow()
+ .get("href")
+ .unwrap_or_default()
+ .to_string();
+ let name = a.text_contents();
+
+ let link = if href.starts_with("/wiki") {
+ let link_str =
+ format!("{}://{}{}", url.scheme(), url.host_str().unwrap(), href).to_string();
+ Some(link_str)
+ } else {
+ None
+ };
+
+ let date_pattern = Regex::new(r"(?:\d+\s+de\s)?\w+\sde\s\d+").unwrap();
+
+ let start = cells[1].as_node().text_contents();
+ let start = date_pattern.captures(&start)
+ .unwrap().get(0).unwrap().as_str().to_owned();
+
+ let end = cells[2].text_contents();
+ let end = match date_pattern.captures(&end) {
+ Some(m) => m.get(0).unwrap().as_str().to_owned(),
+ None => "".to_owned()
+ };
+ let l = Legislature {
+ name,
+ start,
+ end
+ };
+
+ legislatures.push(l);
+ }
+
+ Ok(legislatures)
+}<
\ No newline at end of file
A src/congress/mod.rs => src/congress/mod.rs +14 -0
@@ 0,0 1,14 @@
+pub mod legislatures;
+pub mod deputies;
+pub mod senators;
+
+fn with_title(
+ h: kuchiki::NodeDataRef<kuchiki::ElementData>,
+ title: String,
+) -> Option<kuchiki::NodeDataRef<kuchiki::ElementData>> {
+ if h.text_contents().starts_with(&title) {
+ Some(h)
+ } else {
+ None
+ }
+}<
\ No newline at end of file
R src/senators.rs => src/congress/senators.rs +2 -13
@@ 71,7 71,7 @@ fn extract(link: String) -> std::result::Result<(), Error> {
println!("{:?}", link);
let url = Url::parse(&link)?;
- let mut res = reqwest::blocking::get(url.to_owned())?;
+ let mut res = reqwest::blocking::get(url)?;
let mut buf: Vec<u8> = vec![];
res.copy_to(&mut buf)?;
@@ 80,7 80,7 @@ fn extract(link: String) -> std::result::Result<(), Error> {
let document = kuchiki::parse_html().one(s);
let h3_list = document.select("h3").unwrap();
- for h3 in h3_list.filter_map(|h| with_title(h, "Diputados por distrito".to_string())) {
+ for h3 in h3_list.filter_map(|h| super::with_title(h, "Diputados por distrito".to_string())) {
let table = h3.as_node().next_sibling().unwrap().next_sibling().unwrap();
println!(" {:?}", h3.as_node().text_contents());
for tr in table.select("tr:not(:first-child)").unwrap() {
@@ 109,14 109,3 @@ fn extract(link: String) -> std::result::Result<(), Error> {
Ok(())
}
-
-fn with_title(
- h: kuchiki::NodeDataRef<kuchiki::ElementData>,
- title: String,
-) -> Option<kuchiki::NodeDataRef<kuchiki::ElementData>> {
- if h.text_contents().starts_with(&title) {
- Some(h)
- } else {
- None
- }
-}
M src/main.rs => src/main.rs +4 -4
@@ 3,8 3,7 @@ use failure::Error;
mod opt;
mod presidents;
-mod senators;
-mod deputies;
+mod congress;
use crate::opt::OutputType;
@@ 15,8 14,9 @@ fn main() {
match output_type {
OutputType::Presidents(opts) => crate::presidents::run(opts),
- OutputType::Senators(opts) => crate::senators::run(opts),
- OutputType::Deputies(opts) => crate::deputies::run(opts),
+ OutputType::Legislatures(opts) => crate::congress::legislatures::run(opts),
+ OutputType::Senators(opts) => crate::congress::senators::run(opts),
+ OutputType::Deputies(opts) => crate::congress::deputies::run(opts),
}
}
M src/opt.rs => src/opt.rs +18 -5
@@ 5,7 5,7 @@ use crate::VERSION;
#[derive(StructOpt, Debug, Clone)]
#[structopt(
name = "apizotl",
- about = "Means \"glutton\" in Nahuatl, the Aztec's language. Eats raw data and organizes as an API",
+ about = "Means \"glutton\" in Nahuatl, the Aztecs' language. Eats raw data and organizes as an API",
version = VERSION,
author = "ciriarte <me@ciriarte.dev>",
setting = DeriveDisplayOrder,
@@ 18,8 18,17 @@ pub struct Opt {
#[derive(StructOpt, Debug, PartialEq, Clone)]
pub enum Command {
Presidents,
- Deputies,
+ Congress {
+ #[structopt(subcommand)]
+ command: CongressCommand,
+ },
+}
+
+#[derive(StructOpt, Debug, PartialEq, Clone, Copy)]
+pub enum CongressCommand {
+ Legislatures,
Senators,
+ Deputies
}
pub fn parse_opts() -> OutputType {
@@ 27,13 36,17 @@ pub fn parse_opts() -> OutputType {
match opts.command {
Command::Presidents { .. } => OutputType::Presidents(opts),
- Command::Deputies { .. } => OutputType::Deputies(opts),
- Command::Senators { .. } => OutputType::Senators(opts),
+ Command::Congress { command } => match command {
+ CongressCommand::Legislatures { .. } => OutputType::Legislatures(opts),
+ CongressCommand::Senators { .. } => OutputType::Senators(opts),
+ CongressCommand::Deputies { .. } => OutputType::Deputies(opts),
+ }
}
}
pub enum OutputType {
Presidents(Opt),
- Deputies(Opt),
+ Legislatures(Opt),
Senators(Opt),
+ Deputies(Opt),
}=
\ No newline at end of file
A src/supreme-court.rs => src/supreme-court.rs +0 -0