~vikanezrimaya/kittybox

ad3cf73bdeca9b55f6f57a0bbbc82cf1bb1172e8 — Vika 4 months ago b140de9 pyo3
Allow the PyO3 example to use the `microformats` crate instead
2 files changed, 59 insertions(+), 16 deletions(-)

M kittybox-rs/Cargo.toml
M kittybox-rs/examples/pyo3-mf2.rs
M kittybox-rs/Cargo.toml => kittybox-rs/Cargo.toml +1 -1
@@ 38,7 38,7 @@ required-features = ["cli"]
[[example]]
name = "pyo3-mf2"
path = "examples/pyo3-mf2.rs"
required-features = ["cli", "pyo3"]
required-features = ["cli"]

[workspace]
members = [".", "./util", "./templates", "./indieauth"]

M kittybox-rs/examples/pyo3-mf2.rs => kittybox-rs/examples/pyo3-mf2.rs +58 -15
@@ 1,15 1,21 @@
use clap::Parser;
use pyo3::prelude::*;
use pyo3::types::IntoPyDict;

#[derive(Debug, thiserror::Error)]
enum Error {
    #[cfg(feature = "pyo3")]
    #[error("python error: {0}")]
    Python(#[from] pyo3::PyErr),
    #[error("http request error: {0}")]
    Http(#[from] reqwest::Error),
    #[error("url parse error: {0}")]
    UrlParse(#[from] url::ParseError),
    #[error("json error: {0}")]
    Json(#[from] serde_json::Error),
    #[error("microformats error: {0}")]
    Microformats(#[from] microformats::Error),
    #[cfg(not(feature = "pyo3"))]
    #[error("python is not enabled")]
    PythonUnavailable
}

#[derive(Parser, Debug)]


@@ 17,11 23,55 @@ enum Error {
    name = "pyo3-mf2",
    author = "Vika <vika@fireburn.ru>",
    version = env!("CARGO_PKG_VERSION"),
    about = "Fetch HTML and turn it into MF2-JSON using mf2py"
    about = "Fetch HTML and turn it into MF2-JSON"
)]
struct Args {
    #[clap(value_parser)]
    url: url::Url,
    #[clap(long)]
    python: bool
}

#[cfg(feature = "pyo3")]
fn parse_mf2_with_mf2py(text: &str, base_url: &url::Url) -> Result<serde_json::Value, Error> {
    use pyo3::prelude::*;
    use pyo3::types::{PyString, PyBool, PyDict};
    use pyo3::types::IntoPyDict;

    Python::with_gil(|py| -> Result<serde_json::Value, Error> {
        let mf2py = PyModule::import(py, "mf2py")?;
        let mf2_dict = mf2py.getattr("parse")?
            .call((), Some([
                ("doc", PyString::new(py, text).as_ref()),
                ("url", PyString::new(py, base_url.as_str()).as_ref()),
                ("img_with_alt", PyBool::new(py, true).as_ref()),
            ].into_py_dict(py)))?;

        let json = PyModule::import(py, "json")?;
        let mf2_json_str = json.getattr("dumps")?
            .call1((mf2_dict,))?
            .downcast::<PyString>()
            .map_err(pyo3::PyErr::from)?;
        
        Ok(serde_json::from_str(mf2_json_str.to_str()?)?)
    })
}

//#[cfg(not(feature = "pyo3"))]
fn parse_mf2_with_rust(text: &str, base_url: &url::Url) -> Result<serde_json::Value, Error> {
    Ok(serde_json::to_value(microformats::from_html(text, base_url.clone())?)?)
}

fn parse_mf2(text: &str, base_url: &url::Url, use_py: bool) -> Result<serde_json::Value, Error> {
    if use_py {
        eprintln!("Using python");
        #[cfg(feature = "pyo3")]
        return parse_mf2_with_mf2py(text, base_url);
        #[cfg(not(feature = "pyo3"))]
        return Err(Error::PythonUnavailable);
    } else {
        parse_mf2_with_rust(text, base_url)
    }
}

#[tokio::main]


@@ 41,17 91,10 @@ async fn main() -> Result<(), Error> {
    let response = http.get(args.url.clone()).send().await?;
    let text = response.text().await?;
    
    tokio::task::spawn_blocking(move || Python::with_gil(|py| {
        let mf2py = PyModule::import(py, "mf2py")?;
        let mf2_dict = mf2py.getattr("parse")?
            .call((), Some([
                ("doc", text),
                ("url", args.url.as_str().to_owned())
            ].into_py_dict(py)))?;
    let data = tokio::task::spawn_blocking(
        move || parse_mf2(&text, &args.url, args.python)
    ).await.unwrap()?;
    println!("{:#}", data);

        let json = PyModule::import(py, "json")?;
        let mf2_json_str = json.getattr("dumps")?.call1((mf2_dict,))?.extract::<String>()?;
        println!("{}", mf2_json_str);
        Ok(())
    })).await.unwrap()
    Ok(())
}