~chrisppy/hare-rss

251fdfe0c78b3ff342d98e4636892f568c6f80b6 — Chris Palmer 3 months ago 0380d25
Cleanup and add extensions & attributes to channel and item

Signed-off-by: Chris Palmer <chris@red-oxide.org>
4 files changed, 300 insertions(+), 248 deletions(-)

M format/rss/+test.ha
D format/rss/finish.ha
M format/rss/scan.ha
M format/rss/types.ha
M format/rss/+test.ha => format/rss/+test.ha +3 -2
@@ 1,5 1,6 @@
// License: MPL-2.0
// (c) 2022-2023 Chris Palmer <chris@red-oxide.org>
// SPDX-License-Identifier: MPL-2.0
// (c) Chris Palmer <chris@red-oxide.org>

use encoding::utf8;
use fmt;
use io;

D format/rss/finish.ha => format/rss/finish.ha +0 -89
@@ 1,89 0,0 @@
// License: MPL-2.0
// (c) 2022-2023 Chris Palmer <chris@red-oxide.org>

// Frees resources associated with a [[rss]].
export fn finish(r: *rss) void = {
	if (r == null) {
		return;
	};

	for (let i = 0z; i < len(r.categories); i += 1) {
		free(r.categories[i].0);
		free(r.categories[i].1);
	};
	for (let i = 0z; i < len(r.skip_hours); i += 1) {
		free(r.skip_hours[i]);
	};
	for (let i = 0z; i < len(r.skip_days); i += 1) {
		free(r.skip_days[i]);
	};
	for (let i = 0z; i < len(r.items); i += 1) {
		item_finish(&r.items[i]);
	};

	image_finish(&r.image);

	free(r.title);
	free(r.link);
	free(r.description);
	free(r.pub_date);
	free(r.last_build_date);
	free(r.language);
	free(r.copyright);
	free(r.managing_editor);
	free(r.web_master);
	free(r.generator);
	free(r.docs);
	free(r.ttl);
	free(r.categories);
	free(r.skip_hours);
	free(r.skip_days);
	free(r.items);
};

fn image_finish(e: *image) void = {
	if (e == null) {
		return;
	};

	free(e.url);
	free(e.title);
	free(e.link);
	free(e.width);
	free(e.height);
};

fn item_finish(e: *item) void = {
	if (e == null) {
		return;
	};

	for (let i = 0z; i < len(e.categories); i += 1) {
		free(e.categories[i].0);
		free(e.categories[i].1);
	};
	for (let i = 0z; i < len(e.enclosures); i += 1) {
		enclosure_finish(&e.enclosures[i]);
	};

	free(e.title);
	free(e.link);
	free(e.description);
	free(e.pub_date);
	free(e.author);
	free(e.comments);
	free(e.guid.0);
	free(e.guid.1);
	free(e.categories);
	free(e.enclosures);
};

fn enclosure_finish(e :*enclosure) void = {
	if (e == null) {
		return;
	};

	free(e.url);
	free(e.length);
	free(e.mime_type);
};

M format/rss/scan.ha => format/rss/scan.ha +279 -138
@@ 1,10 1,29 @@
// License: MPL-2.0
// (c) 2022-2023 Chris Palmer <chris@red-oxide.org>
// SPDX-License-Identifier: MPL-2.0
// (c) Chris Palmer <chris@red-oxide.org>

use format::xml;
use io;
use memio;
use strings;

// Returned when scanning a XML file which does not meet the expected schema.
export type formaterr = !void;

// Any error which can occur during RSS parsing.
export type error = !(formaterr | xml::error | io::error);

// Converts an [[error]] to an user-friendly string representation.
export fn strerror(err: error) const str = {
	match(err) {
	case formaterr =>
		return "XML does not fulfill required schema";
	case let err: xml::error =>
		return xml::strerror(err);
	case let err: io::error =>
		return io::strerror(err);
	};
};

// Scans a RSS feed XML definition.
export fn scan(in: io::handle) (rss | error) = {
	const parser = xml::parse(in)?;


@@ 12,144 31,111 @@ export fn scan(in: io::handle) (rss | error) = {
	return scan_rss(parser);
};

fn scan_rss(p: *xml::parser) (rss | error) = {
fn scan_rss(par: *xml::parser) (rss | error) = {
	let r = rss{ ... };

	for (true) {
		match(want(p)?) {
		case let at: xml::attribute =>
			switch (at.0) {
			case "version" =>
				r.version = strings::dup(at.1);
			case =>
				continue;
			};
		case let e: xml::elementstart =>
			switch (e) {
		match(want(par)?) {
		case let el: xml::elementstart =>
			switch (el) {
			case "rss" =>
				continue;
			case "channel" =>
				continue;
			case "category" =>
				append(r.categories, scan_category(p)?);
				append(r.categories, scan_category(par)?);
			case "description" =>
				const txt = scan_text(p)?;
				r.description = txt;
				r.description = scan_text(par)?;
			case "docs" =>
				const txt = scan_text(p)?;
				r.docs = txt;
				r.docs = scan_text(par)?;
			case "generator" =>
				const txt = scan_text(p)?;
				r.generator = txt;
				r.generator = scan_text(par)?;
			case "image" =>
				r.image = scan_image(p)?;
				r.image = scan_image(par)?;
			case "item" =>
				append(r.items, scan_item(p)?);
				append(r.items, scan_item(par)?);
			case "lastBuildDate" =>
				const txt = scan_text(p)?;
				r.last_build_date = txt;
				r.last_build_date = scan_text(par)?;
			case "language" =>
				const txt = scan_text(p)?;
				r.language = txt;
				r.language = scan_text(par)?;
			case "link" =>
				const txt = scan_text(p)?;
				r.link = txt;
				r.link = scan_text(par)?;
			case "managingEditor" =>
				const txt = scan_text(p)?;
				r.managing_editor = txt;
				r.managing_editor = scan_text(par)?;
			case "pubDate" =>
				const txt = scan_text(p)?;
				r.pub_date = txt;
				r.pub_date = scan_text(par)?;
			case "skipDays" =>
				const txt = scan_text(p)?;
				append(r.skip_days, txt);
				append(r.skip_days, scan_text(par)?);
			case "skipHours" =>
				const txt = scan_text(p)?;
				append(r.skip_hours, txt);
				append(r.skip_hours, scan_text(par)?);
			case "title" =>
				const txt = scan_text(p)?;
				r.title = txt;
				r.title = scan_text(par)?;
			case "ttl" =>
				const txt = scan_text(p)?;
				r.ttl = txt;
				r.ttl = scan_text(par)?;
			case "webMaster" =>
				const txt = scan_text(p)?;
				r.web_master = txt;
				r.web_master = scan_text(par)?;
			case =>
				append(r.extensions, scan_extension(par)?);
				continue;
			};
		case let e: xml::elementend =>
			switch (e) {
			case "channel" =>
				continue;
			case "rss" =>
				break;
		case xml::elementend =>
			until(par, "rss")?;
			break;
		case let at: xml::attribute =>
			switch (at.0) {
			case "version" =>
				r.version = strings::dup(at.1);
			case =>
				continue;
				append(r.attributes, scan_attribute(at)?);
			};
		case =>
			continue;
		case xml::text =>
			return formaterr;
		};
	};

	return r;
};

fn scan_item(p: *xml::parser) (item | error) = {
fn scan_item(par: *xml::parser) (item | error) = {
	let it = item{ ... };

	for (true) {
		match(want(p)?) {
		case let e: xml::elementstart =>
			switch (e) {
		match(want(par)?) {
		case let el: xml::elementstart =>
			switch (el) {
			case "author" =>
				const txt = scan_text(p)?;
				it.author = txt;
				it.author = scan_text(par)?;
			case "category" =>
				append(it.categories, scan_category(p)?);
				append(it.categories, scan_category(par)?);
			case "comments" =>
				const txt = scan_text(p)?;
				it.comments = txt;
				it.comments = scan_text(par)?;
			case "description" =>
				const txt = scan_text(p)?;
				it.description = txt;
				it.description = scan_text(par)?;
			case "enclosure" =>
				append(it.enclosures, scan_enclosure(p)?);
				append(it.enclosures, scan_enclosure(par)?);
			case "guid" =>
				const txt = scan_guid(p)?;
				it.guid = txt;
				it.guid = scan_guid(par)?;
			case "link" =>
				const txt = scan_text(p)?;
				it.link = txt;
				it.link = scan_text(par)?;
			case "pubDate" =>
				const txt = scan_text(p)?;
				it.pub_date = txt;
				it.pub_date = scan_text(par)?;
			case "title" =>
				const txt = scan_text(p)?;
				it.title = txt;
				it.title = scan_text(par)?;
			case =>
				continue;
				append(it.extensions, scan_extension(par)?);
			};
		case let e: xml::elementend =>
			switch (e) {
			case "item" =>
				break;
			case =>
				continue;
			};
		case =>
			continue;
		case xml::elementend =>
			break;
		case let at: xml::attribute =>
			append(it.attributes, scan_attribute(at)?);
		case xml::text =>
			return formaterr;
		};
	};

	return it;
};

fn scan_enclosure(p: *xml::parser) (enclosure | error) = {
fn scan_enclosure(par: *xml::parser) (enclosure | error) = {
	let en = enclosure{ ... };

	for (true) {
		match(want(p)?) {
		match(want(par)?) {
		case let at: xml::attribute =>
			switch (at.0) {
			case "length" =>


@@ 159,68 145,55 @@ fn scan_enclosure(p: *xml::parser) (enclosure | error) = {
			case "url" =>
				en.url = strings::dup(at.1);
			case =>
				continue;
				return formaterr;
			};
		case xml::elementend =>
			break;
		case =>
			continue;
			return formaterr;
		};
	};

	return en;
};

fn scan_image(p: *xml::parser) (image | error) = {
fn scan_image(par: *xml::parser) (image | error) = {
	let im = image{
		width = "88",
		height = "31",
		...
	};

	for (true) {
		match(want(p)?) {
		case let e: xml::elementstart =>
			switch (e) {
		match(want(par)?) {
		case let el: xml::elementstart =>
			switch (el) {
			case "height" =>
				const txt = scan_text(p)?;
				im.height = txt;
				im.height = scan_text(par)?;
			case "link" =>
				const txt = scan_text(p)?;
				im.link = txt;
				im.link = scan_text(par)?;
			case "title" =>
				const txt = scan_text(p)?;
				im.title = txt;
				im.title = scan_text(par)?;
			case "url" =>
				const txt = scan_text(p)?;
				im.url = txt;
				im.url = scan_text(par)?;
			case "width" =>
				const txt = scan_text(p)?;
				im.width = txt;
				im.width = scan_text(par)?;
			case =>
				continue;
			};
		case let e: xml::elementend =>
			switch (e) {
			case "image" =>
				break;
			case =>
				continue;
				return formaterr;
			};
		case xml::elementend =>
			break;
		case =>
			continue;
			return formaterr;
		};
	};

	return im;
};

fn scan_guid(p: *xml::parser) (guid | error) = {
fn scan_guid(par: *xml::parser) (guid | error) = {
	let val = memio::dynamic(), pl = "true";
	for (true) {
		match (xml::scan(p)?) {
		match (xml::scan(par)?) {
		case void =>
			return format;
			return formaterr;
		case let tok: xml::token =>
			match (tok) {
			case let at: xml::attribute =>


@@ 228,14 201,14 @@ fn scan_guid(p: *xml::parser) (guid | error) = {
				case "isPermaLink" =>
					pl = strings::dup(at.1);
				case =>
					continue;
					return formaterr;
				};
			case let txt: xml::text =>
				memio::concat(&val, txt)?;
			case let e: xml::elementend =>
			case xml::elementend =>
				break;
			case =>
				continue;
				return formaterr;
			};
		};
	};


@@ 243,12 216,12 @@ fn scan_guid(p: *xml::parser) (guid | error) = {
	return (memio::string(&val)?, pl);
};

fn scan_category(p: *xml::parser) (category | error) = {
fn scan_category(par: *xml::parser) (category | error) = {
	let val = memio::dynamic(), d = "";
	for (true) {
		match (xml::scan(p)?) {
		match (xml::scan(par)?) {
		case void =>
			return format;
			return formaterr;
		case let tok: xml::token =>
			match (tok) {
			case let at: xml::attribute =>


@@ 256,14 229,14 @@ fn scan_category(p: *xml::parser) (category | error) = {
				case "domain" =>
					d = strings::dup(at.1);
				case =>
					continue;
					return formaterr;
				};
			case let txt: xml::text =>
				memio::concat(&val, txt)?;
			case let e: xml::elementend =>
			case xml::elementend =>
				break;
			case =>
				continue;
				return formaterr;
			};
		};
	};


@@ 271,20 244,20 @@ fn scan_category(p: *xml::parser) (category | error) = {
	return (memio::string(&val)?, d);
};

fn scan_text(p: *xml::parser) (str | error) = {
fn scan_text(par: *xml::parser) (str | error) = {
	let val = memio::dynamic();
	for (true) {
		match (xml::scan(p)?) {
		match (xml::scan(par)?) {
		case void =>
			return format;
			return formaterr;
		case let tok: xml::token =>
			match (tok) {
			case let txt: xml::text =>
				memio::concat(&val, txt)?;
			case let e: xml::elementend =>
			case xml::elementend =>
				break;
			case =>
				continue;
				return formaterr;
			};
		};
	};


@@ 292,22 265,190 @@ fn scan_text(p: *xml::parser) (str | error) = {
	return memio::string(&val)?;
};

fn want(p: *xml::parser) (xml::token | error) = {
	let tok = match (xml::scan(p)?) {
fn scan_attribute(at: xml::attribute) (attribute | error) = {
	return attribute{
		name = strings::dup(at.0),
		value = strings::dup(at.1)
	};
};

fn scan_extension(par: *xml::parser) (extension | error) = {
	let ex = extension { ... }, txt = memio::dynamic();
	for (true) {
		match (xml::scan(par)?) {
		case void =>
			return formaterr;
		case let t: xml::token =>
			match (t) {
			case let el: xml::elementstart =>
				append(ex.extensions, scan_extension(par)?);
			case let el: xml::elementend =>
				ex.name = strings::dup(el);
				break;
			case let at: xml::attribute =>
				append(ex.attributes, scan_attribute(at)?);
			case let tx: xml::text =>
				memio::concat(&txt, tx)!;
			};
		};
	};
	ex.text = strings::trim(memio::string(&txt)?);
	return ex;

};

fn want(par: *xml::parser) (xml::token | error) = {
	let tok = match (xml::scan(par)?) {
	case let tok: xml::token =>
		yield tok;
	case void =>
		return format;
		return formaterr;
	};

	for (tok is xml::text) {
		tok = match (xml::scan(p)?) {
		tok = match (xml::scan(par)?) {
		case let tok: xml::token =>
			yield tok;
		case void =>
			return format;
			return formaterr;
		};
	};

	return tok;
};

fn until(par: *xml::parser, end: str) (void | error) = {
	for (true) {
		match (xml::scan(par)?) {
		case void => abort();
		case let tok: xml::token =>
			match (tok) {
			case let el: xml::elementend =>
				if (el == end) {
					break;
				};
			case => void;
			};
		};
	};
};

// Frees resources associated with a [[rss]].
export fn finish(r: *rss) void = {
	if (r == null) {
		return;
	};

	for (let i = 0z; i < len(r.categories); i += 1) {
		free(r.categories[i].0);
		free(r.categories[i].1);
	};
	for (let i = 0z; i < len(r.skip_hours); i += 1) {
		free(r.skip_hours[i]);
	};
	for (let i = 0z; i < len(r.skip_days); i += 1) {
		free(r.skip_days[i]);
	};
	for (let i = 0z; i < len(r.items); i += 1) {
		item_finish(&r.items[i]);
	};
	for (let i = 0z; i < len(r.extensions); i += 1) {
		extension_finish(&r.extensions[i]);
	};
	for (let i = 0z; i < len(r.attributes); i += 1) {
		attribute_finish(&r.attributes[i]);
	};

	image_finish(&r.image);

	free(r.title);
	free(r.link);
	free(r.description);
	free(r.pub_date);
	free(r.last_build_date);
	free(r.language);
	free(r.copyright);
	free(r.managing_editor);
	free(r.web_master);
	free(r.generator);
	free(r.docs);
	free(r.ttl);
	free(r.categories);
	free(r.skip_hours);
	free(r.skip_days);
	free(r.items);
	free(r.extensions);
	free(r.attributes);
};

fn image_finish(e: *image) void = {
	if (e == null) {
		return;
	};

	free(e.url);
	free(e.title);
	free(e.link);
	free(e.width);
	free(e.height);
};

fn item_finish(e: *item) void = {
	if (e == null) {
		return;
	};

	for (let i = 0z; i < len(e.categories); i += 1) {
		free(e.categories[i].0);
		free(e.categories[i].1);
	};
	for (let i = 0z; i < len(e.enclosures); i += 1) {
		enclosure_finish(&e.enclosures[i]);
	};
	for (let i = 0z; i < len(e.extensions); i += 1) {
		extension_finish(&e.extensions[i]);
	};
	for (let i = 0z; i < len(e.attributes); i += 1) {
		attribute_finish(&e.attributes[i]);
	};

	free(e.title);
	free(e.link);
	free(e.description);
	free(e.pub_date);
	free(e.author);
	free(e.comments);
	free(e.guid.0);
	free(e.guid.1);
	free(e.categories);
	free(e.enclosures);
	free(e.extensions);
	free(e.attributes);
};

fn attribute_finish(a: *attribute) void = {
	free(a.name);
	free(a.value);
};

fn extension_finish(e: *extension) void = {
	for (let i = 0z; i < len(e.attributes); i += 1) {
		attribute_finish(&e.attributes[i]);
	};
	for (let i = 0z; i < len(e.extensions); i += 1) {
		extension_finish(&e.extensions[i]);
	};
	free(e.name);
	free(e.attributes);
	free(e.extensions);
};

fn enclosure_finish(e :*enclosure) void = {
	if (e == null) {
		return;
	};

	free(e.url);
	free(e.length);
	free(e.mime_type);
};

M format/rss/types.ha => format/rss/types.ha +18 -19
@@ 1,7 1,5 @@
// License: MPL-2.0
// (c) 2022-2023 Chris Palmer <chris@red-oxide.org>
use format::xml;
use io;
// SPDX-License-Identifier: MPL-2.0
// (c) Chris Palmer <chris@red-oxide.org>

// A RSS parent element. Channel has been collapsed into this structure.
export type rss = struct {


@@ 23,6 21,8 @@ export type rss = struct {
	skip_days: []str,
	categories: []category,
	items: []item,
	attributes: []attribute,
	extensions: []extension,
};

// A RSS guid element.


@@ 42,6 42,8 @@ export type item = struct {
	guid: guid,
	categories: []category,
	enclosures: []enclosure,
	attributes: []attribute,
	extensions: []extension,
};

// A RSS enclosure element.


@@ 60,20 62,17 @@ export type image = struct {
	height: str,
};

// Returned when scanning a XML file which does not meet the expected schema.
export type format = !void;

// Any error which can occur during RSS parsing.
export type error = !(format | xml::error | io::error);
// Attributes.
export type attribute = struct {
	name: str,
	value: str,
};

// Converts an [[error]] to an user-friendly string representation.
export fn strerror(err: error) const str = {
	match(err) {
	case format =>
		return "XML does not fulfill required schema";
	case let err: xml::error =>
		return xml::strerror(err);
	case let err: io::error =>
		return io::strerror(err);
	};
// Extension.
export type extension = struct {
	name: str,
	text: str,
	attributes: []attribute,
	extensions: []extension,
};