From 3d92360310d8e35394109058ff723da57af5b380 Mon Sep 17 00:00:00 2001 From: Florent Le Coz Date: Sun, 10 Nov 2013 02:26:07 +0100 Subject: [PATCH] Use the Expat library directly instead of relying on expatpp And now we handle namespaces, yay. And a nice little test. --- CMakeLists.txt | 6 +++- src/test.cpp | 20 ++++++++++++- src/xmpp/xmpp_component.cpp | 31 ++++++++++++------- src/xmpp/xmpp_parser.cpp | 60 ++++++++++++++++++++++++++----------- src/xmpp/xmpp_parser.hpp | 46 ++++++++++++++-------------- src/xmpp/xmpp_stanza.cpp | 15 ++++++++++ src/xmpp/xmpp_stanza.hpp | 20 ++++++++++--- 7 files changed, 140 insertions(+), 58 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bd8ca76..2ec9dd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,11 +13,14 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -fsanitize=address") # set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") find_package(Cryptopp REQUIRED) +include(FindEXPAT) +find_package(EXPAT REQUIRED) include_directories("src/") # the SYSTEM flag tells the compiler that we don't care about warnings # coming from these headers. include_directories(SYSTEM ${CRYPTO++_INCLUDE_DIR}) +include_directories(SYSTEM ${EXPAT_INCLUDE_DIRS}) # ## utils @@ -47,7 +50,8 @@ target_link_libraries(irc network utils) file(GLOB source_xmpp src/xmpp/*.[hc]pp) add_library(xmpp STATIC ${source_xmpp}) -target_link_libraries(xmpp bridge network utils ${CRYPTO++_LIBRARIES} expatpp) +target_link_libraries(xmpp bridge network utils + ${CRYPTO++_LIBRARIES} ${EXPAT_LIBRARIES}) # ## bridge diff --git a/src/test.cpp b/src/test.cpp index d110868..674be98 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include int main() { @@ -44,5 +44,23 @@ int main() std::string coucou("\u0002\u0002COUCOU\u0003"); remove_irc_colors(coucou); assert(coucou == "COUCOU"); + + /** + * XML parsing + */ + XmppParser xml; + const std::string doc = "innertail"; + xml.add_stanza_callback([](const Stanza& stanza) + { + assert(stanza.get_name() == "stream_ns:stanza"); + assert(stanza["b"] == "c"); + assert(stanza.get_inner() == "inner"); + assert(stanza.get_tail() == ""); + assert(stanza.get_child("stream_ns:child1") != nullptr); + assert(stanza.get_child("stream_ns:child2") == nullptr); + assert(stanza.get_child("child2_ns:child2") != nullptr); + assert(stanza.get_child("child2_ns:child2")->get_tail() == "tail"); + }); + xml.feed(doc.data(), doc.size(), true); return 0; } diff --git a/src/xmpp/xmpp_component.cpp b/src/xmpp/xmpp_component.cpp index 83091ba..4d981fa 100644 --- a/src/xmpp/xmpp_component.cpp +++ b/src/xmpp/xmpp_component.cpp @@ -10,6 +10,15 @@ #include #include +#define STREAM_NS "http://etherx.jabber.org/streams" +#define COMPONENT_NS "jabber:component:accept" +#define MUC_NS "http://jabber.org/protocol/muc" +#define MUC_USER_NS MUC_NS"#user" +#define DISCO_NS "http://jabber.org/protocol/disco" +#define DISCO_ITEMS_NS DISCO_NS"#items" +#define DISCO_INFO_NS DISCO_NS"#info" + + XmppComponent::XmppComponent(const std::string& hostname, const std::string& secret): served_hostname(hostname), secret(secret), @@ -21,11 +30,11 @@ XmppComponent::XmppComponent(const std::string& hostname, const std::string& sec std::placeholders::_1)); this->parser.add_stream_close_callback(std::bind(&XmppComponent::on_remote_stream_close, this, std::placeholders::_1)); - this->stanza_handlers.emplace("handshake", + this->stanza_handlers.emplace(COMPONENT_NS":handshake", std::bind(&XmppComponent::handle_handshake, this,std::placeholders::_1)); - this->stanza_handlers.emplace("presence", + this->stanza_handlers.emplace(COMPONENT_NS":presence", std::bind(&XmppComponent::handle_presence, this,std::placeholders::_1)); - this->stanza_handlers.emplace("message", + this->stanza_handlers.emplace(COMPONENT_NS":message", std::bind(&XmppComponent::handle_message, this,std::placeholders::_1)); } @@ -49,8 +58,8 @@ void XmppComponent::on_connected() { std::cout << "connected to XMPP server" << std::endl; XmlNode node("stream:stream", nullptr); - node["xmlns"] = "jabber:component:accept"; - node["xmlns:stream"] = "http://etherx.jabber.org/streams"; + node["xmlns"] = COMPONENT_NS; + node["xmlns:stream"] = STREAM_NS; node["to"] = "irc.abricot"; this->send_stanza(node); } @@ -62,7 +71,7 @@ void XmppComponent::on_connection_close() void XmppComponent::parse_in_buffer() { - this->parser.XML_Parse(this->in_buf.data(), this->in_buf.size(), false); + this->parser.feed(this->in_buf.data(), this->in_buf.size(), false); this->in_buf.clear(); } @@ -122,7 +131,7 @@ void XmppComponent::send_stream_error(const std::string& name, const std::string { XmlNode node("stream:error", nullptr); XmlNode error(name, nullptr); - error["xmlns"] = "urn:ietf:params:xml:ns:xmpp-streams"; + error["xmlns"] = STREAM_NS; if (!explanation.empty()) error.set_inner(explanation); error.close(); @@ -161,7 +170,7 @@ void XmppComponent::handle_presence(const Stanza& stanza) bridge->join_irc_channel(iid, to.resource); else if (type == "unavailable") { - XmlNode* status = stanza.get_child("status"); + XmlNode* status = stanza.get_child(MUC_USER_NS":status"); bridge->leave_irc_channel(std::move(iid), status ? std::move(status->get_inner()) : ""); } } @@ -172,7 +181,7 @@ void XmppComponent::handle_message(const Stanza& stanza) Bridge* bridge = this->get_user_bridge(stanza["from"]); Jid to(stanza["to"]); Iid iid(to.local); - XmlNode* body = stanza.get_child("body"); + XmlNode* body = stanza.get_child(COMPONENT_NS":body"); if (stanza["type"] == "groupchat") { if (to.resource.empty()) @@ -214,7 +223,7 @@ void XmppComponent::send_user_join(const std::string& from, const std::string& n node["from"] = from + "@" + this->served_hostname + "/" + nick; XmlNode x("x"); - x["xmlns"] = "http://jabber.org/protocol/muc#user"; + x["xmlns"] = MUC_USER_NS; // TODO: put real values here XmlNode item("item"); @@ -235,7 +244,7 @@ void XmppComponent::send_self_join(const std::string& from, const std::string& n node["from"] = from + "@" + this->served_hostname + "/" + nick; XmlNode x("x"); - x["xmlns"] = "http://jabber.org/protocol/muc#user"; + x["xmlns"] = MUC_USER_NS; // TODO: put real values here XmlNode item("item"); diff --git a/src/xmpp/xmpp_parser.cpp b/src/xmpp/xmpp_parser.cpp index 00714b3..6e4809d 100644 --- a/src/xmpp/xmpp_parser.cpp +++ b/src/xmpp/xmpp_parser.cpp @@ -1,21 +1,56 @@ #include #include -#include +/** + * Expat handlers. Called by the Expat library, never by ourself. + * They just forward the call to the XmppParser corresponding methods. + */ + +static void start_element_handler(void* user_data, const XML_Char* name, const XML_Char** atts) +{ + static_cast(user_data)->start_element(name, atts); +} + +static void end_element_handler(void* user_data, const XML_Char* name) +{ + static_cast(user_data)->end_element(name); +} + +static void character_data_handler(void *user_data, const XML_Char *s, int len) +{ + static_cast(user_data)->char_data(s, len); +} + +/** + * XmppParser class + */ XmppParser::XmppParser(): level(0), current_node(nullptr) { + // Create the expat parser + this->parser = XML_ParserCreateNS("UTF-8", ':'); + XML_SetUserData(this->parser, static_cast(this)); + + // Install Expat handlers + XML_SetElementHandler(this->parser, &start_element_handler, &end_element_handler); + XML_SetCharacterDataHandler(this->parser, &character_data_handler); } XmppParser::~XmppParser() { if (this->current_node) delete this->current_node; + XML_ParserFree(this->parser); } -void XmppParser::startElement(const XML_Char* name, const XML_Char** attribute) +void XmppParser::feed(const char* data, const int len, const bool is_final) +{ + XML_Parse(this->parser, data, len, is_final); +} + +void XmppParser::start_element(const XML_Char* name, const XML_Char** attribute) { level++; @@ -29,9 +64,9 @@ void XmppParser::startElement(const XML_Char* name, const XML_Char** attribute) this->stream_open_event(*this->current_node); } -void XmppParser::endElement(const XML_Char* name) +void XmppParser::end_element(const XML_Char* name) { - assert(name == this->current_node->get_name()); + (void)name; level--; this->current_node->close(); if (level == 1) @@ -50,18 +85,12 @@ void XmppParser::endElement(const XML_Char* name) this->current_node->delete_all_children(); } -void XmppParser::charData(const XML_Char* data, int len) +void XmppParser::char_data(const XML_Char* data, int len) { if (this->current_node->has_children()) - this->current_node->get_last_child()->set_tail(std::string(data, len)); + this->current_node->get_last_child()->add_to_tail(std::string(data, len)); else - this->current_node->set_inner(std::string(data, len)); -} - -void XmppParser::startNamespace(const XML_Char* prefix, const XML_Char* uri) -{ - std::cout << "startNamespace: " << prefix << ":" << uri << std::endl; - this->namespaces.emplace(std::make_pair(prefix, uri)); + this->current_node->add_to_inner(std::string(data, len)); } void XmppParser::stanza_event(const Stanza& stanza) const @@ -82,11 +111,6 @@ void XmppParser::stream_close_event(const XmlNode& node) const callback(node); } -void XmppParser::endNamespace(const XML_Char* coucou) -{ - std::cout << "endNamespace: " << coucou << std::endl; -} - void XmppParser::add_stanza_callback(std::function&& callback) { this->stanza_callbacks.emplace_back(std::move(callback)); diff --git a/src/xmpp/xmpp_parser.hpp b/src/xmpp/xmpp_parser.hpp index 2e83bc3..afdfdfa 100644 --- a/src/xmpp/xmpp_parser.hpp +++ b/src/xmpp/xmpp_parser.hpp @@ -1,35 +1,43 @@ #ifndef XMPP_PARSER_INCLUDED # define XMPP_PARSER_INCLUDED -#include -#include - #include -#include +#include + +#include /** * A SAX XML parser that builds XML nodes and spawns events when a complete * stanza is received (an element of level 2), or when the document is - * opened (an element of level 1) + * opened/closed (an element of level 1) * * After a stanza_event has been spawned, we delete the whole stanza. This * means that even with a very long document (in XMPP the document is - * potentially infinite), the memory then is never exhausted as long as each + * potentially infinite), the memory is never exhausted as long as each * stanza is reasonnably short. * + * The element names generated by expat contain the namespace of the + * element, a colon (':') and then the actual name of the element. To get + * an element "x" with a namespace of "http://jabber.org/protocol/muc", you + * just look for an XmlNode named "http://jabber.org/protocol/muc:x" + * * TODO: enforce the size-limit for the stanza (limit the number of childs * it can contain). For example forbid the parser going further than level * 20 (arbitrary number here), and each XML node to have more than 15 childs * (arbitrary number again). */ -class XmppParser: public expatpp +class XmppParser { public: explicit XmppParser(); ~XmppParser(); public: + /** + * Feed the parser with some XML data + */ + void feed(const char* data, const int len, const bool is_final); /** * Add one callback for the various events that this parser can spawn. */ @@ -37,7 +45,6 @@ public: void add_stream_open_callback(std::function&& callback); void add_stream_close_callback(std::function&& callback); -private: /** * Called when a new XML element has been opened. We instanciate a new * XmlNode and set it as our current node. The parent of this new node is @@ -46,7 +53,7 @@ private: * * We spawn a stream_event with this node if this is a level-1 element. */ - void startElement(const XML_Char* name, const XML_Char** attribute); + void start_element(const XML_Char* name, const XML_Char** attribute); /** * Called when an XML element has been closed. We close the current_node, * set our current_node as the parent of the current_node, and if that was @@ -55,19 +62,11 @@ private: * And we then delete the stanza (and everything under it, its children, * attribute, etc). */ - void endElement(const XML_Char* name); + void end_element(const XML_Char* name); /** * Some inner or tail data has been parsed */ - void charData(const XML_Char* data, int len); - /** - * TODO use that. - */ - void startNamespace(const XML_Char* prefix, const XML_Char* uri); - /** - * TODO and that. - */ - void endNamespace(const XML_Char* prefix); + void char_data(const XML_Char* data, int len); /** * Calls all the stanza_callbacks one by one. */ @@ -82,6 +81,11 @@ private: */ void stream_close_event(const XmlNode& node) const; +private: + /** + * Expat structure. + */ + XML_Parser parser; /** * The current depth in the XML document */ @@ -98,10 +102,6 @@ private: std::vector> stanza_callbacks; std::vector> stream_open_callbacks; std::vector> stream_close_callbacks; - /** - * TODO: also use that. - */ - std::stack> namespaces; }; #endif // XMPP_PARSER_INCLUDED diff --git a/src/xmpp/xmpp_stanza.cpp b/src/xmpp/xmpp_stanza.cpp index ab26304..a1a04ba 100644 --- a/src/xmpp/xmpp_stanza.cpp +++ b/src/xmpp/xmpp_stanza.cpp @@ -72,16 +72,31 @@ void XmlNode::set_tail(const std::string& data) this->tail = data; } +void XmlNode::add_to_tail(const std::string& data) +{ + this->tail += data; +} + void XmlNode::set_inner(const std::string& data) { this->inner = xml_escape(data); } +void XmlNode::add_to_inner(const std::string& data) +{ + this->inner += xml_escape(data); +} + std::string XmlNode::get_inner() const { return this->inner; } +std::string XmlNode::get_tail() const +{ + return this->tail; +} + XmlNode* XmlNode::get_child(const std::string& name) const { for (auto& child: this->children) diff --git a/src/xmpp/xmpp_stanza.hpp b/src/xmpp/xmpp_stanza.hpp index 62f152d..d2fe8c8 100644 --- a/src/xmpp/xmpp_stanza.hpp +++ b/src/xmpp/xmpp_stanza.hpp @@ -5,8 +5,6 @@ #include #include -#include - std::string xml_escape(const std::string& data); /** @@ -52,15 +50,29 @@ public: */ void set_tail(const std::string& data); /** - * Set the content of the inner, that is the text inside this node - * TODO: escape it here. + * Append the given data to the content of the tail. This exists because + * the expat library may provide the complete text of an element in more + * than one call + */ + void add_to_tail(const std::string& data); + /** + * Set the content of the inner, that is the text inside this node. */ void set_inner(const std::string& data); + /** + * Append the given data to the content of the inner. For the reason + * described in add_to_tail comment. + */ + void add_to_inner(const std::string& data); /** * Get the content of inner * TODO: unescape it here. */ std::string get_inner() const; + /** + * Get the content of the tail + */ + std::string get_tail() const; /** * Get a pointer to the first child element with that name */ -- 2.45.2