~singpolyma/biboumi

e6f20d3c0fd4ba8696a4410a366741c9b9f3562d — Florent Le Coz 10 years ago 31e18e4
Implement IRC format to xhtml-im conversion

The generated XML is very verbose because each IRC formatting tag makes us
close a <span/> element and reopen it with the new style applied.  However,
this works quite well and is easy to implement.
M src/bridge/bridge.cpp => src/bridge/bridge.cpp +11 -15
@@ 21,14 21,14 @@ Bridge::~Bridge()
{
}

std::string Bridge::sanitize_for_xmpp(const std::string& str)
Xmpp::body Bridge::make_xmpp_body(const std::string& str)
{
  std::string res;
  if (utils::is_valid_utf8(str.c_str()))
    res = str;
  else
    res = utils::convert_to_utf8(str, "ISO-8859-1");
  return res;
  return irc_format_to_xhtmlim(res);
}

IrcClient* Bridge::get_irc_client(const std::string& hostname, const std::string& username)


@@ 102,7 102,8 @@ void Bridge::send_channel_message(const Iid& iid, const std::string& body)
    irc->send_channel_message(iid.chan, *it);
  // We do not need to convert body to utf-8: it comes from our XMPP server,
  // so it's ok to send it back
  this->xmpp->send_muc_message(iid.chan + "%" + iid.server, irc->get_own_nick(), body, this->user_jid);
  this->xmpp->send_muc_message(iid.chan + "%" + iid.server, irc->get_own_nick(),
                               this->make_xmpp_body(body), this->user_jid);
}

void Bridge::send_private_message(const Iid& iid, const std::string& body)


@@ 137,22 138,17 @@ void Bridge::send_irc_kick(const Iid& iid, const std::string& target, const std:

void Bridge::send_message(const Iid& iid, const std::string& nick, const std::string& body, const bool muc)
{
  std::string utf8_body = this->sanitize_for_xmpp(body);
  if (utf8_body.substr(0, action_prefix_len) == action_prefix)
    { // Special case for ACTION (/me) messages:
      // "\01ACTION goes out\01" == "/me goes out"
      utf8_body = std::string("/me ") +
        utf8_body.substr(action_prefix_len, utf8_body.size() - action_prefix_len - 1);
    }
  if (muc)
    this->xmpp->send_muc_message(iid.chan + "%" + iid.server, nick, utf8_body, this->user_jid);
    this->xmpp->send_muc_message(iid.chan + "%" + iid.server, nick,
                                 this->make_xmpp_body(body), this->user_jid);
  else
    this->xmpp->send_message(iid.chan + "%" + iid.server, utf8_body, this->user_jid);
    this->xmpp->send_message(iid.chan + "%" + iid.server,
                             this->make_xmpp_body(body), this->user_jid);
}

void Bridge::send_muc_leave(Iid&& iid, std::string&& nick, const std::string& message, const bool self)
{
  this->xmpp->send_muc_leave(std::move(iid.chan) + "%" + std::move(iid.server), std::move(nick), this->sanitize_for_xmpp(message), this->user_jid, self);
  this->xmpp->send_muc_leave(std::move(iid.chan) + "%" + std::move(iid.server), std::move(nick), this->make_xmpp_body(message), this->user_jid, self);
}

void Bridge::send_nick_change(Iid&& iid, const std::string& old_nick, const std::string& new_nick, const bool self)


@@ 168,7 164,7 @@ void Bridge::send_xmpp_message(const std::string& from, const std::string& autho
    body = std::string("[") + author + std::string("] ") + msg;
  else
    body = msg;
  this->xmpp->send_message(from, this->sanitize_for_xmpp(body), this->user_jid);
  this->xmpp->send_message(from, this->make_xmpp_body(body), this->user_jid);
}

void Bridge::send_user_join(const std::string& hostname, const std::string& chan_name, const std::string nick)


@@ 183,7 179,7 @@ void Bridge::send_self_join(const std::string& hostname, const std::string& chan

void Bridge::send_topic(const std::string& hostname, const std::string& chan_name, const std::string topic)
{
  this->xmpp->send_topic(chan_name + "%" + hostname, this->sanitize_for_xmpp(topic), this->user_jid);
  this->xmpp->send_topic(chan_name + "%" + hostname, this->make_xmpp_body(topic), this->user_jid);
}

std::string Bridge::get_own_nick(const Iid& iid)

M src/bridge/bridge.hpp => src/bridge/bridge.hpp +2 -1
@@ 2,6 2,7 @@
# define BRIDGE_INCLUDED

#include <irc/irc_client.hpp>
#include <bridge/colors.hpp>
#include <irc/iid.hpp>

#include <unordered_map>


@@ 23,7 24,7 @@ public:
  explicit Bridge(const std::string& user_jid, XmppComponent* xmpp, Poller* poller);
  ~Bridge();

  static std::string sanitize_for_xmpp(const std::string& str);
  static Xmpp::body make_xmpp_body(const std::string& str);
  /***
   **
   ** From XMPP to IRC.

M src/bridge/colors.cpp => src/bridge/colors.cpp +162 -0
@@ 1,7 1,169 @@
#include <bridge/colors.hpp>
#include <xmpp/xmpp_stanza.hpp>
#include <utils/make_unique.hpp>

#include <algorithm>

#include <iostream>

#include <string.h>

static const char IRC_NUM_COLORS = 16;

static const char* irc_colors_to_css[IRC_NUM_COLORS] = {
  "white",
  "black",
  "blue",
  "green",
  "indianred",
  "red",
  "magenta",
  "brown",
  "yellow",
  "lightgreen",
  "cyan",
  "lightcyan",
  "lightblue",
  "lightmagenta",
  "gray",
  "white",
};

#define XHTML_NS "http://www.w3.org/1999/xhtml"

struct styles_t
{
  bool strong;
  bool underline;
  bool italic;
  int fg;
  int bg;
};

/** We keep the currently-applied CSS styles in a structure. Each time a tag
 * is found, update this style list, then close the current span XML element
 * (if it is open), then reopen it with all the new styles in it.  This is
 * done this way because IRC formatting does not map well with XML
 * (hierarchical tags), it’s a lot easier and cleaner to remove all styles
 * and reapply them for each tag, instead of trying to keep a consistent
 * hierarchy of span, strong, em etc tags.  The generated XML is one-level
 * deep only.
*/
Xmpp::body irc_format_to_xhtmlim(const std::string& s)
{
  if (s.find_first_of(irc_format_char) == std::string::npos)
    // there is no special formatting at all
    return std::make_tuple(s, nullptr);

  std::string cleaned;

  styles_t styles = {false, false, false, -1, -1};

  std::unique_ptr<XmlNode> result = std::make_unique<XmlNode>("body");
  (*result)["xmlns"] = XHTML_NS;

  XmlNode* current_node = result.get();
  std::string::size_type pos_start = 0;
  std::string::size_type pos_end;

  while ((pos_end = s.find_first_of(irc_format_char, pos_start)) != std::string::npos)
    {
      const std::string txt = s.substr(pos_start, pos_end-pos_start);
      cleaned += txt;
      if (current_node->has_children())
        current_node->get_last_child()->set_tail(txt);
      else
        current_node->set_inner(txt);

      if (s[pos_end] == IRC_FORMAT_BOLD_CHAR)
        styles.strong = !styles.strong;
      else if (s[pos_end] == IRC_FORMAT_UNDERLINE_CHAR)
        styles.underline = !styles.underline;
      else if (s[pos_end] == IRC_FORMAT_ITALIC_CHAR)
        styles.italic = !styles.italic;
      else if (s[pos_end] == IRC_FORMAT_RESET_CHAR)
        styles = {false, false, false, -1, -1};
      else if (s[pos_end] == IRC_FORMAT_REVERSE_CHAR)
        { }                      // TODO
      else if (s[pos_end] == IRC_FORMAT_REVERSE2_CHAR)
        { }                      // TODO
      else if (s[pos_end] == IRC_FORMAT_FIXED_CHAR)
        { }                      // TODO
      else if (s[pos_end] == IRC_FORMAT_COLOR_CHAR)
        {
          size_t pos = pos_end + 1;
          styles.fg = -1;
          styles.bg = -1;
          // get the first number following the format char
          if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9')
            {                   // first digit
              styles.fg = s[pos++] - '0';
              if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9')
                // second digit
                styles.fg = styles.fg * 10 + s[pos++] - '0';
            }
          if (pos < s.size() && s[pos] == ',')
            {                   // get bg color after the comma
              pos++;
              if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9')
                {               // first digit
                  styles.bg = s[pos++] - '0';
                  if (pos < s.size() && s[pos] >= '0' && s[pos] <= '9')
                    // second digit
                    styles.bg = styles.bg * 10 + s[pos++] - '0';
                }
            }
          pos_end = pos - 1;
        }

      // close opened span, if any
      if (current_node != result.get())
        {
          current_node->close();
          result->add_child(current_node);
          current_node = result.get();
        }
      // Take all currently-applied style and create a new span with it
      std::string styles_str;
      if (styles.strong)
        styles_str += "font-weight:bold;";
      if (styles.underline)
        styles_str += "text-decoration:underline;";
      if (styles.italic)
        styles_str += "font-style:italic;";
      if (styles.fg != -1)
        styles_str += std::string("color:") +
          irc_colors_to_css[styles.fg % IRC_NUM_COLORS] + ";";
      if (styles.bg != -1)
        styles_str += std::string("background-color:") +
          irc_colors_to_css[styles.bg % IRC_NUM_COLORS] + ";";
      if (!styles_str.empty())
        {
          current_node = new XmlNode("span");
          (*current_node)["style"] = styles_str;
        }

      pos_start = pos_end + 1;
    }

  // If some text remains, without any format char, just append that text at
  // the end of the current node
  const std::string txt = s.substr(pos_start, pos_end-pos_start);
  cleaned += txt;
  if (current_node->has_children())
    current_node->get_last_child()->set_tail(txt);
  else
    current_node->set_inner(txt);

  if (current_node != result.get())
    {
      current_node->close();
      result->add_child(current_node);
      current_node = result.get();
    }


  result->close();
  Xmpp::body body_res = std::make_tuple(cleaned, std::move(result));
  return body_res;
}

M src/bridge/colors.hpp => src/bridge/colors.hpp +45 -11
@@ 1,20 1,54 @@
#ifndef COLORS_INCLUDED
# define COLORS_INCLUDED

#include <string>

/**
 * A module handling the conversion between IRC colors and XHTML-IM, and vice versa.
 * A module handling the conversion between IRC colors and XHTML-IM, and
 * vice versa.
 */

#define IRC_COLOR_BOLD_CHAR       '\x02'
#define IRC_COLOR_COLOR_CHAR      '\x03'
#define IRC_COLOR_RESET_CHAR      '\x0F'
#define IRC_COLOR_FIXED_CHAR      '\x11'
#define IRC_COLOR_REVERSE_CHAR    '\x12'
#define IRC_COLOR_REVERSE2_CHAR   '\x16'
#define IRC_COLOR_ITALIC_CHAR     '\x1D'
#define IRC_COLOR_UNDERLINE_CHAR  '\x1F'
#include <string>
#include <memory>
#include <tuple>

class XmlNode;

namespace Xmpp
{
// Contains:
// - an XMPP-valid UTF-8 body
// - an XML node representing the XHTML-IM body, or null
  typedef std::tuple<const std::string, std::unique_ptr<XmlNode>> body;
}

#define IRC_FORMAT_BOLD_CHAR       '\x02' // done
#define IRC_FORMAT_COLOR_CHAR      '\x03' // done
#define IRC_FORMAT_RESET_CHAR      '\x0F' // done
#define IRC_FORMAT_FIXED_CHAR      '\x11' // ??
#define IRC_FORMAT_REVERSE_CHAR    '\x12' // maybe one day
#define IRC_FORMAT_REVERSE2_CHAR   '\x16' // wat
#define IRC_FORMAT_ITALIC_CHAR     '\x1D' // done
#define IRC_FORMAT_UNDERLINE_CHAR  '\x1F' // done

static const char irc_format_char[] = {
  IRC_FORMAT_BOLD_CHAR,
  IRC_FORMAT_COLOR_CHAR,
  IRC_FORMAT_RESET_CHAR,
  IRC_FORMAT_FIXED_CHAR,
  IRC_FORMAT_REVERSE_CHAR,
  IRC_FORMAT_REVERSE2_CHAR,
  IRC_FORMAT_ITALIC_CHAR,
  IRC_FORMAT_UNDERLINE_CHAR,
  '\x00'
};

/**
 * Convert the passed string into an XML tree representing the XHTML version
 * of the message, converting the IRC colors symbols into xhtml-im
 * formatting.
 *
 * Returns the body cleaned from any IRC formatting (but without any xhtml),
 * and the body as XHTML-IM
 */
Xmpp::body irc_format_to_xhtmlim(const std::string& str);

#endif // COLORS_INCLUDED

M src/test.cpp => src/test.cpp +25 -0
@@ 85,6 85,31 @@ int main()
  assert(xml_unescape(xml_escape(unescaped)) == unescaped);

  /**
   * Colors conversion
   */
  std::unique_ptr<XmlNode> xhtml;
  std::string cleaned_up;

  std::tie(cleaned_up, xhtml) =
    irc_format_to_xhtmlim("normalboldunder-and-boldbold normal"
                          "5red,5default-on-red10,2cyan-on-blue");
  assert(xhtml);
  assert(xhtml->to_string() == "<body xmlns='http://www.w3.org/1999/xhtml'>normal<span style='font-weight:bold;'>bold</span><span style='font-weight:bold;text-decoration:underline;'>under-and-bold</span><span style='font-weight:bold;'>bold</span> normal<span style='color:red;'>red</span><span style='background-color:red;'>default-on-red</span><span style='color:cyan;background-color:blue;'>cyan-on-blue</span></body>");
  assert(cleaned_up == "normalboldunder-and-boldbold normalreddefault-on-redcyan-on-blue");

  std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim("normal");
  assert(!xhtml && cleaned_up == "normal");

  std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim("");
  assert(xhtml && !xhtml->has_children() && cleaned_up.empty());

  std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim(",a");
  assert(xhtml && !xhtml->has_children() && cleaned_up == "a");

  std::tie(cleaned_up, xhtml) = irc_format_to_xhtmlim(",");
  assert(xhtml && !xhtml->has_children() && cleaned_up.empty());

  /**
   * JID parsing
   */
  // Full JID

M src/xmpp/xmpp_component.cpp => src/xmpp/xmpp_component.cpp +21 -11
@@ 18,7 18,7 @@
#define DISCO_NS         "http://jabber.org/protocol/disco"
#define DISCO_ITEMS_NS   DISCO_NS"#items"
#define DISCO_INFO_NS    DISCO_NS"#info"

#define XHTMLIM_NS       "http://jabber.org/protocol/xhtml-im"

XmppComponent::XmppComponent(const std::string& hostname, const std::string& secret):
  served_hostname(hostname),


@@ 257,13 257,13 @@ Bridge* XmppComponent::get_user_bridge(const std::string& user_jid)
    }
}

void XmppComponent::send_message(const std::string& from, const std::string& body, const std::string& to)
void XmppComponent::send_message(const std::string& from, Xmpp::body&& body, const std::string& to)
{
  XmlNode node("message");
  node["to"] = to;
  node["from"] = from + "@" + this->served_hostname;
  XmlNode body_node("body");
  body_node.set_inner(body);
  body_node.set_inner(std::get<0>(body));
  body_node.close();
  node.add_child(std::move(body_node));
  node.close();


@@ 319,21 319,21 @@ void XmppComponent::send_self_join(const std::string& from, const std::string& n
  this->send_stanza(node);
}

void XmppComponent::send_topic(const std::string& from, const std::string& topic, const std::string& to)
void XmppComponent::send_topic(const std::string& from, Xmpp::body&& topic, const std::string& to)
{
  XmlNode message("message");
  message["to"] = to;
  message["from"] = from + "@" + this->served_hostname;
  message["type"] = "groupchat";
  XmlNode subject("subject");
  subject.set_inner(topic);
  subject.set_inner(std::get<0>(topic));
  subject.close();
  message.add_child(std::move(subject));
  message.close();
  this->send_stanza(message);
}

void XmppComponent::send_muc_message(const std::string& muc_name, const std::string& nick, const std::string body_str, const std::string& jid_to)
void XmppComponent::send_muc_message(const std::string& muc_name, const std::string& nick, Xmpp::body&& xmpp_body, const std::string& jid_to)
{
  Stanza message("message");
  message["to"] = jid_to;


@@ 343,24 343,34 @@ void XmppComponent::send_muc_message(const std::string& muc_name, const std::str
    message["from"] = muc_name + "@" + this->served_hostname;
  message["type"] = "groupchat";
  XmlNode body("body");
  body.set_inner(body_str);
  body.set_inner(std::get<0>(xmpp_body));
  body.close();
  message.add_child(std::move(body));
  if (std::get<1>(xmpp_body))
    {
      XmlNode html("html");
      html["xmlns"] = XHTMLIM_NS;
      // Pass the ownership of the pointer to this xmlnode
      html.add_child(std::get<1>(xmpp_body).release());
      html.close();
      message.add_child(std::move(html));
    }
  message.close();
  this->send_stanza(message);
}

void XmppComponent::send_muc_leave(std::string&& muc_name, std::string&& nick, std::string&& message, const std::string& jid_to, const bool self)
void XmppComponent::send_muc_leave(std::string&& muc_name, std::string&& nick, Xmpp::body&& message, const std::string& jid_to, const bool self)
{
  Stanza presence("presence");
  presence["to"] = jid_to;
  presence["from"] = muc_name + "@" + this->served_hostname + "/" + nick;
  presence["type"] = "unavailable";
  if (!message.empty() || self)
  const std::string message_str = std::get<0>(message);
  if (message_str.empty() || self)
    {
      XmlNode status("status");
      if (!message.empty())
        status.set_inner(std::move(message));
      if (!message_str.empty())
        status.set_inner(message_str);
      if (self)
        status["code"] = "110";
      status.close();

M src/xmpp/xmpp_component.hpp => src/xmpp/xmpp_component.hpp +4 -4
@@ 58,7 58,7 @@ public:
  /**
   * Send a message from from@served_hostname, with the given body
   */
  void send_message(const std::string& from, const std::string& body, const std::string& to);
  void send_message(const std::string& from, Xmpp::body&& body, const std::string& to);
  /**
   * Send a join from a new participant
   */


@@ 70,15 70,15 @@ public:
  /**
   * Send the MUC topic to the user
   */
  void send_topic(const std::string& from, const std::string& topic, const std::string& to);
  void send_topic(const std::string& from, Xmpp::body&& xmpp_topic, const std::string& to);
  /**
   * Send a (non-private) message to the MUC
   */
  void send_muc_message(const std::string& muc_name, const std::string& nick, const std::string body_str, const std::string& jid_to);
  void send_muc_message(const std::string& muc_name, const std::string& nick, Xmpp::body&& body, const std::string& jid_to);
  /**
   * Send an unavailable presence for this nick
   */
  void send_muc_leave(std::string&& muc_name, std::string&& nick, std::string&& message, const std::string& jid_to, const bool self);
  void send_muc_leave(std::string&& muc_name, std::string&& nick, Xmpp::body&& message, const std::string& jid_to, const bool self);
  /**
   * Indicate that a participant changed his nick
   */

M src/xmpp/xmpp_stanza.hpp => src/xmpp/xmpp_stanza.hpp +5 -5
@@ 21,9 21,9 @@ class AttributeNotFound: public std::exception
     nullptr)
 * - zero, one or more children XML nodes
 * - A name
 * - attributes
 * - inner data (inside the node)
 * - tail data (just after the node)
 * - A map of attributes
 * - inner data (text inside the node)
 * - tail data (text just after the node)
 */
class XmlNode
{


@@ 32,8 32,8 @@ public:
  explicit XmlNode(const std::string& name);
  XmlNode(XmlNode&& node):
    name(std::move(node.name)),
    parent(std::move(node.parent)),
    closed(std::move(node.closed)),
    parent(node.parent),
    closed(node.closed),
    attributes(std::move(node.attributes)),
    children(std::move(node.children)),
    inner(std::move(node.inner)),