~emersion/python-emailthreads

0cfaa6d87d62012954b3edac48907081c72ebd54 — emersion 1 year, 9 months ago 2d31509 v0.1.0
Make parse take a list of messages
3 files changed, 39 insertions(+), 15 deletions(-)

M emailthreads/quotes.py
M emailthreads/threads.py
M test/test_parse.py
M emailthreads/quotes.py => emailthreads/quotes.py +1 -1
@@ 2,7 2,7 @@ import re
import sys
from email.message import EmailMessage

from util import *
from .util import *

def trim_empty_lines(block):
	start = 0

M emailthreads/threads.py => emailthreads/threads.py +36 -12
@@ 2,10 2,12 @@ import re
import sys
from email.message import EmailMessage

from util import *
from quotes import *
from .util import *
from .quotes import *

def get_message_by_id(msgs, msg_id):
	if msg_id is None or msg_id == "":
		return None
	# TODO: handle weird brackets stuff
	for msg in msgs:
		if msg["message-id"] == msg_id:


@@ 119,15 121,27 @@ class Thread:

		return "\n".join(repr_lines)

def parse(msg, refs=[]):
	# For some reason Python strips "Re:" prefixes
	subject = flatten_header_field(msg["subject"])
def build_message_tree(messages):
	heads = []
	replies = []

	for msg in messages:
		in_reply_to = get_message_by_id(messages, msg['in-reply-to'])
		if in_reply_to is None:
			heads.append(msg)
		else:
			replies.append((msg, in_reply_to))

	if len(heads) != 1:
		raise Exception("expected exactly one head message, got " + str(len(heads)))
	head = heads[0]

	in_reply_to = get_message_by_id(refs, msg['in-reply-to'])
	if in_reply_to is None or flatten_header_field(in_reply_to["subject"]) != subject:
		text = get_text(msg)
		text_lines = text.splitlines()
		return Thread(text_lines, msg, (0, len(text_lines)))
	replies = sorted(replies, key=lambda reply: reply[0]['date'])

	return (head, replies)

def parse_reply(msg, in_reply_to, thread):
	subject = flatten_header_field(msg["subject"])

	blocks = parse_blocks(msg)
	blocks = trim_quotes_footer(blocks)


@@ 136,8 150,6 @@ def parse(msg, refs=[]):
	# print("\n".join([str(block) for block in blocks]))
	blocks = merge_blocks(blocks)

	thread = parse(in_reply_to, refs)

	last_quote = None
	for block in blocks:
		if isinstance(block, Text):


@@ 162,3 174,15 @@ def parse(msg, refs=[]):
			last_quote = block

	return thread

def parse(messages):
	(head, replies) = build_message_tree(messages)

	text = get_text(head)
	text_lines = text.splitlines()
	thread = Thread(text_lines, head, (0, len(text_lines)))

	for (msg, in_reply_to) in replies:
		parse_reply(msg, in_reply_to, thread)

	return thread

M test/test_parse.py => test/test_parse.py +2 -2
@@ 31,7 31,7 @@ class ParseTestCase(unittest.TestCase):
		patch = self._load_msg_from_file("scissor/patch.eml")
		reply = self._load_msg_from_file("scissor/reply.eml")

		thread = emailthreads.parse(reply, [patch])
		thread = emailthreads.parse([patch, reply])

		got = self._normalize(str(thread))
		want = self._read_file("scissor/output.txt")


@@ 44,7 44,7 @@ class ParseTestCase(unittest.TestCase):
		reply2 = self._load_msg_from_file("multiple-replies/reply2.eml")
		reply3 = self._load_msg_from_file("multiple-replies/reply3.eml")

		thread = emailthreads.parse(reply3, [patch, reply1, reply2])
		thread = emailthreads.parse([patch, reply1, reply2, reply3])

		got = self._normalize(str(thread))
		want = self._read_file("multiple-replies/output3.txt")