~emersion/python-emailthreads

0cfaa6d87d62012954b3edac48907081c72ebd54 — emersion 9 months ago 2d31509 v0.1.0
Make parse take a list of messages
3 files changed, 39 insertions(+), 15 deletions(-)

M emailthreads/quotes.py
M emailthreads/threads.py
M test/test_parse.py
M emailthreads/quotes.py => emailthreads/quotes.py +1 -1
@@ 2,7 2,7 @@ import sys
  from email.message import EmailMessage
  
- from util import *
+ from .util import *
  
  def trim_empty_lines(block):
  	start = 0

M emailthreads/threads.py => emailthreads/threads.py +36 -12
@@ 2,10 2,12 @@ import sys
  from email.message import EmailMessage
  
- from util import *
- from quotes import *
+ from .util import *
+ from .quotes import *
  
  def get_message_by_id(msgs, msg_id):
+ 	if msg_id is None or msg_id == "":
+ 		return None
  	# TODO: handle weird brackets stuff
  	for msg in msgs:
  		if msg["message-id"] == msg_id:


@@ 119,15 121,27 @@   		return "\n".join(repr_lines)
  
- def parse(msg, refs=[]):
- 	# For some reason Python strips "Re:" prefixes
- 	subject = flatten_header_field(msg["subject"])
+ def build_message_tree(messages):
+ 	heads = []
+ 	replies = []
+ 
+ 	for msg in messages:
+ 		in_reply_to = get_message_by_id(messages, msg['in-reply-to'])
+ 		if in_reply_to is None:
+ 			heads.append(msg)
+ 		else:
+ 			replies.append((msg, in_reply_to))
+ 
+ 	if len(heads) != 1:
+ 		raise Exception("expected exactly one head message, got " + str(len(heads)))
+ 	head = heads[0]
  
- 	in_reply_to = get_message_by_id(refs, msg['in-reply-to'])
- 	if in_reply_to is None or flatten_header_field(in_reply_to["subject"]) != subject:
- 		text = get_text(msg)
- 		text_lines = text.splitlines()
- 		return Thread(text_lines, msg, (0, len(text_lines)))
+ 	replies = sorted(replies, key=lambda reply: reply[0]['date'])
+ 
+ 	return (head, replies)
+ 
+ def parse_reply(msg, in_reply_to, thread):
+ 	subject = flatten_header_field(msg["subject"])
  
  	blocks = parse_blocks(msg)
  	blocks = trim_quotes_footer(blocks)


@@ 136,8 150,6 @@ # print("\n".join([str(block) for block in blocks]))
  	blocks = merge_blocks(blocks)
  
- 	thread = parse(in_reply_to, refs)
- 
  	last_quote = None
  	for block in blocks:
  		if isinstance(block, Text):


@@ 162,3 174,15 @@ last_quote = block
  
  	return thread
+ 
+ def parse(messages):
+ 	(head, replies) = build_message_tree(messages)
+ 
+ 	text = get_text(head)
+ 	text_lines = text.splitlines()
+ 	thread = Thread(text_lines, head, (0, len(text_lines)))
+ 
+ 	for (msg, in_reply_to) in replies:
+ 		parse_reply(msg, in_reply_to, thread)
+ 
+ 	return thread

M test/test_parse.py => test/test_parse.py +2 -2
@@ 31,7 31,7 @@ patch = self._load_msg_from_file("scissor/patch.eml")
  		reply = self._load_msg_from_file("scissor/reply.eml")
  
- 		thread = emailthreads.parse(reply, [patch])
+ 		thread = emailthreads.parse([patch, reply])
  
  		got = self._normalize(str(thread))
  		want = self._read_file("scissor/output.txt")


@@ 44,7 44,7 @@ reply2 = self._load_msg_from_file("multiple-replies/reply2.eml")
  		reply3 = self._load_msg_from_file("multiple-replies/reply3.eml")
  
- 		thread = emailthreads.parse(reply3, [patch, reply1, reply2])
+ 		thread = emailthreads.parse([patch, reply1, reply2, reply3])
  
  		got = self._normalize(str(thread))
  		want = self._read_file("multiple-replies/output3.txt")