from arc.arcpreferences import PreferenceManager as PM from arctool import ARCTool from Default.emailfilter import EmailFilterTable, EmailFilterLogicValidator from PyQt5.QtCore import QUrl, Qt from PyQt5.QtGui import * from PyQt5.QtWidgets import * import arc.arcclasses as arcclasses import arc.arcgui as arcgui import quopri import re import imaplib import email class Plugin(arcclasses.Plugin): headers = '|'.join( ['From','Date','Sent','To','Cc','Bcc','Subject','Importance'] ) def __init__(self,package): super(Plugin,self).__init__(None,package) self.__name__ = "emaildump" # Required self.name = "Email Dump" self.authors = ["unonu"] self.version = (0,0,1) self.description="Finds a group of emails and dumps their information." self.contexts = ['Date'] self.emails = [] self.emailIds = {} self.fragments = {} self.fetched = False self.contextFilters = [] self.igHeader = False self.addHeader = False self.igPles = False self.igReplies= False self.igDup = False self.igFirstDup = False self.igImages = False self.igFormat = False self.igSpace = False self.delim = '' #needed def setupUi(self): #Filter self.widget.emailFilterTable = EmailFilterTable() self.widget.emailFilterTable.tableChanged.connect( lambda x: self.widget.fetchButton.setProperty('enabled', (x > 0 or self.widget.contextCheck.isChecked() or self.widget.selectEdit.text() != '')) ) self.widget.emailFilterTable.tableChanged.connect( ARCTool.signalProfileChanged ) self.widget.tableLayout.addWidget(self.widget.emailFilterTable) self.widget.fetchButton.clicked.connect(self.makeRequest) self.widget.logicBox.setValidator(EmailFilterLogicValidator()) self.widget.selectEdit.textChanged.connect( lambda x: self.widget.fetchButton.setEnabled( x != '' or len(self.widget.emailFilterTable) > 0 ) ) #Options self.widget.contextCheck.stateChanged.connect( lambda x : self.widget.fetchButton.setEnabled(x > 0) ) self.widget.infoCheck.stateChanged.connect( lambda x: self.setAddHeader(x > 0) ) self.widget.headerCheck.stateChanged.connect( lambda x: self.setIgHeader(x > 0) ) self.widget.plesCheck.stateChanged.connect( lambda x: self.setIgPles(x > 0) ) self.widget.quoteCheck.stateChanged.connect( lambda x: self.setIgReplies(x > 0) ) self.widget.dupCheck.stateChanged.connect( lambda x: self.setIgDup(x > 0) ) self.widget.firstCheck.stateChanged.connect( lambda x: self.setIgFirstDup(x > 0) ) self.widget.imageCheck.stateChanged.connect( lambda x: self.setIgImages(x > 0) ) self.widget.formatCheck.stateChanged.connect( lambda x: self.setIgFormat(x > 0) ) self.widget.spaceCheck.stateChanged.connect( lambda x: self.setIgSpace(x > 0) ) self.widget.delimeterEdit.textChanged.connect( lambda: self.updateDelim() ) self.widget.dupCheck.stateChanged.connect( lambda x: self.widget.dupWidget.setEnabled(x > 0) ) self.widget.dupStrength.valueChanged.connect( lambda x: self.widget.dupLabel.setText('%d Words' %(x)) ) #call super def update(self): super(Plugin,self).update() #updated from extras if 'filters' in self.extras: self.widget.emailFilterTable.fromSerial(self.extras['filters']) #needed def storeOptions(self): self.options['logicBox'] =\ (self.widget.logicBox.text(),'text') self.options['selectEdit'] =\ (self.widget.selectEdit.text(),'text') self.options['delimeterEdit'] =\ (self.widget.delimeterEdit.text(), 'text') self.options['contextCheck'] =\ (self.widget.contextCheck.isChecked(), 'checked') self.options['infoCheck'] =\ (self.widget.infoCheck.isChecked(), 'checked') self.options['headerCheck'] =\ (self.widget.headerCheck.isChecked(), 'checked') self.options['plesCheck'] =\ (self.widget.plesCheck.isChecked(), 'checked') self.options['quoteCheck'] =\ (self.widget.quoteCheck.isChecked(), 'checked') self.options['dupCheck'] =\ (self.widget.dupCheck.isChecked(), 'checked') self.options['firstCheck'] =\ (self.widget.dupCheck.isChecked(), 'checked') self.options['imageCheck'] =\ (self.widget.imageCheck.isChecked(), 'checked') self.options['formatCheck'] =\ (self.widget.formatCheck.isChecked(), 'checked') self.options['spaceCheck'] =\ (self.widget.spaceCheck.isChecked(), 'checked') self.options['dupStrength'] =\ (self.widget.dupStrength.value(), 'value') self.extras['filters'] = self.widget.emailFilterTable.serialize() #override def generate(self): if not self.fetched: r = self.makeRequest() if r < 0: return None doc = QTextDocument() cursor = QTextCursor(doc) # For add headers headers = [] _c = _b = None for message in self.emails: _c = cursor.charFormat() _b = cursor.blockFormat() text = '' for part in message.walk(): typ = part.get_content_type() dis = part.get('Content-Disposition') if dis != 'attachment': if typ == 'text/plain': try: text = ('
'
+ re.sub(r'(?<=\r)\n',r'
',
part.get_payload(decode=True).decode('utf-8'))
+ '
' + chr(29) + '
') headers.append(self.getHeader(message)) # print('custom header added') cursor.insertHtml(text) if self.delim != '': cursor.insertHtml('' + chr(26) + '
)\n?'\
%(self.headers),'',text)
return text
def stripPleasantries(self,text):
return text
# deltas = []
# blocks = re.split('
',text)
# if len(blocks) == 1:
# return text
# print('enough blocks')
# for b in blocks[:]:
# _b = re.sub(r'(?s)\s*<.+?>\s*', '',b)
# _b = re.sub('\xa0',' ',_b)
# _b = re.sub(r' ',' ',_b)
# if len(re.split(r'\b',_b)) < 4:
# blocks.remove(b)
# print(blocks)
# return ''.join(blocks)
# breaks = re.findall('
',text)
# blockIndex = [
# len(blocks[x]) + len(breaks[x]) for x in range(len(breaks))
# ] + [len(blocks[-1])]
# print('block indicies', blockIndex)
# for i in range(1,len(blocks)):
# blockIndex[i] += blockIndex[i-1]
# for b in blocks:
# b = re.sub(r'(?s)\s*<.+?>\s*', '',b)
# wc = len(blocks[0])
# for i in range(1,len(blocks)):
# deltas.append( abs(len(blocks[i]) - len(blocks[i-1])) )
# wc = len(blocks[i])
# avg = wc/len(blocks)
# deltaN = sum(deltas) / (2*wc)
# blockLens = [len(b) for b in blocks]
# if deltaN > avg/max(blockLens):
# asc = 0
# while deltas[asc] < avg:
# asc += 1
# des = len(deltas) - 1
# # Naive approach, should really check to see if other islands exist
# while des > asc and deltas[des] < avg:
# des -= 1
# print('start/stop block index', asc, des)
# text = text[blockIndex[asc]:blockIndex[des]]
# return text
def stripImages(self,text):
text = re.sub('(?s)|)','',text)
return text
def collapseSpace(self,text):
# Collapse Spaces
text = re.sub('\xa0',' ',text)
text = re.sub(' +',' ',text)
# Collapse Breaks
text = re.sub(r'(?s)
\s*(?:)?','
',text)
text = re.sub(r'(?s)(?<=>)\s*
\s*(?=<)', '',text)
text = re.sub(r'(?s)\s*]+)?>\s*\s*','',text)
text = re.sub(r'(?s)\s*
]+)?>\s*
\s*','',text) return text def stripReplies(self,message,text): ids = message.get("References") if ids: ids = re.split(',| ',ids) ids = [i.strip() for i in ids if i != ''] # print(ids) for id in ids: if id in self.emailIds: # Remove this previous emails content from me ref = '' for part in self.emailIds[id].walk(): typ = part.get_content_type() dis = part.get('Content-Disposition') if dis != 'attachment': if typ == 'text/plain' or typ == 'text/html': ref = part.get_payload(decode=True)\ .decode('utf-8') # if self.igHeader: # ref = self.stripHeaders(self.emailIds[id[-2]],ref) bodyText = re.search('(?s)]*?>(.+)',text) bodyRef = re.search('(?s)]*?>(.+)',ref) if bodyText and bodyRef: bodyText = bodyText.group(1).strip() container = text.split(bodyText) bodyRef = bodyRef.group(1).strip() bodyText = re.sub('\xa0',' ',bodyText) bodyRef = re.sub('\xa0',' ',bodyRef) bodyTags = re.findall(r'\s*<.+?>\s*',bodyText) bodyText = re.sub(r'\s*<.+?>\s*',chr(24),bodyText) bodyRef = [re.escape(p) for p in \ re.split(r'\s*<.+?>\s*',bodyRef) if p != ''] bodyRef = '(?s)' + (chr(24)+'+').join(bodyRef) bodyRef = re.sub(r'(\\\s)+',r'\\s+',bodyRef) # Search for the original message within the reply partial = re.search(bodyRef,bodyText) if partial: excise = partial.group(0).count(chr(24)) offset =\ bodyText[:bodyText.find(partial.group(0))]\ .count(chr(24)) bodyText = bodyText.replace(partial.group(0),'') bodyTags = (bodyTags[:offset] + bodyTags[offset+excise:]) # Replace the tags for t in bodyTags: bodyText = bodyText.replace(chr(24),t,1) text = container[0] + bodyText + container[1] # We messed up? Impossible... # else: # print(bodyText) # print('-------vvvvvv-------') # print(bodyRef) return text def getHeader(self, message): text = 'From ' # print(message.keys()) text += message.get('From') text += ' on ' text += message.get('Date') text += ':