Skip to content

Commit 23fd936

Browse files
author
epriestley
committed
Add some basic signature stripping
Summary: See discussion in T789. Covered the obvious cases, at least. We can refine this as we get a larger sample size. Test Plan: Unit test coverage. Reviewers: btrahan, vrana, jungejason Reviewed By: btrahan CC: aran Maniphest Tasks: T789 Differential Revision: https://secure.phabricator.com/D2154
1 parent 056fd75 commit 23fd936

File tree

3 files changed

+42
-10
lines changed

3 files changed

+42
-10
lines changed

src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818

1919
final class PhabricatorMetaMTAEmailBodyParser {
2020

21-
public function __construct($corpus) {
22-
$this->corpus = $corpus;
21+
public function stripTextBody($body) {
22+
return $this->stripSignature($this->stripQuotedText($body));
2323
}
2424

25-
public function stripQuotedText() {
26-
$body = $this->corpus;
27-
25+
private function stripQuotedText($body) {
2826
$body = preg_replace(
2927
'/^\s*On\b.*\bwrote:.*?/msU',
3028
'',
@@ -42,9 +40,26 @@ public function stripQuotedText() {
4240
'',
4341
$body);
4442

43+
return rtrim($body);
44+
}
45+
46+
private function stripSignature($body) {
47+
// Quasi-"standard" delimiter, for lols see:
48+
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
49+
$body = preg_replace(
50+
'/^-- +$.*/sm',
51+
'',
52+
$body);
53+
4554
// HTC Mail application (mobile)
4655
$body = preg_replace(
47-
'/^\s*Sent from my HTC smartphone.*?/msU',
56+
'/^\s*^Sent from my HTC smartphone.*/sm',
57+
'',
58+
$body);
59+
60+
// Apple iPhone
61+
$body = preg_replace(
62+
'/^\s*^Sent from my iPhone\s*$.*/sm',
4863
'',
4964
$body);
5065

src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase
2222
public function testQuotedTextStripping() {
2323
$bodies = $this->getEmailBodies();
2424
foreach ($bodies as $body) {
25-
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
26-
$stripped = $parser->stripQuotedText();
25+
$parser = new PhabricatorMetaMTAEmailBodyParser();
26+
$stripped = $parser->stripTextBody($body);
2727
$this->assertEqual("OKAY", $stripped);
2828
}
2929
}
3030

3131
private function getEmailBodies() {
32+
$trailing_space = ' ';
33+
3234
return array(
3335
<<<EOEMAIL
3436
OKAY
@@ -86,6 +88,21 @@ private function getEmailBodies() {
8688
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
8789
> ...
8890
EOEMAIL
91+
,
92+
<<<EOEMAIL
93+
OKAY
94+
95+
--{$trailing_space}
96+
Abraham Lincoln
97+
Supreme Galactic Emperor
98+
EOEMAIL
99+
,
100+
<<<EOEMAIL
101+
OKAY
102+
103+
Sent from my iPhone
104+
EOEMAIL
105+
,
89106
);
90107
}
91108

src/applications/metamta/storage/receivedmail/PhabricatorMetaMTAReceivedMail.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ public function processReceivedMail() {
205205
public function getCleanTextBody() {
206206
$body = idx($this->bodies, 'text');
207207

208-
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
209-
return $parser->stripQuotedText();
208+
$parser = new PhabricatorMetaMTAEmailBodyParser();
209+
return $parser->stripTextBody($body);
210210
}
211211

212212
public static function loadReceiverObject($receiver_name) {

0 commit comments

Comments
 (0)