diff options
author | Dries Buytaert <dries@buytaert.net> | 2001-11-07 18:05:02 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2001-11-07 18:05:02 +0000 |
commit | 74a1fe5718ff4cec0f04c32a8cb0cd356aa8d6d7 (patch) | |
tree | cb672d1c664f5544e399210d928f8b1c58874490 /scripts | |
parent | db47bac35ec035388cfdab0a9fb27e97f8099742 (diff) | |
download | brdo-74a1fe5718ff4cec0f04c32a8cb0cd356aa8d6d7.tar.gz brdo-74a1fe5718ff4cec0f04c32a8cb0cd356aa8d6d7.tar.bz2 |
mail-to-sql.php:
- rewrote the script in PHP.
- added support for mailing list.
- made the parser better.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/mail-to-sql.php | 86 | ||||
-rw-r--r-- | scripts/mail-to-sql.pl | 53 |
2 files changed, 86 insertions, 53 deletions
diff --git a/scripts/mail-to-sql.php b/scripts/mail-to-sql.php new file mode 100644 index 000000000..68cdbf8e6 --- /dev/null +++ b/scripts/mail-to-sql.php @@ -0,0 +1,86 @@ +#!/usr/local/bin/php -q +<?php + +ini_set("include_path", ".:/home/dries/personal/cvs/web/pear:/home/dries/personal/cvs/web/drupal.org/x.x.x"); +include_once "includes/common.inc"; + +/* +** Read the mail from stdin: +*/ + +$stdin = file("php://stdin"); +$mail = implode("", $stdin); + +/* +** Separate the mail headers from the mail body: +*/ + +list($headers, $body) = split("\n\n", $mail, 2); + +/* +** Strip whitespaces, newlines and returns from the beginning and the +** end of the body. +*/ + +$body = trim($body); + +/* +** The field-body can be split into a multiple-line representation, +** which is called "folding". According to RFC 822, the rule is that +** wherever there may be linear whitespace (not simply LWSP-chars), +** a CRLF immediately followed by at least one LWSP-char may instead +** be inserted. Merge multi-line headers: +*/ + +$data = ereg_replace("\n[ |\t]+", " ", $headers); + +/* +** Parse and load the headers into an associative array: +*/ + +foreach (explode("\n", $data) as $line) { + list($name, $value) = split(": ", $line, 2); + $header[strtolower($name)] = $value; +} + +/* +** Try to determine whether the mail comes from a mailing list and if +** so, which mailing list: we filter the mail based on parsing all the +** the possible mailing list headers. +*/ + +if (preg_match("/([^@]+)/", $header["x-mailing-list-name"], $match)) { + $list = $match[1]; // Perl 6 +} +elseif (preg_match("/owner-([^@]+)/", $header["sender"], $match)) { + $list = $match[1]; // Majordomo +} +else if (preg_match("/([^@]+)/", $header["x-beenthere"], $match)) { + $list = $match[1]; +} +else if (preg_match("/mailing list ([^@]+)/", $header["delivered-to"], $match)) { + $list = $match[1]; +} +else if (preg_match("/<([^@]+)/", $header["x-mailing-list"], $match)) { + $list = $match[1]; +} +else if (preg_match("/([^@]+)/", $header["x-loop"], $match)) { + $list = $match[1]; +} +else if (preg_match("/([^@\.]+)/", $header["x-list-id"], $match)) { + $list = $match[1]; // Mailman +} +else if (preg_match("/([^@\.]+)/", $header["x-list"], $match)) { + $list = $match[1]; +} +else { + $list = ""; +} + +/* +** Insert the mail into the database: +*/ + +db_query("INSERT INTO mail (data, subject, header_from, header_to, header_cc, header_reply_to, body, list, timestamp) VALUES ('". check_query($mail) ."', '". check_query($header["subject"]) ."', '". check_query($header["from"]) ."', '". check_query($header["to"]) ."', '". check_query($header["cc"]) ."', '". check_query($header["reply-to"]) ."', '". check_query($body) ."', '". check_query($list) ."', '". check_query(time()) ."')"); + +?>
\ No newline at end of file diff --git a/scripts/mail-to-sql.pl b/scripts/mail-to-sql.pl deleted file mode 100644 index 6fcb89631..000000000 --- a/scripts/mail-to-sql.pl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/perl -w - -use DBI; - -# Database settings: -my $db_name = 'drop'; -my $db_user = 'drop'; -my $db_pass = 'drop'; - -# Read data from stdin: -my @data = <STDIN>; -my $data = join '', @data; - -my @chunks = split(/\n\n/, $data); - -# Parse the header into an associative array: -foreach $line (split(/\n/, $chunks[0])) { - # The field-body can be split into a multiple-line representation, - # which is called "folding". According to RFC 822, the rule is that - # wherever there may be linear-white-space (not simply LWSP-chars), - # a CRLF immediately followed by at least one LWSP-char may instead - # be inserted. - - if ($line =~ /^\s(.*?)/) { - $data = $1; - } - elsif ($line =~ /(.*?):\s(.*)/) { - $key = lc($1); - $data = $2; - } - - if ($key && $data) { - $header{$key} .= $data; - } -} - -# Debug output: - # foreach $key (sort keys %header) { - # print "$key: $header{$key}\n--------\n"; - # } - -# Store the complete header into a field: -$header{header} = $chunks[0]; -$chunks[0] = ""; - -# Construct the mail body: -foreach $line (@chunks) { - $body .= "$line\n\n"; -} - -my $db = DBI->connect("DBI:mysql:$db_name", "$db_user", "$db_pass") or die "Couldn't connect recepient database: " . DBI->errstr; -$db->do("INSERT INTO mail (subject, header_from, header_to, header_cc, header_reply_to, header, body, timestamp) VALUES (". $db->quote($header{"subject"}) .", ". $db->quote($header{"from"}) .", ". $db->quote($header{"to"}) .", ". $db->quote($header{"cc"}) .", ". $db->quote($header{"reply-to"}) .", ". $db->quote($header{"header"}) .", ". $db->quote($body) .", ". $db->quote(time()) .")") or die "Couldn't execute query: " . $db->errstr; -$db->disconnect(); |