summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorDries Buytaert <dries@buytaert.net>2001-11-07 18:05:02 +0000
committerDries Buytaert <dries@buytaert.net>2001-11-07 18:05:02 +0000
commit74a1fe5718ff4cec0f04c32a8cb0cd356aa8d6d7 (patch)
treecb672d1c664f5544e399210d928f8b1c58874490 /scripts
parentdb47bac35ec035388cfdab0a9fb27e97f8099742 (diff)
downloadbrdo-74a1fe5718ff4cec0f04c32a8cb0cd356aa8d6d7.tar.gz
brdo-74a1fe5718ff4cec0f04c32a8cb0cd356aa8d6d7.tar.bz2
mail-to-sql.php:
- rewrote the script in PHP. - added support for mailing list. - made the parser better.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/mail-to-sql.php86
-rw-r--r--scripts/mail-to-sql.pl53
2 files changed, 86 insertions, 53 deletions
diff --git a/scripts/mail-to-sql.php b/scripts/mail-to-sql.php
new file mode 100644
index 000000000..68cdbf8e6
--- /dev/null
+++ b/scripts/mail-to-sql.php
@@ -0,0 +1,86 @@
+#!/usr/local/bin/php -q
+<?php
+
+ini_set("include_path", ".:/home/dries/personal/cvs/web/pear:/home/dries/personal/cvs/web/drupal.org/x.x.x");
+include_once "includes/common.inc";
+
+/*
+** Read the mail from stdin:
+*/
+
+$stdin = file("php://stdin");
+$mail = implode("", $stdin);
+
+/*
+** Separate the mail headers from the mail body:
+*/
+
+list($headers, $body) = split("\n\n", $mail, 2);
+
+/*
+** Strip whitespaces, newlines and returns from the beginning and the
+** end of the body.
+*/
+
+$body = trim($body);
+
+/*
+** The field-body can be split into a multiple-line representation,
+** which is called "folding". According to RFC 822, the rule is that
+** wherever there may be linear whitespace (not simply LWSP-chars),
+** a CRLF immediately followed by at least one LWSP-char may instead
+** be inserted. Merge multi-line headers:
+*/
+
+$data = ereg_replace("\n[ |\t]+", " ", $headers);
+
+/*
+** Parse and load the headers into an associative array:
+*/
+
+foreach (explode("\n", $data) as $line) {
+ list($name, $value) = split(": ", $line, 2);
+ $header[strtolower($name)] = $value;
+}
+
+/*
+** Try to determine whether the mail comes from a mailing list and if
+** so, which mailing list: we filter the mail based on parsing all the
+** the possible mailing list headers.
+*/
+
+if (preg_match("/([^@]+)/", $header["x-mailing-list-name"], $match)) {
+ $list = $match[1]; // Perl 6
+}
+elseif (preg_match("/owner-([^@]+)/", $header["sender"], $match)) {
+ $list = $match[1]; // Majordomo
+}
+else if (preg_match("/([^@]+)/", $header["x-beenthere"], $match)) {
+ $list = $match[1];
+}
+else if (preg_match("/mailing list ([^@]+)/", $header["delivered-to"], $match)) {
+ $list = $match[1];
+}
+else if (preg_match("/<([^@]+)/", $header["x-mailing-list"], $match)) {
+ $list = $match[1];
+}
+else if (preg_match("/([^@]+)/", $header["x-loop"], $match)) {
+ $list = $match[1];
+}
+else if (preg_match("/([^@\.]+)/", $header["x-list-id"], $match)) {
+ $list = $match[1]; // Mailman
+}
+else if (preg_match("/([^@\.]+)/", $header["x-list"], $match)) {
+ $list = $match[1];
+}
+else {
+ $list = "";
+}
+
+/*
+** Insert the mail into the database:
+*/
+
+db_query("INSERT INTO mail (data, subject, header_from, header_to, header_cc, header_reply_to, body, list, timestamp) VALUES ('". check_query($mail) ."', '". check_query($header["subject"]) ."', '". check_query($header["from"]) ."', '". check_query($header["to"]) ."', '". check_query($header["cc"]) ."', '". check_query($header["reply-to"]) ."', '". check_query($body) ."', '". check_query($list) ."', '". check_query(time()) ."')");
+
+?> \ No newline at end of file
diff --git a/scripts/mail-to-sql.pl b/scripts/mail-to-sql.pl
deleted file mode 100644
index 6fcb89631..000000000
--- a/scripts/mail-to-sql.pl
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/perl -w
-
-use DBI;
-
-# Database settings:
-my $db_name = 'drop';
-my $db_user = 'drop';
-my $db_pass = 'drop';
-
-# Read data from stdin:
-my @data = <STDIN>;
-my $data = join '', @data;
-
-my @chunks = split(/\n\n/, $data);
-
-# Parse the header into an associative array:
-foreach $line (split(/\n/, $chunks[0])) {
- # The field-body can be split into a multiple-line representation,
- # which is called "folding". According to RFC 822, the rule is that
- # wherever there may be linear-white-space (not simply LWSP-chars),
- # a CRLF immediately followed by at least one LWSP-char may instead
- # be inserted.
-
- if ($line =~ /^\s(.*?)/) {
- $data = $1;
- }
- elsif ($line =~ /(.*?):\s(.*)/) {
- $key = lc($1);
- $data = $2;
- }
-
- if ($key && $data) {
- $header{$key} .= $data;
- }
-}
-
-# Debug output:
- # foreach $key (sort keys %header) {
- # print "$key: $header{$key}\n--------\n";
- # }
-
-# Store the complete header into a field:
-$header{header} = $chunks[0];
-$chunks[0] = "";
-
-# Construct the mail body:
-foreach $line (@chunks) {
- $body .= "$line\n\n";
-}
-
-my $db = DBI->connect("DBI:mysql:$db_name", "$db_user", "$db_pass") or die "Couldn't connect recepient database: " . DBI->errstr;
-$db->do("INSERT INTO mail (subject, header_from, header_to, header_cc, header_reply_to, header, body, timestamp) VALUES (". $db->quote($header{"subject"}) .", ". $db->quote($header{"from"}) .", ". $db->quote($header{"to"}) .", ". $db->quote($header{"cc"}) .", ". $db->quote($header{"reply-to"}) .", ". $db->quote($header{"header"}) .", ". $db->quote($body) .", ". $db->quote(time()) .")") or die "Couldn't execute query: " . $db->errstr;
-$db->disconnect();