#!/usr/bin/perl -w ############################################################################### # Name: FetchYahoo # Purpose: retrieves messages from Yahoo! Mail, saving them to a local spool # Description: FetchYahoo is a Perl script that downloads mail from a Yahoo! # webmail account to a local mail spool. It is meant to replace # fetchmail for people using Yahoo! mail since Yahoo!'s POP # service is no longer free. It downloads messages to a local # mail spool, including all parts and attachments . It then # deletes messages unless requested not to. It can also forward # messages to another e-mail address # Author: Ravi Ramkissoon # Author's E-mail: ravi_ramkissoon@yahoo.com # License: Gnu Public License # Created: 04.12.02 # Modified: 05.02.02 my $version = ".1.4"; # # Installation instructions are in the INSTALL file # ############################################################################### # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ############################################################################### use strict; use Getopt::Long (); use HTML::Entities (); use HTML::HeadParser (); use HTML::TokeParser (); use HTTP::Request::Common qw(GET POST); use HTTP::Cookies (); use LWP::UserAgent (); use LWP::Simple (); use MIME::Head (); sub GetRedirectUrl($); sub ParseConfigFile(); sub Localize($); sub EmptyTrash(); sub Logout(); # MUST configure these my $username = 'yahoo-user-name'; my $password = 'yahoo-password'; # mail spool, mbox file and procmail configs my $useSpool = 1; # set this to 0 to disable outputting to a file my $spoolName = '/var/spool/mail/local-user-name'; my $spoolMode = 'append'; # either 'append', 'pipe' or 'overwrite' # use 'pipe' for procmail or other filter # proxy configs my $useProxy = 0; # set this to 1 to enable use of a web proxy my $proxyHost = 'proxy.hostname.com'; my $proxyPort = 80; my $useHTTPS = 0; # set this to 1 to use https secure connections # this may require Crypt::SSLeay # mail forwarding configs my $useForward = 0; # set this to 1 to enable mail forwarding my $mailHost = 'outgoing.mail.com'; # set this to yr smtp outgoing mail server my $sendToAddress = 'me@myhost.com'; # the e-mail address to be forwarded to my $sendFromAddress = 'me@myhost.com'; # the e-mail address to use as the from address. This should probably be at the # same ISP as the outgoing smtp mailhost specified above # daemon mode config. If this is 0, the program runs only once and terminates. # Otherwise this is the number of minutes between successive mail checks. my $repeatInterval = 0; # the below defaults can be overridden from the commandline my $newOnly = 0; # download all (0) or just new (1) messages my $noDelete = 0; # to not delete messages set this to 1 my $quiet = 0; # to suppress regular (non-error) output set this to 1 my $noDownload = 0; # to delete msgs and not download them, set this to 1 my $emptyTrashAfter = 0; # to empty trash after downloading msgs set this to 1 my $emptyTrashBefore = 0; # to empty trash b4 downloading msgs, set this to 1 my $logout = 0; # to have fetchyahoo logout at the end, set this to 1 # use LWP::Debug qw(+); # turn this on for tons of debugging messages # I may need to edit these in future my $userAgent= 'FetchYahoo/' . $version; my $loginURL = 'http://login.yahoo.com/config/login'; my $HTTPSloginURL = 'https://login.yahoo.com/config/login'; my $homesuff = '/ym/ShowFolder?box=Inbox'; my $msgsuff = '/ym/ShowLetter?box=Inbox&PRINT=1&Nhead=f&toc=1&MsgId='; my $bodyPartUrlTemplate = "/ym/ShowLetter?box=Inbox&MsgId=%s&bodyPart=%s"; my $versionString = "FetchYahoo Version " . $version . "\n"; # other variables used my $spool; my $proxyURL; my $smtp; my $altConfigFile; # flag for help and version my $helpFlag = 0; my $versionFlag = 0; my %map = (); # hash for extension->MIMEtype mappings my $help = < \$altConfigFile); # config file options take precedence over hardcoded (within-file) options ParseConfigFile(); # get other command-line input options. These take precedence over all others Getopt::Long::Configure('no_pass_through'); Getopt::Long::GetOptions ( 'newonly' => \$newOnly, 'help' => \$helpFlag, 'version' => \$versionFlag, 'noDelete' => \$noDelete, 'username=s' => \$username, 'password=s' => \$password, 'spoolfile=s' => \$spoolName, 'quiet!' => \$quiet, 'nodownload' => \$noDownload, 'emptytrashafter' => \$emptyTrashAfter, 'emptytrashbefore' => \$emptyTrashBefore, 'logout!' => \$logout, 'repeatinterval=i' => \$repeatInterval, 'noempty' => sub {$emptyTrashAfter=0;$emptyTrashBefore=0;}, 'download' => sub { $noDownload=0; }, 'allmsgs' => sub { $newOnly=0; }, 'delete' => sub { $noDelete=0; } ); # set some required variables if ($spoolMode eq 'append') { $spool = '>>' . $spoolName ; } elsif ($spoolMode eq 'pipe') { $spool = '|' . $spoolName ; } elsif ($spoolMode eq 'overwrite') { $spool = '>' . $spoolName ; } else { $spool = '>>' . $spoolName ; } # the default is to append $proxyURL = 'http://' . $proxyHost . ':' . $proxyPort; if ($useHTTPS) { $proxyURL = $proxyHost . ':' . $proxyPort; } if ($useHTTPS) { $loginURL = $HTTPSloginURL; } # unbuffer STDOUT select((select(STDOUT), $| = 1)[0]); # check if help or version was requested if ($helpFlag) { print $versionString . "\n" . $help; exit; } if ($versionFlag) { print $versionString; exit; } # check for common errors (forgot to edit variables) if ($username eq 'yahoo-user-name') { print "You MUST specify a username " . "before using this program.\n\n"; print $versionString . "\n" . $help; exit; } if ($username eq 'yahoo-password') { print "You MUST specify a password" . "before using this program.\n\n"; print $versionString . "\n" . $help; exit; } if ( $useSpool && $spoolName eq "/var/spool/mail/local-user-name") { print "If you are sending the messages to a file (use-spool=1), you must" . " specify the file or spool or program .\n\n"; print $versionString . "\n" . $help; exit; } if ( $useForward && $sendToAddress eq 'me@myhost.com') { print "If you are forwarding the messages (use-forward=1), you must " . "specify the e-mail address to forward to.\n\n"; print $versionString . "\n" . $help; exit; } if ( $useForward && $mailHost eq 'outgoing.mail.com') { print "If you are forwarding the messages (use-forward=1), you must " . "specify an smtp server" . " (localhost if you have one installed locally).\n\n"; print $versionString . "\n" . $help; exit; } if ( $useProxy && $proxyHost eq "proxy.hostname.com") { print "If you are using a web proxy (use-proxy=1), you must " . "specify the proxy hostname.\n\n"; print $versionString . "\n" . $help; exit; } if (!$quiet) { if ($useHTTPS) { print "Logging in securely via SSL as $username.\n" } else { print "Logging in insecurely via plaintext as $username.\n" } if ($useProxy) { print "Using $proxyURL as a webproxy.\n" } if ($repeatInterval>0) { print "Running in daemon mode. Will check every $repeatInterval" . " minutes.\n"; } } startfetch: if ($useForward) { use Net::SMTP; $smtp = Net::SMTP->new($mailHost); die "Unable to connect to server $mailHost to forward mail. Terminating!\n" unless $smtp; } # grab login cookies my $ua = LWP::UserAgent->new; $ua->agent($userAgent); if ($useProxy) { if ($useHTTPS) { $ENV{HTTPS_PROXY} = $proxyURL; } else { $ua->proxy('http', $proxyURL); } } my $cookie_jar = HTTP::Cookies->new(); $ua->cookie_jar($cookie_jar); my $request = POST 'http://login.yahoo.com/config/login', [ '.tries' => '1', #'.done' => 'URL to go to later', '.src' => 'ym', '.intl' => 'us', 'login' => $username, 'passwd' => $password, ]; $request->content_type('application/x-www-form-urlencoded'); $request->header('Accept' => '*/*'); $request->header('Allowed' => 'GET HEAD PUT'); my $response = $ua->simple_request($request); my $url; while ( $response->is_redirect ) { $cookie_jar->extract_cookies($response); $url = GetRedirectUrl($response); $request = GET $url; $response = $ua->simple_request($request); } if ( !$response->is_success ) { print "Failed!\n"; die "Couldn't log in\n"; } if ( ($response->content) =~ /Invalid Password/ ) { print "Failed!\n"; die "Wrong password entered for $username\n"; } if ( ($response->content) =~ /ID does not exist/ ) { print "Failed!\n"; die "Yahoo user $username does not exist\n"; } # Detect country code from url (first 2 chars of url, # eg http://us.f116.mail.yahoo.com) $url=~/http:\/\/(.*?)\./; if (!$quiet) { print "Successfully logged in as $username.\n"; print "Country code : $1\n"; } # set localization strings my %strings = Localize($1); # setup URLs $url =~ /(http:\/\/.*?)\// ; my $baseurl = $1; my $homeurl = $baseurl . $homesuff ; my $msgurl = $baseurl . $msgsuff ; my $delurl = $homeurl . "\&DEL=Delete"; my $logouturl = $baseurl . "/ym/Logout"; my $emptyurl; # get all message IDs my $msgcount = 0; my $pagecount = 0; my $numMsgs ; my $startMsg ; my $endMsg ; my @msgids ; my $crumb; if ( $newOnly) { print "Only retrieving new messages\n" unless $quiet; $homeurl = $homeurl . "\&Nview=u"; } # empty trash before downloading messages, if requested # parsing the empty trash URL from a parsed inbox summary page does NOT work # because it changes message IDs so deleting messages would fail if ($emptyTrashBefore) { my $tmpurl = $baseurl . "/ym/Folders" ; $request = GET $tmpurl ; $response = $ua->simple_request($request); while ( $response->is_redirect ) { $cookie_jar->extract_cookies($response); # manually extract cookies my $url = GetRedirectUrl($response); # get new page $request = GET $url; # go to the new page $response = $ua->simple_request($request); } if ( !$response->is_success ) { if (!$quiet) { print "Failed!\n"; } die "Couldn't get Inbox listing.\n"; } $response->content =~ /href=\"((.*?)\?ET=1(.*?))\"/; if (defined $1) { $emptyurl = $baseurl . $1; } else { die "Couldn't get empty trash URL\n"; } EmptyTrash(); } my $mainPage; # loop over all inbox summary pages do { # get summary page my $tmpurl = $homeurl . "\&Npos=$pagecount" ; $request = GET $tmpurl ; $response = $ua->simple_request($request); while ( $response->is_redirect ) { $cookie_jar->extract_cookies($response); # manually extract cookies my $url = GetRedirectUrl($response); # get new page $request = GET $url; # go to the new page $response = $ua->simple_request($request); } if ( !$response->is_success ) { if (!$quiet) { print "Failed!\n"; } die "Couldn't get Inbox listing.\n"; } $mainPage = $response->content; #parse for number of messages if ($mainPage =~ /$strings{'msg_range'}/) { $startMsg = $1 ; $endMsg = $2 ; $numMsgs = $3; } elsif ($mainPage =~ /$strings{'no_msgs'}/) { if (!$quiet) { print "There are no messages to retrieve.\n"; } if ($emptyTrashAfter) { EmptyTrash(); } # if requested, empty trash if ($logout) { Logout; } # if requested, logout # if repeat interval is non-zero, loop after repeatInterval minutes if ($repeatInterval > 0) { sleep (60*$repeatInterval); goto startfetch ; } exit; } else { if (!$quiet) { print "Failed!\n"; } die "Can't retrieve number of messages.\n"; } if (!$quiet) { print "Getting Message IDs for messages $startMsg -" . " $endMsg.\n"; } # parse summary page for message IDs foreach my $word (split ' ', $mainPage) { if ($word =~ /ShowLetter\?MsgId=([0-9_]+)/ ) { $msgcount = $msgcount + 1; $msgids[$msgcount-1] = $1 ; } } $pagecount = $pagecount+1 ; # next summary page } until $numMsgs == $endMsg ; $mainPage =~ /name=\".crumb\" value=\"(.*?)\"/ ; if (defined($1)) { $crumb = $1; } elsif (!$noDelete) { die "Can't get crumb.\n"; } $mainPage =~ /\"((.*?)\?ET=1(.*?))\"/; if (defined $1) { $emptyurl = $baseurl . $1; } elsif (!$emptyTrashAfter) { die "Couldn't get empty trash URL.\n"; } if (!$quiet) { print "Got $msgcount Message IDs\n"; } my $delCount = 0; my $downloadCount = 0; if ($noDownload) { if (!$quiet) { print "Not downloading messages\n"; } foreach my $msgid (@msgids) { $delurl = $delurl . "\&Mid=$msgid"; # add message to deletion list $delCount = $delCount+1; } goto startDelete; } @msgids = reverse(@msgids); # download msg IDs in correct order # loop over all Message IDs foreach my $msgid (@msgids) { my $tmpurl = $baseurl . sprintf($bodyPartUrlTemplate, $msgid, "HEADER") ; my $header; $request = GET $tmpurl; if ( ! ($response = $ua->simple_request($request))) { print "\nFailed to get header of message $msgid. It will be " . "skipped and not deleted.\n"; next ; } $header = $response->content; my @foo = split /\n/, $header; my $fromLine = shift @foo; # save From_ line for later # Yahoo!'s From_ line is broken, fix it $fromLine =~ /From (.*?)\s*((Mon|Tue|Wed|Thu|Fri|Sat|Sun).*?)$/ ; my $fromName; my $fromRest; if ( ! defined ($1) ) { # can't parse From_ line, make a new one $fromName = '-'; $fromRest = scalar localtime ; } else { $fromName = $1 ; $fromRest = $2 ; } $fromName =~ s/ /_/g; $fromLine = "From " . $fromName . " " . $fromRest . "\n" ; my $mimeHead = new MIME::Head(\@foo); # if we can't parse at least To or From or Date assume this has failed unless ($mimeHead->get('From') || $mimeHead->get('To') || $mimeHead->get('Date')) { print "\nCan't find message $msgid. It will be skipped and not" . " deleted.\n"; next; } $mimeHead->delete("Content-Length"); # Remove Yahoo! Mail's internal headers $mimeHead->delete("X-RocketMail"); $mimeHead->delete("X-RocketUID"); $mimeHead->delete("X-RocketMIF"); $mimeHead->delete("X-RocketRCL"); my $message = $mimeHead->as_string."\n"; # message we are constructing $request = GET $baseurl . sprintf($bodyPartUrlTemplate, $msgid, "TEXT"); if ( ! ($response = $ua->simple_request($request) ) ) { print " \nFailed to get body part TEXT. " . "Skipping body part, message will not be deleted.\n" ; next ; } my $rawPart = $response->content ; # If Yahoo 404's we skip this message if ($rawPart =~ /^Yahoo! -\n404/ ) { print " \nFailed to get body of message $msgid." . "Message will be skipped and not deleted.\n" ; next ; } $message .= $rawPart . "\n\n"; $message =~ s/\n>From /\n>>From /g ; # slightly extended RFC 822 $message =~ s/\nFrom /\n>From /g ; $delurl = $delurl . "\&Mid=$msgid"; # add message to deletion list $delCount = $delCount+1; if ($useSpool) { # send From_line and message to the specified spool/file open SPOOL, "$spool" or die "Can't open output: $spool"; if ( $spoolMode ne 'pipe' ) { print SPOOL $fromLine ; } print SPOOL "$message"; close SPOOL; } # mail fowarding stuff goes here if ($useForward) { $smtp->mail($sendFromAddress); $smtp->to($sendToAddress); $smtp->data(); $smtp->datasend($message); $smtp->dataend(); } $downloadCount = $downloadCount +1 ; # Progress indicator if (!$quiet) { if ($downloadCount%5) { print "."; } elsif ($downloadCount%10) { print "I"; } else { printf("[%d]", $downloadCount); } } } if ($useForward) { $smtp->quit; } if (!$quiet) { print "\nFinished downloading $downloadCount messages.\n"; } startDelete: if ( ! $noDelete) { $delurl = $delurl . "\&.crumb=$crumb"; $request = GET $delurl ; $response = $ua->simple_request($request) || die "Failed to delete messages.\n"; # if we fail try again once if (($response->content) =~ /<head><title>Yahoo\!\s*-\s*404 Not Found<\/title>/s) { $response = $ua->simple_request($request) || die "Failed to delete messages.\n"; if (($response->content) =~ /<head><title>Yahoo\!\s*-\s*404 Not Found<\/title>/s ) { die "Failed to delete messages.\n"; } } if (!$quiet) { print $delCount . " message(s) have been deleted.\n"; } } else { if (!$quiet) { print "Messages have not been deleted.\n"; } } if ($emptyTrashAfter) { EmptyTrash(); } if ($logout) { Logout(); } # if repeat interval is non-zero, loop after repeatInterval minutes if ($repeatInterval > 0) { sleep (60*$repeatInterval); goto startfetch ; } ############################################################################### # Subroutines ############################################################################### # return the URL we're redirected to sub GetRedirectUrl($) { my $response = $_[0]; my $url = $response->header('Location') || return undef; # the Location URL is sometimes non-absolute which is not allowed, fix it local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; my $base = $response->base; $url = $HTTP::URI_CLASS->new($url, $base)->abs($base); return $url; } sub EmptyTrash() { $request = GET $emptyurl ; $response = $ua->simple_request($request); while ( $response->is_redirect ) { $cookie_jar->extract_cookies($response); # manually extract cookies my $url = GetRedirectUrl($response); # get new page $request = GET $url; # go to the new page $response = $ua->simple_request($request); } if ( !$response->is_success ) { if (!$quiet) { print "Failed!\n"; } die "Couldn't empty trash.\n"; } elsif (!$quiet) { print "Trash emptied.\n";} } sub Logout() { $request = GET $logouturl ; $response = $ua->simple_request($request); while ( $response->is_redirect ) { $cookie_jar->extract_cookies($response); # manually extract cookies my $url = GetRedirectUrl($response); # get new page $request = GET $url; # go to the new page $response = $ua->simple_request($request); } if ( !$response->is_success ) { if (!$quiet) { print "Failed!\n"; } die "Couldn't logout.\n"; } elsif (!$quiet) { print "Logged out.\n";} } sub ParseConfigFile() { if ($altConfigFile) { open(CONFIGFILE,$altConfigFile) || die "Can't open config file $altConfigFile\n" ; } else { open(CONFIGFILE, $ENV{"HOME"} . "/.fetchyahoorc") || open(CONFIGFILE,"/etc/fetchyahoorc") || return; } while (<CONFIGFILE>) { next if (/^\s*\#/); # ignore lines with starting with a # if (/username\s*=\s*(.*?)\s*$/i) { $username = $1; } elsif (/password\s*=\s*(.*?)\s*$/i) { $password = $1; } elsif (/use-spool\s*=\s*(.*?)\s*$/i) { $useSpool = $1; } elsif (/spool\s*=\s*(.*?)\s*$/i) { $spoolName = $1; } elsif (/spool-mode\s*=\s*(.*?)\s*$/i) { $spoolMode = $1; } elsif (/use-proxy\s*=\s*(.*?)\s*$/i) { $useProxy = $1; } elsif (/proxy-host\s*=\s*(.*?)\s*$/i) { $proxyHost = $1; } elsif (/proxy-port\s*=\s*(.*?)\s*$/i) { $proxyPort = $1; } elsif (/use-forward\s*=\s*(.*?)\s*$/i) { $useForward = $1; } elsif (/mail-host\s*=\s*(.*?)\s*$/i) { $mailHost = $1; } elsif (/send-to\s*=\s*(.*?)\s*$/i) { $sendToAddress = $1; } elsif (/send-from\s*=\s*(.*?)\s*$/i) { $sendFromAddress = $1; } elsif (/new-messages-only\s*=\s*(.*?)\s*$/i) { $newOnly = $1; } elsif (/no-delete\s*=\s*(.*?)\s*$/i) { $noDelete = $1; } elsif (/use-https\s*=\s*(.*?)\s*$/i) { $useHTTPS = $1; } elsif (/quiet\s*=\s*(.*?)\s*$/i) { $quiet = $1; } elsif (/empty-trash-after\s*=\s*(.*?)\s*$/i) { $emptyTrashAfter = $1; } elsif (/empty-trash-before\s*=\s*(.*?)\s*$/i) { $emptyTrashBefore = $1; } elsif (/logout\s*=\s*(.*?)\s*$/i) { $logout = $1; } elsif (/repeat-interval\s*=\s*(.*?)\s*$/i) { $repeatInterval = $1; } } close(CONFIGFILE); } sub Localize($) { my ($cc) = @_; my $strings; my %localized_strings = ('us' => {'msg_range' => 'showing (\d+)-(\d+) of (\d+)', 'no_msgs' => 'Folder\s*Inbox\s*has\s*no\s+', 'p_view' => 'Printable\ View', 'syntax_err'=> '###################### TBD #######################"' }, 'fr' => {'msg_range' => '(\d+)-(\d+) sur (\d+)', 'no_msgs' => 'Dossier\s*Boîte\s*de\s*réception\s*sans\s*messages', 'p_view' => 'Version\ imprimable', 'syntax_err'=> 'Erreur de syntaxe dans la demande' }, 'es' => {'msg_range' => 'Mostrando (\d+)-(\d+) de (\d+)', 'no_msgs' => 'La\s*carpeta\s*Bandeja\s*de\s*entrada\s*está\s*vacía', 'p_view' => 'Vista para imprimir', 'syntax_err'=> '###################### TBD #######################"' }, 'de' => {'msg_range' => 'werden (\d+)-(\d+) von (\d+)', 'no_msgs' => 'Der\s*Ordner\s*Posteingang\s*hat\s*keine\s*Nachrichten', 'p_view' => 'Druckansicht', 'syntax_err'=> '###################### TBD #######################"' }, 'it' => {'msg_range' => 'mostra (\d+)-(\d+) di (\d+)', 'no_msgs' => 'La\s*cartella\s*In\s*arrivo\s*non\s*contiene\s*messaggi', 'p_view' => 'Anteprima di stampa', 'syntax_err'=> '###################### TBD #######################"' }, 'br' => {'msg_range' => 'exibindo (\d+)-(\d+) de (\d+)', 'no_msgs' => 'A\s*pasta\s*Caixa\s*de\s*entrada\s*não', 'p_view' => 'Visualizar impressão', 'syntax_err'=> '###################### TBD #######################"' } ); if ($strings = $localized_strings{$cc}) { return %$strings; } else { return 0; } }