# HG changeset patch # User indvdum # Date 1308687983 -14400 # Node ID d81f7add59f6a68ab9aceba0cf6e7144a0e9ed38 # Parent 37eab168f50c6d3beb4bd403b854d32ebb350c1f 0.2.0 release diff -r 37eab168f50c -r d81f7add59f6 email-filter.pl --- a/email-filter.pl Tue Jun 21 18:54:22 2011 +0400 +++ b/email-filter.pl Wed Jun 22 00:26:23 2011 +0400 @@ -1,4 +1,10 @@ #!/usr/bin/perl + +# E-mail filter tool for Aptech +# version 0.2.0 +# Copyright 2011, David Veliev (gotoindvdum@gmail.com). +# This program may be used under Apache License 2.0. + use strict; # Глобальные переменные @@ -6,6 +12,7 @@ my $isRemoveDuplicates = 0; my $isSplitByDomens = 0; my $excludeDomen; +my $outputFile; parseCommandLine(); processFile(); @@ -28,11 +35,16 @@ $isRemoveDuplicates = 1; } elsif ($arg eq '--split-by-domens'){ $isSplitByDomens = 1; - } elsif ($arg =~ /--exclude-domen[=]{0,1}(\w*)/){ + } elsif ($arg =~ /--exclude-domen[=]{0,1}([\.\w]*)/){ $excludeDomen = $1; - illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=\w*/; + illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=[\.\w]*/; $excludeDomen = shift(@ARGV) if $excludeDomen eq ''; illegalUse() if $excludeDomen eq ''; + } elsif ($arg =~ /--output-file[=]{0,1}([\.\w]*)/){ + $outputFile = $1; + illegalUse() if $outputFile ne '' && $arg !~ /--output-file=[\.\w]*/; + $outputFile = shift(@ARGV) if $outputFile eq ''; + illegalUse() if $outputFile eq ''; } else { illegalUse(); } @@ -52,7 +64,7 @@ sub about { my $about = q { E-mail filter tool for Aptech -version 0.1 +version 0.2.0 Copyright 2011, David Veliev (gotoindvdum@gmail.com). @@ -69,11 +81,12 @@ Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream. Arguments: - --help print this help - --version print version and information about this script - --remove-duplicates remove e-mail duplicates - --split-by-domens split e-mails by domens - --exclude-domen[=]DOMEN exclude e-mails with domen DOMEN + --help print this help + --version print version and information about this script + --remove-duplicates remove e-mail duplicates + --split-by-domens split e-mails by domens + --exclude-domen[=]DOMEN exclude e-mails with domen DOMEN + --output-file[=]OUTFILENAME redirect standart output to this file }; print $help; } @@ -90,8 +103,36 @@ open FILE, $fileName or die "Failed to open $fileName: $!\n"; my @emails; while(){ - my (@test) = $_ =~ /.*(\w@\w\.\w)*.*/; - print @test; + push @emails, $1 while s/([\.\w]+@[\.\w]+)//; } close FILE; + if($isRemoveDuplicates){ + my %tmp; + @emails = grep {! $tmp{$_}++} @emails; + } + if($excludeDomen){ + @emails = grep { + $1 ne $excludeDomen if $_ =~ /[\.\w]+@([\.\w]+)/; + } @emails; + } + if($isSplitByDomens){ + @emails = sort { + my $a_domen = $1 if $a =~ /[\.\w]+@([\.\w]+)/; + my $b_domen = $1 if $b =~ /[\.\w]+@([\.\w]+)/; + $a_domen cmp $b_domen; + } @emails; + my @tmp; + my %tmp; + while($#emails >= 0){ + my $email = shift @emails; + my $domen = $1 if $email =~ /[\.\w]+@([\.\w]+)/; + push @tmp, '' if !$tmp{$domen}++; + push @tmp, $email; + } + @emails = @tmp; + } + if($outputFile){ + open STDOUT, ">$outputFile" or die "Failed to open $outputFile: $!\n"; + } + print "$_\n" for (@emails); } \ No newline at end of file