1.1 --- a/email-filter.pl Tue Jun 21 18:54:22 2011 +0400
1.2 +++ b/email-filter.pl Wed Jun 22 00:26:23 2011 +0400
1.3 @@ -1,4 +1,10 @@
1.4 #!/usr/bin/perl
1.5 +
1.6 +# E-mail filter tool for Aptech
1.7 +# version 0.2.0
1.8 +# Copyright 2011, David Veliev (gotoindvdum@gmail.com).
1.9 +# This program may be used under Apache License 2.0.
1.10 +
1.11 use strict;
1.12
1.13 # Глобальные переменные
1.14 @@ -6,6 +12,7 @@
1.15 my $isRemoveDuplicates = 0;
1.16 my $isSplitByDomens = 0;
1.17 my $excludeDomen;
1.18 +my $outputFile;
1.19
1.20 parseCommandLine();
1.21 processFile();
1.22 @@ -28,11 +35,16 @@
1.23 $isRemoveDuplicates = 1;
1.24 } elsif ($arg eq '--split-by-domens'){
1.25 $isSplitByDomens = 1;
1.26 - } elsif ($arg =~ /--exclude-domen[=]{0,1}(\w*)/){
1.27 + } elsif ($arg =~ /--exclude-domen[=]{0,1}([\.\w]*)/){
1.28 $excludeDomen = $1;
1.29 - illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=\w*/;
1.30 + illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=[\.\w]*/;
1.31 $excludeDomen = shift(@ARGV) if $excludeDomen eq '';
1.32 illegalUse() if $excludeDomen eq '';
1.33 + } elsif ($arg =~ /--output-file[=]{0,1}([\.\w]*)/){
1.34 + $outputFile = $1;
1.35 + illegalUse() if $outputFile ne '' && $arg !~ /--output-file=[\.\w]*/;
1.36 + $outputFile = shift(@ARGV) if $outputFile eq '';
1.37 + illegalUse() if $outputFile eq '';
1.38 } else {
1.39 illegalUse();
1.40 }
1.41 @@ -52,7 +64,7 @@
1.42 sub about {
1.43 my $about = q {
1.44 E-mail filter tool for Aptech
1.45 -version 0.1
1.46 +version 0.2.0
1.47
1.48 Copyright 2011, David Veliev (gotoindvdum@gmail.com).
1.49
1.50 @@ -69,11 +81,12 @@
1.51 Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream.
1.52
1.53 Arguments:
1.54 - --help print this help
1.55 - --version print version and information about this script
1.56 - --remove-duplicates remove e-mail duplicates
1.57 - --split-by-domens split e-mails by domens
1.58 - --exclude-domen[=]DOMEN exclude e-mails with domen DOMEN
1.59 + --help print this help
1.60 + --version print version and information about this script
1.61 + --remove-duplicates remove e-mail duplicates
1.62 + --split-by-domens split e-mails by domens
1.63 + --exclude-domen[=]DOMEN exclude e-mails with domen DOMEN
1.64 + --output-file[=]OUTFILENAME redirect standart output to this file
1.65 };
1.66 print $help;
1.67 }
1.68 @@ -90,8 +103,36 @@
1.69 open FILE, $fileName or die "Failed to open $fileName: $!\n";
1.70 my @emails;
1.71 while(<FILE>){
1.72 - my (@test) = $_ =~ /.*(\w@\w\.\w)*.*/;
1.73 - print @test;
1.74 + push @emails, $1 while s/([\.\w]+@[\.\w]+)//;
1.75 }
1.76 close FILE;
1.77 + if($isRemoveDuplicates){
1.78 + my %tmp;
1.79 + @emails = grep {! $tmp{$_}++} @emails;
1.80 + }
1.81 + if($excludeDomen){
1.82 + @emails = grep {
1.83 + $1 ne $excludeDomen if $_ =~ /[\.\w]+@([\.\w]+)/;
1.84 + } @emails;
1.85 + }
1.86 + if($isSplitByDomens){
1.87 + @emails = sort {
1.88 + my $a_domen = $1 if $a =~ /[\.\w]+@([\.\w]+)/;
1.89 + my $b_domen = $1 if $b =~ /[\.\w]+@([\.\w]+)/;
1.90 + $a_domen cmp $b_domen;
1.91 + } @emails;
1.92 + my @tmp;
1.93 + my %tmp;
1.94 + while($#emails >= 0){
1.95 + my $email = shift @emails;
1.96 + my $domen = $1 if $email =~ /[\.\w]+@([\.\w]+)/;
1.97 + push @tmp, '' if !$tmp{$domen}++;
1.98 + push @tmp, $email;
1.99 + }
1.100 + @emails = @tmp;
1.101 + }
1.102 + if($outputFile){
1.103 + open STDOUT, ">$outputFile" or die "Failed to open $outputFile: $!\n";
1.104 + }
1.105 + print "$_\n" for (@emails);
1.106 }
1.107 \ No newline at end of file