email-filter.pl
changeset 7 d81f7add59f6
parent 6 37eab168f50c
equal deleted inserted replaced
6:37eab168f50c 7:d81f7add59f6
     1 #!/usr/bin/perl
     1 #!/usr/bin/perl
       
     2 
       
     3 # E-mail filter tool for Aptech
       
     4 # version 0.2.0
       
     5 # Copyright 2011, David Veliev (gotoindvdum@gmail.com).
       
     6 # This program may be used under Apache License 2.0.
       
     7 
     2 use strict;
     8 use strict;
     3 
     9 
     4 # Глобальные переменные
    10 # Глобальные переменные
     5 my $fileName;
    11 my $fileName;
     6 my $isRemoveDuplicates = 0;
    12 my $isRemoveDuplicates = 0;
     7 my $isSplitByDomens = 0;
    13 my $isSplitByDomens = 0;
     8 my $excludeDomen;
    14 my $excludeDomen;
       
    15 my $outputFile;
     9 
    16 
    10 parseCommandLine();
    17 parseCommandLine();
    11 processFile();
    18 processFile();
    12 exit 0;
    19 exit 0;
    13 
    20 
    26 				exit 0;
    33 				exit 0;
    27 			} elsif ($arg eq '--remove-duplicates'){
    34 			} elsif ($arg eq '--remove-duplicates'){
    28 				$isRemoveDuplicates = 1;
    35 				$isRemoveDuplicates = 1;
    29 			} elsif ($arg eq '--split-by-domens'){
    36 			} elsif ($arg eq '--split-by-domens'){
    30 				$isSplitByDomens = 1;
    37 				$isSplitByDomens = 1;
    31 			} elsif ($arg =~ /--exclude-domen[=]{0,1}(\w*)/){
    38 			} elsif ($arg =~ /--exclude-domen[=]{0,1}([\.\w]*)/){
    32 				$excludeDomen = $1;
    39 				$excludeDomen = $1;
    33 				illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=\w*/;
    40 				illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=[\.\w]*/;
    34 				$excludeDomen = shift(@ARGV) if $excludeDomen eq '';
    41 				$excludeDomen = shift(@ARGV) if $excludeDomen eq '';
    35 				illegalUse() if $excludeDomen eq '';
    42 				illegalUse() if $excludeDomen eq '';
       
    43 			} elsif ($arg =~ /--output-file[=]{0,1}([\.\w]*)/){
       
    44 				$outputFile = $1;
       
    45 				illegalUse() if $outputFile ne '' && $arg !~ /--output-file=[\.\w]*/;
       
    46 				$outputFile = shift(@ARGV) if $outputFile eq '';
       
    47 				illegalUse() if $outputFile eq '';
    36 			} else {
    48 			} else {
    37 				illegalUse();
    49 				illegalUse();
    38 			}
    50 			}
    39 		} else {
    51 		} else {
    40 			illegalUse() if $fileName ne '';
    52 			illegalUse() if $fileName ne '';
    50 
    62 
    51 # Вывод информации о программе
    63 # Вывод информации о программе
    52 sub about {
    64 sub about {
    53 	my $about = q {
    65 	my $about = q {
    54 E-mail filter tool for Aptech
    66 E-mail filter tool for Aptech
    55 version 0.1
    67 version 0.2.0
    56 
    68 
    57 Copyright 2011, David Veliev (gotoindvdum@gmail.com).
    69 Copyright 2011, David Veliev (gotoindvdum@gmail.com).
    58 
    70 
    59 This program may be used under Apache License 2.0.
    71 This program may be used under Apache License 2.0.
    60 };
    72 };
    67 	my $help = q {usage: }.$0. q { [KEYS] FILENAME [KEYS]
    79 	my $help = q {usage: }.$0. q { [KEYS] FILENAME [KEYS]
    68 
    80 
    69 Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream.
    81 Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream.
    70 
    82 
    71 Arguments:
    83 Arguments:
    72     --help                      print this help
    84     --help                          print this help
    73     --version                   print version and information about this script
    85     --version                       print version and information about this script
    74     --remove-duplicates         remove e-mail duplicates
    86     --remove-duplicates             remove e-mail duplicates
    75     --split-by-domens           split e-mails by domens
    87     --split-by-domens               split e-mails by domens
    76     --exclude-domen[=]DOMEN     exclude e-mails with domen DOMEN
    88     --exclude-domen[=]DOMEN         exclude e-mails with domen DOMEN
       
    89     --output-file[=]OUTFILENAME     redirect standart output to this file
    77 };
    90 };
    78 	print $help;
    91 	print $help;
    79 }
    92 }
    80 
    93 
    81 # Неправильный формат параметров командной строки
    94 # Неправильный формат параметров командной строки
    88 # Обработка файла
   101 # Обработка файла
    89 sub processFile {
   102 sub processFile {
    90 	open FILE, $fileName or die "Failed to open $fileName: $!\n";
   103 	open FILE, $fileName or die "Failed to open $fileName: $!\n";
    91 	my @emails;
   104 	my @emails;
    92 	while(<FILE>){
   105 	while(<FILE>){
    93 		my (@test) = $_ =~ /.*(\w@\w\.\w)*.*/;
   106 		push @emails, $1 while s/([\.\w]+@[\.\w]+)//;
    94 		print @test; 
       
    95 	}
   107 	}
    96 	close FILE;
   108 	close FILE;
       
   109 	if($isRemoveDuplicates){
       
   110 		my %tmp;
       
   111 		@emails = grep {! $tmp{$_}++} @emails;
       
   112 	}
       
   113 	if($excludeDomen){
       
   114 		@emails = grep {
       
   115 			$1 ne $excludeDomen if $_ =~ /[\.\w]+@([\.\w]+)/;
       
   116 		} @emails;
       
   117 	}
       
   118 	if($isSplitByDomens){
       
   119 		@emails = sort {
       
   120 			my $a_domen = $1 if $a =~ /[\.\w]+@([\.\w]+)/;
       
   121 			my $b_domen = $1 if $b =~ /[\.\w]+@([\.\w]+)/;
       
   122 			$a_domen cmp $b_domen;
       
   123 		} @emails;
       
   124 		my @tmp;
       
   125 		my %tmp;
       
   126 		while($#emails >= 0){
       
   127 			my $email = shift @emails;
       
   128 			my $domen = $1 if $email =~ /[\.\w]+@([\.\w]+)/;
       
   129 			push @tmp, '' if !$tmp{$domen}++;
       
   130 			push @tmp, $email;
       
   131 		}
       
   132 		@emails = @tmp;
       
   133 	}
       
   134 	if($outputFile){
       
   135 		open STDOUT, ">$outputFile" or die "Failed to open $outputFile: $!\n";
       
   136 	}
       
   137 	print "$_\n" for (@emails);
    97 }
   138 }