email-filter.pl
author indvdum
Wed, 22 Jun 2011 00:36:09 +0400
changeset 9 0f0716105406
parent 7 d81f7add59f6
permissions -rwxr-xr-x
* recoding test files
     1 #!/usr/bin/perl
     2 
     3 # E-mail filter tool for Aptech
     4 # version 0.2.0
     5 # Copyright 2011, David Veliev (gotoindvdum@gmail.com).
     6 # This program may be used under Apache License 2.0.
     7 
     8 use strict;
     9 
    10 # Глобальные переменные
    11 my $fileName;
    12 my $isRemoveDuplicates = 0;
    13 my $isSplitByDomens = 0;
    14 my $excludeDomen;
    15 my $outputFile;
    16 
    17 parseCommandLine();
    18 processFile();
    19 exit 0;
    20 
    21 # Парсер параметров командной строки
    22 sub parseCommandLine {
    23 	my $arg = shift(@ARGV);
    24 	my $isHasArgs = 0;
    25 	until ($arg eq '') {
    26 		$isHasArgs = 1;
    27 		if($arg =~ /^-{1,2}.+$/){
    28 			if ($arg =~ /^-{1,2}version$/){
    29 				about();
    30 				exit 0;
    31 			} elsif ($arg =~ /^-{1,2}help$/){
    32 				help();
    33 				exit 0;
    34 			} elsif ($arg eq '--remove-duplicates'){
    35 				$isRemoveDuplicates = 1;
    36 			} elsif ($arg eq '--split-by-domens'){
    37 				$isSplitByDomens = 1;
    38 			} elsif ($arg =~ /--exclude-domen[=]{0,1}([\.\w]*)/){
    39 				$excludeDomen = $1;
    40 				illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=[\.\w]*/;
    41 				$excludeDomen = shift(@ARGV) if $excludeDomen eq '';
    42 				illegalUse() if $excludeDomen eq '';
    43 			} elsif ($arg =~ /--output-file[=]{0,1}([\.\w]*)/){
    44 				$outputFile = $1;
    45 				illegalUse() if $outputFile ne '' && $arg !~ /--output-file=[\.\w]*/;
    46 				$outputFile = shift(@ARGV) if $outputFile eq '';
    47 				illegalUse() if $outputFile eq '';
    48 			} else {
    49 				illegalUse();
    50 			}
    51 		} else {
    52 			illegalUse() if $fileName ne '';
    53 			$fileName = $arg;
    54 		}
    55 		$arg = shift(@ARGV);
    56 	};
    57 	if (!$isHasArgs) {
    58 		about();
    59 		exit 0;
    60 	}
    61 }
    62 
    63 # Вывод информации о программе
    64 sub about {
    65 	my $about = q {
    66 E-mail filter tool for Aptech
    67 version 0.2.0
    68 
    69 Copyright 2011, David Veliev (gotoindvdum@gmail.com).
    70 
    71 This program may be used under Apache License 2.0.
    72 };
    73 	
    74 	print $about;
    75 }
    76 
    77 # Вывод доступных параметров командной строки
    78 sub help {
    79 	my $help = q {usage: }.$0. q { [KEYS] FILENAME [KEYS]
    80 
    81 Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream.
    82 
    83 Arguments:
    84     --help                          print this help
    85     --version                       print version and information about this script
    86     --remove-duplicates             remove e-mail duplicates
    87     --split-by-domens               split e-mails by domens
    88     --exclude-domen[=]DOMEN         exclude e-mails with domen DOMEN
    89     --output-file[=]OUTFILENAME     redirect standart output to this file
    90 };
    91 	print $help;
    92 }
    93 
    94 # Неправильный формат параметров командной строки
    95 sub illegalUse {
    96 	print "Illegal use!\n\n";
    97 	help();
    98 	exit 1;	
    99 }
   100 
   101 # Обработка файла
   102 sub processFile {
   103 	open FILE, $fileName or die "Failed to open $fileName: $!\n";
   104 	my @emails;
   105 	while(<FILE>){
   106 		push @emails, $1 while s/([\.\w]+@[\.\w]+)//;
   107 	}
   108 	close FILE;
   109 	if($isRemoveDuplicates){
   110 		my %tmp;
   111 		@emails = grep {! $tmp{$_}++} @emails;
   112 	}
   113 	if($excludeDomen){
   114 		@emails = grep {
   115 			$1 ne $excludeDomen if $_ =~ /[\.\w]+@([\.\w]+)/;
   116 		} @emails;
   117 	}
   118 	if($isSplitByDomens){
   119 		@emails = sort {
   120 			my $a_domen = $1 if $a =~ /[\.\w]+@([\.\w]+)/;
   121 			my $b_domen = $1 if $b =~ /[\.\w]+@([\.\w]+)/;
   122 			$a_domen cmp $b_domen;
   123 		} @emails;
   124 		my @tmp;
   125 		my %tmp;
   126 		while($#emails >= 0){
   127 			my $email = shift @emails;
   128 			my $domen = $1 if $email =~ /[\.\w]+@([\.\w]+)/;
   129 			push @tmp, '' if !$tmp{$domen}++;
   130 			push @tmp, $email;
   131 		}
   132 		@emails = @tmp;
   133 	}
   134 	if($outputFile){
   135 		open STDOUT, ">$outputFile" or die "Failed to open $outputFile: $!\n";
   136 	}
   137 	print "$_\n" for (@emails);
   138 }