email-filter.pl
author indvdum
Wed, 22 Jun 2011 13:11:04 +0400
changeset 11 e96fa2e16510
parent 7 d81f7add59f6
permissions -rwxr-xr-x
* explanatory
indvdum@2
     1
#!/usr/bin/perl
indvdum@7
     2
indvdum@7
     3
# E-mail filter tool for Aptech
indvdum@7
     4
# version 0.2.0
indvdum@7
     5
# Copyright 2011, David Veliev (gotoindvdum@gmail.com).
indvdum@7
     6
# This program may be used under Apache License 2.0.
indvdum@7
     7
indvdum@3
     8
use strict;
indvdum@2
     9
indvdum@3
    10
# Глобальные переменные
indvdum@3
    11
my $fileName;
indvdum@3
    12
my $isRemoveDuplicates = 0;
indvdum@3
    13
my $isSplitByDomens = 0;
indvdum@3
    14
my $excludeDomen;
indvdum@7
    15
my $outputFile;
indvdum@3
    16
indvdum@5
    17
parseCommandLine();
indvdum@5
    18
processFile();
indvdum@5
    19
exit 0;
indvdum@5
    20
indvdum@5
    21
# Парсер параметров командной строки
indvdum@5
    22
sub parseCommandLine {
indvdum@5
    23
	my $arg = shift(@ARGV);
indvdum@5
    24
	my $isHasArgs = 0;
indvdum@5
    25
	until ($arg eq '') {
indvdum@5
    26
		$isHasArgs = 1;
indvdum@5
    27
		if($arg =~ /^-{1,2}.+$/){
indvdum@5
    28
			if ($arg =~ /^-{1,2}version$/){
indvdum@5
    29
				about();
indvdum@5
    30
				exit 0;
indvdum@5
    31
			} elsif ($arg =~ /^-{1,2}help$/){
indvdum@5
    32
				help();
indvdum@5
    33
				exit 0;
indvdum@5
    34
			} elsif ($arg eq '--remove-duplicates'){
indvdum@5
    35
				$isRemoveDuplicates = 1;
indvdum@5
    36
			} elsif ($arg eq '--split-by-domens'){
indvdum@5
    37
				$isSplitByDomens = 1;
indvdum@7
    38
			} elsif ($arg =~ /--exclude-domen[=]{0,1}([\.\w]*)/){
indvdum@5
    39
				$excludeDomen = $1;
indvdum@7
    40
				illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=[\.\w]*/;
indvdum@5
    41
				$excludeDomen = shift(@ARGV) if $excludeDomen eq '';
indvdum@5
    42
				illegalUse() if $excludeDomen eq '';
indvdum@7
    43
			} elsif ($arg =~ /--output-file[=]{0,1}([\.\w]*)/){
indvdum@7
    44
				$outputFile = $1;
indvdum@7
    45
				illegalUse() if $outputFile ne '' && $arg !~ /--output-file=[\.\w]*/;
indvdum@7
    46
				$outputFile = shift(@ARGV) if $outputFile eq '';
indvdum@7
    47
				illegalUse() if $outputFile eq '';
indvdum@5
    48
			} else {
indvdum@5
    49
				illegalUse();
indvdum@5
    50
			}
indvdum@3
    51
		} else {
indvdum@5
    52
			illegalUse() if $fileName ne '';
indvdum@5
    53
			$fileName = $arg;
indvdum@3
    54
		}
indvdum@5
    55
		$arg = shift(@ARGV);
indvdum@5
    56
	};
indvdum@5
    57
	if (!$isHasArgs) {
indvdum@5
    58
		about();
indvdum@5
    59
		exit 0;
indvdum@3
    60
	}
indvdum@3
    61
}
indvdum@3
    62
indvdum@3
    63
# Вывод информации о программе
indvdum@3
    64
sub about {
indvdum@3
    65
	my $about = q {
indvdum@2
    66
E-mail filter tool for Aptech
indvdum@7
    67
version 0.2.0
indvdum@2
    68
indvdum@2
    69
Copyright 2011, David Veliev (gotoindvdum@gmail.com).
indvdum@2
    70
indvdum@2
    71
This program may be used under Apache License 2.0.
indvdum@2
    72
};
indvdum@3
    73
	
indvdum@3
    74
	print $about;
indvdum@3
    75
}
indvdum@2
    76
indvdum@3
    77
# Вывод доступных параметров командной строки
indvdum@3
    78
sub help {
indvdum@3
    79
	my $help = q {usage: }.$0. q { [KEYS] FILENAME [KEYS]
indvdum@3
    80
indvdum@3
    81
Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream.
indvdum@3
    82
indvdum@3
    83
Arguments:
indvdum@7
    84
    --help                          print this help
indvdum@7
    85
    --version                       print version and information about this script
indvdum@7
    86
    --remove-duplicates             remove e-mail duplicates
indvdum@7
    87
    --split-by-domens               split e-mails by domens
indvdum@7
    88
    --exclude-domen[=]DOMEN         exclude e-mails with domen DOMEN
indvdum@7
    89
    --output-file[=]OUTFILENAME     redirect standart output to this file
indvdum@3
    90
};
indvdum@3
    91
	print $help;
indvdum@3
    92
}
indvdum@3
    93
indvdum@3
    94
# Неправильный формат параметров командной строки
indvdum@3
    95
sub illegalUse {
indvdum@3
    96
	print "Illegal use!\n\n";
indvdum@3
    97
	help();
indvdum@3
    98
	exit 1;	
indvdum@3
    99
}
indvdum@3
   100
indvdum@3
   101
# Обработка файла
indvdum@3
   102
sub processFile {
indvdum@6
   103
	open FILE, $fileName or die "Failed to open $fileName: $!\n";
indvdum@6
   104
	my @emails;
indvdum@6
   105
	while(<FILE>){
indvdum@7
   106
		push @emails, $1 while s/([\.\w]+@[\.\w]+)//;
indvdum@6
   107
	}
indvdum@6
   108
	close FILE;
indvdum@7
   109
	if($isRemoveDuplicates){
indvdum@7
   110
		my %tmp;
indvdum@7
   111
		@emails = grep {! $tmp{$_}++} @emails;
indvdum@7
   112
	}
indvdum@7
   113
	if($excludeDomen){
indvdum@7
   114
		@emails = grep {
indvdum@7
   115
			$1 ne $excludeDomen if $_ =~ /[\.\w]+@([\.\w]+)/;
indvdum@7
   116
		} @emails;
indvdum@7
   117
	}
indvdum@7
   118
	if($isSplitByDomens){
indvdum@7
   119
		@emails = sort {
indvdum@7
   120
			my $a_domen = $1 if $a =~ /[\.\w]+@([\.\w]+)/;
indvdum@7
   121
			my $b_domen = $1 if $b =~ /[\.\w]+@([\.\w]+)/;
indvdum@7
   122
			$a_domen cmp $b_domen;
indvdum@7
   123
		} @emails;
indvdum@7
   124
		my @tmp;
indvdum@7
   125
		my %tmp;
indvdum@7
   126
		while($#emails >= 0){
indvdum@7
   127
			my $email = shift @emails;
indvdum@7
   128
			my $domen = $1 if $email =~ /[\.\w]+@([\.\w]+)/;
indvdum@7
   129
			push @tmp, '' if !$tmp{$domen}++;
indvdum@7
   130
			push @tmp, $email;
indvdum@7
   131
		}
indvdum@7
   132
		@emails = @tmp;
indvdum@7
   133
	}
indvdum@7
   134
	if($outputFile){
indvdum@7
   135
		open STDOUT, ">$outputFile" or die "Failed to open $outputFile: $!\n";
indvdum@7
   136
	}
indvdum@7
   137
	print "$_\n" for (@emails);
indvdum@3
   138
}