indvdum@2
|
1 |
#!/usr/bin/perl
|
indvdum@7
|
2 |
|
indvdum@7
|
3 |
# E-mail filter tool for Aptech
|
indvdum@7
|
4 |
# version 0.2.0
|
indvdum@7
|
5 |
# Copyright 2011, David Veliev (gotoindvdum@gmail.com).
|
indvdum@7
|
6 |
# This program may be used under Apache License 2.0.
|
indvdum@7
|
7 |
|
indvdum@3
|
8 |
use strict;
|
indvdum@2
|
9 |
|
indvdum@3
|
10 |
# Глобальные переменные
|
indvdum@3
|
11 |
my $fileName;
|
indvdum@3
|
12 |
my $isRemoveDuplicates = 0;
|
indvdum@3
|
13 |
my $isSplitByDomens = 0;
|
indvdum@3
|
14 |
my $excludeDomen;
|
indvdum@7
|
15 |
my $outputFile;
|
indvdum@3
|
16 |
|
indvdum@5
|
17 |
parseCommandLine();
|
indvdum@5
|
18 |
processFile();
|
indvdum@5
|
19 |
exit 0;
|
indvdum@5
|
20 |
|
indvdum@5
|
21 |
# Парсер параметров командной строки
|
indvdum@5
|
22 |
sub parseCommandLine {
|
indvdum@5
|
23 |
my $arg = shift(@ARGV);
|
indvdum@5
|
24 |
my $isHasArgs = 0;
|
indvdum@5
|
25 |
until ($arg eq '') {
|
indvdum@5
|
26 |
$isHasArgs = 1;
|
indvdum@5
|
27 |
if($arg =~ /^-{1,2}.+$/){
|
indvdum@5
|
28 |
if ($arg =~ /^-{1,2}version$/){
|
indvdum@5
|
29 |
about();
|
indvdum@5
|
30 |
exit 0;
|
indvdum@5
|
31 |
} elsif ($arg =~ /^-{1,2}help$/){
|
indvdum@5
|
32 |
help();
|
indvdum@5
|
33 |
exit 0;
|
indvdum@5
|
34 |
} elsif ($arg eq '--remove-duplicates'){
|
indvdum@5
|
35 |
$isRemoveDuplicates = 1;
|
indvdum@5
|
36 |
} elsif ($arg eq '--split-by-domens'){
|
indvdum@5
|
37 |
$isSplitByDomens = 1;
|
indvdum@7
|
38 |
} elsif ($arg =~ /--exclude-domen[=]{0,1}([\.\w]*)/){
|
indvdum@5
|
39 |
$excludeDomen = $1;
|
indvdum@7
|
40 |
illegalUse() if $excludeDomen ne '' && $arg !~ /--exclude-domen=[\.\w]*/;
|
indvdum@5
|
41 |
$excludeDomen = shift(@ARGV) if $excludeDomen eq '';
|
indvdum@5
|
42 |
illegalUse() if $excludeDomen eq '';
|
indvdum@7
|
43 |
} elsif ($arg =~ /--output-file[=]{0,1}([\.\w]*)/){
|
indvdum@7
|
44 |
$outputFile = $1;
|
indvdum@7
|
45 |
illegalUse() if $outputFile ne '' && $arg !~ /--output-file=[\.\w]*/;
|
indvdum@7
|
46 |
$outputFile = shift(@ARGV) if $outputFile eq '';
|
indvdum@7
|
47 |
illegalUse() if $outputFile eq '';
|
indvdum@5
|
48 |
} else {
|
indvdum@5
|
49 |
illegalUse();
|
indvdum@5
|
50 |
}
|
indvdum@3
|
51 |
} else {
|
indvdum@5
|
52 |
illegalUse() if $fileName ne '';
|
indvdum@5
|
53 |
$fileName = $arg;
|
indvdum@3
|
54 |
}
|
indvdum@5
|
55 |
$arg = shift(@ARGV);
|
indvdum@5
|
56 |
};
|
indvdum@5
|
57 |
if (!$isHasArgs) {
|
indvdum@5
|
58 |
about();
|
indvdum@5
|
59 |
exit 0;
|
indvdum@3
|
60 |
}
|
indvdum@3
|
61 |
}
|
indvdum@3
|
62 |
|
indvdum@3
|
63 |
# Вывод информации о программе
|
indvdum@3
|
64 |
sub about {
|
indvdum@3
|
65 |
my $about = q {
|
indvdum@2
|
66 |
E-mail filter tool for Aptech
|
indvdum@7
|
67 |
version 0.2.0
|
indvdum@2
|
68 |
|
indvdum@2
|
69 |
Copyright 2011, David Veliev (gotoindvdum@gmail.com).
|
indvdum@2
|
70 |
|
indvdum@2
|
71 |
This program may be used under Apache License 2.0.
|
indvdum@2
|
72 |
};
|
indvdum@3
|
73 |
|
indvdum@3
|
74 |
print $about;
|
indvdum@3
|
75 |
}
|
indvdum@2
|
76 |
|
indvdum@3
|
77 |
# Вывод доступных параметров командной строки
|
indvdum@3
|
78 |
sub help {
|
indvdum@3
|
79 |
my $help = q {usage: }.$0. q { [KEYS] FILENAME [KEYS]
|
indvdum@3
|
80 |
|
indvdum@3
|
81 |
Parse for e-mails file FILENAME with arguments KEYS and print result to standart output stream.
|
indvdum@3
|
82 |
|
indvdum@3
|
83 |
Arguments:
|
indvdum@7
|
84 |
--help print this help
|
indvdum@7
|
85 |
--version print version and information about this script
|
indvdum@7
|
86 |
--remove-duplicates remove e-mail duplicates
|
indvdum@7
|
87 |
--split-by-domens split e-mails by domens
|
indvdum@7
|
88 |
--exclude-domen[=]DOMEN exclude e-mails with domen DOMEN
|
indvdum@7
|
89 |
--output-file[=]OUTFILENAME redirect standart output to this file
|
indvdum@3
|
90 |
};
|
indvdum@3
|
91 |
print $help;
|
indvdum@3
|
92 |
}
|
indvdum@3
|
93 |
|
indvdum@3
|
94 |
# Неправильный формат параметров командной строки
|
indvdum@3
|
95 |
sub illegalUse {
|
indvdum@3
|
96 |
print "Illegal use!\n\n";
|
indvdum@3
|
97 |
help();
|
indvdum@3
|
98 |
exit 1;
|
indvdum@3
|
99 |
}
|
indvdum@3
|
100 |
|
indvdum@3
|
101 |
# Обработка файла
|
indvdum@3
|
102 |
sub processFile {
|
indvdum@6
|
103 |
open FILE, $fileName or die "Failed to open $fileName: $!\n";
|
indvdum@6
|
104 |
my @emails;
|
indvdum@6
|
105 |
while(<FILE>){
|
indvdum@7
|
106 |
push @emails, $1 while s/([\.\w]+@[\.\w]+)//;
|
indvdum@6
|
107 |
}
|
indvdum@6
|
108 |
close FILE;
|
indvdum@7
|
109 |
if($isRemoveDuplicates){
|
indvdum@7
|
110 |
my %tmp;
|
indvdum@7
|
111 |
@emails = grep {! $tmp{$_}++} @emails;
|
indvdum@7
|
112 |
}
|
indvdum@7
|
113 |
if($excludeDomen){
|
indvdum@7
|
114 |
@emails = grep {
|
indvdum@7
|
115 |
$1 ne $excludeDomen if $_ =~ /[\.\w]+@([\.\w]+)/;
|
indvdum@7
|
116 |
} @emails;
|
indvdum@7
|
117 |
}
|
indvdum@7
|
118 |
if($isSplitByDomens){
|
indvdum@7
|
119 |
@emails = sort {
|
indvdum@7
|
120 |
my $a_domen = $1 if $a =~ /[\.\w]+@([\.\w]+)/;
|
indvdum@7
|
121 |
my $b_domen = $1 if $b =~ /[\.\w]+@([\.\w]+)/;
|
indvdum@7
|
122 |
$a_domen cmp $b_domen;
|
indvdum@7
|
123 |
} @emails;
|
indvdum@7
|
124 |
my @tmp;
|
indvdum@7
|
125 |
my %tmp;
|
indvdum@7
|
126 |
while($#emails >= 0){
|
indvdum@7
|
127 |
my $email = shift @emails;
|
indvdum@7
|
128 |
my $domen = $1 if $email =~ /[\.\w]+@([\.\w]+)/;
|
indvdum@7
|
129 |
push @tmp, '' if !$tmp{$domen}++;
|
indvdum@7
|
130 |
push @tmp, $email;
|
indvdum@7
|
131 |
}
|
indvdum@7
|
132 |
@emails = @tmp;
|
indvdum@7
|
133 |
}
|
indvdum@7
|
134 |
if($outputFile){
|
indvdum@7
|
135 |
open STDOUT, ">$outputFile" or die "Failed to open $outputFile: $!\n";
|
indvdum@7
|
136 |
}
|
indvdum@7
|
137 |
print "$_\n" for (@emails);
|
indvdum@3
|
138 |
} |