Regexp-F2B/lib/Regexp/F2B.pm

431 рядки
11 KiB
Perl

package Regexp::F2B;
use 5.006;
use strict;
use warnings;
use File::Slurp;
use Data::Dumper;
use File::Spec;
use Regexp::IPv6 qw($IPv6_re);
use Regexp::IPv4 qw($IPv4_re);
$Data::Dumper::Sortkeys = 1;
=head1 NAME
Regexp::F2B - The great new Regexp::F2B!
=head1 VERSION
Version 0.0.1
=cut
our $VERSION = '0.0.1';
=head1 SYNOPSIS
use Regexp::F2B;
use Data::Dumper;
my $f2b = Regexp::F2B->new(
regexp=\@to_attempt_to_match,
lines=>1
);
my $found=$f2b->proc_line($some_log_line);
print Dumper($found);
=head1 METHODS
=head2 new
Args
- lines :: Number of match lines to use for mathcing. Defaults to 1.
- pre_regexp ::
- start_chomp :: Remove date at the start.
- Default :: 1
- start_pattern :: Removes this from the start of log lines.
- Default :: (?<b_time>[a-zA-Z]+)\ +(?<d_time>\d+)\ +(?<T_time>(?<R_time>(?<H_time>\d+)\:(?<M_time>\d+))\:(?<S_time>\d+))\ +
=cut
sub new {
my ( $blank, %opts ) = @_;
# make sure we have something sane for lines
if ( !defined $opts{lines} ) {
$opts{lines} = 1;
} else {
if ( $opts{lines} !~ /^[0-9]+$/ ) {
die( 'lines is set to "' . $opts{lines} . '" which is not numeric' );
}
if ( $opts{lines} < 1 ) {
die( 'lines is set to "' . $opts{lines} . '" which is less than 1' );
}
}
if ( !defined( $opts{regexp} ) ) {
die('regexp is undefined');
} else {
if ( ref( $opts{regexp} ) ne 'ARRAY' ) {
die( 'regexp is a ' . ref( $opts{regexp} ) . ' and not a array' );
}
}
my $int = 0;
while ( defined( $opts{regexp}[$int] ) ) {
if ( ref( \$opts{regexp}[$int] ) ne 'SCALAR' ) {
die( 'regexp[' . $int . '] is a ' . ref( \$opts{regexp}[$int] ) . ' and not a scalar' );
} elsif ( ref( \$opts{regexp}[$int] ) ne 'SCALAR'
&& ref( $opts{regexp}[$int] ) ne 'SCALAR' )
{
die( 'regexp[' . $int . '] is a ' . ref( $opts{regexp}[$int] ) . ' and not a scalar' );
}
if ( $opts{regexp}[$int] =~ /\(\?\{/ ) {
die( 'regexp[' . $int . '], "' . $opts{regexp}[$int] . '", contains "(?{"' );
}
$int++;
} ## end while ( defined( $opts{regexp}[$int] ) )
if ( !defined( $opts{pre_regexp} ) ) {
$opts{pre_regexp} = [];
} else {
if ( ref( $opts{pre_regexp} ) ne 'ARRAY' ) {
die( 'regexp is a ' . ref( $opts{pre_regexp} ) . ' and not a array' );
}
}
if ( !defined( $opts{start_chomp} ) ) {
$opts{start_chomp} = 1;
}
if ( !defined( $opts{start_pattern} ) ) {
$opts{start_pattern}
= '(?<b_time>[a-zA-Z]+)\ +(?<d_time>\d+)\ +(?<T_time>(?<R_time>(?<H_time>\d+)\:(?<M_time>\d+))\:(?<S_time>\d+))\ +';
}
my $self = {
lines => $opts{lines},
log_lines => [],
pre_regexp => $opts{pre_regexp},
regexp => $opts{regexp},
start_pattern => $opts{start_pattern},
start_chomp => $opts{start_chomp},
};
bless $self;
my $to_drop = { regexp => [], pre_regexp => [] };
#
#
# process each pre_regexp
#
#
$int = 0;
my @pre_regexp_tmp;
while ( defined( $self->{pre_regexp}[$int] ) ) {
my $regexp = 'pre_regexp';
my $value = $self->{$regexp}[$int];
# pre_regexp should not match any hosts etc... only for checking if it is a line we want and
# maybe grabbing the bits we want to check via regexp
if ( $value =~ /\<HOST\>/
|| $value =~ /\<CIDR\>/
|| $value =~ /\<SUBNET\>/
|| $value =~ /\<IP4\>/
|| $value =~ /\<IP6\>/
|| $value =~ /\<ADDR\>/
|| $value =~ /\<DNS\>/ )
{
die( "HOST, CIDR, SUBNET, IP4, IP6, and DNS may only be used in regexp... " . $value );
} ## end if ( $value =~ /\<HOST\>/ || $value =~ /\<CIDR\>/...)
$value =~ s/\<F\-MLFID\>/(?<FMLFID>/;
$value =~ s/\<\/F\-MLFID\>/)/;
$value =~ s/\<F-CONTENT\>/(?<FCONTENT>/;
$value =~ s/\<\/F-CONTENT\>/)/;
if ( $value ne '' ) {
push( @pre_regexp_tmp, $value );
}
$int++;
} ## end while ( defined( $self->{pre_regexp}[$int] ) )
delete( $self->{pre_regexp} );
$self->{pre_regexp} = \@pre_regexp_tmp;
#
#
# process each regexp
#
#
$int = 0;
while ( defined( $self->{regexp}[$int] ) ) {
my $has_finder = 0;
# we should only have F-CONTENT in pre_regexp
if ( $self->{regexp}[$int] =~ /\<F\-CONTENT\>/
|| $self->{regexp}[$int] =~ /\<\/F\-CONTENT\>/ )
{
die( "F-CONTENT tags can only be used in pre_regexp and not regexp... '" . $self->{regexp}[$int] . "'" );
}
# process any /F-[A-Za-z0-9\_\-]+/ items
if ( $self->{regexp}[$int] =~ /\<F\-[A-Za-z0-9\_]+\>/ ) {
$self->{regexp}[$int] =~ s/\<F\-([A-Za-z0-9\_]+)\>/(?<F$1>/g;
$has_finder = 1;
}
if ( $self->{regexp}[$int] =~ /\<\/F\-[A-Za-z0-9\_]+\>/ ) {
$self->{regexp}[$int] =~ s/\<\/F\-[A-Za-z0-9\_]+\>/)/g;
$has_finder = 1;
}
# add ^ and $ bits as needed
if ( $self->{regexp}[$int] !~ /\$$/ ) {
$self->{regexp}[$int] = $self->{regexp}[$int] . '.*$';
}
if ( $self->{regexp}[$int] !~ /^\^/ ) {
$self->{regexp}[$int] = '^.*' . $self->{regexp}[$int];
}
# replace various tags with regexps for matching
if ( $self->{regexp}[$int] =~ /\<HOST\>/ ) {
$self->{regexp}[$int] =~ s/\<HOST\>/(?<HOST>$IPv4_re|$IPv6_re|[a-zA-Z][a-zA-Z\-0-9\.]*[a-zA-Z\-0-9]+)/;
$has_finder = 1;
} elsif ( $self->{regexp}[$int] =~ /\<ADDR\>/ ) {
$self->{regexp}[$int] =~ s/\<ADDR\>/(?<ADDR>$IPv4_re|$IPv6_re)/;
$has_finder = 1;
} elsif ( $self->{regexp}[$int] =~ /\<CIDR\>/ ) {
$self->{regexp}[$int]
=~ s/\<CIDR\>/(?<CIDR>$IPv4_re\/\\b([1-9]|[12][0-9]|3[0-2])\\b|$IPv6_re\/\\b([1-9]|[1-9][0-9]|1[01][0-9]|12[0-8])\\b)/;
$has_finder = 1;
} elsif ( $self->{regexp}[$int] =~ /\<SUBNET\>/ ) {
$self->{regexp}[$int]
=~ s/\<SUBNET\>/(?<SUBNET>$IPv4_re|$IPv6_re|$IPv4_re\/\\b([1-9]|[12][0-9]|3[0-2])\\b|$IPv6_re\/\\b([1-9]|[1-9][0-9]|1[01][0-9]|12[0-8])\\b)/;
$has_finder = 1;
} elsif ( $self->{regexp}[$int] =~ /\<IP4\>/ ) {
$self->{regexp}[$int] =~ s/\<IP4\>/(?<IP4>$IPv4_re)/;
$has_finder = 1;
} elsif ( $self->{regexp}[$int] =~ /\<IP6\>/ ) {
$self->{regexp}[$int] =~ s/\<IP6\>/(?<IP6>$IPv6_re)/;
$has_finder = 1;
} elsif ( $self->{regexp}[$int] =~ /\<DNS\>/ ) {
$self->{regexp}[$int] =~ s/\<DNS\>/(?<DNS>[a-zA-Z][a-zA-Z\-0-9\.]*[a-zA-Z\-0-9]+)/;
$has_finder = 1;
}
if ( $self->{regexp}[$int] =~ /\<SRC\>/ ) {
$self->{regexp}[$int] =~ s/\<SRC\>/(?<SRC>$IPv4_re|$IPv6_re)/;
$has_finder = 1;
}
if ( $self->{regexp}[$int] =~ /\<DEST\>/ ) {
$self->{regexp}[$int] =~ s/\<DEST\>/(?<DEST>$IPv4_re|$IPv6_re)/;
$has_finder = 1;
}
# if it actually does not match anything, remove it
if ( !$has_finder ) {
push( @{ $to_drop->{regexp} }, $int );
}
# if this is meant to be able to include multiple lines, insert .* to allow it to do that
if ( $self->{regexp}[$int] =~ /\<SKIPLINES\>/ ) {
$self->{regexp}[$int] =~ s/\<SKIPLINES\>/.*/g;
}
# while pyhton allows conditionals for items like <foo> to be checked
# via (?(foo), perl needs (?(<foo>)
$self->{regexp}[$int] =~ s/\(\?\(([a-zA-Z_0-0][a-zA-Z_0-0]+)\)/(?(<$1>)/g;
$int++;
} ## end while ( defined( $self->{regexp}[$int] ) )
# remove any blank items
my @items = ( 'pre_regexp', 'regexp' );
foreach my $regexp (@items) {
$int = 0;
my @new_array;
while ( defined( $self->{$regexp}[$int] ) ) {
if ( $self->{$regexp}[$int] ne '' ) {
push( @new_array, $self->{$regexp}[$int] );
}
$int++;
}
delete( $self->{$regexp} );
$self->{$regexp} = \@new_array;
} ## end foreach my $regexp (@items)
# make sure we have atleast one item we can use
foreach my $regexp (@items) {
if ( !defined( $self->{regexp}[0] ) ) {
die('Post processing there are no regexp defined');
}
}
return $self;
} ## end sub new
=head2 proc_lines
=cut
sub proc_line {
my ( $self, $line ) = @_;
my $orig = $line;
if ( !defined($line) ) {
die('No line passed');
}
my $found = { found => 0, new_line => $orig, data => {} };
if ( $self->{start_chomp} ) {
my $regex = $self->{start_pattern};
$line =~ s/^$regex//;
my %found_items = %+;
foreach my $key ( keys(%found_items) ) {
$found->{data}{$key} = $found_items{$key};
}
}
chomp($line);
push( @{ $self->{log_lines} }, $line );
if ( defined( $self->{log_lines}[ $self->{lines} ] ) ) {
shift( @{ $self->{log_lines} } );
}
my $joined = '';
foreach my $join_line ( @{ $self->{log_lines} } ) {
$joined = $joined . $join_line . "\n";
}
chomp($joined);
$found->{joined} = $joined;
#
# if we have a pre_regexp, search and see if we find anything
#
my $int = 0;
my $not_found = 1;
while ( defined( $self->{pre_regexp}[$int] ) && $not_found ) {
my $regexp = $self->{pre_regexp}[$int];
if ( $joined =~ /$regexp/ ) {
my %found_items = %+;
if ( defined( $found_items{'FCONTENT'} ) ) {
$not_found = 0;
$joined = $found_items{'FCONTENT'};
$found->{data}{'F-CONTENT'} = $found_items{'FCONTENT'};
if ( defined( $found_items{'FMLFID'} ) ) {
$found->{data}{'F-MLFID'} = $found_items{'FMLFID'};
}
}
} ## end if ( $joined =~ /$regexp/ )
$int++;
} ## end while ( defined( $self->{pre_regexp}[$int] ) ...)
# we did not any matching lines, so just return
if ( defined( $self->{pre_regexp}[$int] ) && $not_found ) {
return $found;
}
#
# now search through each regexp for possible matches
#
$int = 0;
$not_found = 1;
while ( defined( $self->{regexp}[$int] ) && $not_found ) {
# copy this here so the key test for F keys does not cause an issue
my $regexp = $self->{regexp}[$int];
if ( $joined =~ /$regexp/ ) {
my %found_items = %+;
foreach my $key ( keys(%found_items) ) {
$not_found = 0;
if ( $key =~ /^F/ ) {
my $new_key = $key;
$new_key =~ s/^F/F-/;
$found->{data}{$new_key} = $found_items{$key};
} else {
$found->{data}{$key} = $found_items{$key};
}
} ## end foreach my $key ( keys(%found_items) )
$not_found = 0;
$found->{found} = 1;
} ## end if ( $joined =~ /$regexp/ )
$int++;
} ## end while ( defined( $self->{regexp}[$int] ) && $not_found)
return $found;
} ## end sub proc_line
=head1 AUTHOR
Zane C. Bowers-Hadley, C<< <vvelox at vvelox.net> >>
=head1 BUGS
Please report any bugs or feature requests to C<bug-regexp-f2b at rt.cpan.org>, or through
the web interface at L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Regexp-F2B>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc Regexp::F2B
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker (report bugs here)
L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Regexp-F2B>
=item * CPAN Ratings
L<https://cpanratings.perl.org/d/Regexp-F2B>
=item * Search CPAN
L<https://metacpan.org/release/Regexp-F2B>
=back
=head1 ACKNOWLEDGEMENTS
=head1 LICENSE AND COPYRIGHT
This software is Copyright (c) 2023 by Zane C. Bowers-Hadley.
This is free software, licensed under:
The GNU General Public License, Version 2, June 1991
=cut
1; # End of Regexp::F2B