Changeset 105

Show
Ignore:
Timestamp:
12/11/06 23:27:43 (2 years ago)
Author:
kindlund
Message:

Updated parsing logic. Ran into issue where including the Util::Config library slowed down parsing by a factor of 10x.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • honeyclient/branches/bug/42/lib/HoneyClient/Agent/Integrity/Registry/Parser.pm

    r99 r105  
    102102use Carp (); 
    103103 
    104 # Include Global Configuration Processing Library 
    105 use HoneyClient::Util::Config qw(getVar); 
    106  
    107104# Include Logging Library 
    108105use Log::Log4perl qw(:easy); 
     106# Temporarily Initialize Logging Subsystem 
     107# XXX: We hard code the logging format here, since it appears that 
     108#      calling 'use HoneyClient::Util::Config qw(getVar);' slows down 
     109#      the parser by a factor of 10x (not sure why). 
     110Log::Log4perl->init_once({ 
     111    "log4perl.rootLogger"                               => "INFO, Screen", 
     112    "log4perl.appender.Screen"                          => "Log::Log4perl::Appender::Screen", 
     113    "log4perl.appender.Screen.stderr"                   => 0, 
     114    "log4perl.appender.Screen.Threshold"                => "INFO", 
     115    "log4perl.appender.Screen.layout"                   => "Log::Log4perl::Layout::PatternLayout", 
     116    "log4perl.appender.Screen.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n", 
     117}); 
     118 
    109119 
    110120# Use Dumper Library. 
     
    116126# Use Seek Library. 
    117127use Fcntl qw(:seek); 
     128 
     129# Use Binary Search Library. 
     130use Search::Binary; 
     131 
     132# Use Progress Bar Library. 
     133use Term::ProgressBar; 
    118134 
    119135####################################################################### 
     
    208224require_ok('Fcntl'); 
    209225use Fcntl qw(:seek); 
     226 
     227# Make sure Search::Binary loads 
     228BEGIN { use_ok('Search::Binary') 
     229        or diag("Can't load Search::Binary package. Check to make sure the package library is correctly listed within the path."); } 
     230require_ok('Search::Binary'); 
     231can_ok('Search::Binary', 'binary_search'); 
     232use Search::Binary; 
     233 
     234# Make sure Term::ProgressBar loads 
     235BEGIN { use_ok('Term::ProgressBar') 
     236        or diag("Can't load Term::ProgressBar package. Check to make sure the package library is correctly listed within the path."); } 
     237require_ok('Term::ProgressBar'); 
     238use Term::ProgressBar; 
    210239 
    211240# Make sure HoneyClient::Agent::Integrity::Registry::Parser loads 
     
    354383         'registry', 0, 
    355384sub 
    356 #line 228 "Parser.yp" 
     385#line 257 "Parser.yp" 
    357386{ 
    358387            $LOG->debug("Reached end of input stream."); 
     
    364393         'registry', 1, 
    365394sub 
    366 #line 233 "Parser.yp" 
     395#line 262 "Parser.yp" 
    367396{ 
    368397            $LOG->debug("Reached end of input stream."); 
     
    374403         'registry', 2, 
    375404sub 
    376 #line 238 "Parser.yp" 
     405#line 267 "Parser.yp" 
    377406{ 
    378407            $LOG->debug("Reached end of input stream."); 
     
    396425         'group', 2, 
    397426sub 
    398 #line 255 "Parser.yp" 
     427#line 284 "Parser.yp" 
    399428{ 
    400429            my $ret = { }; 
     
    415444         'group', 1, 
    416445sub 
    417 #line 269 "Parser.yp" 
     446#line 298 "Parser.yp" 
    418447{ 
    419448            my $ret = { }; 
     
    440469         'entry', 2, 
    441470sub 
    442 #line 293 "Parser.yp" 
     471#line 322 "Parser.yp" 
    443472{ 
    444473            my $entry = { 
     
    455484} 
    456485 
    457 #line 303 "Parser.yp" 
     486#line 332 "Parser.yp" 
    458487 
    459488 
     
    467496# Outputs: (token_id, data) pair  
    468497sub _lexer { 
    469  
    470498    # Identify NEWLINE token. 
    471499    if ($_[0]->YYData->{DATA} =~ m/\G\n/cg) { 
     
    481509        $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} ? 
    482510                                       $_[0]->YYData->{'input_pos'} : 0; 
     511        $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} + 
     512                                       $_[0]->YYData->{'abs_offset'}; 
     513 
     514        # Update progress bar, if defined. 
     515        if (defined($_[0]->YYData->{'progress'}) && 
     516            ($_[0]->YYData->{'input_pos'} > $_[0]->YYData->{'progress_next_update'})) { 
     517            $_[0]->YYData->{'progress_next_update'} = 
     518                $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'input_pos'}); 
     519        } 
    483520 
    484521        # Identify DIR_NAME token. 
     
    535572    if ($_[0]->YYData->{DATA} =~ m/\G(.*\n)/cg) { 
    536573        $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA}); 
    537         $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")\n"); 
    538         Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")\n"); 
     574        $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 
     575        Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 
    539576    } 
    540577    return ('', undef); 
     
    547584# Outputs: None 
    548585sub _error { 
    549     $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 
    550     Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 
     586 
     587    $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 
     588    Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 
     589
     590 
     591# Helper function, designed to reset the parser's file stream back to the 
     592# beginning, allowing the parser to reparse from the beginning.  Or, if 
     593# specified, the function will seek the parser to the specified offset. 
     594
     595# Inputs: parser, absolute offset (optional) 
     596# Outputs: none 
     597sub _reset { 
     598    # Extract arguments. 
     599    my ($self, $offset) = @_; 
     600 
     601    $LOG->debug("Resetting parser."); 
     602 
     603    my $fh = $self->YYData->{'file_handle'}; 
     604 
     605    # Check the offset. 
     606    if (!defined($offset)) { 
     607        $offset = 0; 
     608    } 
     609    seek($fh, $offset, SEEK_SET); 
     610 
     611    undef $/; 
     612    $self->YYData->{DATA} = <$fh>; 
     613 
     614    # Strip all CRs. 
     615    $self->YYData->{DATA} =~ s/\r//g; 
     616 
     617    # Total size of input file. 
     618    $self->YYData->{'file_size'} = (stat($fh))[7]; 
     619 
     620    # Reinitialize helper variables. 
     621    # Hashtable, to represent the latest, extracted group chunk. 
     622    $self->YYData->{'latest_group'} = { }; 
     623 
     624    # Boolean, to indicate when we're parsing inside a group chunk. 
     625    $self->YYData->{'in_group'} = 0; 
     626 
     627    # Boolean, to indicate when we're parsing inside a value segment. 
     628    $self->YYData->{'in_value'} = 0; 
     629     
     630    # Regexp offset, used to record where the parser is within 
     631    # the file (relative position). 
     632    $self->YYData->{'input_pos'} = 0; 
     633 
     634    # Absolute offset, recording where the parser initially seeked to. 
     635    $self->YYData->{'abs_offset'} = $offset; 
     636 
     637    # Initialize statistics. 
     638    # Total number of directories parsed. 
     639    $self->YYData->{'dir_count'} = 0; 
     640 
     641    # Total number of key/value pairs parsed. 
     642    $self->YYData->{'entry_count'} = 0; 
     643 
     644    # Progress bar information. 
     645    if ($self->YYData->{'show_progress'}) { 
     646        $self->YYData->{'progress'} = Term::ProgressBar->new({ name  => 'Progress', 
     647                                                               count => $self->YYData->{'file_size'}, 
     648                                                               ETA   => 'linear', }); 
     649        $self->YYData->{'progress'}->minor(0); 
     650        $self->YYData->{'progress'}->max_update_rate(1); 
     651        $self->YYData->{'progress_next_update'} = $self->YYData->{'progress'}->update($offset); 
     652    } else { 
     653        $self->YYData->{'progress'} = undef; 
     654    } 
    551655} 
    552656 
     
    570674    } 
    571675 
    572     my $fh = $self->YYData->{'file_handle'}; 
    573     seek($fh, 0, SEEK_SET); 
    574  
    575     undef $/; 
    576     $self->YYData->{DATA} = <$fh>; 
    577  
    578     # Strip all CRs. 
    579     $self->YYData->{DATA} =~ s/\r//g; 
    580      
    581 $self->YYData->{DATA} =~ m/\G.{73}/scg; 
    582     
    583  
    584     # Reinitialize helper variables. 
    585     # Hashtable, to represent the latest, extracted group chunk. 
    586     $self->YYData->{'latest_group'} = { }; 
    587  
    588     # Boolean, to indicate when we're parsing inside a group chunk. 
    589     $self->YYData->{'in_group'} = 0; 
    590  
    591     # Boolean, to indicate when we're parsing inside a value segment. 
    592     $self->YYData->{'in_value'} = 0; 
    593      
    594     # Regexp offset, used to record where the parser is within 
    595     # the file. 
    596     $self->YYData->{'input_pos'} = 0; 
    597  
    598     # Initialize statistics. 
    599     # Total number of directories parsed. 
    600     $self->YYData->{'dir_count'} = 0; 
    601  
    602     # Total number of key/value pairs parsed. 
    603     $self->YYData->{'entry_count'} = 0; 
     676    # Reset the parser. 
     677    $self->_reset(); 
    604678 
    605679    $LOG->debug("Finished group index process."); 
    606  
    607     print Dumper($self->YYData->{'group_index'}) . "\n"; 
     680
     681 
     682# Helper function, designed to be called from within the 
     683# Search::Binary::binary_search() function, in order to allow 
     684# the binary_search to properly read in group index data from 
     685# the default array reference. 
     686
     687# For more information about how this function operates, please 
     688# see the Search::Binary POD documentation. 
     689
     690# Inputs: parser, value_to_compare, current_array_index 
     691# Outputs: comparison, last_valid_array_index 
     692sub _search { 
     693    # Extract arguments. 
     694    my ($parser, $value_to_compare, $current_array_index) = @_; 
     695 
     696    # Increment the search index, if the current one is undef. 
     697    if (defined($current_array_index)) { 
     698        $parser->YYData->{'last_search_index'} = $current_array_index; 
     699    } else { 
     700        $parser->YYData->{'last_search_index'}++; 
     701    } 
     702 
     703    # Perform a comparison, if the array entry is defined. 
     704    if (defined(@{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}])) { 
     705        return($value_to_compare <=> @{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}], 
     706               $parser->YYData->{'last_search_index'}); 
     707    } 
     708 
     709    # Array entry not found, return undef with this position. 
     710    return (undef, $parser->YYData->{'last_search_index'}); 
    608711} 
    609712 
     
    618721The following functions have been implemented by any Parser object. 
    619722 
    620 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, index_groups => $perform_index) 
     723=head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, 
     724                                                             index_groups => $perform_index, 
     725                                                             show_progress => $progress) 
    621726 
    622727=over 4 
     
    630735ahead and scan the entire file, indexing the file offsets of where groups start and 
    631736end.  Otherwise, this indexing process is not performed. 
     737 B<$progress> is an optional parameter.  1 specifies that the parser should display 
     738a progress bar, as it scans through a specified file.  Otherwise, a progress bar 
     739is not displayed. 
    632740  
    633741I<Output>: The instantiated Parser B<$object>, fully initialized. 
     
    665773        !exists($args{'input_file'}) || 
    666774        !defined($args{'input_file'})) { 
    667         $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!\n"); 
    668         Carp::croak("Error: Unable to create parser - no 'input_file' specified!\n"); 
     775        $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!"); 
     776        Carp::croak("Error: Unable to create parser - no 'input_file' specified!"); 
    669777    } 
    670778 
     
    672780    my $fh = new IO::File($args{'input_file'}, "r"); 
    673781    if (!defined($fh)) { 
    674         $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 
    675         Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 
     782        $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!"); 
     783        Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!"); 
     784    } 
     785     
     786    # Check if show progress was specified. 
     787    if ($argsExist &&  
     788        exists($args{'show_progress'}) &&  
     789        defined($args{'show_progress'}) && 
     790        $args{'show_progress'}) { 
     791        $parser->YYData->{'show_progress'} = 1; 
     792    } else { 
     793        $parser->YYData->{'show_progress'} = 0; 
    676794    } 
    677795 
     
    679797    $parser->YYData->{'file_handle'} = $fh; 
    680798 
    681     undef $/; 
    682     $parser->YYData->{DATA} = <$fh>; 
    683  
    684     # Strip all CRs. 
    685     $parser->YYData->{DATA} =~ s/\r//g; 
    686  
    687     # Initialize helper variables. 
    688     # Hashtable, to represent the latest, extracted group chunk. 
    689     $parser->YYData->{'latest_group'} = { }; 
    690  
    691     # Boolean, to indicate when we're parsing inside a group chunk. 
    692     $parser->YYData->{'in_group'} = 0; 
    693  
    694     # Boolean, to indicate when we're parsing inside a value segment. 
    695     $parser->YYData->{'in_value'} = 0; 
    696      
    697     # Regexp offset, used to record where the parser is within 
    698     # the file. 
    699     $parser->YYData->{'input_pos'} = 0; 
    700  
    701     # Array to record where group boundaries occur. 
    702     $parser->YYData->{'group_index'} = [0, ]; 
    703  
    704     # Initialize statistics. 
    705     # Total number of directories parsed. 
    706     $parser->YYData->{'dir_count'} = 0; 
    707  
    708     # Total number of key/value pairs parsed. 
    709     $parser->YYData->{'entry_count'} = 0; 
     799    # Reset the parser. 
     800    $parser->_reset(); 
    710801 
    711802    # Perform group indexing, if specified. 
     
    715806        $args{'index_groups'}) { 
    716807        $parser->_index(); 
     808    } else { 
     809        $parser->YYData->{'group_index'} = [0, ]; 
    717810    } 
    718811 
     
    9101003    } 
    9111004 
     1005    # Update progress bar, if defined. 
     1006    if (defined($_[0]->YYData->{'progress'}) && 
     1007        ($_[0]->YYData->{'file_size'} <= $_[0]->YYData->{'progress_next_update'})) { 
     1008 
     1009        $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'file_size'}); 
     1010    } 
     1011 
    9121012    # Return the next group parsed. 
    9131013    return $self->YYParse(yylex   => \&_lexer, 
     
    10511151} 
    10521152 
     1153=pod 
     1154 
     1155=head2 $object->seekToNearestGroup(absolute_offset => $offset) 
     1156 
     1157=over 4 
     1158 
     1159Given an absolute offset within the file, this function 
     1160will seek the parser to the nearest group found B<before> 
     1161the specified offset. 
     1162 
     1163I<Inputs>: 
     1164 B<$offset> is an required parameter, specifying the absolute offset 
     1165within the file to seek to. 
     1166 
     1167I<Outputs>: None. 
     1168 
     1169=back 
     1170 
     1171=begin testing 
     1172 
     1173my ($nextGroup, $expectedGroup); 
     1174my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file", 
     1175                                                  namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test"); 
     1176 
     1177# Create a generic Parser object, with test state data. 
     1178my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1); 
     1179 
     1180$parser->seekToNearestGroup(absolute_offset => 84); 
     1181 
     1182# Verify Test Group #2 
     1183$nextGroup = $parser->nextGroup(); 
     1184$expectedGroup = { 
     1185    key     => 'HKEY_CURRENT_USER\Testing Group 2', 
     1186    entries => [ { 
     1187        name  => '@', 
     1188        value => '\\"Annoying=Value\\"', 
     1189    }, { 
     1190        name  => '\\"Annoying=Key\\"', 
     1191        value => 'Bar', 
     1192    }, { 
     1193        name  => 'Multiline', 
     1194        value => 'This 
     1195value spans 
     1196multiple lines 
     1197', 
     1198    }, { 
     1199        name  => 'Sane_Key', 
     1200        value => '\\"Wierd=\\"Value', 
     1201    }, ], 
     1202}; 
     1203is_deeply($nextGroup, $expectedGroup, "seekToNearestGroup()") or diag("The seekToNearestGroup() call failed."); 
     1204 
     1205=end testing 
     1206 
     1207=cut 
     1208 
     1209sub seekToNearestGroup { 
     1210    # Extract arguments. 
     1211    my ($self, %args) = @_; 
     1212 
     1213    # Sanity check, don't continue, unless absolute_offset 
     1214    # was provided. 
     1215    my $argsExist = scalar(%args); 
     1216    if (!$argsExist || 
     1217        !exists($args{'absolute_offset'}) || 
     1218        !defined($args{'absolute_offset'})) { 
     1219        $LOG->fatal("Error: Unable to seek parser - no 'absolute_offset' specified!"); 
     1220        Carp::croak("Error: Unable to seek parser - no 'absolute_offset' specified!"); 
     1221    } 
     1222 
     1223    # Check to see if the 'group_index' has been initialized. 
     1224    # We assume that if it has [0, ], then this has not been 
     1225    # done. 
     1226    my $numIndices = scalar(@{$self->YYData->{'group_index'}}); 
     1227    if ($numIndices < 2) { 
     1228        $self->_index(); 
     1229    } 
     1230    $numIndices = scalar(@{$self->YYData->{'group_index'}}); 
     1231 
     1232    # Find the nearest index after the offset. 
     1233    my $found_index = binary_search(0, $numIndices - 1, $args{'absolute_offset'}, \&_search, $self); 
     1234 
     1235    # Now, find the nearest index before the offset. 
     1236    if ($found_index > 0) { 
     1237        $found_index--; 
     1238    } 
     1239    my $found_offset = @{$self->YYData->{'group_index'}}[$found_index]; 
     1240 
     1241    # XXX: Change this to debug, eventually. 
     1242    $LOG->info("Seeking parser to nearest earlier group offset (" . $found_offset . ")."); 
     1243 
     1244    # Seek the parser, to the specified offset. 
     1245    $self->_reset($found_offset); 
     1246} 
     1247 
    10531248####################################################################### 
    10541249# Additional Module Documentation                                     # 
  • honeyclient/branches/bug/42/lib/HoneyClient/Agent/Integrity/Registry/Parser.yp

    r99 r105  
    8585use Carp (); 
    8686 
    87 # Include Global Configuration Processing Library 
    88 use HoneyClient::Util::Config qw(getVar); 
    89  
    9087# Include Logging Library 
    9188use Log::Log4perl qw(:easy); 
     89# Temporarily Initialize Logging Subsystem 
     90# XXX: We hard code the logging format here, since it appears that 
     91#      calling 'use HoneyClient::Util::Config qw(getVar);' slows down 
     92#      the parser by a factor of 10x (not sure why). 
     93Log::Log4perl->init_once({ 
     94    "log4perl.rootLogger"                               => "INFO, Screen", 
     95    "log4perl.appender.Screen"                          => "Log::Log4perl::Appender::Screen", 
     96    "log4perl.appender.Screen.stderr"                   => 0, 
     97    "log4perl.appender.Screen.Threshold"                => "INFO", 
     98    "log4perl.appender.Screen.layout"                   => "Log::Log4perl::Layout::PatternLayout", 
     99    "log4perl.appender.Screen.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n", 
     100}); 
     101 
    92102 
    93103# Use Dumper Library. 
     
    99109# Use Seek Library. 
    100110use Fcntl qw(:seek); 
     111 
     112# Use Binary Search Library. 
     113use Search::Binary; 
     114 
     115# Use Progress Bar Library. 
     116use Term::ProgressBar; 
    101117 
    102118####################################################################### 
     
    191207require_ok('Fcntl'); 
    192208use Fcntl qw(:seek); 
     209 
     210# Make sure Search::Binary loads 
     211BEGIN { use_ok('Search::Binary') 
     212        or diag("Can't load Search::Binary package. Check to make sure the package library is correctly listed within the path."); } 
     213require_ok('Search::Binary'); 
     214can_ok('Search::Binary', 'binary_search'); 
     215use Search::Binary; 
     216 
     217# Make sure Term::ProgressBar loads 
     218BEGIN { use_ok('Term::ProgressBar') 
     219        or diag("Can't load Term::ProgressBar package. Check to make sure the package library is correctly listed within the path."); } 
     220require_ok('Term::ProgressBar'); 
     221use Term::ProgressBar; 
    193222 
    194223# Make sure HoneyClient::Agent::Integrity::Registry::Parser loads 
     
    312341# Outputs: (token_id, data) pair  
    313342sub _lexer { 
    314  
    315343    # Identify NEWLINE token. 
    316344    if ($_[0]->YYData->{DATA} =~ m/\G\n/cg) { 
     
    326354        $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} ? 
    327355                                       $_[0]->YYData->{'input_pos'} : 0; 
     356        $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} + 
     357                                       $_[0]->YYData->{'abs_offset'}; 
     358 
     359        # Update progress bar, if defined. 
     360        if (defined($_[0]->YYData->{'progress'}) && 
     361            ($_[0]->YYData->{'input_pos'} > $_[0]->YYData->{'progress_next_update'})) { 
     362            $_[0]->YYData->{'progress_next_update'} = 
     363                $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'input_pos'}); 
     364        } 
    328365 
    329366        # Identify DIR_NAME token. 
     
    380417    if ($_[0]->YYData->{DATA} =~ m/\G(.*\n)/cg) { 
    381418        $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA}); 
    382         $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")\n"); 
    383         Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")\n"); 
     419        $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 
     420        Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 
    384421    } 
    385422    return ('', undef); 
     
    392429# Outputs: None 
    393430sub _error { 
    394     $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 
    395     Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 
     431 
     432    $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 
     433    Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 
     434
     435 
     436# Helper function, designed to reset the parser's file stream back to the 
     437# beginning, allowing the parser to reparse from the beginning.  Or, if 
     438# specified, the function will seek the parser to the specified offset. 
     439
     440# Inputs: parser, absolute offset (optional) 
     441# Outputs: none 
     442sub _reset { 
     443    # Extract arguments. 
     444    my ($self, $offset) = @_; 
     445 
     446    $LOG->debug("Resetting parser."); 
     447 
     448    my $fh = $self->YYData->{'file_handle'}; 
     449 
     450    # Check the offset. 
     451    if (!defined($offset)) { 
     452        $offset = 0; 
     453    } 
     454    seek($fh, $offset, SEEK_SET); 
     455 
     456    undef $/; 
     457    $self->YYData->{DATA} = <$fh>; 
     458 
     459    # Strip all CRs. 
     460    $self->YYData->{DATA} =~ s/\r//g; 
     461 
     462    # Total size of input file. 
     463    $self->YYData->{'file_size'} = (stat($fh))[7]; 
     464 
     465    # Reinitialize helper variables. 
     466    # Hashtable, to represent the latest, extracted group chunk. 
     467    $self->YYData->{'latest_group'} = { }; 
     468 
     469    # Boolean, to indicate when we're parsing inside a group chunk. 
     470    $self->YYData->{'in_group'} = 0; 
     471 
     472    # Boolean, to indicate when we're parsing inside a value segment. 
     473    $self->YYData->{'in_value'} = 0; 
     474     
     475    # Regexp offset, used to record where the parser is within 
     476    # the file (relative position). 
     477    $self->YYData->{'input_pos'} = 0; 
     478 
     479    # Absolute offset, recording where the parser initially seeked to. 
     480    $self->YYData->{'abs_offset'} = $offset; 
     481 
     482    # Initialize statistics. 
     483    # Total number of directories parsed. 
     484    $self->YYData->{'dir_count'} = 0; 
     485 
     486    # Total number of key/value pairs parsed. 
     487    $self->YYData->{'entry_count'} = 0; 
     488 
     489    # Progress bar information. 
     490    if ($self->YYData->{'show_progress'}) { 
     491        $self->YYData->{'progress'} = Term::ProgressBar->new({ name  => 'Progress', 
     492                                                               count => $self->YYData->{'file_size'}, 
     493                                                               ETA   => 'linear', }); 
     494        $self->YYData->{'progress'}->minor(0); 
     495        $self->YYData->{'progress'}->max_update_rate(1); 
     496        $self->YYData->{'progress_next_update'} = $self->YYData->{'progress'}->update($offset); 
     497    } else { 
     498        $self->YYData->{'progress'} = undef; 
     499    } 
    396500} 
    397501 
     
    415519    } 
    416520 
    417     my $fh = $self->YYData->{'file_handle'}; 
    418     seek($fh, 0, SEEK_SET); 
    419  
    420     undef $/; 
    421     $self->YYData->{DATA} = <$fh>; 
    422  
    423     # Strip all CRs. 
    424     $self->YYData->{DATA} =~ s/\r//g; 
    425      
    426 $self->YYData->{DATA} =~ m/\G.{73}/scg; 
    427     
    428  
    429     # Reinitialize helper variables. 
    430     # Hashtable, to represent the latest, extracted group chunk. 
    431     $self->YYData->{'latest_group'} = { }; 
    432  
    433     # Boolean, to indicate when we're parsing inside a group chunk. 
    434     $self->YYData->{'in_group'} = 0; 
    435  
    436     # Boolean, to indicate when we're parsing inside a value segment. 
    437     $self->YYData->{'in_value'} = 0; 
    438      
    439     # Regexp offset, used to record where the parser is within 
    440     # the file. 
    441     $self->YYData->{'input_pos'} = 0; 
    442  
    443     # Initialize statistics. 
    444     # Total number of directories parsed. 
    445     $self->YYData->{'dir_count'} = 0; 
    446  
    447     # Total number of key/value pairs parsed. 
    448     $self->YYData->{'entry_count'} = 0; 
     521    # Reset the parser. 
     522    $self->_reset(); 
    449523 
    450524    $LOG->debug("Finished group index process."); 
    451  
    452     print Dumper($self->YYData->{'group_index'}) . "\n"; 
     525
     526 
     527# Helper function, designed to be called from within the 
     528# Search::Binary::binary_search() function, in order to allow 
     529# the binary_search to properly read in group index data from 
     530# the default array reference. 
     531
     532# For more information about how this function operates, please 
     533# see the Search::Binary POD documentation. 
     534
     535# Inputs: parser, value_to_compare, current_array_index 
     536# Outputs: comparison, last_valid_array_index 
     537sub _search { 
     538    # Extract arguments. 
     539    my ($parser, $value_to_compare, $current_array_index) = @_; 
     540 
     541    # Increment the search index, if the current one is undef. 
     542    if (defined($current_array_index)) { 
     543        $parser->YYData->{'last_search_index'} = $current_array_index; 
     544    } else { 
     545        $parser->YYData->{'last_search_index'}++; 
     546    } 
     547 
     548    # Perform a comparison, if the array entry is defined. 
     549    if (defined(@{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}])) { 
     550        return($value_to_compare <=> @{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}], 
     551               $parser->YYData->{'last_search_index'}); 
     552    } 
     553 
     554    # Array entry not found, return undef with this position. 
     555    return (undef, $parser->YYData->{'last_search_index'}); 
    453556} 
    454557 
     
    463566The following functions have been implemented by any Parser object. 
    464567 
    465 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, index_groups => $perform_index) 
     568=head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, 
     569                                                             index_groups => $perform_index, 
     570                                                             show_progress => $progress) 
    466571 
    467572=over 4 
     
    475580ahead and scan the entire file, indexing the file offsets of where groups start and 
    476581end.  Otherwise, this indexing process is not performed. 
     582 B<$progress> is an optional parameter.  1 specifies that the parser should display 
     583a progress bar, as it scans through a specified file.  Otherwise, a progress bar 
     584is not displayed. 
    477585  
    478586I<Output>: The instantiated Parser B<$object>, fully initialized. 
     
    510618        !exists($args{'input_file'}) || 
    511619        !defined($args{'input_file'})) { 
    512         $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!\n"); 
    513         Carp::croak("Error: Unable to create parser - no 'input_file' specified!\n"); 
     620        $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!"); 
     621        Carp::croak("Error: Unable to create parser - no 'input_file' specified!"); 
    514622    } 
    515623 
     
    517625    my $fh = new IO::File($args{'input_file'}, "r"); 
    518626    if (!defined($fh)) { 
    519         $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 
    520         Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 
     627        $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!"); 
     628        Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!"); 
     629    } 
     630     
     631    # Check if show progress was specified. 
     632    if ($argsExist &&  
     633        exists($args{'show_progress'}) &&  
     634        defined($args{'show_progress'}) && 
     635        $args{'show_progress'}) { 
     636        $parser->YYData->{'show_progress'} = 1; 
     637    } else { 
     638        $parser->YYData->{'show_progress'} = 0; 
    521639    } 
    522640 
     
    524642    $parser->YYData->{'file_handle'} = $fh; 
    525643 
    526     undef $/; 
    527     $parser->YYData->{DATA} = <$fh>; 
    528  
    529     # Strip all CRs. 
    530     $parser->YYData->{DATA} =~ s/\r//g; 
    531  
    532     # Initialize helper variables. 
    533     # Hashtable, to represent the latest, extracted group chunk. 
    534     $parser->YYData->{'latest_group'} = { }; 
    535  
    536     # Boolean, to indicate when we're parsing inside a group chunk. 
    537     $parser->YYData->{'in_group'} = 0; 
    538  
    539     # Boolean, to indicate when we're parsing inside a value segment. 
    540     $parser->YYData->{'in_value'} = 0; 
    541      
    542     # Regexp offset, used to record where the parser is within 
    543     # the file. 
    544     $parser->YYData->{'input_pos'} = 0; 
    545  
    546     # Array to record where group boundaries occur. 
    547     $parser->YYData->{'group_index'} = [0, ]; 
    548  
    549     # Initialize statistics. 
    550     # Total number of directories parsed. 
    551     $parser->YYData->{'dir_count'} = 0; 
    552  
    553     # Total number of key/value pairs parsed. 
    554     $parser->YYData->{'entry_count'} = 0; 
     644    # Reset the parser. 
     645    $parser->_reset(); 
    555646 
    556647    # Perform group indexing, if specified. 
     
    560651        $args{'index_groups'}) { 
    561652        $parser->_index(); 
     653    } else { 
     654        $parser->YYData->{'group_index'} = [0, ]; 
    562655    } 
    563656 
     
    755848    } 
    756849 
     850    # Update progress bar, if defined. 
     851    if (defined($_[0]->YYData->{'progress'}) && 
     852        ($_[0]->YYData->{'file_size'} <= $_[0]->YYData->{'progress_next_update'})) { 
     853 
     854        $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'file_size'}); 
     855    } 
     856 
    757857    # Return the next group parsed. 
    758858    return $self->YYParse(yylex   => \&_lexer, 
     
    896996} 
    897997 
     998=pod 
     999 
     1000=head2 $object->seekToNearestGroup(absolute_offset => $offset) 
     1001 
     1002=over 4 
     1003 
     1004Given an absolute offset within the file, this function 
     1005will seek the parser to the nearest group found B<before> 
     1006the specified offset. 
     1007 
     1008I<Inputs>: 
     1009 B<$offset> is an required parameter, specifying the absolute offset 
     1010within the file to seek to. 
     1011 
     1012I<Outputs>: None. 
     1013 
     1014=back 
     1015 
     1016=begin testing 
     1017 
     1018my ($nextGroup, $expectedGroup); 
     1019my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file", 
     1020                                                  namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test"); 
     1021 
     1022# Create a generic Parser object, with test state data. 
     1023my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1); 
     1024 
     1025$parser->seekToNearestGroup(absolute_offset => 84); 
     1026 
     1027# Verify Test Group #2 
     1028$nextGroup = $parser->nextGroup(); 
     1029$expectedGroup = { 
     1030    key     => 'HKEY_CURRENT_USER\Testing Group 2', 
     1031    entries => [ { 
     1032        name  => '@', 
     1033        value => '\\"Annoying=Value\\"', 
     1034    }, { 
     1035        name  => '\\"Annoying=Key\\"', 
     1036        value => 'Bar', 
     1037    }, { 
     1038        name  => 'Multiline', 
     1039        value => 'This 
     1040value spans 
     1041multiple lines 
     1042', 
     1043    }, { 
     1044        name  => 'Sane_Key', 
     1045        value => '\\"Wierd=\\"Value', 
     1046    }, ], 
     1047}; 
     1048is_deeply($nextGroup, $expectedGroup, "seekToNearestGroup()") or diag("The seekToNearestGroup() call failed."); 
     1049 
     1050=end testing 
     1051 
     1052=cut 
     1053 
     1054sub seekToNearestGroup { 
     1055    # Extract arguments. 
     1056    my ($self, %args) = @_; 
     1057 
     1058    # Sanity check, don't continue, unless absolute_offset 
     1059    # was provided. 
     1060    my $argsExist = scalar(%args); 
     1061    if (!$argsExist || 
     1062        !exists($args{'absolute_offset'}) || 
     1063        !defined($args{'absolute_offset'})) { 
     1064        $LOG->fatal("Error: Unable to seek parser - no 'absolute_offset' specified!"); 
     1065        Carp::croak("Error: Unable to seek parser - no 'absolute_offset' specified!"); 
     1066    } 
     1067 
     1068    # Check to see if the 'group_index' has been initialized. 
     1069    # We assume that if it has [0, ], then this has not been 
     1070    # done. 
     1071    my $numIndices = scalar(@{$self->YYData->{'group_index'}}); 
     1072    if ($numIndices < 2) { 
     1073        $self->_index(); 
     1074    } 
     1075    $numIndices = scalar(@{$self->YYData->{'group_index'}}); 
     1076 
     1077    # Find the nearest index after the offset. 
     1078    my $found_index = binary_search(0, $numIndices - 1, $args{'absolute_offset'}, \&_search, $self); 
     1079 
     1080    # Now, find the nearest index before the offset. 
     1081    if ($found_index > 0) { 
     1082        $found_index--; 
     1083    } 
     1084    my $found_offset = @{$self->YYData->{'group_index'}}[$found_index]; 
     1085 
     1086    # XXX: Change this to debug, eventually. 
     1087    $LOG->info("Seeking parser to nearest earlier group offset (" . $found_offset . ")."); 
     1088 
     1089    # Seek the parser, to the specified offset. 
     1090    $self->_reset($found_offset); 
     1091} 
     1092 
    8981093####################################################################### 
    8991094# Additional Module Documentation                                     #