root/honeyclient/branches/bug/42/lib/HoneyClient/Agent/Integrity/Registry/Parser.yp

Revision 123, 48.2 kB (checked in by kindlund, 1 year ago)

Completed alpha version of bug fix. Still have to test it out on our test VM network.

  • Property svn:keywords set to Id "$file"
Line 
1 %{
2 #######################################################################
3 # Created on:  Dec 10, 2006
4 # Package:     HoneyClient::Agent::Integrity::Registry::Parser
5 # File:        Parser.pm
6 # Description: Parses static hive dumps of the Windows OS registry.
7 #
8 # CVS: $Id$
9 #
10 # @author kindlund
11 #
12 # Copyright (C) 2006 The MITRE Corporation.  All rights reserved.
13 #
14 # This program is free software; you can redistribute it and/or
15 # modify it under the terms of the GNU General Public License
16 # as published by the Free Software Foundation, using version 2
17 # of the License.
18 #
19 # This program is distributed in the hope that it will be useful,
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 # GNU General Public License for more details.
23 #
24 # You should have received a copy of the GNU General Public License
25 # along with this program; if not, write to the Free Software
26 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 # 02110-1301, USA.
28 #
29 #######################################################################
30
31 =pod
32
33 =head1 NAME
34
35 HoneyClient::Agent::Integrity::Registry::Parser - Perl extension to parse
36 static hive dumps of the Windows OS registry.
37
38 =head1 VERSION
39
40 This documentation refers to HoneyClient::Agent::Integrity::Registry::Parser version 1.0.
41
42 =head1 SYNOPSIS
43
44   use HoneyClient::Agent::Integrity::Registry::Parser;
45   use IO::File;
46   use Data::Dumper;
47
48   # Initialize the parser object.
49   my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(
50                    input_file => "dump.reg",
51                );
52
53   # Print each registry group found, until there are no more left.
54   my $registryGroup = $parser->nextGroup();
55   while(scalar(keys(%{$registryGroup}))) {
56       print Dumper($registryGroup);
57       $registryGroup = $parser->nextGroup();
58   }
59
60   # $registryGroup refers to hashtable reference, which has the
61   # following format:
62   #
63   # $registryGroup = {
64   #     # The registry directory name.
65   #     'key' => 'HKEY_LOCAL_MACHINE\Software...',
66   #
67   #     # An array containing the list of entries within the
68   #     # registry directory.
69   #     'entries'  => [ {
70   #         'name' => "\"string\"",  # A (potentially) quoted string;
71   #                                  # "@" for default
72   #         'value' => "data",
73   #     }, ],
74   # };
75
76 =head1 DESCRIPTION
77
78 This library allows the Registry module to easily parse and enumerate
79 each Windows OS registry hive.
80
81 =cut
82
83 use strict;
84 use warnings;
85 use Carp ();
86
87 # Include Global Configuration Processing Library
88 use HoneyClient::Util::Config qw(getVar);
89
90 # Include Logging Library
91 use Log::Log4perl qw(:easy);
92
93 # Use Dumper Library.
94 use Data::Dumper;
95
96 # Use IO File Library.
97 use IO::File;
98
99 # Use Seek Library.
100 use Fcntl qw(:seek);
101
102 # Use Binary Search Library.
103 use Search::Binary;
104
105 # Use Progress Bar Library.
106 use Term::ProgressBar;
107
108 #######################################################################
109 # Module Initialization                                               #
110 #######################################################################
111
112 BEGIN {
113     # Defines which functions can be called externally.
114     require Exporter;
115     our (@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
116
117     # Set our package version.
118     $VERSION = 0.9;
119
120     @ISA = qw(Exporter);
121
122     # Symbols to export on request
123     @EXPORT = qw( );
124
125     # Items to export into callers namespace by default. Note: do not export
126     # names by default without a very good reason. Use EXPORT_OK instead.
127     # Do not simply export all your public functions/methods/constants.
128
129     # This allows declaration use HoneyClient::Agent::Integrity::Registry ':all';
130     # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
131     # will save memory.
132
133     %EXPORT_TAGS = (
134         'all' => [ qw( ) ],
135     );
136
137     # Symbols to autoexport (:DEFAULT tag)
138     @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
139
140     $SIG{PIPE} = 'IGNORE'; # Do not exit on broken pipes.
141 }
142 our (@EXPORT_OK, $VERSION);
143
144 =pod
145
146 =begin testing
147
148 # Make sure Log::Log4perl loads
149 BEGIN { use_ok('Log::Log4perl', qw(:nowarn))
150         or diag("Can't load Log::Log4perl package. Check to make sure the package library is correctly listed within the path.");
151        
152         # Suppress all logging messages, since we need clean output for unit testing.
153         Log::Log4perl->init({
154             "log4perl.rootLogger"                               => "DEBUG, Buffer",
155             "log4perl.appender.Buffer"                          => "Log::Log4perl::Appender::TestBuffer",
156             "log4perl.appender.Buffer.min_level"                => "fatal",
157             "log4perl.appender.Buffer.layout"                   => "Log::Log4perl::Layout::PatternLayout",
158             "log4perl.appender.Buffer.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n",
159         });
160 }
161 require_ok('Log::Log4perl');
162 use Log::Log4perl qw(:easy);
163
164 # Make sure the module loads properly, with the exportable
165 # functions shared.
166 BEGIN { use_ok('HoneyClient::Util::Config', qw(getVar setVar))
167         or diag("Can't load HoneyClient::Util::Config package.  Check to make sure the package library is correctly listed within the path."); }
168 require_ok('HoneyClient::Util::Config');
169 can_ok('HoneyClient::Util::Config', 'getVar');
170 can_ok('HoneyClient::Util::Config', 'setVar');
171 use HoneyClient::Util::Config qw(getVar setVar);
172
173 # Suppress all logging messages, since we need clean output for unit testing.
174 Log::Log4perl->init({
175     "log4perl.rootLogger"                               => "DEBUG, Buffer",
176     "log4perl.appender.Buffer"                          => "Log::Log4perl::Appender::TestBuffer",
177     "log4perl.appender.Buffer.min_level"                => "fatal",
178     "log4perl.appender.Buffer.layout"                   => "Log::Log4perl::Layout::PatternLayout",
179     "log4perl.appender.Buffer.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n",
180 });
181
182 # Make sure Data::Dumper loads
183 BEGIN { use_ok('Data::Dumper')
184         or diag("Can't load Data::Dumper package. Check to make sure the package library is correctly listed within the path."); }
185 require_ok('Data::Dumper');
186 use Data::Dumper;
187
188 # Make sure IO::File loads
189 BEGIN { use_ok('IO::File')
190         or diag("Can't load IO::File package. Check to make sure the package library is correctly listed within the path."); }
191 require_ok('IO::File');
192 use IO::File;
193
194 # Make sure Fcntl loads
195 BEGIN { use_ok('Fcntl')
196         or diag("Can't load Fcntl package. Check to make sure the package library is correctly listed within the path."); }
197 require_ok('Fcntl');
198 use Fcntl qw(:seek);
199
200 # Make sure Search::Binary loads
201 BEGIN { use_ok('Search::Binary')
202         or diag("Can't load Search::Binary package. Check to make sure the package library is correctly listed within the path."); }
203 require_ok('Search::Binary');
204 can_ok('Search::Binary', 'binary_search');
205 use Search::Binary;
206
207 # Make sure Term::ProgressBar loads
208 BEGIN { use_ok('Term::ProgressBar')
209         or diag("Can't load Term::ProgressBar package. Check to make sure the package library is correctly listed within the path."); }
210 require_ok('Term::ProgressBar');
211 use Term::ProgressBar;
212
213 # Make sure HoneyClient::Agent::Integrity::Registry::Parser loads
214 BEGIN { use_ok('HoneyClient::Agent::Integrity::Registry::Parser')
215         or diag("Can't load HoneyClient::Agent::Integrity::Registry::Parser package. Check to make sure the package library is correctly listed within the path."); }
216 require_ok('HoneyClient::Agent::Integrity::Registry::Parser');
217 use HoneyClient::Agent::Integrity::Registry::Parser;
218
219 =end testing
220
221 =cut
222
223 #######################################################################
224 # Global Configuration Variables
225 #######################################################################
226
227 # The global logging object.
228 our $LOG = get_logger();
229
230 # Make Dumper format more terse.
231 $Data::Dumper::Terse = 1;
232 $Data::Dumper::Indent = 1;
233
234 %}
235
236 %token DIR_NAME
237 %token KEY_NAME
238 %token KEY_VALUE
239 %token HEADER
240 %token NEWLINE
241
242 %%
243
244 # A registry can be thought of as a header, along with 1 or more
245 # groups.
246 registry:
247                {
248             $LOG->debug("Reached end of input stream.");
249             # Finished parsing the entire file, return empty hash ref.
250             return { };
251         }
252     |   groups {
253             $LOG->debug("Reached end of input stream.");
254             # Finished parsing the entire file, return empty hash ref.
255             return { };
256         }
257     |   HEADER groups {
258             $LOG->debug("Reached end of input stream.");
259             # Finished parsing the entire file, return empty hash ref.
260             return { };
261         }
262 ;
263
264 # Define 1 or more groups.
265 groups:
266         group
267     |   NEWLINE group
268     |   NEWLINE group NEWLINE
269     |   NEWLINE group groups
270 ;
271
272 # A group consists of a group_header and 0 or more entries.
273 group:
274         DIR_NAME entries {
275             my $ret = { };
276             $_[0]->YYData->{'latest_group'}->{'key'} = $_[1];
277             if (!exists($_[0]->YYData->{'latest_group'}->{'entries'})) {
278                 # Make sure the 'entries' key exists.
279                 $_[0]->YYData->{'latest_group'}->{'entries'} = [];
280             }
281             $ret = $_[0]->YYData->{'latest_group'};
282             $_[0]->YYData->{'latest_group'} = { };
283             $_[0]->YYData->{'dir_count'}++;
284             $_[0]->YYAccept; # Terminate the parse, early.
285
286             return $ret;
287         }
288     |   DIR_NAME {
289             my $ret = { };
290             $_[0]->YYData->{'latest_group'}->{'key'} = $_[1];
291             if (!exists($_[0]->YYData->{'latest_group'}->{'entries'})) {
292                 # Make sure the 'entries' key exists.
293                 $_[0]->YYData->{'latest_group'}->{'entries'} = [];
294             }
295             $ret = $_[0]->YYData->{'latest_group'};
296             $_[0]->YYData->{'latest_group'} = { };
297             $_[0]->YYData->{'dir_count'}++;
298             $_[0]->YYAccept; # Terminate the parse, early.
299
300             return $ret;
301         }
302 ;
303
304 # Define 1 or more entries.
305 entries:
306         entry
307     |   entry entries
308 ;
309
310 # Define an entry.
311 entry:
312         KEY_NAME KEY_VALUE {
313             my $entry = {
314                 name  => $_[1],
315                 value => $_[2],
316             };
317             push(@{$_[0]->YYData->{'latest_group'}->{entries}}, $entry);
318             $_[0]->YYData->{'entry_count'}++;
319         }
320 ;
321
322 %%
323
324 #######################################################################
325 # Private Methods Implemented                                         #
326 #######################################################################
327
328 # Helper function, designed to tokenize specific data from the input stream.
329 #
330 # Inputs: parser
331 # Outputs: (token_id, data) pair
332 sub _lexer {
333     # Identify NEWLINE token.
334     if ($_[0]->YYData->{DATA} =~ m/\G\n/cg) {
335         $_[0]->YYData->{'in_group'} = 0;
336         $LOG->debug("Found NEWLINE token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
337         $_[0]->YYData->{'line_count'}++;
338         return ("NEWLINE", "\n");
339     }
340
341     # Check to see if we're inside a group block...
342     if (!$_[0]->YYData->{'in_group'}) {
343
344         $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA});
345         $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} ?
346                                        $_[0]->YYData->{'input_pos'} : 0;
347         $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} +
348                                        $_[0]->YYData->{'abs_offset'};
349
350         # Update progress bar, if defined.
351         if (defined($_[0]->YYData->{'progress'}) &&
352             ($_[0]->YYData->{'input_pos'} > $_[0]->YYData->{'progress_next_update'})) {
353             $_[0]->YYData->{'progress_next_update'} =
354                 $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'input_pos'});
355         }
356
357         # Identify DIR_NAME token.
358         if ($_[0]->YYData->{DATA} =~ m/\G\[(.*)\]\n/cg) {
359             $_[0]->YYData->{'in_group'} = 1;
360             $LOG->debug("Found DIR_NAME token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
361             $_[0]->YYData->{'last_group_line_number'} = $_[0]->YYData->{'line_count'};
362             $_[0]->YYData->{'line_count'}++;
363             return ("DIR_NAME", $1);
364         }
365
366         # Identify HEADER token. It's always only at the beginning.
367         if ($_[0]->YYData->{DATA} =~ m/\GREGEDIT4\n/cg) {
368             $LOG->debug("Found HEADER token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
369             $_[0]->YYData->{'line_count'}++;
370             return ("HEADER", "REGEDIT4\n");
371         }
372
373     } else {
374
375         # Check to see if we're in a value segment...
376         if (!$_[0]->YYData->{'in_value'}) {
377
378             # Identify KEY_NAME token.
379             if ($_[0]->YYData->{DATA} =~ m/\G\"(|[^\\]|.*(?:\\[^\\]|\\\\|[^\\][^\\]))\"(?==)/cg) {
380                 $_[0]->YYData->{'in_value'} = 1;
381                 $LOG->debug("Found KEY_NAME token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
382                 return ("KEY_NAME", $1);
383             }
384
385             # Identify default KEY_NAME token (@).
386             if ($_[0]->YYData->{DATA} =~ m/\G\@(?==)/cg) {
387                 $_[0]->YYData->{'in_value'} = 1;
388                 $LOG->debug("Found KEY_NAME token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
389                 return ("KEY_NAME", "@");
390             }
391
392         } else {
393
394             # Identify string KEY_VALUE token.
395             if ($_[0]->YYData->{DATA} =~ m/\G=\"(|[^\\]|.*?(?:\\[^\\]|\\\\|[^\\][^\\]))\"\n/cgs) {
396                 $_[0]->YYData->{'in_value'} = 0;
397                 $LOG->debug("Found KEY_VALUE token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
398                 $_[0]->YYData->{'line_count'} += 1 + @{[$1 =~ /\n/g]};
399                 return ("KEY_VALUE", $1);
400             }
401
402             # Identify binary KEY_VALUE token.
403             if ($_[0]->YYData->{DATA} =~ m/\G=(|.*?[^\\])\n/cgs) {
404                 $_[0]->YYData->{'in_value'} = 0;
405                 $LOG->debug("Found KEY_VALUE token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
406                 $_[0]->YYData->{'line_count'} += 1 + @{[$1 =~ /\n/g]};
407                 return ("KEY_VALUE", $1);
408             }
409         }
410     }
411    
412     # Croak if encountered a token error.
413     if ($_[0]->YYData->{DATA} =~ m/\G(.*\n)/cg) {
414         $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA});
415         $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")");
416         Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")");
417     }
418     return ('', undef);
419 }
420
421 # Helper function, designed to report when any parsing error
422 # occurs.
423 #
424 # Inputs: parser
425 # Outputs: None
426 sub _error {
427
428     $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").");
429     Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").");
430 }
431
432 # Helper function, designed to reset the parser's file stream back to the
433 # beginning, allowing the parser to reparse from the beginning.  Or, if
434 # specified, the function will seek the parser to the specified offset.
435 #
436 # Inputs: parser, absolute offset (optional)
437 # Outputs: none
438 sub _reset {
439     # Extract arguments.
440     my ($self, $offset) = @_;
441
442     $LOG->debug("Resetting parser.");
443
444     $self->YYData->{'file_handle'} = undef;
445
446     my $fh = new IO::File($self->YYData->{'filename'}, "r");
447     if (!defined($fh)) {
448         $LOG->fatal("Error: Unable to read file '" . $self->YYData->{'filename'} . "'!");
449         Carp::croak("Error: Unable to read file '" . $self->YYData->{'filename'} . "'!");
450     }
451
452     $self->YYData->{'file_handle'} = $fh;
453
454     # Check the offset.
455     if (!defined($offset)) {
456         $offset = 0;
457     }
458     seek($fh, $offset, SEEK_SET);
459
460     undef $/;
461     $self->YYData->{DATA} = <$fh>;
462
463     # Strip all CRs.
464     $self->YYData->{DATA} =~ s/\r//g;
465
466     # Total size of input file.
467     $self->YYData->{'file_size'} = (stat($fh))[7];
468
469     # Reinitialize helper variables.
470     # Hashtable, to represent the latest, extracted group chunk.
471     $self->YYData->{'latest_group'} = { };
472
473     # Boolean, to indicate when we're parsing inside a group chunk.
474     $self->YYData->{'in_group'} = 0;
475
476     # Boolean, to indicate when we're parsing inside a value segment.
477     $self->YYData->{'in_value'} = 0;
478    
479     # Regexp offset, used to record where the parser is within
480     # the file (relative position).
481     $self->YYData->{'input_pos'} = 0;
482
483     # Absolute offset, recording where the parser initially seeked to.
484     $self->YYData->{'abs_offset'} = $offset;
485
486     # Initialize statistics.
487     # Total number of directories parsed.
488     $self->YYData->{'dir_count'} = 0;
489
490     # Total number of key/value pairs parsed.
491     $self->YYData->{'entry_count'} = 0;
492
493     # Total number of lines parsed.
494     $self->YYData->{'line_count'} = 0;
495
496     # Last line number that corresponded to a group separation point.
497     $self->YYData->{'last_group_line_number'} = 0;
498
499     # Progress bar information.
500     if ($self->YYData->{'show_progress'}) {
501         $self->YYData->{'progress'} = Term::ProgressBar->new({ name  => 'Progress',
502                                                                count => $self->YYData->{'file_size'},
503                                                                ETA   => 'linear', });
504         $self->YYData->{'progress'}->minor(0);
505         $self->YYData->{'progress'}->max_update_rate(1);
506         $self->YYData->{'progress_next_update'} = $self->YYData->{'progress'}->update($offset);
507     } else {
508         $self->YYData->{'progress'} = undef;
509     }
510 }
511
512 # Helper function, designed to index all groups, based upon beginning file
513 # offsets.
514 #
515 # Inputs: parser
516 # Outputs: None
517 sub _index {
518     # Extract arguments.
519     my $self = shift;
520
521     $LOG->debug("Starting group index process.");
522
523     $self->YYData->{'group_index_offsets'} = [0, ];
524     $self->YYData->{'group_index_linenums'} = [0, ];
525
526     my $registryGroup = $self->nextGroup();
527     while(scalar(keys(%{$registryGroup}))) {
528         push (@{$self->YYData->{'group_index_offsets'}}, $self->YYData->{'input_pos'});
529         push (@{$self->YYData->{'group_index_linenums'}}, $self->YYData->{'last_group_line_number'});
530         $registryGroup = $self->nextGroup();
531     }
532
533     # Reset the parser.
534     $self->_reset();
535
536     $LOG->debug("Finished group index process.");
537 }
538
539 # Helper function, designed to be called from within the
540 # Search::Binary::binary_search() function, in order to allow
541 # the binary_search to properly read in group index data from
542 # the default parser reference.
543 #
544 # For more information about how this function operates, please
545 # see the Search::Binary POD documentation.
546 #
547 # Inputs: parser, value_to_compare, current_array_index
548 # Outputs: comparison, last_valid_array_index
549 sub _search {
550     # Extract arguments.
551     my ($parser, $value_to_compare, $current_array_index) = @_;
552
553     # Increment the search index, if the current one is undef.
554     if (defined($current_array_index)) {
555         $parser->YYData->{'last_search_index'} = $current_array_index;
556     } else {
557         $parser->YYData->{'last_search_index'}++;
558     }
559
560     # Perform a comparison, if the array entry is defined.
561     # Check to see if the search is for line numbers or offsets.
562     if ($parser->YYData->{'search_is_linenum'}) {
563         if (defined(@{$parser->YYData->{'group_index_linenums'}}[$parser->YYData->{'last_search_index'}])) {
564             return($value_to_compare <=> @{$parser->YYData->{'group_index_linenums'}}[$parser->YYData->{'last_search_index'}],
565                    $parser->YYData->{'last_search_index'});
566         }
567     } else {
568         if (defined(@{$parser->YYData->{'group_index_offsets'}}[$parser->YYData->{'last_search_index'}])) {
569             return($value_to_compare <=> @{$parser->YYData->{'group_index_offsets'}}[$parser->YYData->{'last_search_index'}],
570                    $parser->YYData->{'last_search_index'});
571         }
572     }
573
574     # Array entry not found, return undef with this position.
575     return (undef, $parser->YYData->{'last_search_index'});
576 }
577
578 #######################################################################
579 # Public Methods Implemented                                          #
580 #######################################################################
581
582 =pod
583
584 =head1 METHODS IMPLEMENTED
585
586 The following functions have been implemented by any Parser object.
587
588 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename,
589                                                              index_groups => $perform_index,
590                                                              show_progress => $progress)
591
592 =over 4
593
594 Creates a new Parser object, using the specified input file as its data
595 source.
596
597 I<Inputs>:
598  B<$filename> is an required parameter, specifying the file to open for parsing.
599  B<$perform_index> is an optional parameter.  1 specifies that the parser should go
600 ahead and scan the entire file, indexing the file offsets of where groups start and
601 end.  Otherwise, this indexing process is not performed.
602  B<$progress> is an optional parameter.  1 specifies that the parser should display
603 a progress bar, as it scans through a specified file.  Otherwise, a progress bar
604 is not displayed.
605  
606 I<Output>: The instantiated Parser B<$object>, fully initialized.
607
608 =back
609
610 =begin testing
611
612 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
613                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
614
615 # Create a generic Parser object, with test state data.
616 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
617 isa_ok($parser, 'HoneyClient::Agent::Integrity::Registry::Parser', "init(input_file => $test_registry_file)") or diag("The init() call failed.");
618
619 =end testing
620
621 =cut
622
623 sub init {
624
625     # Extract arguments.
626     my ($self, %args) = @_;
627
628     # Log resolved arguments.
629     # Make Dumper format more terse.
630     $Data::Dumper::Terse = 1;
631     $Data::Dumper::Indent = 0;
632     $LOG->debug(Dumper(\%args));
633
634     # Sanity check, don't initialize, unless input_file_handle
635     # was provided.
636     my $argsExist = scalar(%args);
637     if (!$argsExist ||
638         !exists($args{'input_file'}) ||
639         !defined($args{'input_file'})) {
640         $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!");
641         Carp::croak("Error: Unable to create parser - no 'input_file' specified!");
642     }
643
644     my $parser = HoneyClient::Agent::Integrity::Registry::Parser->new();
645     my $fh = new IO::File($args{'input_file'}, "r");
646     if (!defined($fh)) {
647         $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!");
648         Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!");
649     }
650    
651     # Check if show progress was specified.
652     if ($argsExist &&
653         exists($args{'show_progress'}) &&
654         defined($args{'show_progress'}) &&
655         $args{'show_progress'}) {
656         $parser->YYData->{'show_progress'} = 1;
657     } else {
658         $parser->YYData->{'show_progress'} = 0;
659     }
660
661     # Save the file name.
662     $parser->YYData->{'filename'} = $args{'input_file'};
663
664     # Save the file handle.
665     $parser->YYData->{'file_handle'} = $fh;
666
667     # Reset the parser.
668     $parser->_reset();
669
670     # Perform group indexing, if specified.
671     if ($argsExist &&
672         exists($args{'index_groups'}) &&
673         defined($args{'index_groups'}) &&
674         $args{'index_groups'}) {
675         $parser->_index();
676     } else {
677         $parser->YYData->{'group_index_offsets'} = [0, ];
678         $parser->YYData->{'group_index_linenums'} = [0, ];
679     }
680
681     # Return parser object.
682     return $parser;
683 }
684
685 =pod
686
687 =head2 $object->nextGroup()
688
689 =over 4
690
691 Provides the next registry group, in the form of a hashtable reference.
692 This hashtable has the following format:
693
694   {
695       # The registry directory name.
696       'key' => 'HKEY_LOCAL_MACHINE\Software...',
697  
698       # An array containing the list of entries within the
699       # registry directory.
700       'entries'  => [ {
701           'name' => "\"string\"",  # A (potentially) quoted string;
702                                    # "@" for default
703           'value' => "data",
704       }, ],
705   };
706
707 I<Output>: A hashtable reference if the next group was parsed successfully;
708 returns an empty hash ref, if the Parser B<$object> has reached the end of
709 the input stream.
710
711 =back
712
713 =begin testing
714
715 my ($nextGroup, $expectedGroup);
716 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
717                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
718
719 # Create a generic Parser object, with test state data.
720 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1);
721
722 # Verify Test Group #1
723 $nextGroup = $parser->nextGroup();
724 $expectedGroup = {
725     key     => 'HKEY_CURRENT_USER\]Testing Group 1[',
726     entries => [ {
727         name  => '@',
728         value => 'Default',
729     }, {
730         name  => 'Foo',
731         value => 'Bar',
732     }, ],
733 };
734 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 1") or diag("The nextGroup() call failed.");
735
736 # Verify Test Group #2
737 $nextGroup = $parser->nextGroup();
738 $expectedGroup = {
739     key     => 'HKEY_CURRENT_USER\Testing Group 2',
740     entries => [ {
741         name  => '@',
742         value => '\\"Annoying=Value\\"',
743     }, {
744         name  => '\\"Annoying=Key\\"',
745         value => 'Bar',
746     }, {
747         name  => 'Multiline',
748         value => 'This
749 value spans
750 multiple lines
751 ',
752     }, {
753         name  => 'Sane_Key',
754         value => '\\"Wierd=\\"Value',
755     }, ],
756 };
757 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 2") or diag("The nextGroup() call failed.");
758
759 # Verify Test Group #3
760 $nextGroup = $parser->nextGroup();
761 $expectedGroup = {
762     key     => 'HKEY_CURRENT_USER\Testing Group 3',
763     entries => [ {
764         name  => 'Test_Bin_1',
765         value => 'hex:f4,ff,ff,ff,00,00,00,00,00,00,00,00,00,00,00,00,bc,02,00,00,00,\
766   00,00,00,00,00,00,00,54,00,61,00,68,00,6f,00,6d,00,61,00,00,00,f0,77,3f,00,\
767   3f,00,3f,00,3f,00,00,00,00,00,00,00,00,00,00,00,00,00,00,00,78,00,1c,10,fc,\
768   7f,22,14,fc,7f,b0,fe,12,00,00,00,00,00,00,00,00,00,98,23,eb,77'
769     }, {
770         name  => 'Test_Bin_2',
771         value => 'hex:f5,ff,ff,ff,00,00,00,00,00,00,00,00,00,00,00,00,90,01,00,00,00,\
772   00,00,00,00,00,00,00,4d,00,69,00,63,00,72,00,6f,00,73,00,6f,00,66,00,74,00,\
773   20,00,53,00,61,00,6e,00,73,00,20,00,53,00,65,00,72,00,69,00,66,00,00,00,f0,\
774   77,00,20,14,00,00,00,00,10,80,05,14,00,f0,1f,14,00,00,00,14,00'
775     }, ],
776 };
777 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 3") or diag("The nextGroup() call failed.");
778
779 # Verify Test Group #4
780 $nextGroup = $parser->nextGroup();
781 $expectedGroup = {
782     key     => 'HKEY_CURRENT_USER\Testing Group 4',
783     entries => [],
784 };
785 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 4") or diag("The nextGroup() call failed.");
786
787 # Verify Test Group #5
788 $nextGroup = $parser->nextGroup();
789 $expectedGroup = {
790     key     => 'HKEY_CURRENT_USER\Testing Group 5',
791     entries => [ {
792         name  => '@',
793         value => '',
794     }, ],
795 };
796 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 5") or diag("The nextGroup() call failed.");
797
798 # Verify Test Group #6
799 $nextGroup = $parser->nextGroup();
800 $expectedGroup = {
801     key     => 'HKEY_CURRENT_USER\Testing Group 6\With\Really\Deep\Nested\Directory\Structure',
802     entries => [ {
803         name  => 'InstallerLocation',
804         value => 'C:\\\\WINDOWS\\\\system32\\\\',
805     }, ],
806 };
807 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 6") or diag("The nextGroup() call failed.");
808
809 # Verify Test Group #7
810 $nextGroup = $parser->nextGroup();
811 $expectedGroup = {
812     key     => 'HKEY_CURRENT_USER\Testing Group 7',
813     entries => [ {
814         name  => 'C:\\\\Program Files\\\\Common Files\\\\Microsoft Shared\\\\Web Folders\\\\',
815         value => '',
816     }, {
817         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{350C97B0-3D7C-4EE8-BAA9-00BCB3D54227}\\\\',
818         value => '',
819     }, {
820         name  => 'C:\\\\Program Files\\\\Support Tools\\\\',
821         value => '',
822     }, {
823         name  => 'C:\\\\Documents and Settings\\\\All Users\\\\Start Menu\\\\Programs\\\\Windows Support Tools\\\\',
824         value => '',
825     }, {
826         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{6855CCDD-BDF9-48E4-B80A-80DFB96FE36C}\\\\',
827         value => '',
828     }, {
829         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{F251B999-08A9-4704-999C-9962F0DFD88E}\\\\',
830         value => '',
831     }, {
832         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{1CB92574-96F2-467B-B793-5CEB35C40C29}\\\\',
833         value => '',
834     }, {
835         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{B37C842A-B624-46B8-A727-654E72F1C91A}\\\\',
836         value => '',
837     }, ],
838 };
839 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 7") or diag("The nextGroup() call failed.");
840
841 # Verify Test Group #8
842 $nextGroup = $parser->nextGroup();
843 $expectedGroup = {
844     key     => 'HKEY_CURRENT_USER\Testing Group 8\{00021492-0000-0000-C000-000000000046}',
845     entries => [ {
846         name  => '000',
847         value => 'String Value',
848     }, ],
849 };
850 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 8") or diag("The nextGroup() call failed.");
851
852 # Verify Test Group #9
853 $nextGroup = $parser->nextGroup();
854 is_deeply($nextGroup, { }, "nextGroup() - 9") or diag("The nextGroup() call failed.");
855
856 =end testing
857
858 =cut
859
860 sub nextGroup {
861     # Extract arguments.
862     my ($self, %args) = @_;
863
864     # Log resolved arguments.
865     # Make Dumper format more terse.
866     $Data::Dumper::Terse = 1;
867     $Data::Dumper::Indent = 0;
868     $LOG->debug(Dumper(\%args));
869
870     # Reopen the file_handle, if it's been closed.
871     if (!defined($self->YYData->{'file_handle'})) {
872         $self->_reset();   
873     }
874
875     if ($self->YYData->{'input_pos'} == 0) {
876         $LOG->debug("Beginning parse of input stream.");
877     }
878
879     # Update progress bar, if defined.
880     if (defined($_[0]->YYData->{'progress'}) &&
881         ($_[0]->YYData->{'file_size'} <= $_[0]->YYData->{'progress_next_update'})) {
882
883         $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'file_size'});
884     }
885
886     # Return the next group parsed.
887     return $self->YYParse(yylex   => \&_lexer,
888                           yyerror => \&_error);
889 }
890
891 =pod
892
893 =head2 $object->dirsParsed()
894
895 =over 4
896
897 Indicates how many registry directories the Parser B<$object> has
898 parsed within the specified file, so far.
899
900 I<Output>: Returns the number of directory groups parsed so far;
901 returns 0, if none parsed yet.
902
903 =back
904
905 =begin testing
906
907 my ($nextGroup);
908 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
909                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
910
911 # Create a generic Parser object, with test state data.
912 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
913
914 $nextGroup = $parser->nextGroup();
915 while(scalar(keys(%{$nextGroup}))) {
916     $nextGroup = $parser->nextGroup();
917 }
918
919 is($parser->dirsParsed(), 8, "dirsParsed()") or diag("The dirsParsed() call failed.");
920
921 =end testing
922
923 =cut
924
925 sub dirsParsed {
926     # Extract arguments.
927     my ($self, %args) = @_;
928    
929     # Log resolved arguments.
930     # Make Dumper format more terse.
931     $Data::Dumper::Terse = 1;
932     $Data::Dumper::Indent = 0;
933     $LOG->debug(Dumper(\%args));
934
935     return $self->YYData->{'dir_count'};
936 }
937
938 =pod
939
940 =head2 $object->entriesParsed()
941
942 =over 4
943
944 Indicates how many registry key/value pairs the Parser B<$object> has
945 parsed within the specified file, so far.
946
947 I<Output>: Returns the number of key/value pairs parsed so far;
948 returns 0, if none parsed yet.
949
950 =back
951
952 =begin testing
953
954 my ($nextGroup);
955 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
956                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
957
958 # Create a generic Parser object, with test state data.
959 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
960
961 $nextGroup = $parser->nextGroup();
962 while(scalar(keys(%{$nextGroup}))) {
963     $nextGroup = $parser->nextGroup();
964 }
965
966 is($parser->entriesParsed(), 19, "entriesParsed()") or diag("The entriesParsed() call failed.");
967
968 =end testing
969
970 =cut
971
972 sub entriesParsed {
973     # Extract arguments.
974     my ($self, %args) = @_;
975
976     # Log resolved arguments.
977     # Make Dumper format more terse.
978     $Data::Dumper::Terse = 1;
979     $Data::Dumper::Indent = 0;
980     $LOG->debug(Dumper(\%args));
981
982     return $self->YYData->{'entry_count'};
983 }
984
985 =pod
986
987 =head2 $object->getFileHandle()
988
989 =over 4
990
991 Returns the file handle associated with the current Parser B<$object>.
992
993 I<Output>: Returns the file handle in use.
994
995 =back
996
997 =begin testing
998
999 my ($handle);
1000 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
1001                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
1002
1003 # Create a generic Parser object, with test state data.
1004 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
1005
1006 $handle = $parser->getFileHandle();
1007
1008 isa_ok($handle, 'IO::File', "getFileHandle()") or diag("The getFileHandle() call failed.");
1009
1010 =end testing
1011
1012 =cut
1013
1014 sub getFileHandle {
1015     # Extract arguments.
1016     my ($self, %args) = @_;
1017
1018     # Log resolved arguments.
1019     # Make Dumper format more terse.
1020     $Data::Dumper::Terse = 1;
1021     $Data::Dumper::Indent = 0;
1022     $LOG->debug(Dumper(\%args));
1023
1024     return $self->YYData->{'file_handle'};
1025 }
1026
1027 =pod
1028
1029 =head2 $object->getFilename()
1030
1031 =over 4
1032
1033 Returns the file name associated with the current Parser B<$object>.
1034
1035 I<Output>: Returns the file name in use.
1036
1037 =back
1038
1039 =begin testing
1040
1041 my ($filename);
1042 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
1043                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
1044
1045 # Create a generic Parser object, with test state data.
1046 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
1047
1048 $filename = $parser->getFilename();
1049
1050 is($filename, $test_registry_file, "getFilename()") or diag("The getFilename() call failed.");
1051
1052 =end testing
1053
1054 =cut
1055
1056 sub getFilename {
1057     # Extract arguments.
1058     my ($self, %args) = @_;
1059
1060     # Log resolved arguments.
1061     # Make Dumper format more terse.
1062     $Data::Dumper::Terse = 1;
1063     $Data::Dumper::Indent = 0;
1064     $LOG->debug(Dumper(\%args));
1065
1066     return $self->YYData->{'filename'};
1067 }
1068
1069 =pod
1070
1071 =head2 $object->closeFileHandle()
1072
1073 =over 4
1074
1075 Closes the file handle associated with the current Parser B<$object>.
1076
1077 =back
1078
1079 =begin testing
1080
1081 my ($handle);
1082 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
1083                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
1084
1085 # Create a generic Parser object, with test state data.
1086 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
1087 $parser->closeFileHandle();
1088
1089 # Verify Test Group #1
1090 my $nextGroup = $parser->nextGroup();
1091 my $expectedGroup = {
1092     key     => 'HKEY_CURRENT_USER\]Testing Group 1[',
1093     entries => [ {
1094         name  => '@',
1095         value => 'Default',
1096     }, {
1097         name  => 'Foo',
1098         value => 'Bar',
1099     }, ],
1100 };
1101 is_deeply($nextGroup, $expectedGroup, "closeFileHandle()") or diag("The closeFileHandle() call failed.");
1102
1103 =end testing
1104
1105 =cut
1106
1107 sub closeFileHandle {
1108     # Extract arguments.
1109     my ($self, %args) = @_;
1110
1111     # Log resolved arguments.
1112     # Make Dumper format more terse.
1113     $Data::Dumper::Terse = 1;
1114     $Data::Dumper::Indent = 0;
1115     $LOG->debug(Dumper(\%args));
1116
1117     $self->YYData->{'file_handle'} = undef;
1118 }
1119
1120 =pod
1121
1122 =head2 $object->getCurrentLineCount()
1123
1124 =over 4
1125
1126 Returns the number of lines parsed by the Parser B<$object>
1127 within the specified file and resets the counter back to
1128 zero.
1129
1130 I<Output>: Returns the current line count of the parser.
1131
1132 B<Note>: Calling this function will reset the parser's
1133 line count.
1134
1135 =back
1136
1137 =begin testing
1138
1139 my ($handle);
1140 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
1141                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
1142
1143 # Create a generic Parser object, with test state data.
1144 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1);
1145
1146 $parser->seekToNearestGroup(absolute_offset => 84);
1147 my $nextGroup = $parser->nextGroup();
1148
1149 is($parser->getCurrentLineCount(), 9, "getCurrentLineCount()") or diag("The getCurrentLineCount() call failed.");
1150
1151 =end testing
1152
1153 =cut
1154
1155 sub getCurrentLineCount {
1156     # Extract arguments.
1157     my ($self, %args) = @_;
1158
1159     # Log resolved arguments.
1160     # Make Dumper format more terse.
1161     $Data::Dumper::Terse = 1;
1162     $Data::Dumper::Indent = 0;
1163     $LOG->debug(Dumper(\%args));
1164
1165     my $ret = $self->YYData->{'line_count'};
1166     $self->YYData->{'line_count'} = 0;
1167     return $ret;
1168 }
1169
1170 =pod
1171
1172 =head2 $object->seekToNearestGroup(absolute_offset => $offset, absolute_linenum => $linenum, adjust_index => $index)
1173
1174 =over 4
1175
1176 Given an absolute offset or line number within the file, this function
1177 will seek the parser to the nearest group found B<before>
1178 the specified offset.
1179
1180 I<Inputs>:
1181  B<$offset> is an required parameter, specifying the absolute offset
1182 within the file to seek to.
1183  B<$linenum> is a required parameter, specifying the absolute line
1184 number wi