root/honeyclient/branches/bug/42/lib/HoneyClient/Agent/Integrity/Registry/Parser.yp

Revision 112, 35.5 kB (checked in by kindlund, 4 years ago)

Identified slowdown culprit: XML::XPath using $` (which is bad). Developed patch file.

  • Property svn:keywords set to Id "$file"
Line 
1 %{
2 #######################################################################
3 # Created on:  Dec 10, 2006
4 # Package:     HoneyClient::Agent::Integrity::Registry::Parser
5 # File:        Parser.pm
6 # Description: Parses static hive dumps of the Windows OS registry.
7 #
8 # CVS: $Id$
9 #
10 # @author kindlund
11 #
12 # Copyright (C) 2006 The MITRE Corporation.  All rights reserved.
13 #
14 # This program is free software; you can redistribute it and/or
15 # modify it under the terms of the GNU General Public License
16 # as published by the Free Software Foundation, using version 2
17 # of the License.
18 #
19 # This program is distributed in the hope that it will be useful,
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 # GNU General Public License for more details.
23 #
24 # You should have received a copy of the GNU General Public License
25 # along with this program; if not, write to the Free Software
26 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 # 02110-1301, USA.
28 #
29 #######################################################################
30
31 =pod
32
33 =head1 NAME
34
35 HoneyClient::Agent::Integrity::Registry::Parser - Perl extension to parse
36 static hive dumps of the Windows OS registry.
37
38 =head1 VERSION
39
40 This documentation refers to HoneyClient::Agent::Integrity::Registry::Parser version 1.0.
41
42 =head1 SYNOPSIS
43
44   use HoneyClient::Agent::Integrity::Registry::Parser;
45   use IO::File;
46   use Data::Dumper;
47
48   # Initialize the parser object.
49   my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(
50                    input_file => "dump.reg",
51                );
52
53   # Print each registry group found, until there are no more left.
54   my $registryGroup = $parser->nextGroup();
55   while(scalar(keys(%{$registryGroup}))) {
56       print Dumper($registryGroup);
57       $registryGroup = $parser->nextGroup();
58   }
59
60   # $registryGroup refers to hashtable reference, which has the
61   # following format:
62   #
63   # $registryGroup = {
64   #     # The registry directory name.
65   #     'key' => 'HKEY_LOCAL_MACHINE\Software...',
66   #
67   #     # An array containing the list of entries within the
68   #     # registry directory.
69   #     'entries'  => [ {
70   #         'name' => "\"string\"",  # A (potentially) quoted string;
71   #                                  # "@" for default
72   #         'value' => "data",
73   #     }, ],
74   # };
75
76 =head1 DESCRIPTION
77
78 This library allows the Registry module to easily parse and enumerate
79 each Windows OS registry hive.
80
81 =cut
82
83 use strict;
84 use warnings;
85 use Carp ();
86
87 # Include Global Configuration Processing Library
88 use HoneyClient::Util::Config qw(getVar);
89
90 # Include Logging Library
91 use Log::Log4perl qw(:easy);
92
93 # Use Dumper Library.
94 use Data::Dumper;
95
96 # Use IO File Library.
97 use IO::File;
98
99 # Use Seek Library.
100 use Fcntl qw(:seek);
101
102 # Use Binary Search Library.
103 use Search::Binary;
104
105 # Use Progress Bar Library.
106 use Term::ProgressBar;
107
108 #######################################################################
109 # Module Initialization                                               #
110 #######################################################################
111
112 BEGIN {
113     # Defines which functions can be called externally.
114     require Exporter;
115     our (@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
116
117     # Set our package version.
118     $VERSION = 0.9;
119
120     @ISA = qw(Exporter);
121
122     # Symbols to export on request
123     @EXPORT = qw( );
124
125     # Items to export into callers namespace by default. Note: do not export
126     # names by default without a very good reason. Use EXPORT_OK instead.
127     # Do not simply export all your public functions/methods/constants.
128
129     # This allows declaration use HoneyClient::Agent::Integrity::Registry ':all';
130     # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
131     # will save memory.
132
133     %EXPORT_TAGS = (
134         'all' => [ qw( ) ],
135     );
136
137     # Symbols to autoexport (:DEFAULT tag)
138     @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
139
140     $SIG{PIPE} = 'IGNORE'; # Do not exit on broken pipes.
141 }
142 our (@EXPORT_OK, $VERSION);
143
144 =pod
145
146 =begin testing
147
148 # Make sure Log::Log4perl loads
149 BEGIN { use_ok('Log::Log4perl', qw(:nowarn))
150         or diag("Can't load Log::Log4perl package. Check to make sure the package library is correctly listed within the path.");
151        
152         # Suppress all logging messages, since we need clean output for unit testing.
153         Log::Log4perl->init({
154             "log4perl.rootLogger"                               => "DEBUG, Buffer",
155             "log4perl.appender.Buffer"                          => "Log::Log4perl::Appender::TestBuffer",
156             "log4perl.appender.Buffer.min_level"                => "fatal",
157             "log4perl.appender.Buffer.layout"                   => "Log::Log4perl::Layout::PatternLayout",
158             "log4perl.appender.Buffer.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n",
159         });
160 }
161 require_ok('Log::Log4perl');
162 use Log::Log4perl qw(:easy);
163
164 # Make sure the module loads properly, with the exportable
165 # functions shared.
166 BEGIN { use_ok('HoneyClient::Util::Config', qw(getVar setVar))
167         or diag("Can't load HoneyClient::Util::Config package.  Check to make sure the package library is correctly listed within the path."); }
168 require_ok('HoneyClient::Util::Config');
169 can_ok('HoneyClient::Util::Config', 'getVar');
170 can_ok('HoneyClient::Util::Config', 'setVar');
171 use HoneyClient::Util::Config qw(getVar setVar);
172
173 # Suppress all logging messages, since we need clean output for unit testing.
174 Log::Log4perl->init({
175     "log4perl.rootLogger"                               => "DEBUG, Buffer",
176     "log4perl.appender.Buffer"                          => "Log::Log4perl::Appender::TestBuffer",
177     "log4perl.appender.Buffer.min_level"                => "fatal",
178     "log4perl.appender.Buffer.layout"                   => "Log::Log4perl::Layout::PatternLayout",
179     "log4perl.appender.Buffer.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n",
180 });
181
182 # Make sure Data::Dumper loads
183 BEGIN { use_ok('Data::Dumper')
184         or diag("Can't load Data::Dumper package. Check to make sure the package library is correctly listed within the path."); }
185 require_ok('Data::Dumper');
186 use Data::Dumper;
187
188 # Make sure IO::File loads
189 BEGIN { use_ok('IO::File')
190         or diag("Can't load IO::File package. Check to make sure the package library is correctly listed within the path."); }
191 require_ok('IO::File');
192 use IO::File;
193
194 # Make sure Fcntl loads
195 BEGIN { use_ok('Fcntl')
196         or diag("Can't load Fcntl package. Check to make sure the package library is correctly listed within the path."); }
197 require_ok('Fcntl');
198 use Fcntl qw(:seek);
199
200 # Make sure Search::Binary loads
201 BEGIN { use_ok('Search::Binary')
202         or diag("Can't load Search::Binary package. Check to make sure the package library is correctly listed within the path."); }
203 require_ok('Search::Binary');
204 can_ok('Search::Binary', 'binary_search');
205 use Search::Binary;
206
207 # Make sure Term::ProgressBar loads
208 BEGIN { use_ok('Term::ProgressBar')
209         or diag("Can't load Term::ProgressBar package. Check to make sure the package library is correctly listed within the path."); }
210 require_ok('Term::ProgressBar');
211 use Term::ProgressBar;
212
213 # Make sure HoneyClient::Agent::Integrity::Registry::Parser loads
214 BEGIN { use_ok('HoneyClient::Agent::Integrity::Registry::Parser')
215         or diag("Can't load HoneyClient::Agent::Integrity::Registry::Parser package. Check to make sure the package library is correctly listed within the path."); }
216 require_ok('HoneyClient::Agent::Integrity::Registry::Parser');
217 use HoneyClient::Agent::Integrity::Registry::Parser;
218
219 =end testing
220
221 =cut
222
223 #######################################################################
224 # Global Configuration Variables
225 #######################################################################
226
227 # The global logging object.
228 our $LOG = get_logger();
229
230 # Make Dumper format more terse.
231 $Data::Dumper::Terse = 1;
232 $Data::Dumper::Indent = 1;
233
234 %}
235
236 %token DIR_NAME
237 %token KEY_NAME
238 %token KEY_VALUE
239 %token HEADER
240 %token NEWLINE
241
242 %%
243
244 # A registry can be thought of as a header, along with 1 or more
245 # groups.
246 registry:
247                {
248             $LOG->debug("Reached end of input stream.");
249             # Finished parsing the entire file, return empty hash ref.
250             return { };
251         }
252     |   groups {
253             $LOG->debug("Reached end of input stream.");
254             # Finished parsing the entire file, return empty hash ref.
255             return { };
256         }
257     |   HEADER groups {
258             $LOG->debug("Reached end of input stream.");
259             # Finished parsing the entire file, return empty hash ref.
260             return { };
261         }
262 ;
263
264 # Define 1 or more groups.
265 groups:
266         group
267     |   NEWLINE group
268     |   NEWLINE group NEWLINE
269     |   NEWLINE group groups
270 ;
271
272 # A group consists of a group_header and 0 or more entries.
273 group:
274         DIR_NAME entries {
275             my $ret = { };
276             $_[0]->YYData->{'latest_group'}->{'key'} = $_[1];
277             if (!exists($_[0]->YYData->{'latest_group'}->{'entries'})) {
278                 # Make sure the 'entries' key exists.
279                 $_[0]->YYData->{'latest_group'}->{'entries'} = [];
280             }
281             $ret = $_[0]->YYData->{'latest_group'};
282             $_[0]->YYData->{'latest_group'} = { };
283             $_[0]->YYData->{'dir_count'}++;
284             $_[0]->YYAccept; # Terminate the parse, early.
285
286             return $ret;
287         }
288     |   DIR_NAME {
289             my $ret = { };
290             $_[0]->YYData->{'latest_group'}->{'key'} = $_[1];
291             if (!exists($_[0]->YYData->{'latest_group'}->{'entries'})) {
292                 # Make sure the 'entries' key exists.
293                 $_[0]->YYData->{'latest_group'}->{'entries'} = [];
294             }
295             $ret = $_[0]->YYData->{'latest_group'};
296             $_[0]->YYData->{'latest_group'} = { };
297             $_[0]->YYData->{'dir_count'}++;
298             $_[0]->YYAccept; # Terminate the parse, early.
299
300             return $ret;
301         }
302 ;
303
304 # Define 1 or more entries.
305 entries:
306         entry
307     |   entry entries
308 ;
309
310 # Define an entry.
311 entry:
312         KEY_NAME KEY_VALUE {
313             my $entry = {
314                 name  => $_[1],
315                 value => $_[2],
316             };
317             push(@{$_[0]->YYData->{'latest_group'}->{entries}}, $entry);
318             $_[0]->YYData->{'entry_count'}++;
319         }
320 ;
321
322 %%
323
324 #######################################################################
325 # Private Methods Implemented                                         #
326 #######################################################################
327
328 # Helper function, designed to tokenize specific data from the input stream.
329 #
330 # Inputs: parser
331 # Outputs: (token_id, data) pair
332 sub _lexer {
333     # Identify NEWLINE token.
334     if ($_[0]->YYData->{DATA} =~ m/\G\n/cg) {
335         $_[0]->YYData->{'in_group'} = 0;
336         $LOG->debug("Found NEWLINE token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
337         return ("NEWLINE", "\n");
338     }
339
340     # Check to see if we're inside a group block...
341     if (!$_[0]->YYData->{'in_group'}) {
342
343         $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA});
344         $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} ?
345                                        $_[0]->YYData->{'input_pos'} : 0;
346         $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} +
347                                        $_[0]->YYData->{'abs_offset'};
348
349         # Update progress bar, if defined.
350         if (defined($_[0]->YYData->{'progress'}) &&
351             ($_[0]->YYData->{'input_pos'} > $_[0]->YYData->{'progress_next_update'})) {
352             $_[0]->YYData->{'progress_next_update'} =
353                 $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'input_pos'});
354         }
355
356         # Identify DIR_NAME token.
357         if ($_[0]->YYData->{DATA} =~ m/\G\[(.*)\]\n/cg) {
358             $_[0]->YYData->{'in_group'} = 1;
359             $LOG->debug("Found DIR_NAME token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
360             return ("DIR_NAME", $1);
361         }
362
363         # Identify HEADER token. It's always only at the beginning.
364         if ($_[0]->YYData->{DATA} =~ m/\GREGEDIT4\n/cg) {
365             $LOG->debug("Found HEADER token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
366             return ("HEADER", "REGEDIT4\n");
367         }
368
369     } else {
370
371         # Check to see if we're in a value segment...
372         if (!$_[0]->YYData->{'in_value'}) {
373
374             # Identify KEY_NAME token.
375             if ($_[0]->YYData->{DATA} =~ m/\G\"(|[^\\]|.*(?:\\[^\\]|\\\\|[^\\][^\\]))\"(?==)/cg) {
376                 $_[0]->YYData->{'in_value'} = 1;
377                 $LOG->debug("Found KEY_NAME token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
378                 return ("KEY_NAME", $1);
379             }
380
381             # Identify default KEY_NAME token (@).
382             if ($_[0]->YYData->{DATA} =~ m/\G\@(?==)/cg) {
383                 $_[0]->YYData->{'in_value'} = 1;
384                 $LOG->debug("Found KEY_NAME token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
385                 return ("KEY_NAME", "@");
386             }
387
388         } else {
389
390             # Identify string KEY_VALUE token.
391             if ($_[0]->YYData->{DATA} =~ m/\G=\"(|[^\\]|.*?(?:\\[^\\]|\\\\|[^\\][^\\]))\"\n/cgs) {
392                 $_[0]->YYData->{'in_value'} = 0;
393                 $LOG->debug("Found KEY_VALUE token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
394                 return ("KEY_VALUE", $1);
395             }
396
397             # Identify binary KEY_VALUE token.
398             if ($_[0]->YYData->{DATA} =~ m/\G=(|.*?[^\\])\n/cgs) {
399                 $_[0]->YYData->{'in_value'} = 0;
400                 $LOG->debug("Found KEY_VALUE token ending at offset (" . pos($_[0]->YYData->{DATA}) . ").");
401                 return ("KEY_VALUE", $1);
402             }
403         }
404     }
405    
406     # Croak if encountered a token error.
407     if ($_[0]->YYData->{DATA} =~ m/\G(.*\n)/cg) {
408         $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA});
409         $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")");
410         Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")");
411     }
412     return ('', undef);
413 }
414
415 # Helper function, designed to report when any parsing error
416 # occurs.
417 #
418 # Inputs: parser
419 # Outputs: None
420 sub _error {
421
422     $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").");
423     Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").");
424 }
425
426 # Helper function, designed to reset the parser's file stream back to the
427 # beginning, allowing the parser to reparse from the beginning.  Or, if
428 # specified, the function will seek the parser to the specified offset.
429 #
430 # Inputs: parser, absolute offset (optional)
431 # Outputs: none
432 sub _reset {
433     # Extract arguments.
434     my ($self, $offset) = @_;
435
436     $LOG->debug("Resetting parser.");
437
438     my $fh = $self->YYData->{'file_handle'};
439
440     # Check the offset.
441     if (!defined($offset)) {
442         $offset = 0;
443     }
444     seek($fh, $offset, SEEK_SET);
445
446     undef $/;
447     $self->YYData->{DATA} = <$fh>;
448
449     # Strip all CRs.
450     $self->YYData->{DATA} =~ s/\r//g;
451
452     # Total size of input file.
453     $self->YYData->{'file_size'} = (stat($fh))[7];
454
455     # Reinitialize helper variables.
456     # Hashtable, to represent the latest, extracted group chunk.
457     $self->YYData->{'latest_group'} = { };
458
459     # Boolean, to indicate when we're parsing inside a group chunk.
460     $self->YYData->{'in_group'} = 0;
461
462     # Boolean, to indicate when we're parsing inside a value segment.
463     $self->YYData->{'in_value'} = 0;
464    
465     # Regexp offset, used to record where the parser is within
466     # the file (relative position).
467     $self->YYData->{'input_pos'} = 0;
468
469     # Absolute offset, recording where the parser initially seeked to.
470     $self->YYData->{'abs_offset'} = $offset;
471
472     # Initialize statistics.
473     # Total number of directories parsed.
474     $self->YYData->{'dir_count'} = 0;
475
476     # Total number of key/value pairs parsed.
477     $self->YYData->{'entry_count'} = 0;
478
479     # Progress bar information.
480     if ($self->YYData->{'show_progress'}) {
481         $self->YYData->{'progress'} = Term::ProgressBar->new({ name  => 'Progress',
482                                                                count => $self->YYData->{'file_size'},
483                                                                ETA   => 'linear', });
484         $self->YYData->{'progress'}->minor(0);
485         $self->YYData->{'progress'}->max_update_rate(1);
486         $self->YYData->{'progress_next_update'} = $self->YYData->{'progress'}->update($offset);
487     } else {
488         $self->YYData->{'progress'} = undef;
489     }
490 }
491
492 # Helper function, designed to index all groups, based upon beginning file
493 # offsets.
494 #
495 # Inputs: parser
496 # Outputs: None
497 sub _index {
498     # Extract arguments.
499     my $self = shift;
500
501     $LOG->debug("Starting group index process.");
502
503     $self->YYData->{'group_index'} = [0, ];
504
505     my $registryGroup = $self->nextGroup();
506     while(scalar(keys(%{$registryGroup}))) {
507         $registryGroup = $self->nextGroup();
508         push (@{$self->YYData->{'group_index'}}, $self->YYData->{'input_pos'});
509     }
510
511     # Reset the parser.
512     $self->_reset();
513
514     $LOG->debug("Finished group index process.");
515 }
516
517 # Helper function, designed to be called from within the
518 # Search::Binary::binary_search() function, in order to allow
519 # the binary_search to properly read in group index data from
520 # the default array reference.
521 #
522 # For more information about how this function operates, please
523 # see the Search::Binary POD documentation.
524 #
525 # Inputs: parser, value_to_compare, current_array_index
526 # Outputs: comparison, last_valid_array_index
527 sub _search {
528     # Extract arguments.
529     my ($parser, $value_to_compare, $current_array_index) = @_;
530
531     # Increment the search index, if the current one is undef.
532     if (defined($current_array_index)) {
533         $parser->YYData->{'last_search_index'} = $current_array_index;
534     } else {
535         $parser->YYData->{'last_search_index'}++;
536     }
537
538     # Perform a comparison, if the array entry is defined.
539     if (defined(@{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}])) {
540         return($value_to_compare <=> @{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}],
541                $parser->YYData->{'last_search_index'});
542     }
543
544     # Array entry not found, return undef with this position.
545     return (undef, $parser->YYData->{'last_search_index'});
546 }
547
548 #######################################################################
549 # Public Methods Implemented                                          #
550 #######################################################################
551
552 =pod
553
554 =head1 METHODS IMPLEMENTED
555
556 The following functions have been implemented by any Parser object.
557
558 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename,
559                                                              index_groups => $perform_index,
560                                                              show_progress => $progress)
561
562 =over 4
563
564 Creates a new Parser object, using the specified input file as its data
565 source.
566
567 I<Inputs>:
568  B<$filename> is an required parameter, specifying the file to open for parsing.
569  B<$perform_index> is an optional parameter.  1 specifies that the parser should go
570 ahead and scan the entire file, indexing the file offsets of where groups start and
571 end.  Otherwise, this indexing process is not performed.
572  B<$progress> is an optional parameter.  1 specifies that the parser should display
573 a progress bar, as it scans through a specified file.  Otherwise, a progress bar
574 is not displayed.
575  
576 I<Output>: The instantiated Parser B<$object>, fully initialized.
577
578 =back
579
580 =begin testing
581
582 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
583                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
584
585 # Create a generic Parser object, with test state data.
586 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
587 isa_ok($parser, 'HoneyClient::Agent::Integrity::Registry::Parser', "init(input_file => $test_registry_file)") or diag("The init() call failed.");
588
589 =end testing
590
591 =cut
592
593 sub init {
594
595     # Extract arguments.
596     my ($self, %args) = @_;
597
598     # Log resolved arguments.
599     # Make Dumper format more terse.
600     $Data::Dumper::Terse = 1;
601     $Data::Dumper::Indent = 0;
602     $LOG->debug(Dumper(\%args));
603
604     # Sanity check, don't initialize, unless input_file_handle
605     # was provided.
606     my $argsExist = scalar(%args);
607     if (!$argsExist ||
608         !exists($args{'input_file'}) ||
609         !defined($args{'input_file'})) {
610         $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!");
611         Carp::croak("Error: Unable to create parser - no 'input_file' specified!");
612     }
613
614     my $parser = HoneyClient::Agent::Integrity::Registry::Parser->new();
615     my $fh = new IO::File($args{'input_file'}, "r");
616     if (!defined($fh)) {
617         $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!");
618         Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!");
619     }
620    
621     # Check if show progress was specified.
622     if ($argsExist &&
623         exists($args{'show_progress'}) &&
624         defined($args{'show_progress'}) &&
625         $args{'show_progress'}) {
626         $parser->YYData->{'show_progress'} = 1;
627     } else {
628         $parser->YYData->{'show_progress'} = 0;
629     }
630
631     # Save the file handle.
632     $parser->YYData->{'file_handle'} = $fh;
633
634     # Reset the parser.
635     $parser->_reset();
636
637     # Perform group indexing, if specified.
638     if ($argsExist &&
639         exists($args{'index_groups'}) &&
640         defined($args{'index_groups'}) &&
641         $args{'index_groups'}) {
642         $parser->_index();
643     } else {
644         $parser->YYData->{'group_index'} = [0, ];
645     }
646
647     # Return parser object.
648     return $parser;
649 }
650
651 =pod
652
653 =head2 $object->nextGroup()
654
655 =over 4
656
657 Provides the next registry group, in the form of a hashtable reference.
658 This hashtable has the following format:
659
660   {
661       # The registry directory name.
662       'key' => 'HKEY_LOCAL_MACHINE\Software...',
663  
664       # An array containing the list of entries within the
665       # registry directory.
666       'entries'  => [ {
667           'name' => "\"string\"",  # A (potentially) quoted string;
668                                    # "@" for default
669           'value' => "data",
670       }, ],
671   };
672
673 I<Output>: A hashtable reference if the next group was parsed successfully;
674 returns an empty hash ref, if the Parser B<$object> has reached the end of
675 the input stream.
676
677 =back
678
679 =begin testing
680
681 my ($nextGroup, $expectedGroup);
682 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
683                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
684
685 # Create a generic Parser object, with test state data.
686 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1);
687
688 # Verify Test Group #1
689 $nextGroup = $parser->nextGroup();
690 $expectedGroup = {
691     key     => 'HKEY_CURRENT_USER\]Testing Group 1[',
692     entries => [ {
693         name  => '@',
694         value => 'Default',
695     }, {
696         name  => 'Foo',
697         value => 'Bar',
698     }, ],
699 };
700 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 1") or diag("The nextGroup() call failed.");
701
702 # Verify Test Group #2
703 $nextGroup = $parser->nextGroup();
704 $expectedGroup = {
705     key     => 'HKEY_CURRENT_USER\Testing Group 2',
706     entries => [ {
707         name  => '@',
708         value => '\\"Annoying=Value\\"',
709     }, {
710         name  => '\\"Annoying=Key\\"',
711         value => 'Bar',
712     }, {
713         name  => 'Multiline',
714         value => 'This
715 value spans
716 multiple lines
717 ',
718     }, {
719         name  => 'Sane_Key',
720         value => '\\"Wierd=\\"Value',
721     }, ],
722 };
723 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 2") or diag("The nextGroup() call failed.");
724
725 # Verify Test Group #3
726 $nextGroup = $parser->nextGroup();
727 $expectedGroup = {
728     key     => 'HKEY_CURRENT_USER\Testing Group 3',
729     entries => [ {
730         name  => 'Test_Bin_1',
731         value => 'hex:f4,ff,ff,ff,00,00,00,00,00,00,00,00,00,00,00,00,bc,02,00,00,00,\
732   00,00,00,00,00,00,00,54,00,61,00,68,00,6f,00,6d,00,61,00,00,00,f0,77,3f,00,\
733   3f,00,3f,00,3f,00,00,00,00,00,00,00,00,00,00,00,00,00,00,00,78,00,1c,10,fc,\
734   7f,22,14,fc,7f,b0,fe,12,00,00,00,00,00,00,00,00,00,98,23,eb,77'
735     }, {
736         name  => 'Test_Bin_2',
737         value => 'hex:f5,ff,ff,ff,00,00,00,00,00,00,00,00,00,00,00,00,90,01,00,00,00,\
738   00,00,00,00,00,00,00,4d,00,69,00,63,00,72,00,6f,00,73,00,6f,00,66,00,74,00,\
739   20,00,53,00,61,00,6e,00,73,00,20,00,53,00,65,00,72,00,69,00,66,00,00,00,f0,\
740   77,00,20,14,00,00,00,00,10,80,05,14,00,f0,1f,14,00,00,00,14,00'
741     }, ],
742 };
743 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 3") or diag("The nextGroup() call failed.");
744
745 # Verify Test Group #4
746 $nextGroup = $parser->nextGroup();
747 $expectedGroup = {
748     key     => 'HKEY_CURRENT_USER\Testing Group 4',
749     entries => [],
750 };
751 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 4") or diag("The nextGroup() call failed.");
752
753 # Verify Test Group #5
754 $nextGroup = $parser->nextGroup();
755 $expectedGroup = {
756     key     => 'HKEY_CURRENT_USER\Testing Group 5',
757     entries => [ {
758         name  => '@',
759         value => '',
760     }, ],
761 };
762 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 5") or diag("The nextGroup() call failed.");
763
764 # Verify Test Group #6
765 $nextGroup = $parser->nextGroup();
766 $expectedGroup = {
767     key     => 'HKEY_CURRENT_USER\Testing Group 6\With\Really\Deep\Nested\Directory\Structure',
768     entries => [ {
769         name  => 'InstallerLocation',
770         value => 'C:\\\\WINDOWS\\\\system32\\\\',
771     }, ],
772 };
773 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 6") or diag("The nextGroup() call failed.");
774
775 # Verify Test Group #7
776 $nextGroup = $parser->nextGroup();
777 $expectedGroup = {
778     key     => 'HKEY_CURRENT_USER\Testing Group 7',
779     entries => [ {
780         name  => 'C:\\\\Program Files\\\\Common Files\\\\Microsoft Shared\\\\Web Folders\\\\',
781         value => '',
782     }, {
783         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{350C97B0-3D7C-4EE8-BAA9-00BCB3D54227}\\\\',
784         value => '',
785     }, {
786         name  => 'C:\\\\Program Files\\\\Support Tools\\\\',
787         value => '',
788     }, {
789         name  => 'C:\\\\Documents and Settings\\\\All Users\\\\Start Menu\\\\Programs\\\\Windows Support Tools\\\\',
790         value => '',
791     }, {
792         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{6855CCDD-BDF9-48E4-B80A-80DFB96FE36C}\\\\',
793         value => '',
794     }, {
795         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{F251B999-08A9-4704-999C-9962F0DFD88E}\\\\',
796         value => '',
797     }, {
798         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{1CB92574-96F2-467B-B793-5CEB35C40C29}\\\\',
799         value => '',
800     }, {
801         name  => 'C:\\\\WINDOWS\\\\Installer\\\\{B37C842A-B624-46B8-A727-654E72F1C91A}\\\\',
802         value => '',
803     }, ],
804 };
805 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 7") or diag("The nextGroup() call failed.");
806
807 # Verify Test Group #8
808 $nextGroup = $parser->nextGroup();
809 $expectedGroup = {
810     key     => 'HKEY_CURRENT_USER\Testing Group 8\{00021492-0000-0000-C000-000000000046}',
811     entries => [ {
812         name  => '000',
813         value => 'String Value',
814     }, ],
815 };
816 is_deeply($nextGroup, $expectedGroup, "nextGroup() - 8") or diag("The nextGroup() call failed.");
817
818 # Verify Test Group #9
819 $nextGroup = $parser->nextGroup();
820 is_deeply($nextGroup, { }, "nextGroup() - 9") or diag("The nextGroup() call failed.");
821
822 =end testing
823
824 =cut
825
826 sub nextGroup {
827     # Extract arguments.
828     my ($self, %args) = @_;
829
830     # Log resolved arguments.
831     # Make Dumper format more terse.
832     $Data::Dumper::Terse = 1;
833     $Data::Dumper::Indent = 0;
834     $LOG->debug(Dumper(\%args));
835
836     if ($self->YYData->{'input_pos'} == 0) {
837         $LOG->debug("Beginning parse of input stream.");
838     }
839
840     # Update progress bar, if defined.
841     if (defined($_[0]->YYData->{'progress'}) &&
842         ($_[0]->YYData->{'file_size'} <= $_[0]->YYData->{'progress_next_update'})) {
843
844         $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'file_size'});
845     }
846
847     # Return the next group parsed.
848     return $self->YYParse(yylex   => \&_lexer,
849                           yyerror => \&_error);
850 }
851
852 =pod
853
854 =head2 $object->dirsParsed()
855
856 =over 4
857
858 Indicates how many registry directories the Parser B<$object> has
859 parsed within the specified file, so far.
860
861 I<Output>: Returns the number of directory groups parsed so far;
862 returns 0, if none parsed yet.
863
864 =back
865
866 =begin testing
867
868 my ($nextGroup);
869 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
870                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
871
872 # Create a generic Parser object, with test state data.
873 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
874
875 $nextGroup = $parser->nextGroup();
876 while(scalar(keys(%{$nextGroup}))) {
877     $nextGroup = $parser->nextGroup();
878 }
879
880 is($parser->dirsParsed(), 8, "dirsParsed()") or diag("The dirsParsed() call failed.");
881
882 =end testing
883
884 =cut
885
886 sub dirsParsed {
887     # Extract arguments.
888     my ($self, %args) = @_;
889    
890     # Log resolved arguments.
891     # Make Dumper format more terse.
892     $Data::Dumper::Terse = 1;
893     $Data::Dumper::Indent = 0;
894     $LOG->debug(Dumper(\%args));
895
896     return $self->YYData->{'dir_count'};
897 }
898
899 =pod
900
901 =head2 $object->entriesParsed()
902
903 =over 4
904
905 Indicates how many registry key/value pairs the Parser B<$object> has
906 parsed within the specified file, so far.
907
908 I<Output>: Returns the number of key/value pairs parsed so far;
909 returns 0, if none parsed yet.
910
911 =back
912
913 =begin testing
914
915 my ($nextGroup);
916 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
917                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
918
919 # Create a generic Parser object, with test state data.
920 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
921
922 $nextGroup = $parser->nextGroup();
923 while(scalar(keys(%{$nextGroup}))) {
924     $nextGroup = $parser->nextGroup();
925 }
926
927 is($parser->entriesParsed(), 19, "entriesParsed()") or diag("The entriesParsed() call failed.");
928
929 =end testing
930
931 =cut
932
933 sub entriesParsed {
934     # Extract arguments.
935     my ($self, %args) = @_;
936
937     # Log resolved arguments.
938     # Make Dumper format more terse.
939     $Data::Dumper::Terse = 1;
940     $Data::Dumper::Indent = 0;
941     $LOG->debug(Dumper(\%args));
942
943     return $self->YYData->{'entry_count'};
944 }
945
946 =pod
947
948 =head2 $object->getFileHandle()
949
950 =over 4
951
952 Returns the file handle associated with the current Parser B<$object>.
953
954 I<Output>: Returns the file handle in use.
955
956 =back
957
958 =begin testing
959
960 my ($handle);
961 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
962                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
963
964 # Create a generic Parser object, with test state data.
965 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file);
966
967 $handle = $parser->getFileHandle();
968
969 isa_ok($handle, 'IO::File', "getFileHandle()") or diag("The getFileHandle() call failed.");
970
971 =end testing
972
973 =cut
974
975 sub getFileHandle {
976     # Extract arguments.
977     my ($self, %args) = @_;
978
979     # Log resolved arguments.
980     # Make Dumper format more terse.
981     $Data::Dumper::Terse = 1;
982     $Data::Dumper::Indent = 0;
983     $LOG->debug(Dumper(\%args));
984
985     return $self->YYData->{'file_handle'};
986 }
987
988 =pod
989
990 =head2 $object->seekToNearestGroup(absolute_offset => $offset)
991
992 =over 4
993
994 Given an absolute offset within the file, this function
995 will seek the parser to the nearest group found B<before>
996 the specified offset.
997
998 I<Inputs>:
999  B<$offset> is an required parameter, specifying the absolute offset
1000 within the file to seek to.
1001
1002 I<Outputs>: None.
1003
1004 =back
1005
1006 =begin testing
1007
1008 my ($nextGroup, $expectedGroup);
1009 my $test_registry_file = $ENV{PWD} . "/" . getVar(name      => "registry_file",
1010                                                   namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test");
1011
1012 # Create a generic Parser object, with test state data.
1013 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1);
1014
1015 $parser->seekToNearestGroup(absolute_offset => 84);
1016
1017 # Verify Test Group #2
1018 $nextGroup = $parser->nextGroup();
1019 $expectedGroup = {
1020     key     => 'HKEY_CURRENT_USER\Testing Group 2',
1021     entries => [ {
1022         name  => '@',
1023         value => '\\"Annoying=Value\\"',
1024     }, {
1025         name  => '\\"Annoying=Key\\"',
1026         value => 'Bar',
1027     }, {
1028         name  => 'Multiline',
1029         value => 'This
1030 value spans
1031 multiple lines
1032 ',
1033     }, {
1034         name  => 'Sane_Key',
1035         value => '\\"Wierd=\\"Value',
1036     }, ],
1037 };
1038 is_deeply($nextGroup, $expectedGroup, "seekToNearestGroup()") or diag("The seekToNearestGroup() call failed.");
1039
1040 =end testing
1041
1042 =cut
1043
1044 sub seekToNearestGroup {
1045     # Extract arguments.
1046     my ($self, %args) = @_;
1047
1048     # Sanity check, don't continue, unless absolute_offset
1049     # was provided.
1050     my $argsExist = scalar(%args);
1051     if (!$argsExist ||
1052         !exists($args{'absolute_offset'}) ||
1053         !defined($args{'absolute_offset'})) {
1054         $LOG->fatal("Error: Unable to seek parser - no 'absolute_offset' specified!");
1055         Carp::croak("Error: Unable to seek parser - no 'absolute_offset' specified!");
1056     }
1057
1058     # Check to see if the 'group_index' has been initialized.
1059     # We assume that if it has [0, ], then this has not been
1060     # done.
1061     my $numIndices = scalar(@{$self->YYData->{'group_index'}});
1062     if ($numIndices < 2) {
1063         $self->_index();
1064     }
1065     $numIndices = scalar(@{$self->YYData->{'group_index'}});
1066
1067     # Find the nearest index after the offset.
1068     my $found_index = binary_search(0, $numIndices - 1, $args{'absolute_offset'}, \&_search, $self);
1069
1070     # Now, find the nearest index before the offset.
1071     if ($found_index > 0) {
1072         $found_index--;
1073     }
1074     my $found_offset = @{$self->YYData->{'group_index'}}[$found_index];
1075
1076     # XXX: Change this to debug, eventually.
1077     $LOG->info("Seeking parser to nearest earlier group offset (" . $found_offset . ").");
1078
1079     # Seek the parser, to the specified offset.
1080     $self->_reset($found_offset);
1081 }
1082
1083 #######################################################################
1084 # Additional Module Documentation                                     #
1085 #######################################################################
1086
1087 =head1 BUGS & ASSUMPTIONS
1088
1089 The Parser B<$object> expects to scan the specified file as an input stream.
1090 Subsequent calls to $object->nextGroup() will advance the parser through
1091 the input stream.
1092
1093 Currently, there is no way to reset a Parser's scan through an input stream.
1094 If desired, simply create a new Parser object, to restart the scan process.
1095
1096 =head1 SEE ALSO
1097
1098 L<http://www.honeyclient.org/trac>
1099
1100 =head1 REPORTING BUGS
1101
1102 L<http://www.honeyclient.org/trac/newticket>
1103
1104 =head1 ACKNOWLEDGEMENTS
1105
1106 Francois Desarmenien E<lt>francois@fdesar.netE<gt> for his
1107 work in developing the Parse::Yapp module.
1108
1109 =head1 AUTHORS
1110
1111 Darien Kindlund, E<lt>kindlund@mitre.orgE<gt>
1112
1113 =head1 COPYRIGHT & LICENSE
1114
1115 Copyright (C) 2006 The MITRE Corporation.  All rights reserved.
1116
1117 This program is free software; you can redistribute it and/or
1118 modify it under the terms of the GNU General Public License
1119 as published by the Free Software Foundation, using version 2
1120 of the License.
1121
1122 This program is distributed in the hope that it will be useful,
1123 but WITHOUT ANY WARRANTY; without even the implied warranty of
1124 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1125 GNU General Public License for more details.
1126
1127 You should have received a copy of the GNU General Public License
1128 along with this program; if not, write to the Free Software
1129 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
1130 02110-1301, USA.
1131
1132
1133 =cut
Note: See TracBrowser for help on using the browser.