Changeset 105
- Timestamp:
- 12/11/06 23:27:43 (2 years ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
honeyclient/branches/bug/42/lib/HoneyClient/Agent/Integrity/Registry/Parser.pm
r99 r105 102 102 use Carp (); 103 103 104 # Include Global Configuration Processing Library105 use HoneyClient::Util::Config qw(getVar);106 107 104 # Include Logging Library 108 105 use Log::Log4perl qw(:easy); 106 # Temporarily Initialize Logging Subsystem 107 # XXX: We hard code the logging format here, since it appears that 108 # calling 'use HoneyClient::Util::Config qw(getVar);' slows down 109 # the parser by a factor of 10x (not sure why). 110 Log::Log4perl->init_once({ 111 "log4perl.rootLogger" => "INFO, Screen", 112 "log4perl.appender.Screen" => "Log::Log4perl::Appender::Screen", 113 "log4perl.appender.Screen.stderr" => 0, 114 "log4perl.appender.Screen.Threshold" => "INFO", 115 "log4perl.appender.Screen.layout" => "Log::Log4perl::Layout::PatternLayout", 116 "log4perl.appender.Screen.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n", 117 }); 118 109 119 110 120 # Use Dumper Library. … … 116 126 # Use Seek Library. 117 127 use Fcntl qw(:seek); 128 129 # Use Binary Search Library. 130 use Search::Binary; 131 132 # Use Progress Bar Library. 133 use Term::ProgressBar; 118 134 119 135 ####################################################################### … … 208 224 require_ok('Fcntl'); 209 225 use Fcntl qw(:seek); 226 227 # Make sure Search::Binary loads 228 BEGIN { use_ok('Search::Binary') 229 or diag("Can't load Search::Binary package. Check to make sure the package library is correctly listed within the path."); } 230 require_ok('Search::Binary'); 231 can_ok('Search::Binary', 'binary_search'); 232 use Search::Binary; 233 234 # Make sure Term::ProgressBar loads 235 BEGIN { use_ok('Term::ProgressBar') 236 or diag("Can't load Term::ProgressBar package. Check to make sure the package library is correctly listed within the path."); } 237 require_ok('Term::ProgressBar'); 238 use Term::ProgressBar; 210 239 211 240 # Make sure HoneyClient::Agent::Integrity::Registry::Parser loads … … 354 383 'registry', 0, 355 384 sub 356 #line 2 28"Parser.yp"385 #line 257 "Parser.yp" 357 386 { 358 387 $LOG->debug("Reached end of input stream."); … … 364 393 'registry', 1, 365 394 sub 366 #line 2 33"Parser.yp"395 #line 262 "Parser.yp" 367 396 { 368 397 $LOG->debug("Reached end of input stream."); … … 374 403 'registry', 2, 375 404 sub 376 #line 2 38"Parser.yp"405 #line 267 "Parser.yp" 377 406 { 378 407 $LOG->debug("Reached end of input stream."); … … 396 425 'group', 2, 397 426 sub 398 #line 2 55"Parser.yp"427 #line 284 "Parser.yp" 399 428 { 400 429 my $ret = { }; … … 415 444 'group', 1, 416 445 sub 417 #line 2 69"Parser.yp"446 #line 298 "Parser.yp" 418 447 { 419 448 my $ret = { }; … … 440 469 'entry', 2, 441 470 sub 442 #line 293"Parser.yp"471 #line 322 "Parser.yp" 443 472 { 444 473 my $entry = { … … 455 484 } 456 485 457 #line 3 03"Parser.yp"486 #line 332 "Parser.yp" 458 487 459 488 … … 467 496 # Outputs: (token_id, data) pair 468 497 sub _lexer { 469 470 498 # Identify NEWLINE token. 471 499 if ($_[0]->YYData->{DATA} =~ m/\G\n/cg) { … … 481 509 $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} ? 482 510 $_[0]->YYData->{'input_pos'} : 0; 511 $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} + 512 $_[0]->YYData->{'abs_offset'}; 513 514 # Update progress bar, if defined. 515 if (defined($_[0]->YYData->{'progress'}) && 516 ($_[0]->YYData->{'input_pos'} > $_[0]->YYData->{'progress_next_update'})) { 517 $_[0]->YYData->{'progress_next_update'} = 518 $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'input_pos'}); 519 } 483 520 484 521 # Identify DIR_NAME token. … … 535 572 if ($_[0]->YYData->{DATA} =~ m/\G(.*\n)/cg) { 536 573 $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA}); 537 $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .") \n");538 Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .") \n");574 $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 575 Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 539 576 } 540 577 return ('', undef); … … 547 584 # Outputs: None 548 585 sub _error { 549 $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 550 Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 586 587 $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 588 Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 589 } 590 591 # Helper function, designed to reset the parser's file stream back to the 592 # beginning, allowing the parser to reparse from the beginning. Or, if 593 # specified, the function will seek the parser to the specified offset. 594 # 595 # Inputs: parser, absolute offset (optional) 596 # Outputs: none 597 sub _reset { 598 # Extract arguments. 599 my ($self, $offset) = @_; 600 601 $LOG->debug("Resetting parser."); 602 603 my $fh = $self->YYData->{'file_handle'}; 604 605 # Check the offset. 606 if (!defined($offset)) { 607 $offset = 0; 608 } 609 seek($fh, $offset, SEEK_SET); 610 611 undef $/; 612 $self->YYData->{DATA} = <$fh>; 613 614 # Strip all CRs. 615 $self->YYData->{DATA} =~ s/\r//g; 616 617 # Total size of input file. 618 $self->YYData->{'file_size'} = (stat($fh))[7]; 619 620 # Reinitialize helper variables. 621 # Hashtable, to represent the latest, extracted group chunk. 622 $self->YYData->{'latest_group'} = { }; 623 624 # Boolean, to indicate when we're parsing inside a group chunk. 625 $self->YYData->{'in_group'} = 0; 626 627 # Boolean, to indicate when we're parsing inside a value segment. 628 $self->YYData->{'in_value'} = 0; 629 630 # Regexp offset, used to record where the parser is within 631 # the file (relative position). 632 $self->YYData->{'input_pos'} = 0; 633 634 # Absolute offset, recording where the parser initially seeked to. 635 $self->YYData->{'abs_offset'} = $offset; 636 637 # Initialize statistics. 638 # Total number of directories parsed. 639 $self->YYData->{'dir_count'} = 0; 640 641 # Total number of key/value pairs parsed. 642 $self->YYData->{'entry_count'} = 0; 643 644 # Progress bar information. 645 if ($self->YYData->{'show_progress'}) { 646 $self->YYData->{'progress'} = Term::ProgressBar->new({ name => 'Progress', 647 count => $self->YYData->{'file_size'}, 648 ETA => 'linear', }); 649 $self->YYData->{'progress'}->minor(0); 650 $self->YYData->{'progress'}->max_update_rate(1); 651 $self->YYData->{'progress_next_update'} = $self->YYData->{'progress'}->update($offset); 652 } else { 653 $self->YYData->{'progress'} = undef; 654 } 551 655 } 552 656 … … 570 674 } 571 675 572 my $fh = $self->YYData->{'file_handle'}; 573 seek($fh, 0, SEEK_SET); 574 575 undef $/; 576 $self->YYData->{DATA} = <$fh>; 577 578 # Strip all CRs. 579 $self->YYData->{DATA} =~ s/\r//g; 580 581 $self->YYData->{DATA} =~ m/\G.{73}/scg; 582 583 584 # Reinitialize helper variables. 585 # Hashtable, to represent the latest, extracted group chunk. 586 $self->YYData->{'latest_group'} = { }; 587 588 # Boolean, to indicate when we're parsing inside a group chunk. 589 $self->YYData->{'in_group'} = 0; 590 591 # Boolean, to indicate when we're parsing inside a value segment. 592 $self->YYData->{'in_value'} = 0; 593 594 # Regexp offset, used to record where the parser is within 595 # the file. 596 $self->YYData->{'input_pos'} = 0; 597 598 # Initialize statistics. 599 # Total number of directories parsed. 600 $self->YYData->{'dir_count'} = 0; 601 602 # Total number of key/value pairs parsed. 603 $self->YYData->{'entry_count'} = 0; 676 # Reset the parser. 677 $self->_reset(); 604 678 605 679 $LOG->debug("Finished group index process."); 606 607 print Dumper($self->YYData->{'group_index'}) . "\n"; 680 } 681 682 # Helper function, designed to be called from within the 683 # Search::Binary::binary_search() function, in order to allow 684 # the binary_search to properly read in group index data from 685 # the default array reference. 686 # 687 # For more information about how this function operates, please 688 # see the Search::Binary POD documentation. 689 # 690 # Inputs: parser, value_to_compare, current_array_index 691 # Outputs: comparison, last_valid_array_index 692 sub _search { 693 # Extract arguments. 694 my ($parser, $value_to_compare, $current_array_index) = @_; 695 696 # Increment the search index, if the current one is undef. 697 if (defined($current_array_index)) { 698 $parser->YYData->{'last_search_index'} = $current_array_index; 699 } else { 700 $parser->YYData->{'last_search_index'}++; 701 } 702 703 # Perform a comparison, if the array entry is defined. 704 if (defined(@{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}])) { 705 return($value_to_compare <=> @{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}], 706 $parser->YYData->{'last_search_index'}); 707 } 708 709 # Array entry not found, return undef with this position. 710 return (undef, $parser->YYData->{'last_search_index'}); 608 711 } 609 712 … … 618 721 The following functions have been implemented by any Parser object. 619 722 620 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, index_groups => $perform_index) 723 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, 724 index_groups => $perform_index, 725 show_progress => $progress) 621 726 622 727 =over 4 … … 630 735 ahead and scan the entire file, indexing the file offsets of where groups start and 631 736 end. Otherwise, this indexing process is not performed. 737 B<$progress> is an optional parameter. 1 specifies that the parser should display 738 a progress bar, as it scans through a specified file. Otherwise, a progress bar 739 is not displayed. 632 740 633 741 I<Output>: The instantiated Parser B<$object>, fully initialized. … … 665 773 !exists($args{'input_file'}) || 666 774 !defined($args{'input_file'})) { 667 $LOG->fatal("Error: Unable to create parser - no 'input_file' specified! \n");668 Carp::croak("Error: Unable to create parser - no 'input_file' specified! \n");775 $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!"); 776 Carp::croak("Error: Unable to create parser - no 'input_file' specified!"); 669 777 } 670 778 … … 672 780 my $fh = new IO::File($args{'input_file'}, "r"); 673 781 if (!defined($fh)) { 674 $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 675 Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 782 $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!"); 783 Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!"); 784 } 785 786 # Check if show progress was specified. 787 if ($argsExist && 788 exists($args{'show_progress'}) && 789 defined($args{'show_progress'}) && 790 $args{'show_progress'}) { 791 $parser->YYData->{'show_progress'} = 1; 792 } else { 793 $parser->YYData->{'show_progress'} = 0; 676 794 } 677 795 … … 679 797 $parser->YYData->{'file_handle'} = $fh; 680 798 681 undef $/; 682 $parser->YYData->{DATA} = <$fh>; 683 684 # Strip all CRs. 685 $parser->YYData->{DATA} =~ s/\r//g; 686 687 # Initialize helper variables. 688 # Hashtable, to represent the latest, extracted group chunk. 689 $parser->YYData->{'latest_group'} = { }; 690 691 # Boolean, to indicate when we're parsing inside a group chunk. 692 $parser->YYData->{'in_group'} = 0; 693 694 # Boolean, to indicate when we're parsing inside a value segment. 695 $parser->YYData->{'in_value'} = 0; 696 697 # Regexp offset, used to record where the parser is within 698 # the file. 699 $parser->YYData->{'input_pos'} = 0; 700 701 # Array to record where group boundaries occur. 702 $parser->YYData->{'group_index'} = [0, ]; 703 704 # Initialize statistics. 705 # Total number of directories parsed. 706 $parser->YYData->{'dir_count'} = 0; 707 708 # Total number of key/value pairs parsed. 709 $parser->YYData->{'entry_count'} = 0; 799 # Reset the parser. 800 $parser->_reset(); 710 801 711 802 # Perform group indexing, if specified. … … 715 806 $args{'index_groups'}) { 716 807 $parser->_index(); 808 } else { 809 $parser->YYData->{'group_index'} = [0, ]; 717 810 } 718 811 … … 910 1003 } 911 1004 1005 # Update progress bar, if defined. 1006 if (defined($_[0]->YYData->{'progress'}) && 1007 ($_[0]->YYData->{'file_size'} <= $_[0]->YYData->{'progress_next_update'})) { 1008 1009 $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'file_size'}); 1010 } 1011 912 1012 # Return the next group parsed. 913 1013 return $self->YYParse(yylex => \&_lexer, … … 1051 1151 } 1052 1152 1153 =pod 1154 1155 =head2 $object->seekToNearestGroup(absolute_offset => $offset) 1156 1157 =over 4 1158 1159 Given an absolute offset within the file, this function 1160 will seek the parser to the nearest group found B<before> 1161 the specified offset. 1162 1163 I<Inputs>: 1164 B<$offset> is an required parameter, specifying the absolute offset 1165 within the file to seek to. 1166 1167 I<Outputs>: None. 1168 1169 =back 1170 1171 =begin testing 1172 1173 my ($nextGroup, $expectedGroup); 1174 my $test_registry_file = $ENV{PWD} . "/" . getVar(name => "registry_file", 1175 namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test"); 1176 1177 # Create a generic Parser object, with test state data. 1178 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1); 1179 1180 $parser->seekToNearestGroup(absolute_offset => 84); 1181 1182 # Verify Test Group #2 1183 $nextGroup = $parser->nextGroup(); 1184 $expectedGroup = { 1185 key => 'HKEY_CURRENT_USER\Testing Group 2', 1186 entries => [ { 1187 name => '@', 1188 value => '\\"Annoying=Value\\"', 1189 }, { 1190 name => '\\"Annoying=Key\\"', 1191 value => 'Bar', 1192 }, { 1193 name => 'Multiline', 1194 value => 'This 1195 value spans 1196 multiple lines 1197 ', 1198 }, { 1199 name => 'Sane_Key', 1200 value => '\\"Wierd=\\"Value', 1201 }, ], 1202 }; 1203 is_deeply($nextGroup, $expectedGroup, "seekToNearestGroup()") or diag("The seekToNearestGroup() call failed."); 1204 1205 =end testing 1206 1207 =cut 1208 1209 sub seekToNearestGroup { 1210 # Extract arguments. 1211 my ($self, %args) = @_; 1212 1213 # Sanity check, don't continue, unless absolute_offset 1214 # was provided. 1215 my $argsExist = scalar(%args); 1216 if (!$argsExist || 1217 !exists($args{'absolute_offset'}) || 1218 !defined($args{'absolute_offset'})) { 1219 $LOG->fatal("Error: Unable to seek parser - no 'absolute_offset' specified!"); 1220 Carp::croak("Error: Unable to seek parser - no 'absolute_offset' specified!"); 1221 } 1222 1223 # Check to see if the 'group_index' has been initialized. 1224 # We assume that if it has [0, ], then this has not been 1225 # done. 1226 my $numIndices = scalar(@{$self->YYData->{'group_index'}}); 1227 if ($numIndices < 2) { 1228 $self->_index(); 1229 } 1230 $numIndices = scalar(@{$self->YYData->{'group_index'}}); 1231 1232 # Find the nearest index after the offset. 1233 my $found_index = binary_search(0, $numIndices - 1, $args{'absolute_offset'}, \&_search, $self); 1234 1235 # Now, find the nearest index before the offset. 1236 if ($found_index > 0) { 1237 $found_index--; 1238 } 1239 my $found_offset = @{$self->YYData->{'group_index'}}[$found_index]; 1240 1241 # XXX: Change this to debug, eventually. 1242 $LOG->info("Seeking parser to nearest earlier group offset (" . $found_offset . ")."); 1243 1244 # Seek the parser, to the specified offset. 1245 $self->_reset($found_offset); 1246 } 1247 1053 1248 ####################################################################### 1054 1249 # Additional Module Documentation # honeyclient/branches/bug/42/lib/HoneyClient/Agent/Integrity/Registry/Parser.yp
r99 r105 85 85 use Carp (); 86 86 87 # Include Global Configuration Processing Library88 use HoneyClient::Util::Config qw(getVar);89 90 87 # Include Logging Library 91 88 use Log::Log4perl qw(:easy); 89 # Temporarily Initialize Logging Subsystem 90 # XXX: We hard code the logging format here, since it appears that 91 # calling 'use HoneyClient::Util::Config qw(getVar);' slows down 92 # the parser by a factor of 10x (not sure why). 93 Log::Log4perl->init_once({ 94 "log4perl.rootLogger" => "INFO, Screen", 95 "log4perl.appender.Screen" => "Log::Log4perl::Appender::Screen", 96 "log4perl.appender.Screen.stderr" => 0, 97 "log4perl.appender.Screen.Threshold" => "INFO", 98 "log4perl.appender.Screen.layout" => "Log::Log4perl::Layout::PatternLayout", 99 "log4perl.appender.Screen.layout.ConversionPattern" => "%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n", 100 }); 101 92 102 93 103 # Use Dumper Library. … … 99 109 # Use Seek Library. 100 110 use Fcntl qw(:seek); 111 112 # Use Binary Search Library. 113 use Search::Binary; 114 115 # Use Progress Bar Library. 116 use Term::ProgressBar; 101 117 102 118 ####################################################################### … … 191 207 require_ok('Fcntl'); 192 208 use Fcntl qw(:seek); 209 210 # Make sure Search::Binary loads 211 BEGIN { use_ok('Search::Binary') 212 or diag("Can't load Search::Binary package. Check to make sure the package library is correctly listed within the path."); } 213 require_ok('Search::Binary'); 214 can_ok('Search::Binary', 'binary_search'); 215 use Search::Binary; 216 217 # Make sure Term::ProgressBar loads 218 BEGIN { use_ok('Term::ProgressBar') 219 or diag("Can't load Term::ProgressBar package. Check to make sure the package library is correctly listed within the path."); } 220 require_ok('Term::ProgressBar'); 221 use Term::ProgressBar; 193 222 194 223 # Make sure HoneyClient::Agent::Integrity::Registry::Parser loads … … 312 341 # Outputs: (token_id, data) pair 313 342 sub _lexer { 314 315 343 # Identify NEWLINE token. 316 344 if ($_[0]->YYData->{DATA} =~ m/\G\n/cg) { … … 326 354 $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} ? 327 355 $_[0]->YYData->{'input_pos'} : 0; 356 $_[0]->YYData->{'input_pos'} = $_[0]->YYData->{'input_pos'} + 357 $_[0]->YYData->{'abs_offset'}; 358 359 # Update progress bar, if defined. 360 if (defined($_[0]->YYData->{'progress'}) && 361 ($_[0]->YYData->{'input_pos'} > $_[0]->YYData->{'progress_next_update'})) { 362 $_[0]->YYData->{'progress_next_update'} = 363 $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'input_pos'}); 364 } 328 365 329 366 # Identify DIR_NAME token. … … 380 417 if ($_[0]->YYData->{DATA} =~ m/\G(.*\n)/cg) { 381 418 $_[0]->YYData->{'input_pos'} = pos($_[0]->YYData->{DATA}); 382 $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .") \n");383 Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .") \n");419 $LOG->fatal("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 420 Carp::croak("Error: Unknown token (" . $1 . ") at offset (". $_[0]->YYData->{'input_pos'} .")"); 384 421 } 385 422 return ('', undef); … … 392 429 # Outputs: None 393 430 sub _error { 394 $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 395 Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ").\n"); 431 432 $LOG->fatal("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 433 Carp::croak("Error: Malformed input found at offset (" . $_[0]->YYData->{'input_pos'} . ")."); 434 } 435 436 # Helper function, designed to reset the parser's file stream back to the 437 # beginning, allowing the parser to reparse from the beginning. Or, if 438 # specified, the function will seek the parser to the specified offset. 439 # 440 # Inputs: parser, absolute offset (optional) 441 # Outputs: none 442 sub _reset { 443 # Extract arguments. 444 my ($self, $offset) = @_; 445 446 $LOG->debug("Resetting parser."); 447 448 my $fh = $self->YYData->{'file_handle'}; 449 450 # Check the offset. 451 if (!defined($offset)) { 452 $offset = 0; 453 } 454 seek($fh, $offset, SEEK_SET); 455 456 undef $/; 457 $self->YYData->{DATA} = <$fh>; 458 459 # Strip all CRs. 460 $self->YYData->{DATA} =~ s/\r//g; 461 462 # Total size of input file. 463 $self->YYData->{'file_size'} = (stat($fh))[7]; 464 465 # Reinitialize helper variables. 466 # Hashtable, to represent the latest, extracted group chunk. 467 $self->YYData->{'latest_group'} = { }; 468 469 # Boolean, to indicate when we're parsing inside a group chunk. 470 $self->YYData->{'in_group'} = 0; 471 472 # Boolean, to indicate when we're parsing inside a value segment. 473 $self->YYData->{'in_value'} = 0; 474 475 # Regexp offset, used to record where the parser is within 476 # the file (relative position). 477 $self->YYData->{'input_pos'} = 0; 478 479 # Absolute offset, recording where the parser initially seeked to. 480 $self->YYData->{'abs_offset'} = $offset; 481 482 # Initialize statistics. 483 # Total number of directories parsed. 484 $self->YYData->{'dir_count'} = 0; 485 486 # Total number of key/value pairs parsed. 487 $self->YYData->{'entry_count'} = 0; 488 489 # Progress bar information. 490 if ($self->YYData->{'show_progress'}) { 491 $self->YYData->{'progress'} = Term::ProgressBar->new({ name => 'Progress', 492 count => $self->YYData->{'file_size'}, 493 ETA => 'linear', }); 494 $self->YYData->{'progress'}->minor(0); 495 $self->YYData->{'progress'}->max_update_rate(1); 496 $self->YYData->{'progress_next_update'} = $self->YYData->{'progress'}->update($offset); 497 } else { 498 $self->YYData->{'progress'} = undef; 499 } 396 500 } 397 501 … … 415 519 } 416 520 417 my $fh = $self->YYData->{'file_handle'}; 418 seek($fh, 0, SEEK_SET); 419 420 undef $/; 421 $self->YYData->{DATA} = <$fh>; 422 423 # Strip all CRs. 424 $self->YYData->{DATA} =~ s/\r//g; 425 426 $self->YYData->{DATA} =~ m/\G.{73}/scg; 427 428 429 # Reinitialize helper variables. 430 # Hashtable, to represent the latest, extracted group chunk. 431 $self->YYData->{'latest_group'} = { }; 432 433 # Boolean, to indicate when we're parsing inside a group chunk. 434 $self->YYData->{'in_group'} = 0; 435 436 # Boolean, to indicate when we're parsing inside a value segment. 437 $self->YYData->{'in_value'} = 0; 438 439 # Regexp offset, used to record where the parser is within 440 # the file. 441 $self->YYData->{'input_pos'} = 0; 442 443 # Initialize statistics. 444 # Total number of directories parsed. 445 $self->YYData->{'dir_count'} = 0; 446 447 # Total number of key/value pairs parsed. 448 $self->YYData->{'entry_count'} = 0; 521 # Reset the parser. 522 $self->_reset(); 449 523 450 524 $LOG->debug("Finished group index process."); 451 452 print Dumper($self->YYData->{'group_index'}) . "\n"; 525 } 526 527 # Helper function, designed to be called from within the 528 # Search::Binary::binary_search() function, in order to allow 529 # the binary_search to properly read in group index data from 530 # the default array reference. 531 # 532 # For more information about how this function operates, please 533 # see the Search::Binary POD documentation. 534 # 535 # Inputs: parser, value_to_compare, current_array_index 536 # Outputs: comparison, last_valid_array_index 537 sub _search { 538 # Extract arguments. 539 my ($parser, $value_to_compare, $current_array_index) = @_; 540 541 # Increment the search index, if the current one is undef. 542 if (defined($current_array_index)) { 543 $parser->YYData->{'last_search_index'} = $current_array_index; 544 } else { 545 $parser->YYData->{'last_search_index'}++; 546 } 547 548 # Perform a comparison, if the array entry is defined. 549 if (defined(@{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}])) { 550 return($value_to_compare <=> @{$parser->YYData->{'group_index'}}[$parser->YYData->{'last_search_index'}], 551 $parser->YYData->{'last_search_index'}); 552 } 553 554 # Array entry not found, return undef with this position. 555 return (undef, $parser->YYData->{'last_search_index'}); 453 556 } 454 557 … … 463 566 The following functions have been implemented by any Parser object. 464 567 465 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, index_groups => $perform_index) 568 =head2 HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $filename, 569 index_groups => $perform_index, 570 show_progress => $progress) 466 571 467 572 =over 4 … … 475 580 ahead and scan the entire file, indexing the file offsets of where groups start and 476 581 end. Otherwise, this indexing process is not performed. 582 B<$progress> is an optional parameter. 1 specifies that the parser should display 583 a progress bar, as it scans through a specified file. Otherwise, a progress bar 584 is not displayed. 477 585 478 586 I<Output>: The instantiated Parser B<$object>, fully initialized. … … 510 618 !exists($args{'input_file'}) || 511 619 !defined($args{'input_file'})) { 512 $LOG->fatal("Error: Unable to create parser - no 'input_file' specified! \n");513 Carp::croak("Error: Unable to create parser - no 'input_file' specified! \n");620 $LOG->fatal("Error: Unable to create parser - no 'input_file' specified!"); 621 Carp::croak("Error: Unable to create parser - no 'input_file' specified!"); 514 622 } 515 623 … … 517 625 my $fh = new IO::File($args{'input_file'}, "r"); 518 626 if (!defined($fh)) { 519 $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 520 Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!\n"); 627 $LOG->fatal("Error: Unable to read file '" . $args{'input_file'} . "'!"); 628 Carp::croak("Error: Unable to read file '" . $args{'input_file'} . "'!"); 629 } 630 631 # Check if show progress was specified. 632 if ($argsExist && 633 exists($args{'show_progress'}) && 634 defined($args{'show_progress'}) && 635 $args{'show_progress'}) { 636 $parser->YYData->{'show_progress'} = 1; 637 } else { 638 $parser->YYData->{'show_progress'} = 0; 521 639 } 522 640 … … 524 642 $parser->YYData->{'file_handle'} = $fh; 525 643 526 undef $/; 527 $parser->YYData->{DATA} = <$fh>; 528 529 # Strip all CRs. 530 $parser->YYData->{DATA} =~ s/\r//g; 531 532 # Initialize helper variables. 533 # Hashtable, to represent the latest, extracted group chunk. 534 $parser->YYData->{'latest_group'} = { }; 535 536 # Boolean, to indicate when we're parsing inside a group chunk. 537 $parser->YYData->{'in_group'} = 0; 538 539 # Boolean, to indicate when we're parsing inside a value segment. 540 $parser->YYData->{'in_value'} = 0; 541 542 # Regexp offset, used to record where the parser is within 543 # the file. 544 $parser->YYData->{'input_pos'} = 0; 545 546 # Array to record where group boundaries occur. 547 $parser->YYData->{'group_index'} = [0, ]; 548 549 # Initialize statistics. 550 # Total number of directories parsed. 551 $parser->YYData->{'dir_count'} = 0; 552 553 # Total number of key/value pairs parsed. 554 $parser->YYData->{'entry_count'} = 0; 644 # Reset the parser. 645 $parser->_reset(); 555 646 556 647 # Perform group indexing, if specified. … … 560 651 $args{'index_groups'}) { 561 652 $parser->_index(); 653 } else { 654 $parser->YYData->{'group_index'} = [0, ]; 562 655 } 563 656 … … 755 848 } 756 849 850 # Update progress bar, if defined. 851 if (defined($_[0]->YYData->{'progress'}) && 852 ($_[0]->YYData->{'file_size'} <= $_[0]->YYData->{'progress_next_update'})) { 853 854 $_[0]->YYData->{'progress'}->update($_[0]->YYData->{'file_size'}); 855 } 856 757 857 # Return the next group parsed. 758 858 return $self->YYParse(yylex => \&_lexer, … … 896 996 } 897 997 998 =pod 999 1000 =head2 $object->seekToNearestGroup(absolute_offset => $offset) 1001 1002 =over 4 1003 1004 Given an absolute offset within the file, this function 1005 will seek the parser to the nearest group found B<before> 1006 the specified offset. 1007 1008 I<Inputs>: 1009 B<$offset> is an required parameter, specifying the absolute offset 1010 within the file to seek to. 1011 1012 I<Outputs>: None. 1013 1014 =back 1015 1016 =begin testing 1017 1018 my ($nextGroup, $expectedGroup); 1019 my $test_registry_file = $ENV{PWD} . "/" . getVar(name => "registry_file", 1020 namespace => "HoneyClient::Agent::Integrity::Registry::Parser::Test"); 1021 1022 # Create a generic Parser object, with test state data. 1023 my $parser = HoneyClient::Agent::Integrity::Registry::Parser->init(input_file => $test_registry_file, index_groups => 1); 1024 1025 $parser->seekToNearestGroup(absolute_offset => 84); 1026 1027 # Verify Test Group #2 1028 $nextGroup = $parser->nextGroup(); 1029 $expectedGroup = { 1030 key => 'HKEY_CURRENT_USER\Testing Group 2', 1031 entries => [ { 1032 name => '@', 1033 value => '\\"Annoying=Value\\"', 1034 }, { 1035 name => '\\"Annoying=Key\\"', 1036 value => 'Bar', 1037 }, { 1038 name => 'Multiline', 1039 value => 'This 1040 value spans 1041 multiple lines 1042 ', 1043 }, { 1044 name => 'Sane_Key', 1045 value => '\\"Wierd=\\"Value', 1046 }, ], 1047 }; 1048 is_deeply($nextGroup, $expectedGroup, "seekToNearestGroup()") or diag("The seekToNearestGroup() call failed."); 1049 1050 =end testing 1051 1052 =cut 1053 1054 sub seekToNearestGroup { 1055 # Extract arguments. 1056 my ($self, %args) = @_; 1057 1058 # Sanity check, don't continue, unless absolute_offset 1059 # was provided. 1060 my $argsExist = scalar(%args); 1061 if (!$argsExist || 1062 !exists($args{'absolute_offset'}) || 1063 !defined($args{'absolute_offset'})) { 1064 $LOG->fatal("Error: Unable to seek parser - no 'absolute_offset' specified!"); 1065 Carp::croak("Error: Unable to seek parser - no 'absolute_offset' specified!"); 1066 } 1067 1068 # Check to see if the 'group_index' has been initialized. 1069 # We assume that if it has [0, ], then this has not been 1070 # done. 1071 my $numIndices = scalar(@{$self->YYData->{'group_index'}}); 1072 if ($numIndices < 2) { 1073 $self->_index(); 1074 } 1075 $numIndices = scalar(@{$self->YYData->{'group_index'}}); 1076 1077 # Find the nearest index after the offset. 1078 my $found_index = binary_search(0, $numIndices - 1, $args{'absolute_offset'}, \&_search, $self); 1079 1080 # Now, find the nearest index before the offset. 1081 if ($found_index > 0) { 1082 $found_index--; 1083 } 1084 my $found_offset = @{$self->YYData->{'group_index'}}[$found_index]; 1085 1086 # XXX: Change this to debug, eventually. 1087 $LOG->info("Seeking parser to nearest earlier group offset (" . $found_offset . ")."); 1088 1089 # Seek the parser, to the specified offset. 1090 $self->_reset($found_offset); 1091 } 1092 898 1093 ####################################################################### 899 1094 # Additional Module Documentation #
