Changeset 205

Show
Ignore:
Timestamp:
03/07/07 10:49:41 (2 years ago)
Author:
kindlund
Message:

sc: merging branch using tags svn+ssh://kindlund@www.honeyclient.org/home/svn/honeyclient/honeyclient/tags/exp/PRE-jpuchalski-active_content and svn+ssh://kindlund@www.honeyclient.org/home/svn/honeyclient/honeyclient/trunk

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • honeyclient/branches/exp/jpuchalski-active_content

    • Property sc:bug-fix-release-branch set to 0.9
  • honeyclient/branches/exp/jpuchalski-active_content/bin/StartAgent.pl

    • Property svn:keywords set to Id "$file"
    r13 r205  
    1 #!perl -w 
     1#!perl -w -Ilib 
     2 
     3# $Id$ 
    24 
    35use strict; 
     
    3537    print "Watchdog fault detected, recovering Agent daemon.\n"; 
    3638    # XXX: Reenable this, eventually. 
    37 #    Carp::carp __PACKAGE__ . "->_watchdogFaultHandler(): Error occurred during processing.\n" . $errMsg; 
     39    #Carp::carp __PACKAGE__ . "->_watchdogFaultHandler(): Error occurred during processing.\n" . $errMsg; 
    3840 
    3941 
  • honeyclient/branches/exp/jpuchalski-active_content/bin/StartManager.pl

    • Property svn:keywords set to Id "$file"
    r13 r205  
    1 #!/usr/bin/perl -w 
     1#!perl -w -Ilib 
     2 
     3# $Id$ 
    24 
    35use strict; 
     
    2022my $agentState = HoneyClient::Manager->run( 
    2123                    driver           => 'IE', # Change to 'IE' or 'FF' 
    22                     master_vm_config => '/vm/Agent.Master-2/winXPPro.cfg', 
     24                    master_vm_config => '/vm/Agent.Master-7/winXPPro.cfg', 
    2325                    agent_state      => encode_base64(nfreeze({ 
    2426                        IE => { # Change to 'IE' or 'FF' 
     
    2628                            # Enable this line, if you want to only go to the 
    2729                            # first 5 links for each domain. 
    28                             #max_relative_links_to_visit => 5, 
     30                            max_relative_links_to_visit => 5, 
    2931                         }, 
    3032                    })),  
  • honeyclient/branches/exp/jpuchalski-active_content/etc/honeyclient.xml

    • Property svn:keywords set to Id "$file"
    r13 r205  
    99For information about the syntax of this file format, please type 
    1010"perldoc XML::Simple". 
     11 
     12CVS: $Id$ 
    1113 
    1214####################################################################### 
     
    4547<HoneyClient> 
    4648    <!-- Global Options --> 
    47     <timeout description="Default timeout used for all communications between each module (in seconds)." default="600"> 
    48         600 
     49    <!-- Note: This timeout should be long enough so that the Agent watchdog code will properly let the integrity checking code finish, before detecting a faulty timeout in processing. --> 
     50    <timeout description="Default timeout used for all communications between each module (in seconds)." default="900"> 
     51        1800 
    4952    </timeout> 
    5053    <log_config description="The global Log4perl configuration file, used throughout all modules.  This setting should not need to be changed." default="etc/honeyclient_log.conf"> 
     
    6669            <!-- TODO: Update this. --> 
    6770            <timeout description="How long the Driver waits during a drive operation, before timing out (in seconds)." default="60"> 
    68                 5  
     71                10  
    6972            </timeout> 
    7073            <Browser> 
    71                    <!-- HoneyClient::Agent::Driver::IE Options --> 
    72                <!-- TODO: Update this. --> 
    73                <ignore_links_timed_out description="If this parameter is 1, then the browser will never attempt to revisit any links that caused the browser to initially time out." default="0"> 
     74                <!-- HoneyClient::Agent::Driver::IE Options --> 
     75                <!-- TODO: Update this. --> 
     76                <ignore_links_timed_out description="If this parameter is 1, then the browser will never attempt to revisit any links that caused the browser to initially time out." default="0"> 
    7477                    1 
    7578                </ignore_links_timed_out> 
    76                <!-- TODO: Update this. --> 
    77                <process_name description="The name of the Internet Explorer application process, as it appears in the Task Manager." default="iexplore.exe"> 
     79                <!-- TODO: Update this. --> 
     80                <process_name description="The name of the Internet Explorer application process, as it appears in the Task Manager." default="iexplore.exe"> 
    7881                    iexplore.exe 
    7982                </process_name> 
    80                <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website.  If negative, then the browser will exhaust all possible relative links found, before moving on.  This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1"> 
     83                <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website.  If negative, then the browser will exhaust all possible relative links found, before moving on.  This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1"> 
    8184                    -1 
    8285                </max_relative_links_to_visit> 
    83                 <IE> 
    84                     <!-- HoneyClient::Agent::Driver::IE Options --> 
    85                     <!-- TODO: Update this. --> 
    86                     <ignore_links_timed_out description="If this parameter is 1, then the browser will never attempt to revisit any links that caused the browser to initially time out." default="0"> 
     86                <goodwords description="A comma-separated list of good words which will increase the score of links within a webpage." default=""> 
     87                    news,new,big,latest,main,update,sell,free,buy 
     88                </goodwords> 
     89                <badwords description="A comma-separated list of bad words which will decrease the score of links within a webpage." default=""> 
     90                    archive,privacy,legal,disclaim,about,contact,copyright,jobs,careers 
     91                </badwords> 
     92                <IE> 
     93                    <!-- HoneyClient::Agent::Driver::IE Options --> 
     94                    <!-- TODO: Update this. --> 
     95                    <ignore_links_timed_out description="If this parameter is 1, then the browser will never attempt to revisit any links that caused the browser to initially time out." default="0"> 
    8796                        1 
    8897                    </ignore_links_timed_out> 
    89                    <!-- TODO: Update this. --> 
    90                    <process_name description="The name of the Internet Explorer application process, as it appears in the Task Manager." default="iexplore.exe"> 
     98                    <!-- TODO: Update this. --> 
     99                    <process_name description="The name of the Internet Explorer application process, as it appears in the Task Manager." default="iexplore.exe"> 
    91100                        iexplore.exe 
    92101                    </process_name> 
    93                    <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website.  If negative, then the browser will exhaust all possible relative links found, before moving on.  This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1"> 
     102                    <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website.  If negative, then the browser will exhaust all possible relative links found, before moving on.  This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1"> 
    94103                        -1 
    95104                    </max_relative_links_to_visit> 
    96                    </IE> 
     105                </IE> 
    97106                <FF> 
    98                    <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website.  If negative, then the browser will exhaust all possible relative links found, before moving on.  This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1"> 
     107                    <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website.  If negative, then the browser will exhaust all possible relative links found, before moving on.  This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1"> 
    99108                        5 
    100109                    </max_relative_links_to_visit> 
    101                    <!-- http://gatekeeper-w.mitre.org:80 --> 
    102                    <http_proxy description="Set to your HTTP Proxy if you have one, otherwise set to 'none'"> 
     110                    <!-- http://gatekeeper-w.mitre.org:80 --> 
     111                    <http_proxy description="Set to your HTTP Proxy if you have one, otherwise set to 'none'"> 
    103112                        none 
    104113                    </http_proxy> 
    105                    <ff_exec description="path to the firefox executable (default install path is C:\Program Files\Mozilla Firefox\firefox.exe)"> 
     114                    <ff_exec description="path to the firefox executable (default install path is C:\Program Files\Mozilla Firefox\firefox.exe)"> 
    106115                        C:\Program Files\Mozilla Firefox\firefox.exe 
    107116                    </ff_exec> 
    108                </FF> 
     117                </FF> 
    109118            </Browser> 
    110119            <EmailClient> 
     
    112121        </Driver> 
    113122        <perform_integrity_checks description="An integer, representing whether the Agent should perform any integrity checks. 1 enables, 0 disables." default="1"> 
    114             0 
     123            1  
    115124        </perform_integrity_checks> 
    116125        <!-- HoneyClient::Agent::Integrity Options --> 
     
    118127            <!-- Files which are read in only. --> 
    119128            <!-- TODO: Update this. --> 
    120             <file_checklist description="The file containing the list of files and directories to check during filesystem checking."
    121                 none 
     129            <file_checklist description="The file containing the list of files and directories to check during filesystem checking." default="none"
     130                etc/file_checklist.txt 
    122131            </file_checklist> 
    123132            <!-- TODO: Update this. --> 
    124             <file_exclude description="The file containing the list of files or directories to exclude if found in subdirectories during filesystem checking."
    125                 /tmp/file_exclude.txt 
     133            <file_exclude description="The file containing the list of files or directories to exclude if found in subdirectories during filesystem checking." default="none"
     134                ../../../etc/file_exclude.txt 
    126135            </file_exclude> 
    127             <!-- TODO: Update this. --> 
    128             <reg_list_to_check description="The file containing the list of registry keys to check."> 
    129                 /tmp/reg_list_to_check.txt 
    130             </reg_list_to_check> 
    131136            <!-- Files which are written out only. --> 
    132137            <!-- TODO: Update this. --> 
     
    140145            <!-- Files to read and write. --> 
    141146            <!-- TODO: Update this. --> 
    142             <clean_reg description="Stores baseline for the registry.  Always appended with a number."> 
    143                 clean.reg 
    144             </clean_reg> 
    145             <!-- TODO: Update this. --> 
    146             <current_reg description="Stores the current state of the register to check against the clean state."> 
    147                 current.reg 
    148             </current_reg> 
    149             <!-- TODO: Update this. --> 
    150             <diffs description="The file for the diff command to redirect its output to.  Always appended with a number."> 
    151                 differences.out 
    152             </diffs> 
    153             <!-- TODO: Update this. --> 
    154147            <test_dir description="If you're testing integrity checks, this is the directory that you can specify to minimize testing time. Only the files in this directory will be part of the integrity check."> 
    155148                ../t/testintegrity 
    156149            </test_dir> 
     150            <Registry> 
     151                <!-- HoneyClient::Agent::Integrity::Registry::Test Options --> 
     152                <Test> 
     153                    <!-- 
     154                        Note: you should *never* need to change *any* values 
     155                        within this section of the configuration.  All contents 
     156                        are *only* used for unit testing. 
     157                    --> 
     158                    <before_registry_file description="The relative path to a (before) sample registry dump, that's used during unit testing." default="t/test_registry/before.reg"> 
     159                        t/test_registry/before.reg 
     160                    </before_registry_file> 
     161                    <after_registry_file description="The relative path to an (after) sample registry dump, that's used during unit testing." default="t/test_registry/after.reg"> 
     162                        t/test_registry/after.reg 
     163                    </after_registry_file> 
     164                </Test> 
     165                <Parser> 
     166                    <!-- HoneyClient::Agent::Integrity::Registry::Parser::Test Options --> 
     167                    <Test> 
     168                        <!-- 
     169                            Note: you should *never* need to change *any* values 
     170                            within this section of the configuration.  All contents 
     171                            are *only* used for unit testing. 
     172                        --> 
     173                        <registry_file description="The relative path to a sample registry dump, that's used during unit testing." default="t/test_registry/dump.reg"> 
     174                            t/test_registry/dump.reg 
     175                        </registry_file> 
     176                    </Test> 
     177                </Parser> 
     178            </Registry> 
    157179        </Integrity> 
    158180    </Agent> 
    159181    <Manager> 
     182        <!-- TODO: Update this. --> 
     183        <manager_state description="Upon termination, the Manager will attempt to save a complete copy of its state into this file, if specified." default=""> 
     184            Manager.dump 
     185        </manager_state> 
    160186        <!-- TODO: Update this. --> 
    161187        <address description="The IP or hostname that all Manager modules should use, when accepting SOAP requests." default="localhost"> 
     
    314340                    are *only* used for unit testing. 
    315341                --> 
    316                 <!-- TODO: Update this. --> 
    317342                <test_vm_config description="The relative path to the (empty) test VM, that's used during unit testing." default="t/testVM/winXPPro.vmx"> 
    318343                    t/testVM/winXPPro.vmx 
  • honeyclient/branches/exp/jpuchalski-active_content/etc/honeyclient_log.conf

    • Property svn:keywords set to Id "$file"
    r13 r205  
    77#              modules. 
    88# 
    9 # CVS: $Id: honeylog.conf 1099 2006-06-29 22:38:16Z jdurick
     9# CVS: $Id
    1010# 
    1111# @author jdurick, kindlund 
     
    6060 
    6161log4perl.rootLogger=INFO, Screen 
     62#log4perl.logger.HoneyClient.Agent.Integrity.Registry=DEBUG, Screen 
     63# Suppress Parser Debugging Messages 
     64#log4perl.logger.HoneyClient.Agent.Integrity.Registry.Parser=INFO, Screen 
    6265log4perl.appender.Screen=Log::Log4perl::Appender::Screen 
    6366# If you want colorized logging to the screen, enable this line, instead. 
    6467#log4perl.appender.Screen=Log::Log4perl::Appender::ScreenColoredLevels 
    6568log4perl.appender.Screen.stderr=0 
    66 log4perl.appender.Screen.Threshold=INFO 
     69log4perl.appender.Screen.Threshold=DEBUG 
    6770log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout 
    6871log4perl.appender.Screen.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %5p [%M] (%F:%L) - %m%n 
  • honeyclient/branches/exp/jpuchalski-active_content/etc/inline2test.conf

    • Property svn:keywords set to Id "$file"
  • honeyclient/branches/exp/jpuchalski-active_content/lib/HoneyClient/Agent.pm

    • Property svn:executable deleted
    • Property svn:keywords set to Id "$file"
    r13 r205  
    55# Description: Central library used for agent-based operations. 
    66# 
    7 # CVS: $Id: Agent.pm 1049 2006-06-28 16:37:41Z flindiakos
     7# CVS: $Id
    88# 
    99# @author knwang, ttruong, kindlund 
     
    3838=head1 VERSION 
    3939 
    40 $Rev: 1626 $ 
     400.94 
    4141 
    4242=head1 SYNOPSIS 
     
    7979 
    8080    # Set our package version. 
    81     $VERSION = 0.9
     81    $VERSION = 0.94
    8282 
    8383    @ISA = qw(Exporter); 
     
    210210# TODO: Update unit tests to include 'dclone' 
    211211use Storable qw(nfreeze thaw dclone); 
     212$Storable::Deparse = 1; 
     213$Storable::Eval = 1; 
    212214 
    213215# Include Base64 Libraries 
     
    236238our $PERFORM_INTEGRITY_CHECKS : shared = 
    237239    getVar(name => "perform_integrity_checks"); 
     240 
     241# A globally shared, serialized hashtable, containing the 
     242# initialized integrity state of the VM -- ready to be checked 
     243# against, at any time. 
     244our $integrityState : shared = undef; 
    238245 
    239246# A globally shared, serialized hashtable, containing data per 
     
    359366        $driverUpdateQueues{$driverName} = new Thread::Queue; 
    360367    } 
     368 
     369    # Perform initial integrity baseline check. 
     370    #my $integrity = undef; 
     371    #if ($PERFORM_INTEGRITY_CHECKS) { 
     372    #    print "Initializing Integrity Check...\n"; 
     373    #    # TODO: Initialize Integrity Checks 
     374    #    $integrity = HoneyClient::Agent::Integrity->new(); 
     375    #    $integrity->initAll(); 
     376    #} 
     377    #$integrityState = $integrity->serialize(); 
    361378 
    362379    # Release data lock. 
     
    688705            eval { 
    689706 
    690                 my $integrity = undef; 
    691                 if ($PERFORM_INTEGRITY_CHECKS) { 
    692                     print "Initializing Filesystem Integrity Check...\n"; 
    693                     # TODO: Initialize Integrity Checks 
    694                     $integrity = HoneyClient::Agent::Integrity->new(); 
    695                     $integrity->initAll(); 
    696                 } 
    697   
    698707                ################################### 
    699708                ### Driver Initialization Phase ### 
    700709                ################################### 
     710 
     711                # Initially set local integrity object to undef. 
     712                my $integrity = undef; 
    701713                 
    702714                # Initially set all driver objects to undef.  
     
    705717                # Acquire lock on stored driver state. 
    706718                $data = _lock(); 
     719 
     720                if ($PERFORM_INTEGRITY_CHECKS) { 
     721                    # XXX: WARNING - The $integrityState object data is NOT thread-safe 
     722                    # (since it relies on external data stored on the file system). 
     723                    # As such, do NOT try to call integrity checks on multiple, simultaneous 
     724                    # asynchronous threaded drivers. 
     725                    #$integrity = thaw($integrityState); 
     726                    # Perform initial integrity baseline check. 
     727                    print "Initializing Integrity Check...\n"; 
     728                    # TODO: Initialize Integrity Checks 
     729                    $integrity = HoneyClient::Agent::Integrity->new(); 
     730                    $integrity->initAll(); 
     731 
     732                    # TODO: Delete this. 
     733                    #$Data::Dumper::Indent = 1; 
     734                    #$Data::Dumper::Terse = 1; 
     735                    #print "Integrity: " . Dumper($integrity) . "\n"; 
     736                } 
    707737 
    708738                # Now, initialize each driver object.  
     
    805835                    # For now, we update a scalar called 'is_compromised' within 
    806836                    # the $data->{$driverName}->{'status'} sub-hashtable. 
    807                     print "Performing Filesystem Integrity Check...\n"; 
     837                    print "Performing Integrity Checks...\n"; 
    808838                    if ($integrity->checkAll()) { 
    809839                        print "Integrity Check: FAILED\n"; 
     
    10991129=head1 SEE ALSO 
    11001130 
    1101 XXX: Fill this in. 
    1102  
    1103 XXX: If you have a mailing list, mention it here. 
    1104  
    1105 XXX: If you have a web site set up for your module, mention it here. 
     1131L<http://www.honeyclient.org/trac> 
    11061132 
    11071133=head1 REPORTING BUGS 
    11081134 
    1109 XXX: Mention website/mailing list to use, when reporting bugs. 
     1135L<http://www.honeyclient.org/trac/newticket> 
    11101136 
    11111137=head1 ACKNOWLEDGEMENTS 
  • honeyclient/branches/exp/jpuchalski-active_content/lib/HoneyClient/Agent/Driver.pm

    • Property svn:executable deleted
    • Property svn:keywords set to Id "$file"
    r13 r205  
    66#              HoneyClient VM. 
    77# 
    8 # CVS: $Id: Driver.pm 1412 2006-10-18 20:33:18Z kindlund
     8# CVS: $Id
    99# 
    1010# @author knwang, ttruong, kindlund 
     
    3838=head1 VERSION 
    3939 
    40 This documentation refers to HoneyClient::Agent::Driver version 1.0
     40This documentation refers to HoneyClient::Agent::Driver version 0.94
    4141 
    4242=head1 SYNOPSIS 
     
    120120 
    121121    # Set our package version. 
    122     $VERSION = 0.9
     122    $VERSION = 0.94
    123123 
    124124    @ISA = qw(Exporter); 
     
    160160# Make sure Log::Log4perl loads 
    161161BEGIN { use_ok('Log::Log4perl', qw(:nowarn)) 
    162         or diag("Can't load Log::Log4perl package. Check to make sure the package library is correctly linsted within the path."); 
     162        or diag("Can't load Log::Log4perl package. Check to make sure the package library is correctly listed within the path."); 
    163163        
    164164        # Suppress all logging messages, since we need clean output for unit testing. 
     
    697697L<perltoot/"Autoloaded Data Methods"> 
    698698 
    699 XXX: If you have a mailing list, mention it here. 
    700  
    701 XXX: If you have a web site set up for your module, mention it here. 
     699L<http://www.honeyclient.org/trac> 
    702700 
    703701=head1 REPORTING BUGS 
    704702 
    705 XXX: Mention website/mailing list to use, when reporting bugs. 
     703L<http://www.honeyclient.org/trac/newticket> 
    706704 
    707705=head1 AUTHORS 
  • honeyclient/branches/exp/jpuchalski-active_content/lib/HoneyClient/Agent/Driver/Browser.pm

    • Property svn:keywords set to Id "$file"
    r13 r205  
    77#              HoneyClient VM. 
    88# 
    9 # CVS: $Id: Browser.pm 1423 2006-11-6 14:21:47Z stephenson
     9# CVS: $Id
    1010# 
    1111# @author knwang, kindlund, stephenson 
     
    1717# as published by the Free Software Foundation, using version 2 
    1818# of the License. 
    19 #  
     19# 
    2020# This program is distributed in the hope that it will be useful, 
    2121# but WITHOUT ANY WARRANTY; without even the implied warranty of 
    2222# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
    2323# GNU General Public License for more details. 
    24 #  
     24# 
    2525# You should have received a copy of the GNU General Public License 
    2626# along with this program; if not, write to the Free Software 
     
    4040=head1 VERSION 
    4141 
    42 This documentation refers to HoneyClient::Agent::Driver::Browser version 1.0
     42This documentation refers to HoneyClient::Agent::Driver::Browser version 0.94
    4343 
    4444=head1 SYNOPSIS 
     
    5555          'http://www.google.com'  => 1, 
    5656          'http://www.cnn.com'     => 1, 
    57       },  
     57      }, 
    5858  ); 
    5959 
     
    7676      print "Status:\n"; 
    7777      print Dumper($browser->status()); 
    78        
     78 
    7979  } 
    8080 
     
    8484  $browser->{links_to_visit}->{'http://www.mitre.org'} = 1; 
    8585 
    86   # Now, drive IE for one iteration. 
     86  # Now, drive the browser for one iteration. 
    8787  $browser->drive(); 
    8888 
     
    9090 
    9191This library allows the Agent module to drive an instance of any broswer, 
    92 running inside the HoneyClient VM.  The purpose  
    93 of this module is to programmatically navigate the browser to different 
    94 websites, in order to become purposefully infected with new malware. 
    95 The module implements the logic necessary to decide the order in which 
    96 the  
    97  
    98 This module is object-oriented in design, retaining all state information  
     92running inside the HoneyClient VM.  The purpose of this module is to 
     93programmatically navigate the browser to different websites, in order to 
     94become purposefully infected with new malware. 
     95 
     96This module is object-oriented in design, retaining all state information 
    9997within itself for easy access.  A specific browser class must inherit from 
    10098Browser. 
     
    116114external links in a random fashion.  B<However>, this cannot be 
    117115guaranteed, as additional links from the same server may be found 
    118 later, after processing the contents of an external link.  
     116later, after processing the contents of an external link. 
    119117 
    120118As the browser driver navigates the browser to each link, it 
     
    122120visited (see L<links_visited>); when invalid links were found 
    123121(see L<links_ignored>); and when the browser attempted to visit 
    124 a link but the operation timed out (see L<links_timed_out>).  
     122a link but the operation timed out (see L<links_timed_out>). 
    125123By maintaining this internal history, the driver will B<never> 
    126124navigate the browser to the same link twice. 
     
    157155 
    158156    # Set our package version. 
    159     $VERSION = 0.9
     157    $VERSION = 0.94
    160158 
    161159    # Define inherited modules. 
     
    175173    # Do not simply export all your public functions/methods/constants. 
    176174 
    177     # This allows declaration use HoneyClient::Agent::Driver::IE ':all'; 
     175    # This allows declaration use HoneyClient::Agent::Driver::Browser ':all'; 
    178176    # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK 
    179177    # will save memory. 
     
    194192    #if ($Config{osname} !~ /^MSWin32$/) { 
    195193    #    Carp::croak "Error: " . __PACKAGE__ . " will only run on Win32 platforms!\n"; 
    196     #}     
     194    #} 
    197195 
    198196    $SIG{PIPE} = 'IGNORE'; # Do not exit on broken pipes. 
     
    213211 
    214212# Use Storable Library 
     213# TODO: Need unit testing. 
    215214use Storable qw(dclone); 
    216215 
     
    223222# TODO: Need unit testing. 
    224223use HoneyClient::Util::SOAP qw(getClientHandle); 
    225      
     224 
    226225# TODO: Need unit testing. 
    227226use LWP::UserAgent; 
     
    240239=head1 DEFAULT PARAMETER LIST 
    241240 
    242 When an IE B<$object> is instantiated using the B<new()> function, 
     241When a Browser B<$object> is instantiated using the B<new()> function, 
    243242the following parameters are supplied default values.  Each value 
    244243can be overridden by specifying the new (key => value) pair into the 
    245244B<new()> function, as arguments. 
    246245 
    247 Furthermore, as each parameter is initialized, each can be individually  
     246Furthermore, as each parameter is initialized, each can be individually 
    248247retrieved and set at any time, using the following syntax: 
    249248 
     
    287286resource (i.e., "javascript:doNetDetect()"). 
    288287 
    289 Specifically, each 'key' corresponds to an absolute URL and the  
     288Specifically, each 'key' corresponds to an absolute URL and the 
    290289'value' is a string representing the date and time of when the link 
    291290was visited. 
     
    302301This parameter is a hashtable of fully qualified URLs, such that each 
    303302URL shares a common B<hostname>.  This is an internal hashtable used 
    304 by the IE driver that should be initially empty.  As the IE driver 
    305 extracts and removes new URLs off the B<links_to_visit> hashtable, 
     303by the Browser driver that should be initially empty.  As the Browser 
     304driver extracts and removes new URLs off the B<links_to_visit> hashtable, 
    306305driving the browser to each URL, any B<relative> links found are 
    307306added into this hashtable; any B<external> links found are added 
    308307back into the B<links_to_visit> hashtable. 
    309308 
    310 When driving to the next link, this hashtable is exhausted prior  
     309When driving to the next link, this hashtable is exhausted prior 
    311310to the main B<links_to_visit> hashtable.  This allows a 
    312311browser to navigate to all links hosted on the same server, prior 
     
    325324It is updated dynamically, any time $object->getNextLink() is called. 
    326325 
    327 When the browser is ready to drive to the next link, B<next_link_to_visit>  
     326When the browser is ready to drive to the next link, B<next_link_to_visit> 
    328327is checked first.  If that value is B<undef>, then the B<relative_links_to_visit> 
    329328hashtable is checked next.  If that hashtable is empty, then finally the 
     
    341340timing out. 
    342341 
    343 Specifically, each 'key' corresponds to an absolute URL and the  
     342Specifically, each 'key' corresponds to an absolute URL and the 
    344343'value' is a string representing the date and time of when access to 
    345 the resource was attempted.  
     344the resource was attempted. 
    346345 
    347346B<Note>: See internal documentation of _getTimestamp() for the 
     
    364363=over 4 
    365364 
    366 A string containing the process name of the Internet Explorer 
    367 browser application, as it appears in the Task Manager.  This is 
    368 usually called "iexplore.exe". 
     365A string containing the process name of the  browser application, 
     366as it appears in the Task Manager. 
    369367 
    370368=back 
     
    385383=cut 
    386384 
    387 my %PARAMS = (  
     385my %PARAMS = ( 
    388386 
    389387    # This is a hashtable of fully qualified URLs 
     
    396394    # 'key' is the absolute URL and the 'value' is a string 
    397395    # representing the date and time of when the link was visited. 
    398     #  
     396    # 
    399397    # Note: See _getTimestamp() for the corresponding date/time 
    400398    # format. 
     
    411409    # The 'key' is the absolute URL and the 'value' is a string 
    412410    # representing the date and time of when the link was visited. 
    413     #  
     411    # 
    414412    # Note: See _getTimestamp() for the corresponding date/time 
    415413    # format. 
     
    418416    # This is a hashtable of fully qualified URLs 
    419417    # that all share a common *hostname*.  This hashtable should be 
    420     # initially empty.  As the driver extracts and removes new URLs  
    421     # off the 'links_to_visit' hashtable, driving the browser to each URL,  
     418    # initially empty.  As the driver extracts and removes new URLs 
     419    # off the 'links_to_visit' hashtable, driving the browser to each URL, 
    422420    # any *relative* links found are added into this hashtable; any 
    423421    # *external* links found are added back into the 'links_to_visit' 
    424422    # hashtable. 
    425423    # 
    426     # When navigating to the next link, this hashtable is exhausted prior  
     424    # When navigating to the next link, this hashtable is exhausted prior 
    427425    # to the main 'links_to_visit' hashtable.  This allows a 
    428426    # browser to navigate to all links hosted on the same server, prior 
    429427    # to contacting a different server. 
    430     #    
     428    # 
    431429    # Specifically, the 'key' is the absolute URL and the 'value' 
    432430    # is always 1. 
     
    448446    # The 'key' is the absolute URL and the 'value' is a string 
    449447    # representing the date and time of when the link was visited. 
    450     #  
     448    # 
    451449    # Note: See _getTimestamp() for the corresponding date/time 
    452450    # format. 
     
    458456    ignore_links_timed_out  => getVar(name => "ignore_links_timed_out"), 
    459457 
    460     # A string containing the process name of the Internet Explorer 
    461     # browser application, as it appears in the Task Manager.  This is 
    462     # usually called "iexplore.exe". 
     458    # A string containing the process name of the browser application, 
     459    # as it appears in the Task Manager. 
    463460    process_name            => getVar(name => "process_name"), 
    464461 
     
    477474    # websites. 
    478475    max_relative_links_to_visit => getVar(name => "max_relative_links_to_visit"), 
    479      
     476 
     477    # Comma-separated string containing the good words and bad words for link scoring purposes 
     478    goodwords => getVar(name => "goodwords", namespace => "HoneyClient::Agent::Driver::Browser"), 
     479    badwords => getVar(name => "badwords", namespace => "HoneyClient::Agent::Driver::Browser"), 
     480 
    480481); 
    481482 
     
    491492# 
    492493# When getting the next link, 'next_link_to_visit' is checked first. 
    493 # If that value is undef, then the 'relative_links_to_visit' hashtable  
    494 # is checked next.  If that hashtable is empty, then finally the  
     494# If that value is undef, then the 'relative_links_to_visit' hashtable 
     495# is checked next.  If that hashtable is empty, then finally the 
    495496# 'links_to_visit' hashtable is checked. 
    496497# 
    497 # Inputs: HoneyClient::Agent::Driver::IE object 
     498# Inputs: HoneyClient::Agent::Driver::Browser object 
    498499# Outputs: link, or undef if all applicable scalars/hashtables are empty 
    499500sub _getNextLink { 
     
    501502    # Get the object state. 
    502503    my $self = shift; 
    503      
    504     # Set the link to find as undef, initially.  
     504 
     505    # Set the link to find as undef, initially. 
    505506    # We use undef to signify that our URL *_links_to_visit hashtables 
    506507    # are empty.  If we were to use the empty string instead, as our 
     
    540541    } 
    541542 
    542     # Return the next link found.  
     543    # Return the next link found. 
    543544    return $link; 
    544545} 
     
    556557           $dt->hms(':') . "." . 
    557558           $dt->nanosecond(); 
    558 }  
     559} 
    559560 
    560561# Helper function designed to "pop" a key off a given hashtable. 
    561562# When given a hashtable reference, this function will extract a valid key 
    562 # from the hashtable and delete the (key, value) pair from the  
    563 # hashtable. 
    564 
    565 # Note: There is no guaranteed order about how this function picks 
    566 # keys from the hashtable. 
     563# from the hashtable and delete the (key, value) pair from the 
     564# hashtable.  The link with the highest score is returned. 
     565
     566
    567567# 
    568568# Inputs: hashref 
     
    573573    my $hash = shift; 
    574574 
    575     # Get a new key
    576     my @keys = keys(%{$hash})
    577     my $key = pop(@keys)
    578      
     575    # Get the highest score
     576    my @array = sort {$$hash{$b} <=> $$hash{$a}} keys %{$hash}
     577    my $topkey = $array[0]
     578 
    579579    # Delete the key from the hashtable. 
    580     if (defined($key)) { 
    581         delete $hash->{$key}; 
     580    if (defined($topkey)) { 
     581        delete $hash->{$topkey}; 
    582582    } 
    583583 
    584584    # Return the key found. 
    585     return $key; 
    586 
    587  
    588 # This is the abstract function which actually fetches the web content using 
    589 # a specific browser implementation.  Must be implemented by each browser class. 
    590  
    591 sub getContent { 
    592  
    593 
    594  
    595 # Helper function which parses the HTTP::Response from LWP::UserAgent 
    596 # and returns an array of the links contained in the response 
    597 
    598 # Inputs: HTTP::Response object 
    599 # Outputs: Array containing all href links within the response 
    600  
    601 sub _getAllLinks { 
    602      
    603     my $response = shift; 
    604     my $hostname = shift; 
    605     my @links = (); 
    606     my $thislink; 
    607      
    608 &nb