Changeset 610

Show
Ignore:
Timestamp:
07/11/07 06:22:15 (1 year ago)
Author:
jpuchalski
Message:

Dumping in a bunch of changes that include some initial attempts at getURL2
call processing (this is not an easy problem). Squashed some bugs as well,
and added more logging output.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • honeyclient/branches/exp/jpuchalski-active_content/lib/HoneyClient/Agent/Driver/ActiveContent/Flash.pm

    r250 r610  
    33# Package:     HoneyClient::Agent::Driver::ActiveContent::Flash 
    44# File:        Flash.pm 
    5 # Description: A driver for extracting URLs from Adobe 
    6 #              Flash movies. 
     5# Description: An module used for extracting URLs from Adobe 
     6#              Flash (SWF) movies.  If the URLs could not be 
     7#              extracted, but the movie appears "interesting," 
     8#              then it is flagged for analyst review. 
    79# 
    810# CVS: $Id:   $ 
     
    3436=head1 NAME 
    3537 
    36 HoneyClient::Agent::Driver::ActiveContent::Flash - Perl extension to  
    37 extract URLs from Adobe Flash (SWF) movies. 
     38HoneyClient::Agent::Driver::ActiveContent::Flash - Perl module that  
     39extract URLs from Adobe Flash (SWF) movies.  Returns a list of URLs. 
    3840 
    3941=head1 VERSION 
     
    4446=head1 SYNOPSIS 
    4547 
    46 One or two sentence overview here... 
     48  use HoneyClient::Agent::Driver::ActiveContent::Flash; 
     49 
     50 
    4751 
    4852=head1 DESCRIPTION 
    49  
    50 Describe the module here... 
    5153 
    5254=cut 
     
    8789# Include Global Configuration Processing Library 
    8890use HoneyClient::Util::Config qw(getVar); 
    89  
    90 
    91 use URI::URL; 
    92  
    93 
    94 use File::Temp; 
    95  
    96 
     91use Log::Log4perl qw(:easy); 
    9792use Filesys::CygwinPaths qw(:all); 
    9893 
    99 # Include Logging Library 
    100 use Log::Log4perl qw(:easy); 
    101  
    102 # The global logging object. 
     94# Include the Global Configuration Processing Library 
     95use HoneyClient::Util::Config qw(getVar); 
     96 
     97=pod 
     98 
     99=head1 GLOBAL VARIABLES 
     100 
     101=head2 flasm_exec 
     102 
     103=over 4 
     104 
     105Path to the flasm executable (default=./thirdparty/flasm/flasm.exe). 
     106 
     107=back 
     108 
     109=cut 
     110 
     111# Path to the flasm executable. 
     112our $flasm_exec = getVar(name => "flasm_exec"); 
     113 
     114# Our friendly local logger. 
    103115our $LOG = get_logger(); 
    104116 
    105  
    106  
    107 =pod 
    108  
    109 =head1 DEFAULT PARAMETER LIST 
    110  
    111 When a Driver B<$object> is instantiated using the B<new()> function, 
    112 the following parameters are supplied default values.  Each value 
    113 can be overridden by specifying the new (key => value) pair into the 
    114 B<new()> function, as arguments. 
    115  
    116 Furthermore, as each parameter is initialized, each can be individually  
    117 retrieved and set at any time, using the following syntax: 
    118  
    119   my $value = $object->{key}; # Gets key's value. 
    120   $object->{key} = $value;    # Sets key's value. 
    121  
    122 =head2 timeout 
     117# This variable holds the base URL reference for the SWF movie 
     118# we are currently processing. 
     119our $base_url; 
     120 
     121# Put all the relative URLs that were retrieved into a hash as 
     122# keys, but first turn them into full URLs.  Set the value for 
     123# each URL key to 1 (this is its score). 
     124our %urls; 
     125 
     126 
     127####################################################################### 
     128# Private Methods Implemented                                         # 
     129####################################################################### 
     130 
     131=pod 
     132 
     133=head1 PRIVATE METHODS 
     134 
     135=head2 HoneyClient::Agent::Driver::ActiveContent::Flash->_addURL($url) 
    123136 
    124137=over 4 
    125138 
    126 This parameter indicates how long (in seconds) the Driver should wait  
    127 for an application response, once driven for one iteration.  
    128 The default value is any valid "timeout" setting located within the 
    129 global configuration file that matches any portion of this package's 
    130 namespace.  See L<HoneyClient::Util::Config> for more information. 
     139Adds the specified URL to the hash of URLs to be returned.  Checks 
     140to see if the URL is relative or absolute, and in the former case 
     141appends the base URL appropriately. 
    131142 
    132143=back 
     
    134145=cut 
    135146 
    136 my %PARAMS = ( 
    137     flasm_exec     => getVar(name => "flasm_exec") 
    138 ); 
    139  
    140 ####################################################################### 
    141 # Private Methods Implemented                                         # 
    142 ####################################################################### 
     147sub _addURL { 
     148  my $url = shift; 
     149 
     150  # URL appears to be absolute, or a different protocol 
     151  if ($url =~ /^mailto/ or 
     152      $url =~ /^javascript/ or 
     153      $url =~ /^http/ 
     154     )  
     155  { 
     156    $urls{"$url"} = 1; 
     157  } 
     158  # URL appears to be relative, so add the base 
     159  else { 
     160    $urls{"$base_url/$url"} = 1; 
     161  } 
     162
    143163 
    144164 
     
    149169=pod 
    150170 
    151 =head2 $object->extract() 
     171=head1 PUBLIC METHODS 
     172 
     173=head2 HoneyClient::Agent::Driver::ActiveContent::Flash->extract() 
    152174 
    153175=over 4 
    154176 
    155 Describe here... 
     177Extracts URLs from an Adobe Flash SWF movie file.  Takes in a file 
     178name and a base URL, and uses the latter to construct relative URLs 
     179to local links found in the movie.  Returns a hash containing the 
     180found URLs as keys, and values of 1 for each of them, where the 
     181values represent the weights used by the link ranking code. 
    156182 
    157183=back 
     
    170196sub extract { 
    171197  my %args = @_; 
    172  
    173   # Call flasm and capture the output in an array 
    174   my $filename = fullwin32path($args{'file'}->filename); 
    175   # Must encode all backslashes with double-backslashes, since backtick commands don't like 
    176   # single backslashes. 
    177   $filename =~ s/\\/\\\\/g; 
    178   my @bytecode = `./thirdparty/flasm/flasm.exe -d $filename`; 
    179  
    180   # Parse out lines that contain the getURL method 
    181   my @urls = grep(/getURL /, @bytecode); 
     198  my $filename; 
    182199   
    183   foreach (@urls) { 
    184     print "$_\n"; 
    185   } 
    186  
    187   # If there are any getURL2 calls, mark this file 
    188   # for additional analysis 
    189   if (grep(/getURL2/, @bytecode)) { 
    190     print "Detected getURL2, need to process with Flare\n"; 
    191   } 
    192  
    193   # We can certainly do better processing of these getURL2 
    194   # calls.  What we need to do is read up to the preceeding 
    195   # lines in the assembly to where the getVariable call is 
    196   # made.  The push call immediately before it has the name 
    197   # of the variable that is used in the getURL2 call.  To 
    198   # get the value of that variable, one needs to look for 
    199   # where the variable is set.  If it is a simple String 
    200   # assignment, then there will be another push call 
    201   # somewhere that has the variable name as the first 
    202   # parameter and the value as the second parameter.  Note 
    203   # that this process could very quickly become complicated, 
    204   # so let's just try to handle the basic cases for now and 
    205   # we can mark the file for human analysis as a fallback. 
     200  # Chop any trailing slash off the base URL 
     201  $base_url = $args{'base_url'}; 
     202  $base_url =~ s/\/$//; 
     203 
     204  # Call flasm in OS-dependent way and capture the output in an array. 
     205  if ($^O =~ m/linux/i) { 
     206    $filename = $args{'file'}->filename; 
     207  }  
     208  # Must encode all backslashes with double backslashes, since  
     209  # backtick commands don't like single backslashes. 
     210  elsif ($^O =~ m/win/i || $ENV{'OS'} =~ m/win/i) { 
     211    my $filename = (fullwin32path($args{'file'}->filename) =~ s/\\/\\\\/g); 
     212  } 
     213 
     214  # Call flasm and store the output bytecode string in an array 
     215  my @bytecode = `$flasm_exec -d $filename`; 
     216 
     217  # Check the return value on the flasm call that just happened. 
     218  # We care if the return code was anything other than 0. 
     219  if ($? >> 8) { 
     220    my $signal = ($? & 127); 
     221    $LOG->fatal("Call to flasm exited on signal $signal"); 
     222    Carp::croak "Error: Call to flasm exited on signal $signal"; 
     223  } 
     224 
     225  # Parse out lines that contain the getURL method (exclude any 
     226  # getURL2 calls from this, as they need to be handled differently) 
     227  my @geturl_calls = grep(/getURL /, @bytecode); 
     228 
     229  # Each getURL line has getURL followed by the URL in single  
     230  # quotes.  Extract the URL, remove the single quotes, and store 
     231  # the URL in a new array. 
     232  my @found_urls; 
     233 
     234  foreach (@geturl_calls) { 
     235    $_ =~ s/^\s+//; 
     236    my ($fun, $url) = split(/\s+/); 
     237    $url =~ s/'//g; 
     238    push @found_urls, $url; 
     239  } 
     240 
     241  foreach (@found_urls) { 
     242    _addURL $_; 
     243  } 
     244 
     245  # Sanity check on the URLs 
     246  foreach (sort keys %urls) { 
     247    $LOG->debug("$_"); 
     248  } 
    206249   
    207   # TODO: Eventually, return something useful. 
    208   my %urls = ( 
    209                 'http://www.google.com' => 1, 
    210              ); 
     250  # We can exit here if there are no getURL2 calls 
     251  unless (grep(/getURL2/, @bytecode)) { 
     252    return %urls; 
     253  } 
     254  
     255  # If we made it here, then at least one getURL2 call was 
     256  # detected.  Proceed with additional processing. 
     257  $LOG->warn("Detected getURL2\n"); 
     258 
     259  # Before we forget, turn off unlink on destroy for our 
     260  # temporary file handle, so the file is kept around for 
     261  # analysts to look at later. 
     262  $args{'file'}->unlink_on_destroy(0); 
     263 
     264  # What we do next is some parsing on the flasm decompilation. 
     265  # We are looking for getURL2 calls, and will then try to piece 
     266  # together the URLs that go into them. 
     267  my $i = 0; 
     268  
     269  foreach (@bytecode) { 
     270    # First, find a line containing a getURL2 call 
     271    if ($_ =~ /getURL2/) { 
     272      $LOG->debug("Got a getURL2 on line " . ($i+1)); 
     273       
     274      my ($fun, $var, $val); 
     275      my $instr; 
     276      my $haveVar = 0; 
     277       
     278      # Next, work backwards from the call.  We start from $i - 2  
     279      # here because the $i - 1 line always contains the URL's target 
     280      # (e.g., '', '_parent', '_blank'). 
     281      for (my $j = $i - 2; $bytecode[$j] !~ /^\s+constants/; $j--) { 
     282        # The first time through, look for a getVariable call, which 
     283        # tells us what variable has the value of the URL. 
     284        if (!$haveVar and $bytecode[$j] =~ /getVariable/) { 
     285          $LOG->debug("Found getVariable on line " . ($j+1)); 
     286          # The line before this one has the name of the 
     287          # variable that contains the URL 
     288          $instr = $bytecode[--$j]; 
     289          $instr =~ s/^\s+//; 
     290          ($fun, $var) = split(/\s+/, $instr); 
     291          $LOG->debug("Name of the URL variable is $var"); 
     292          $haveVar = 1; 
     293        }  
     294        # Once we have the URL variable, we can look back further 
     295        # for the push call that sets its value. 
     296        elsif ($haveVar and $bytecode[$j] =~ /^\s+push $var/) { 
     297          $instr = $bytecode[$j]; 
     298          $instr =~ s/^\s+//; 
     299          ($var, $val) = split(/, /, $instr); 
     300          $LOG->info("Value of the URL is $val"); 
     301          $val =~ s/'//g; 
     302          _addURL $val; 
     303          last; 
     304        } 
     305      } 
     306    } 
     307 
     308    $i++; 
     309  } 
     310 
    211311  return %urls; 
    212312}