Changeset 639

Show
Ignore:
Timestamp:
07/12/07 17:06:48 (1 year ago)
Author:
kindlund
Message:

Restored ActiveContent integration into Browser

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • honeyclient/branches/exp/jpuchalski-active_content/lib/HoneyClient/Agent/Driver/Browser.pm

    r637 r639  
    202202# Include the Global Configuration Processing Library 
    203203use HoneyClient::Util::Config qw(getVar); 
     204 
     205# Include the ActiveContent Processing Library 
     206# TODO: Need unit testing. 
     207use HoneyClient::Agent::Driver::ActiveContent; 
    204208 
    205209# Use ISO 8601 DateTime Libraries 
     
    9991003    # I'm thinking this could be set by IE/FF and passed via $args{'default_headers'} 
    10001004    # as a HTTP::Headers object. 
    1001     $ua->default_header( 'Accept' => 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5' ); 
     1005    $ua->default_header( 'Accept' => 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,application/x-shockwave-flash,*/*;q=0.5' ); 
    10021006 
    10031007    my $response = $ua->request( 
     
    10061010                            HTTP::Headers->new( 
    10071011                                # TODO: Add custom headers here? 
    1008                                 'Accept' => 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 
     1012                                'Accept' => 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,application/x-shockwave-flash,*/*;q=0.5', 
    10091013                            ), 
    10101014                        ) 
     
    10141018    my $base = $response->base; 
    10151019    my $content = $response->content; 
     1020    my $type = $response->header('Content-Type'); 
    10161021 
    10171022    # Get the current time. 
    10181023    my $timestamp = _getTimestamp(); 
    10191024 
    1020     # Score the new links based on their surrounding HTML context 
     1025    # Score the new links based on their surrounding HTML context. 
    10211026    # If %scored_links is emtpy upon return, there are no links 
    1022     # and we will not perform any of the following code 
     1027    # and we will not perform any of the following code. 
    10231028    my %scored_links; 
    10241029    if ($content) { 
    1025         # Extract the good word and bad word lists into arrays; 
    1026         my %wordlists = ('good' => $self->{'positive_words'}, 
    1027                          'bad'  => $self->{'negative_words'}); 
    1028         # Call the link scoring function 
    1029         %scored_links = _scoreLinks($base, $content, %wordlists); 
     1030 
     1031        # Check to see if the content is Flash-based. 
     1032        if ($type eq "application/x-shockwave-flash") { 
     1033 
     1034            # Save content to a temp file on disk. 
     1035            my $tempFile = new File::Temp(SUFFIX => '.swf'); 
     1036            print $tempFile $content; 
     1037            $tempFile->close(); 
     1038 
     1039            %scored_links = HoneyClient::Agent::Driver::ActiveContent::process( 
     1040                                file => $tempFile, 
     1041                                base_url => $base, 
     1042                            ); 
     1043            # TODO: Check to make sure that temp files are getting deleted 
     1044            # properly (when normal conditions apply). 
     1045 
     1046        # Assume that all other content types are HTML-based. 
     1047        } else { 
     1048 
     1049            # Extract the good word and bad word lists into arrays; 
     1050            my %wordlists = ('good' => $self->{'positive_words'}, 
     1051                             'bad'  => $self->{'negative_words'}); 
     1052            # Call the link scoring function 
     1053            %scored_links = _scoreLinks($base, $content, %wordlists); 
     1054        } 
    10301055    } 
    10311056