Changeset 1080
- Timestamp:
- 12/18/07 08:40:11 (8 months ago)
- Files:
-
- honeyclient/trunk/etc/honeyclient.xml (modified) (3 diffs)
- honeyclient/trunk/lib/HoneyClient/Agent/Driver/Browser.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
honeyclient/trunk/etc/honeyclient.xml
r1068 r1080 83 83 <ActiveContent> 84 84 <enable description="Enables active content parsing. 1 enables, 0 disables." default="1"> 85 185 0 86 86 </enable> 87 87 <Flash> … … 97 97 1 98 98 </ignore_links_timed_out> 99 < max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website. If negative, then the browser will exhaust all possible relative links found, before moving on. This functionality is best effort; it's possible for the browser to visit new links on previously visited websites." default="-1">99 <limit_spidering description="Sometimes you only want to check the URLs in your initial list, and not add any of the relative or absolute links found on the sites you visit. (For instance, when trying to determine whether a specific list of URLs contains malicious sites.) In this case you should set limit_spidering to 1. This option will override max_relative_links_to_visit (essentially setting it to 0)." default="0"> 100 100 1 101 </limit_spidering> 102 <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website. If negative, then the browser will exhaust all possible relative links found, before moving on. This functionality is best effort; it's possible for the browser to visit new links on previously visited websites. Note that this value can be overridden if limit_spidering is set to 1 above." default="-1"> 103 5 101 104 </max_relative_links_to_visit> 102 105 <positive_words description="If a link contains any number of these words, then its probability of being visited (its score) will increase."> … … 147 150 </Driver> 148 151 <perform_integrity_checks description="An integer, representing whether the Agent should perform any integrity checks. 1 enables, 0 disables." default="1"> 149 1152 0 150 153 </perform_integrity_checks> 151 154 <!-- HoneyClient::Agent::Integrity Options --> honeyclient/trunk/lib/HoneyClient/Agent/Driver/Browser.pm
r1079 r1080 923 923 max_relative_links_to_visit => getVar(name => "max_relative_links_to_visit"), 924 924 925 #Sometimes you only want to check the URLs in your initial list, and 926 #not add any of the relative or absolute links found on the sites you 927 #visit. (For instance, when trying to determine whether a specific list 928 #of URLs contains malicious sites.) In this case you should set 929 #limit_spidering to 1. This option will override max_relative_links_to_visit 930 #(essentially setting it to 0). 931 limit_spidering => getVar(name => "limit_spidering"), 932 925 933 # An array of positive words, where a link's probability of being 926 934 # visited (its score) will increase, if the link contains any of these … … 1092 1100 # Assume that all other content types are HTML-based. 1093 1101 } else { 1094 # Call the link scoring function 1095 %scored_links = $self->_scoreLinks($base, $content); 1102 #If limit_spidering is set, we don't want to add any new links 1103 #Hence, by not calling _scoreLinks() the next logic will just drop all 1104 #found links, because it won't call _processLinks in the next conditional 1105 #(The only reason for putting this check here rather than there is to avoid 1106 #the cost of useless link parsing for scoring) 1107 #NOTE: This technically could go at the level that we don't even use LWP::UserAgent 1108 #but it has just been put here to be conservative as we may want to use the 1109 #data from LWP for the future hybrid approach. 1110 if(!$self->{limit_spidering}){ 1111 # Call the link scoring function 1112 %scored_links = $self->_scoreLinks($base, $content); 1113 } 1096 1114 } 1097 1115 }
