| 97 | | <!-- HoneyClient::Agent::Driver::IE Options --> |
|---|
| 98 | | <!-- TODO: Update this. --> |
|---|
| 99 | | <ignore_links_timed_out description="If this parameter is 1, then the browser will never attempt to revisit any links that caused the browser to initially time out." default="0"> |
|---|
| 100 | | 1 |
|---|
| 101 | | </ignore_links_timed_out> |
|---|
| 102 | | <limit_spidering description="When set to 1, the Browser will not visit any derived links other than the initial set of URLs given. This is useful when you want the Browser to only visit a specific set of URLs (perhaps malicious) and no other links that it finds on any of the visited pages. When set to 1, this option will override max_relative_links_to_visit (essentially setting it to 0). If you want the Browser to spider through derived links, set this value to 0." default="1"> |
|---|
| 103 | | 1 |
|---|
| 104 | | </limit_spidering> |
|---|
| 105 | | <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website. If negative, then the browser will exhaust all possible relative links found, before moving on. This functionality is best effort; it's possible for the browser to visit new links on previously visited websites. Note that this value can be overridden if limit_spidering is set to 1 above." default="-1"> |
|---|
| 106 | | 5 |
|---|
| 107 | | </max_relative_links_to_visit> |
|---|
| 108 | | <positive_words description="If a link contains any number of these words, then its probability of being visited (its score) will increase."> |
|---|
| 109 | | <word>news</word> |
|---|
| 110 | | <word>new</word> |
|---|
| 111 | | <word>big</word> |
|---|
| 112 | | <word>latest</word> |
|---|
| 113 | | <word>main</word> |
|---|
| 114 | | <word>update</word> |
|---|
| 115 | | <word>sell</word> |
|---|
| 116 | | <word>free</word> |
|---|
| 117 | | <word>buy</word> |
|---|
| 118 | | <word>science</word> |
|---|
| 119 | | </positive_words> |
|---|
| 120 | | <negative_words description="If a link contains any number of these words, then its probability of being visited (its score) will decrease."> |
|---|
| 121 | | <word>archive</word> |
|---|
| 122 | | <word>privacy</word> |
|---|
| 123 | | <word>legal</word> |
|---|
| 124 | | <word>disclaim</word> |
|---|
| 125 | | <word>about</word> |
|---|
| 126 | | <word>contact</word> |
|---|
| 127 | | <word>copyright</word> |
|---|
| 128 | | <word>jobs</word> |
|---|
| 129 | | <word>careers</word> |
|---|
| 130 | | <word>term</word> |
|---|
| 131 | | </negative_words> |
|---|
| | 131 | <!-- TODO: Update this. --> |
|---|
| | 132 | <Crawler> |
|---|
| | 133 | <ActiveContent> |
|---|
| | 134 | <enable description="Enables active content parsing. 1 enables, 0 disables." default="1"> |
|---|
| | 135 | 0 |
|---|
| | 136 | </enable> |
|---|
| | 137 | <Flash> |
|---|
| | 138 | <flasm_exec description="Path to the flasm executable." default="thirdparty/flasm/flasm.exe"> |
|---|
| | 139 | thirdparty/flasm/flasm.exe |
|---|
| | 140 | </flasm_exec> |
|---|
| | 141 | </Flash> |
|---|
| | 142 | </ActiveContent> |
|---|
| | 143 | <Browser> |
|---|
| | 144 | <!-- TODO: Update this. --> |
|---|
| | 145 | <ignore_links_timed_out description="If this parameter is 1, then the browser will never attempt to revisit any links that caused the browser to initially time out." default="0"> |
|---|
| | 146 | 1 |
|---|
| | 147 | </ignore_links_timed_out> |
|---|
| | 148 | <limit_spidering description="When set to 1, the Browser will not visit any derived links other than the initial set of URLs given. This is useful when you want the Browser to only visit a specific set of URLs (perhaps malicious) and no other links that it finds on any of the visited pages. When set to 1, this option will override max_relative_links_to_visit (essentially setting it to 0). If you want the Browser to spider through derived links, set this value to 0." default="1"> |
|---|
| | 149 | 1 |
|---|
| | 150 | </limit_spidering> |
|---|
| | 151 | <max_relative_links_to_visit description="An integer, representing the maximum number of relative links that the browser should visit, before moving onto another website. If negative, then the browser will exhaust all possible relative links found, before moving on. This functionality is best effort; it's possible for the browser to visit new links on previously visited websites. Note that this value can be overridden if limit_spidering is set to 1 above." default="-1"> |
|---|
| | 152 | 5 |
|---|
| | 153 | </max_relative_links_to_visit> |
|---|
| | 154 | <positive_words description="If a link contains any number of these words, then its probability of being visited (its score) will increase."> |
|---|
| | 155 | <word>news</word> |
|---|
| | 156 | <word>new</word> |
|---|
| | 157 | <word>big</word> |
|---|
| | 158 | <word>latest</word> |
|---|
| | 159 | <word>main</word> |
|---|
| | 160 | <word>update</word> |
|---|
| | 161 | <word>sell</word> |
|---|
| | 162 | <word>free</word> |
|---|
| | 163 | <word>buy</word> |
|---|
| | 164 | <word>science</word> |
|---|
| | 165 | </positive_words> |
|---|
| | 166 | <negative_words description="If a link contains any number of these words, then its probability of being visited (its score) will decrease."> |
|---|
| | 167 | <word>archive</word> |
|---|
| | 168 | <word>privacy</word> |
|---|
| | 169 | <word>legal</word> |
|---|
| | 170 | <word>disclaim</word> |
|---|
| | 171 | <word>about</word> |
|---|
| | 172 | <word>contact</word> |
|---|
| | 173 | <word>copyright</word> |
|---|
| | 174 | <word>jobs</word> |
|---|
| | 175 | <word>careers</word> |
|---|
| | 176 | <word>term</word> |
|---|
| | 177 | </negative_words> |
|---|
| | 178 | </Browser> |
|---|
| | 179 | </Crawler> |
|---|