Changeset 1550

Show
Ignore:
Timestamp:
04/22/08 10:56:04 (3 weeks ago)
Author:
mbriggs
Message:

Replaced get_queue_urls w/ two functions: get_queue_urls_by_hostname and get_new_queue_urls

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • hive/trunk/data_webapp/app/apis/hc_database_api.rb

    r1529 r1550  
    77  api_method :insert_fingerprint, :expects => [:string], :returns => [:string] 
    88  api_method :insert_client, :expects => [:string], :returns => [:string] 
     9 
    910  api_method :set_client_suspended, :expects => [:string], :returns => [:string] 
    1011  api_method :set_client_running, :expects => [:string], :returns => [:string] 
     
    1516  api_method :set_client_unknown, :expects => [:string], :returns => [:string] 
    1617  api_method :set_client_deleted, :expects => [:string], :returns => [:string] 
    17   #api_method :get_queue_urls, :expects => [:string], :returns => [:string] 
    18   api_method :get_queue_urls, :expects => [:int,:int], :returns => [:string] 
     18 
     19  api_method :get_queue_urls_by_hostname, :expects => [:string], :returns => [:string] 
     20  api_method :get_new_queue_urls, :expects => [:int,:int], :returns => [:string] 
    1921  api_method :get_broken_clients, :expects => [:string], :returns => [:string] 
    2022  api_method :get_not_deleted_clients, :expects => [:string], :returns => [:string] 
  • hive/trunk/data_webapp/app/controllers/hc_database_controller.rb

    r1529 r1550  
    245245    Client.update(cid,{:status => "suspicious",:compromise => string_to_microseconds(c_time)}) 
    246246  end 
     247   
     248  def get_queue_urls_by_hostname(hostname) 
     249    # Get host associated with hostname (assume unique hostnames) 
     250    host = Host.find(:first,:conditions => {:hostname => hostname}) 
     251    # Get urls associated with host_id 
     252    urls = QueueUrl.find(:all,:conditions => {:host_id => host.id}) 
     253 
     254    # Return the urls as a hash table url/priority pairs (e.g. {"http://www.honeyclient.org" => 1}) 
     255    url_hash = Hash[*urls.collect {|u| [u.url,u.priority]}.flatten] 
     256    RbYAML.dump(url_hash) 
     257  end 
    247258 
    248259  # Retrieve num_urls links for client with id client_id 
    249   def get_queue_urls(num_urls,client_id
    250     # Get object for Client requesting urls 
    251     client = Client.find(client_id
     260  def get_new_queue_urls(hostname,num_urls
     261    # Get requesting Host object 
     262    host = Host.find(:first, :conditions => {:hostname => hostname}
    252263 
    253264    # Get any urls currently assigned to this host 
    254     urls = QueueUrl.find(:all,:conditions => {:host_id => client.host_id}); 
    255     if urls.empty? 
    256       # Calculate the time which urls must be older than 
    257       #XXX: Hardcoded url num values and age, but will eventually be configuration variables 
    258       min_age_by_min = 5 
    259       older_than_time = Time.now.to_f - min_age_by_min*60 
    260    
    261       # Get 60% of requested URLs by Highest Priority 
    262       #num_by_priority = (num_urls*0.6).ceil 
    263       num_by_priority = 12 
    264       urls = QueueUrl.find(:all, 
    265         :conditions => {:host_id => 0,:last_visited_at_lt => older_than_time}, 
    266         :order => "priority DESC",:limit => num_by_priority) 
    267    
    268       # If no urls are retrieved in first query, avoid further queries 
    269       if urls.length > 0 
    270         # Get 20% of requested URLs by Age 
    271         #num_by_age = (num_urls > num_by_priority) ? (num_urls*0.4).ceil : 0 
    272         num_by_age = 4 
    273         urls += QueueUrl.find(:all, 
    274           :conditions => {:host_id => 0,:last_visited_at_lt => older_than_time,:id_not_in => urls.map(&:id)}, 
    275           :order => "created_at ASC",:limit => num_by_age) 
    276    
    277         # Get 20% of requested URLs by Popularity 
    278         #num_by_popularity = (r = num_urls - num_by_priority - num_by_age) > 0 ? r : 0 
    279         num_by_popularity = 4 
    280         urls += QueueUrl.find(:all, 
    281           :conditions => {:host_id => 0,:last_visited_at_lt => older_than_time,:id_not_in => urls.map(&:id)}, 
    282           :order => "count DESC",:limit => num_by_popularity) 
    283    
    284         # Lock urls to prevent duplication of work by setting host_id (!0) 
    285         QueueUrl.update_all('host_id='+client.host_id.to_s,'id IN ('+urls.map(&:id).join(',')+')') 
    286       end 
     265    # Calculate the time which urls must be older than 
     266    #XXX: Hardcoded url num values and age, but will eventually be configuration variables 
     267    min_age_by_min = 5 
     268    older_than_time = Time.now.to_f - min_age_by_min*60 
     269 
     270    # Get 60% of requested URLs by Highest Priority 
     271    #num_by_priority = (num_urls*0.6).ceil 
     272    num_by_priority = 12 
     273    urls = QueueUrl.find(:all, 
     274      :conditions => {:host_id => 0,:last_visited_at_lt => older_than_time}, 
     275      :order => "priority DESC",:limit => num_by_priority) 
     276 
     277    # If no urls are retrieved in first query, avoid further queries 
     278    if urls.length > 0 
     279      # Get 20% of requested URLs by Age 
     280      #num_by_age = (num_urls > num_by_priority) ? (num_urls*0.4).ceil : 0 
     281      num_by_age = 4 
     282      urls += QueueUrl.find(:all, 
     283        :conditions => {:host_id => 0,:last_visited_at_lt => older_than_time,:id_not_in => urls.map(&:id)}, 
     284        :order => "created_at ASC",:limit => num_by_age) 
     285 
     286      # Get 20% of requested URLs by Popularity 
     287      #num_by_popularity = (r = num_urls - num_by_priority - num_by_age) > 0 ? r : 0 
     288      num_by_popularity = 4 
     289      urls += QueueUrl.find(:all, 
     290        :conditions => {:host_id => 0,:last_visited_at_lt => older_than_time,:id_not_in => urls.map(&:id)}, 
     291        :order => "count DESC",:limit => num_by_popularity) 
     292 
     293      # Lock urls to prevent duplication of work by setting host_id (!0) 
     294      QueueUrl.update_all('host_id='+host.id.to_s,'id IN ('+urls.map(&:id).join(',')+')') 
    287295    end 
    288296    # Return the urls as a hash table url/priority pairs (e.g. {"http://www.honeyclient.org" => 1})