Watir获得sedo网站上指定后缀域名列表

作为一个’米农‘,sedo.com网站上面的信息很有价值。sedo.com是全球最大的域名交易网站。下面我写的这个脚本是用来取得sedo上面指定后缀(_如.asia)的所有域名列表。

#encoding: UTF-8                                          #添加这一行才可以处理中文
require 'rubygems'                                        #gem install xxx --no-ri --no-rdoc
require 'timeout'
require 'watir-webdriver'

#b=Watir::Browser.new(:firefox, {:profile => 'default'})
output =File.new("sedo_asia.txt",'w:UTF-8') 
def pre_work()
  $b=Watir::Browser.new :ff
  $b.driver.manage.timeouts.implicit_wait =100                          #默认的等待页面加载30秒若还未加载完成则会跑出timeout异常,这里可以修改成60秒
  begin
  Timeout::timeout(60)  do |timeout_length|
   $b.goto "http://sedo.com/search/searchresult.php4?tracked=&partnerid=&language=cn"
   #$b.ul(:class,"login").link(:class,"open").click   #$b.div(:class,"row").text_field(:class,"input").set "your_sedo_username"
   #$b.div(:class,"row").text_field(:type,"password").set "your_sedo_password"
   #$b.div(:class,"col2 right").button(:name,"submit").click
   #if  $b.span(:id => 'jqs_searchbar_advancedFilter_toggle').wait_until_present then
   $b.span(:id, 'jqs_searchbar_advancedFilter_toggle').wait_until_present  
   $b.span(:id, 'jqs_searchbar_advancedFilter_toggle').click            #点击展开 ”Advanced search"
   $b.span(:class, 'filterStatus closed left').wait_until_present    
   $b.span(:class, 'filterStatus closed left').click                    #点击展开Extensions
   $b.checkbox(:value, "cn").set                                        #勾选.asia选项 
   $b.button(:class, 'btnGnS left jqs_advancedFilter_apply jqs_advancedFilter_apply_button').wait_until_present
   $b.button(:class, 'btnGnS left jqs_advancedFilter_apply jqs_advancedFilter_apply_button').click #点击'Apply'按钮
   #table = $b.table(:id,"resultListTable")
  end
  rescue Timeout::Error
    sleep(2) 
    retry
  end
 sleep(10)
 $b.select_list( :id, "pageLimitSelection").select("200")                #下拉框选择值  每页200个
 sleep(10)
 $b.link(:class, 'MaxPage').wait_until_present                           #等到此控件出现再接着执行下面代码
 $maxpage= $b.link(:class, 'MaxPage').text                               #取得一共有多少页
end

pre_work

p=1
while (p < $maxpage.to_i) do
  puts "==========Processing Page"+ p.to_s + "===============" 
  $b.div(:id,'resultList').table(:id, 'resultListTable').wait_until_present  
  system('purge') if p%100==0  #exec('purge') if p%60==0                #每200页执行一次Purge释放不活动内存 exec命令会运行完后当前的ruby进程也被迫结束了,用system替代
  $b.div(:id,'resultList').tds(:class,'domainField jqs_tbl_domain').each do |i|  #遍历当前页面找到所有的td,可以用这个方法遍历任意的tr,link什么的,记得用复数trs,links
   begin    
   Timeout::timeout(60)  do |timeout_length|
      output.puts i.text
   end  
  rescue Timeout::Error
    puts "Got stucked in Processing page element " + i.text
    sleep(2) 
    retry
  end    #begin
 end #$b.tds.each do |i|    
 $b.link(:class, 'arrowNextPage').wait_until_present                     
 #$b.text_field(:name, "page").set(p+1)
 $b.link(:class, 'arrowNextPage').click                                   #点击下一页
 p=p+1
 #$b.wait_until {$b.input(:class, 'pageIndex').value ==p.to_s}
 $b.wait_until(timeout=600,message=nil) {$b.input(:class, 'pageIndex').value ==p.to_s}  #直到下一页变成p+1 默认30秒超时报错,用timeout参数修改
 #$b.table(:class, 'searchResultNavi').when_present(60)
end

Comments !