作为一个’米农‘,sedo.com网站上面的信息很有价值。sedo.com是全球最大的域名交易网站。下面我写的这个脚本是用来取得sedo上面指定后缀(_如.asia)的所有域名列表。
#encoding: UTF-8 #添加这一行才可以处理中文
require 'rubygems' #gem install xxx --no-ri --no-rdoc
require 'timeout'
require 'watir-webdriver'
#b=Watir::Browser.new(:firefox, {:profile => 'default'})
output =File.new("sedo_asia.txt",'w:UTF-8')
def pre_work()
$b=Watir::Browser.new :ff
$b.driver.manage.timeouts.implicit_wait =100 #默认的等待页面加载30秒若还未加载完成则会跑出timeout异常,这里可以修改成60秒
begin
Timeout::timeout(60) do |timeout_length|
$b.goto "http://sedo.com/search/searchresult.php4?tracked=&partnerid=&language=cn"
#$b.ul(:class,"login").link(:class,"open").click #$b.div(:class,"row").text_field(:class,"input").set "your_sedo_username"
#$b.div(:class,"row").text_field(:type,"password").set "your_sedo_password"
#$b.div(:class,"col2 right").button(:name,"submit").click
#if $b.span(:id => 'jqs_searchbar_advancedFilter_toggle').wait_until_present then
$b.span(:id, 'jqs_searchbar_advancedFilter_toggle').wait_until_present
$b.span(:id, 'jqs_searchbar_advancedFilter_toggle').click #点击展开 ”Advanced search"
$b.span(:class, 'filterStatus closed left').wait_until_present
$b.span(:class, 'filterStatus closed left').click #点击展开Extensions
$b.checkbox(:value, "cn").set #勾选.asia选项
$b.button(:class, 'btnGnS left jqs_advancedFilter_apply jqs_advancedFilter_apply_button').wait_until_present
$b.button(:class, 'btnGnS left jqs_advancedFilter_apply jqs_advancedFilter_apply_button').click #点击'Apply'按钮
#table = $b.table(:id,"resultListTable")
end
rescue Timeout::Error
sleep(2)
retry
end
sleep(10)
$b.select_list( :id, "pageLimitSelection").select("200") #下拉框选择值 每页200个
sleep(10)
$b.link(:class, 'MaxPage').wait_until_present #等到此控件出现再接着执行下面代码
$maxpage= $b.link(:class, 'MaxPage').text #取得一共有多少页
end
pre_work
p=1
while (p < $maxpage.to_i) do
puts "==========Processing Page"+ p.to_s + "==============="
$b.div(:id,'resultList').table(:id, 'resultListTable').wait_until_present
system('purge') if p%100==0 #exec('purge') if p%60==0 #每200页执行一次Purge释放不活动内存 exec命令会运行完后当前的ruby进程也被迫结束了,用system替代
$b.div(:id,'resultList').tds(:class,'domainField jqs_tbl_domain').each do |i| #遍历当前页面找到所有的td,可以用这个方法遍历任意的tr,link什么的,记得用复数trs,links
begin
Timeout::timeout(60) do |timeout_length|
output.puts i.text
end
rescue Timeout::Error
puts "Got stucked in Processing page element " + i.text
sleep(2)
retry
end #begin
end #$b.tds.each do |i|
$b.link(:class, 'arrowNextPage').wait_until_present
#$b.text_field(:name, "page").set(p+1)
$b.link(:class, 'arrowNextPage').click #点击下一页
p=p+1
#$b.wait_until {$b.input(:class, 'pageIndex').value ==p.to_s}
$b.wait_until(timeout=600,message=nil) {$b.input(:class, 'pageIndex').value ==p.to_s} #直到下一页变成p+1 默认30秒超时报错,用timeout参数修改
#$b.table(:class, 'searchResultNavi').when_present(60)
end
Comments !