RubyでPageRankの参照
PageRankをRubyで処理したときに使ったスクリプト。使いまわしだしエラー処理してないけど。
「個人ニュースサイトのPageRank - 鯨飲馬食コード」で使ったやつ。
#!/usr/bin/ruby require 'uri' require 'net/http' require "rexml/document" require 'kconv' include REXML class PR def initialize() @data = [] @rank_table = {} @title_table = {} end def pagerank(target) page_rank = -1 address = "www.trynt.com" path = "/google-pagerank-api/v1/" + "?u=#{target}" body = Net::HTTP.get(address, path) doc = Document.new body error = doc.elements.to_a("//Error-Code/")[0] unless error then doc.elements.to_a("//Pagerank").each do |element| page_rank = element.text.to_i end end return page_rank end def title(target) uri = URI.parse(target) title = target begin body = Net::HTTP.get(uri.host, uri.path) rescue return title else if body =~ /<title>(.*?)<\/title>/i title = $1 end end return title end def read_data(file) IO.foreach(file) do |line| if line =~ /http:\/\// @data.push(line.strip) end end end def check_pagerank() i=1 @data.each do |url| @rank_table[url] = pagerank(url) @title_table[url] = title(url) $stderr.print "\r#{i}" i+=1 end $stderr.puts end def write_table() table = @rank_table.to_a table.sort! do |a, b| (b[1] <=> a[1])*2 + (a[0]<=>b[0]) end puts "|*PageRank|*サイト|" table.each do |x| url = x[0] rank = x[1] title = @title_table[x[0]].toutf8 puts "|" + rank.to_s + "|" + "<a href=\"" + url + "\">" + title + "<\/a>" + "|" end end end in_file = ARGV.shift test = PR.new test.read_data(in_file) test.check_pagerank() test.write_table()
「はてなダイアリーのPageRank - 鯨飲馬食コード」で使ったスクリプト。
#!/usr/bin/ruby require 'xmlrpc/client' require 'uri' require 'net/http' Net::HTTP.version_1_2 require "rexml/document" require 'kconv' include REXML class PR def initialize() @data = [] @rank_table = {} @title_table = {} @id_table = {} @bm_table = {} end def pagerank(target) page_rank = -1 address = "www.trynt.com" path = "/google-pagerank-api/v1/" + "?u=#{target}" body = Net::HTTP.get(address, path) doc = Document.new body error = doc.elements.to_a("//Error-Code/")[0] unless error then doc.elements.to_a("//Pagerank").each do |element| page_rank = element.text.to_i end end return page_rank end def b_hatena(target) srv = XMLRPC::Client.new2('http://b.hatena.ne.jp/xmlrpc') bm = srv.call('bookmark.getTotalCount', target).to_i return bm end def title(target) uri = URI.parse(target) title = target begin body = Net::HTTP.get(uri.host, uri.path) rescue return title else if body =~ /<title>(.*?)<\/title>/i title = $1 end end return title end def read_data(file) IO.foreach(file) do |line| if line =~ /http:\/\// @data.push(line.strip) end end end def check_pagerank() i=1 @data.each do |url| @rank_table[url] = pagerank(url) @title_table[url] = title(url) if url =~ /http:\/\/d\.hatena\.ne\.jp\/(.*)\// id = $1 @id_table[url] = id end @bm_table[url] = b_hatena(url) $stderr.print "\r#{i}" i+=1 end $stderr.puts end def write_table() table = [] @rank_table.to_a.each do |x| info = [] url = x[0] rank = x[1] info[0] = url info[1] = rank info[2] = @title_table[url].toutf8 info[3] = @id_table[url].toutf8 info[4] = @bm_table[url] table.push(info) end table.sort! do |a, b| (b[4]<=>a[4]) end i=1 table.each do |x| x.push(i) i+=1 end table.sort! do |a, b| (b[1] <=> a[1])*2 + (b[4]<=>a[4]) end puts "|*PageRank|*id|*サイト名|*はてブ順位|*はてブ数|" table.each do |x| url = x[0] rank = x[1] title = x[2] id = x[3] bm = x[4] bm_rank = x[5] puts "|" + rank.to_s + "|" + "id:" + id + "|" + "<a href=\"" + url + "\">" + title + "<\/a>" + "|" + bm_rank.to_s + "位|" + bm.to_s + "|" end end end in_file = ARGV.shift test = PR.new test.read_data(in_file) test.check_pagerank() test.write_table()