require 'rubygems' require 'nokogiri' require 'open-uri' require 'httparty' unless ARGV[0] && ARGV[1] puts "Usage: ruby sketchup_downloader.rb path/to/terms.csv path/to/folder" exit 1 end SEARCH_TERMS = open(ARGV[0]).read.split(",") TARGET_FOLDER = ARGV[1] puts "Search terms: #{SEARCH_TERMS.inspect}" puts "Target folder: #{TARGET_FOLDER}" ROOT_URL = "http://sketchup.google.com" # collect all known file names @known_models = Dir.glob("#{TARGET_FOLDER}/*/*.skp").collect{|path| path.split("/").last} puts "Known models: #{@known_models.join(",")}" def download_all_pages_for(term, start=0) fetch_url = "#{ROOT_URL}/3dwarehouse/search?q=#{URI.escape(term)}&start=#{start}&scoring=t" puts "fetching URL #{fetch_url}" page = open(fetch_url).read doc = Nokogiri::HTML(page) results = doc.css(".dwnld") puts "#{results.length} results (#{start}-#{start+11})" doc.css(".dwnld").each do |link| download_url = ROOT_URL + link["href"] # parse the model number out of the url model_number = download_url.split(/\&|\?|=/)[2] # if we already have a file for that model, skip it if(@known_models.include? "#{model_number}.skp") puts "skipping #{model_number}.skp. we already have it" else puts "downloading #{download_url}" result = HTTParty.get download_url filename = result.headers["x-3dwarehouse-modelid"] + ".skp" File.open("#{TARGET_FOLDER}/#{term}/#{filename}", "w"){|f| f << result.parsed_response } # add this new filename to the list of known models @known_models << filename end end if not doc.css(".pager_next").empty? download_all_pages_for(term, start+12) end end SEARCH_TERMS.each do |term| puts puts "Searching for #{term}..." puts "========================" puts "mkdir #{TARGET_FOLDER}/#{term}" `mkdir "#{TARGET_FOLDER}/#{term}"` puts download_all_pages_for( term ) end