Skip to content

Instantly share code, notes, and snippets.

@jwilkins
Last active December 28, 2024 15:11
Show Gist options
  • Select an option

  • Save jwilkins/5308913 to your computer and use it in GitHub Desktop.

Select an option

Save jwilkins/5308913 to your computer and use it in GitHub Desktop.

Revisions

  1. jwilkins revised this gist Apr 4, 2013. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions httparallel.rb
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,6 @@
    #!/usr/bin/env ruby
    # 25s curl -o all.gz.curl http://download.openwall.net/pub/passwords/wordlists/all.gz
    # 11s for this script with 4 parallel requests
    require 'typhoeus'

    def pfetch(url, splits=4)
  2. jwilkins created this gist Apr 4, 2013.
    63 changes: 63 additions & 0 deletions httparallel.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@
    require 'typhoeus'

    def pfetch(url, splits=4)
    response = Typhoeus.head(url)
    parallelize = false
    basename = File.basename(url)
    size = nil

    if response.response_headers =~ /Content-Length: (\d+)/i
    size = $1.to_i
    end

    if response.response_headers =~ /Accept-Ranges: (.*)$/i
    ranges = $1
    parallelize = true if size > 1_000_000 && ranges.strip.downcase == 'bytes'
    end

    unless parallelize
    open(basename, 'wb') { |ff| ff << Typhoeus.get(url).body }
    else
    part_size = size / splits
    offset = 0
    ranges = []
    (0..splits-1).each_with_index { |split, ii|
    last = offset+part_size-1
    last = size if last > size
    ranges << [offset, last]
    offset += part_size
    }

    hydra = Typhoeus::Hydra.new
    ranges.each_with_index { |range, ii|
    req = Typhoeus::Request.new(url, followlocation: true, :headers => { "Range" => "bytes=#{range[0]}-#{range[1]}"})
    req.on_complete do |resp|
    if resp.code >199 && resp.code < 300
    puts "#{ii} done, #{part_size} bytes in #{resp.time}"
    open("#{basename}.part_%03d" % ii, 'wb+'){ |ff| ff << resp.body }
    else
    puts "#{ii} returned #{resp.code}"
    end
    end
    hydra.queue(req)
    }
    hydra.run

    # combine parts
    open(basename, 'wb+'){ |ff|
    (0..ranges.length-1).each { |ii|
    partname = "#{basename}.part_%03d" % ii
    open(partname, 'rb') { |pf|
    while (buf = pf.read(524288))
    ff << buf
    end
    }
    File.unlink(partname)
    }
    }
    end
    end

    (([] << ARGV).flatten).each { |url|
    pfetch(url, 4)
    }