Skip to content

Instantly share code, notes, and snippets.

@assembler
Last active September 20, 2023 15:39
Show Gist options
  • Save assembler/adce5c2b2eaa6a7e9fa88eba78571443 to your computer and use it in GitHub Desktop.
Save assembler/adce5c2b2eaa6a7e9fa88eba78571443 to your computer and use it in GitHub Desktop.

Revisions

  1. assembler renamed this gist Apr 15, 2016. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. assembler created this gist Apr 15, 2016.
    67 changes: 67 additions & 0 deletions Ruby File Streaming - line by line
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,67 @@
    require "uri"
    require "net/http"

    class RemoteFileStream
    def initialize(url, headers: {})
    @uri = URI(url)
    @headers = headers
    end

    def each_line(&block)
    Net::HTTP.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
    req = Net::HTTP::Get.new(@uri.request_uri)
    @headers.each { |k,v| req[k] = v }

    http.request(req) do |response|
    remainings = ""
    response.read_body do |chunk|
    chunk = remainings + chunk

    i = chunk.rindex("\n")
    data = chunk[0..i]
    remainings = chunk[i+1..-1]

    each_chunk(data, &block)
    end
    each_chunk(remainings, &block)
    end
    end
    end

    private
    def each_chunk(chunk, &block)
    chunk.split("\n").each { |line| yield(line) }
    end
    end


    require "csv"
    class RemoteCsvStream
    def initialize(remote_file_stream)
    @remote_file_stream = remote_file_stream
    end

    def each_row(&block)
    @remote_file_stream.each_line do |line|
    row = CSV.parse(line)[0]

    if !@headers
    @headers = row.map(&:to_sym)
    next
    end

    yield(Hash[@headers.zip(row)])
    end
    end
    end

    # ---------------------------------

    uri = ARGV.fetch(0) { puts("must provide url as first parameter"); exit() }
    token = ARGV.fetch(1) { puts("must provide access token as second parameter"); exit() }

    rfs = RemoteFileStream.new(uri, headers: { "Authorization" => "Bearer #{token}" })
    csv = RemoteCsvStream.new(rfs)
    csv.each_row do |row|
    printf("[%s] %f\n", row[:channel_id], row[:estimated_partner_revenue])
    end