#!/usr/bin/ruby puts "hello" require 'tempfile' puts ARGV[1] filename,lines,prefix = ARGV unless filename && lines abort("missing file or lines") end lines = lines.to_i neo_query = <<-EOF // load csv LOAD CSV WITH HEADERS FROM "file:%%FILENAME%%" AS csvLine // Find or create a user node with an id value of the userId field from the CSV MERGE (user:User { id: toInt(csvLine.userId) }) // Find or create an alias node with a display name value (need to ensure this is not merging people with same names without taking email and such in to consideration) MERGE (alias:Alias { name: csvLine.displayName }) // Create the KNOWS relationship between the user we found or created, and an empty contact niode that we are creating // We set a merge false field so a subsequent process will know that it needs to be worked on // Then link the empty contact node to the alias node we found or created CREATE (user)-[:KNOWS]->(contact:Contact { merge: false })-[:ALIAS]->(alias) // Loop over each Email address FOREACH (address IN split(replace(replace(replace(replace(csvLine.emails,'[',''),']',''),'"',''),' ',''),',') | // Find or create an email address node MERGE (email:Email { address: address }) // If we find an email address (not create) set merge = true so we know how to deal with it later ON MATCH SET contact.merge = true // finally create the relationship between the contact we created in the above block, and the email we found or created here. CREATE (contact)-[:EMAIL]->(email) ) // Loop over each phone number FOREACH (number IN split(replace(replace(replace(replace(csvLine.phoneNumbers,'[',''),']',''),'"',''),' ',''),',') | // find or create a phone number node MERGE (phone:Phone { number: number }) // If we find a phopne number (not create), set merge = true so we know how to deal with it late ON MATCH SET contact.merge = true // finally create the relationship between the contact we created in the above block, and the phone we found or created here. CREATE (contact)-[:PHONE]->(phone) ); // Phase 2 // Delete null Email addresses and Phone numbers ( a result in us using FOREACH on an empty array) MATCH (email:Email { address: '' })<-[e:EMAIL]-() DELETE e, email; MATCH (phone:Phone { number: '' })<-[p:PHONE]-() DELETE p, phone; // End phase 2 EOF File.open(filename, 'r') do |f| total_imported = 0 headers = f.gets size = `wc -l #{filename}`.gsub(/^(\d+).+?$/,"\\1") files = size.to_i / lines.to_i 1.upto(files+1) do |file_count| puts "making file." file = File.new("/home/ubuntu/csvchunk.csv", "w") begin file.puts headers 0.upto(lines.to_i) do if line = f.gets file.puts line end end file.close puts "making query file now." query_file = File.new('queryfile', 'w') query = neo_query.gsub(/%%FILENAME%%/, file.path) begin query_file.puts query query_file.close end start = Time.now `/usr/bin/neo4j-shell -file #{query_file.path} /dev/null` total_time = Time.now - start total_imported += lines puts "#{total_imported} total, last batch: #{total_time}" end end end