Last active
March 4, 2022 09:23
-
-
Save tanema/2c752d3c9725c7ffea94 to your computer and use it in GitHub Desktop.
Revisions
-
tanema revised this gist
Oct 20, 2014 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -69,7 +69,7 @@ syncfile () { #check if file is already on the server file_count=$((0+$(aws s3 ls $_bucket/$1 | wc -l))) if [[ $file_count -gt 0 ]]; then log_ok "$status Already on server" else filename="_migration-$_current_file-$(uuidgen)" @@ -123,7 +123,7 @@ kill_all_workers () { #allows ctrl c to work in the while loop trap "kill_all_workers" SIGINT SIGHUP SIGTERM for ((i=0; i < $thread_count; ++i)); do echo "starting worker $i" #call process on this chunk of files process_lines $i $((lines_per_file * i)) & -
tanema revised this gist
Oct 17, 2014 . 1 changed file with 43 additions and 25 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,31 +1,52 @@ #! /bin/bash ###################### USAGE ###################################### usage() { echo " Usage: mongotos3 [-t n] mongo_host mongo_collection s3_bucket -t : number of parallel processes to use mongo_host : the host of the mongodb server mongo_collection : the collection to collecthe gridfs data from s3_bucket : the name of the bucket you want to cp the files to " } ###################### END USAGE ################################## # how many times to split up the list thread_count=8 # parrallel process pid array _worker_pids=() # incremented variable to see progress _current_file=1 # get options just -t for setting how many threads you want while getopts 't:*:' opt; do case $opt in t) thread_count=$OPTARG;; *) usage exit ;; esac done shift $((OPTIND-1)) # script params if [ "$#" -ne 3 ] then usage fi # mongo host _host="${1:?Mongo Host Required}" # mongo collection to pull grid_fs data from _db="${2:?Mongo Collection required}" # s3 bucket for everything to be synced to _bucket="${3:?AWS Bucket Required}" # all the files _files_list=$(mongofiles -h $_host -db $_db list) # total files to be synced _total_files=$(echo "$_files_list" | wc -l | awk {'print $1'}) # how many lines to send to each thread ((lines_per_file=(_total_files + thread_count - 1) / thread_count)) ###################### LOGGING #################################### RED=$(tput setaf 1) GREEN=$(tput setaf 2) NORMAL=$(tput sgr0) @@ -38,8 +59,10 @@ log_fail() { let COL=$(tput cols)-${#1}+${#RED}+${#NORMAL} printf "%s%${COL}s" "$1" "$RED[FAIL]$NORMAL" } ###################### END LOGGING ################################ ###################### METHOD DEFINITIONS ######################### # param $1: filepath from mongo # param $2: worker identity number syncfile () { status="(worker $2) $_current_file/$lines_per_file $_bucket/$1" ((_current_file++)) @@ -71,7 +94,8 @@ syncfile () { fi } # param: $1 worker identity number # param: $2 starting line number in the file to process process_lines () { while read -r line; do #get filename @@ -83,8 +107,8 @@ process_lines () { done < <(echo "$_files_list" | head -n $(($2 + $lines_per_file)) | tail -n $lines_per_file) } # used for kill signals # calls kill on each pid kill_all_workers () { echo 'killing all workers' for ((i=0; i <= ${#_worker_pids[@]}; ++i)); do @@ -94,17 +118,11 @@ kill_all_workers () { #cleanup any files that were interrupted rm _migration-* > /dev/null 2>&1 } ###################### END METHOD DEFINITIONS ##################### #allows ctrl c to work in the while loop trap "kill_all_workers" SIGINT SIGHUP SIGTERM for ((i=0; i <= $thread_count; ++i)); do echo "starting worker $i" #call process on this chunk of files -
tanema revised this gist
Oct 16, 2014 . 2 changed files with 0 additions and 98 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,35 +0,0 @@ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,63 +0,0 @@ -
tanema revised this gist
Oct 16, 2014 . 1 changed file with 24 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -25,13 +25,29 @@ _total_files=$(echo "$_files_list" | wc -l | awk {'print $1'}) # how many lines to send to each thread ((lines_per_file = (_total_files + thread_count - 1) / thread_count)) RED=$(tput setaf 1) GREEN=$(tput setaf 2) NORMAL=$(tput sgr0) log_ok() { let COL=$(tput cols)-${#1}+${#GREEN}+${#NORMAL} printf "%s%${COL}s" "$1" "$GREEN[OK]$NORMAL" } log_fail() { let COL=$(tput cols)-${#1}+${#RED}+${#NORMAL} printf "%s%${COL}s" "$1" "$RED[FAIL]$NORMAL" } # param 1: filepath from mongo syncfile () { status="(worker $2) $_current_file/$lines_per_file $_bucket/$1" ((_current_file++)) #check if file is already on the server file_count=$((0+$(aws s3 ls $_bucket/$1 | wc -l))) if [[ $file_count -eq 0 ]]; then log_ok "$status Already on server" else filename="_migration-$_current_file-$(uuidgen)" #get file from gridfs and create a temp file of it @@ -42,19 +58,17 @@ syncfile () { aws s3 cp $filename s3://$_bucket/$1 --dryrun --quiet #send file status and if this file migration succeeded if [ $? -eq 0 ]; then log_ok "$status" else log_fail "$status" fi #rm temp file gotten from gridfs rm $filename else log_fail "$status Get from db failed" fi fi } # $1 is the split file list @@ -77,6 +91,8 @@ kill_all_workers () { kill -6 ${_worker_pids[i]} > /dev/null 2>&1 done echo 'migration aborted' #cleanup any files that were interrupted rm _migration-* > /dev/null 2>&1 } ############################################################################## @@ -105,5 +121,4 @@ done #if no errors say we are complete if [ $? -eq 0 ]; then echo DONE fi -
tanema revised this gist
Oct 15, 2014 . 1 changed file with 109 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,109 @@ #! /bin/bash #how many times to split up the list thread_count=8 while getopts 't:' opt; do case $opt in t) thread_count=$OPTARG;; esac done shift $((OPTIND-1)) # script params # mongo host _host="${1:?Usage: mongodb host}" #mongo collection to pull grid_fs data from _db="${2:?Usage: mongodb collection}" #s3 bucket for everything to be synced to _bucket="${3:?Usage: aws dest bucket}" #incremented variable to see progress _current_file=1 # all the files _files_list=$(mongofiles -h $_host -db $_db list) #total files to be synced _total_files=$(echo "$_files_list" | wc -l | awk {'print $1'}) # how many lines to send to each thread ((lines_per_file = (_total_files + thread_count - 1) / thread_count)) # param 1: filepath from mongo syncfile () { #check if file is already on the server file_count=$((0+$(aws s3 ls $_bucket/$1 | wc -l))) if [[ $file_count -gt 0 ]]; then echo "File is already on server" else filename="_migration-$_current_file-$(uuidgen)" #get file from gridfs and create a temp file of it mongofiles -h $_host -db $_db get --local $filename $1 > /dev/null 2>&1 #get file succeeded if [ $? -eq 0 ]; then #send it to s3 aws s3 cp $filename s3://$_bucket/$1 --dryrun --quiet #send file status and if this file migration succeeded if [ $? -eq 0 ]; then echo "$2 $_current_file / $lines_per_file $_bucket/$1 OK" else echo "$_current_file / $lines_per_file $_bucket/$1 FAIL" fi #rm temp file gotten from gridfs rm $filename else echo "$_current_file / $lines_per_file $_bucket/$1 FAIL get" fi fi ((_current_file++)) } # $1 is the split file list process_lines () { while read -r line; do #get filename file=$(echo "$line" | awk -F'\t' '{ print $1 }') #if connected message then continue [[ $file == 'connected to'* ]] && continue # sync the file with the server syncfile $file $1 done < <(echo "$_files_list" | head -n $(($2 + $lines_per_file)) | tail -n $lines_per_file) } #used for kill signals #calls kill on each pid kill_all_workers () { echo 'killing all workers' for ((i=0; i <= ${#_worker_pids[@]}; ++i)); do kill -6 ${_worker_pids[i]} > /dev/null 2>&1 done echo 'migration aborted' } ############################################################################## # # # MAIN METHOD AREA # # # ############################################################################## #allows ctrl c to work in the while loop trap "kill_all_workers" SIGINT SIGHUP SIGTERM _worker_pids=() for ((i=0; i <= $thread_count; ++i)); do echo "starting worker $i" #call process on this chunk of files process_lines $i $((lines_per_file * i)) & #record the pid for cleanup and waiting _worker_pids+=($!) done #wait for each process to finish for ((i=0; i <= ${#_worker_pids[@]}; ++i)); do wait ${_worker_pids[i]} > /dev/null 2>&1 done #if no errors say we are complete if [ $? -eq 0 ]; then echo DONE fi -
tanema revised this gist
Oct 14, 2014 . 1 changed file with 23 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,41 +1,44 @@ #! /bin/bash # script params # mongo host _host="${1:?Usage: mongodb host}" # mongo collection to pull grid_fs data from _db="${2:?Usage: mongodb collection}" # s3 bucket for everything to be synced to _bucket="${3:?Usage: aws dest bucket}" # incremented variable to see progress _current_file=1 # total files to be synced _total_files=$(wc -l < <(mongofiles -h $_host -db $_db list) | awk {'print $1'}) # param 1: filepath from mongo syncfile () { echo "$_current_file / $_total_files $_bucket/$1" #check if file is already on the server file_count=$((0+$(aws s3 ls $_bucket/$1 | wc -l))) if [[ $file_count -gt 0 ]]; then echo "File is already on server" else #get file from gridfs and create a temp file of it echo "creating $1" mongofiles -h $_host -db $_db get --local _temp $1 > /dev/null 2>&1 #get file succeeded if [ $? -eq 0 ]; then #send it to s3 echo "sending to s3://$_bucket/$1" aws s3 cp _temp s3://$_bucket/$1 > /dev/null 2>&1 #send file status if [ $? -eq 0 ]; then echo OK else echo FAIL fi #rm temp file gotten from gridfs rm _temp fi fi ((_current_file++)) @@ -50,10 +53,11 @@ while read -r line; do file=$(echo "$line" | awk -F'\t' '{ print $1 }') #if connected message then continue [[ $file == 'connected to'* ]] && continue # sync the file with the server syncfile $file done < <(mongofiles -h $_host -db $_db list) #if no errors say we are complete if [ $? -eq 0 ]; then echo DONE fi -
tanema revised this gist
Oct 14, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,7 @@ _db="${2:?Usage: mongodb collection}" _bucket="${3:?Usage: aws dest bucket}" _current_file=1 _total_files=$(wc -l < <(mongofiles -h $_host -db $_db list) | awk {'print $1'}) syncfile () { echo "$_current_file / $_total_files $_bucket/$1" -
tanema revised this gist
Oct 14, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ #! /bin/bash # this script will pull out one file at a time and push it to s3 # good if you have low space or big amounts of data, slower because it has to connect to amazon for each file # script params -
tanema revised this gist
Oct 14, 2014 . 2 changed files with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,6 @@ #! /bin/bash # this script will pull all the files out of the database first then sync them # good if you have a lot of space or low data. faster for syncing all the files at once # intial script taken from: # http://blog.vladimirm.com/2011/06/export-files-from-mongodb-gridfs-with-directory-paths/ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,6 @@ #! /bin/bash #this script will pull out one file at a time and push it to s3 # good if you have low space or big amounts of data, slower because it has to connect to amazon for each file # script params _host="${1:?Usage: mongodb host}" -
tanema revised this gist
Oct 14, 2014 . 2 changed files with 60 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,6 @@ #! /bin/bash # this script will pull all the files out of the database first then sync them # intial script taken from: # http://blog.vladimirm.com/2011/06/export-files-from-mongodb-gridfs-with-directory-paths/ # kudos to Vladimir Momirov This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ #! /bin/bash #this script will pull out one file at a time and push it to s3 # script params _host="${1:?Usage: mongodb host}" _db="${2:?Usage: mongodb collection}" _bucket="${3:?Usage: aws dest bucket}" _current_file=1 _total_files=$(wc -l < <(mongofiles -h localhost -db shiftee_development list) | awk {'print $1'}) syncfile () { echo "$_current_file / $_total_files $_bucket/$1" file_count=$((0+$(aws s3 ls $_bucket/$1 | wc -l))) if [[ $file_count -gt 0 ]]; then echo "File is already on server" else #get file from gridfs and create a temp file of it echo "creating $1" mongofiles -h $_host -db $_db get --local _temp $1 > /dev/null 2>&1 if [ $? -eq 0 ]; then echo OK else echo FAIL fi #send it to s3 echo "sending to s3://$_bucket/$1" aws s3 cp _temp s3://$_bucket/$1 > /dev/null 2>&1 if [ $? -eq 0 ]; then echo OK else echo FAIL fi rm _temp fi ((_current_file++)) } #allows ctrl c to work in the while loop trap "break" SIGINT SIGHUP SIGTERM #for each file in gridfs while read -r line; do #get filename file=$(echo "$line" | awk -F'\t' '{ print $1 }') #if connected message then continue [[ $file == 'connected to'* ]] && continue syncfile $file done < <(mongofiles -h $_host -db $_db list) #if no errors say we are complete if [ $? -eq 0 ]; then echo DONE fi -
tanema revised this gist
Oct 6, 2014 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,8 +3,8 @@ # http://blog.vladimirm.com/2011/06/export-files-from-mongodb-gridfs-with-directory-paths/ # kudos to Vladimir Momirov # script params _host="${1:?Usage: mongodb host}" _db="${2:?Usage: mongodb collection}" _bucket="${3:?Usage: aws dest bucket}" #create temp folder to work in mkdir _uploads -
tanema revised this gist
Oct 6, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,7 @@ #! /bin/bash # intial script taken from: # http://blog.vladimirm.com/2011/06/export-files-from-mongodb-gridfs-with-directory-paths/ # kudos to Vladimir Momirov # script params _host="${1:?Usage: gridfs host db}" _db="${2:?Usage: gridfs host db}" -
tanema created this gist
Oct 6, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,29 @@ #! /bin/bash # script params _host="${1:?Usage: gridfs host db}" _db="${2:?Usage: gridfs host db}" _bucket="${3:?Usage: aws dest bucket}" #create temp folder to work in mkdir _uploads cd _uploads #for each file in gridfs while read -r line; do #get filename file=$(echo "$line" | awk -F'\t' '{ print $1 }') #if connected message then continue [[ $file == 'connected to'* ]] && continue #get the relative path to the file directory=${file%/*} # make the relative path to where the file should be located mkdir -p ./$directory #get file from gridfs and put it in its path mongofiles -h $_host -db $_db get $file done < <(mongofiles -h $_host -db $_db list) #sync the whole temp folder with the configured bucket echo "Syncing with $_bucket bucket" aws s3 sync ./ s3://$_bucket/ #rm the temp uploads folder echo "Cleaning Up" cd .. rm -rf _uploads echo "Done."