Forked from kaorimatz/scrape-apache-directory-index.sh
Last active
October 7, 2017 23:15
-
-
Save grmpfhmbl/d82d50ea54fcddbd936d to your computer and use it in GitHub Desktop.
Revisions
-
grmpfhmbl revised this gist
Jan 18, 2016 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -44,8 +44,8 @@ fetch() done for DIR in $DIRS; do echo "fetch $1/$DIR" fetch $1/$DIR done rm -f $INDEX_FILE -
grmpfhmbl revised this gist
Jan 18, 2016 . 1 changed file with 10 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,22 +16,26 @@ fetch() INDEX_URI=$BASE_URI$1 DIR=`basename $1` echo "INDEX_URI=$INDEX_URI" if [ ! -d $DIR ]; then mkdir $DIR fi pushd $DIR echo "INFO: Downloading $INDEX_URI" echo "curl -o $INDEX_FILE -s -L $INDEX_URI" curl -o $INDEX_FILE -s -L $INDEX_URI if [ $? -eq 0 ]; then DIRS=`grep '\[DIR\]' $INDEX_FILE | grep -v 'Parent Directory' | sed -e 's/.*href="\([^"]*\).*/\1/g'` TXTS=`grep '\[TXT\]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` IMGS=`grep '\[IMG\]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` UNKNOWNS=`grep '\[ \]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` for FILE in $TXTS $UNKNOWNS $IMGS; do FILE_URI=$INDEX_URI$FILE echo "INFO: Downloading $FILE_URI" curl -O -s -L -R $FILE_URI if [ $? -ne 0 ]; then @@ -40,7 +44,8 @@ fetch() done for DIR in $DIRS; do echo "fetch $1$DIR" fetch $1$DIR done rm -f $INDEX_FILE -
kaorimatz revised this gist
Jun 17, 2013 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -27,10 +27,10 @@ fetch() if [ $? -eq 0 ]; then TXTS=`grep '\[TXT\]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` UNKNOWNS=`grep '\[ \]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` DIRS=`grep '\[DIR\]' $INDEX_FILE | grep -v 'Parent Directory' | sed -e 's/.*href="\([^"]*\).*/\1/g'` for FILE in $TXTS $UNKNOWNS; do FILE_URI=$BASE_URI$FILE echo "INFO: Downloading $FILE_URI" curl -O -s -L -R $FILE_URI -
kaorimatz renamed this gist
Jun 17, 2013 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
kaorimatz created this gist
Jun 17, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,54 @@ #!/bin/bash set -e if [ $# -ne 2 ]; then echo "usage: $0 <base_uri> <target_dir_path>" exit 1; fi BASE_URI=$1 TARGET_DIR_PATH=$2 INDEX_FILE='index.html' fetch() { INDEX_URI=$BASE_URI$1 DIR=`basename $1` if [ ! -d $DIR ]; then mkdir $DIR fi pushd $DIR > /dev/null echo "INFO: Downloading $INDEX_URI" curl -o $INDEX_FILE -s -L $INDEX_URI if [ $? -eq 0 ]; then TXTS=`grep '\[TXT\]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` UNKOWNS=`grep '\[ \]' $INDEX_FILE | sed -e 's/.*href="\([^"]*\).*/\1/g'` DIRS=`grep '\[DIR\]' $INDEX_FILE | grep -v 'Parent Directory' | sed -e 's/.*href="\([^"]*\).*/\1/g'` for FILE in $TXTS $UNKOWNS; do FILE_URI=$BASE_URI$FILE echo "INFO: Downloading $FILE_URI" curl -O -s -L -R $FILE_URI if [ $? -ne 0 ]; then echo "WARN: Failed to download: $FILE_URI" 1>&2 fi done for DIR in $DIRS; do fetch $DIR done rm -f $INDEX_FILE else echo "WARN: Failed to download directory index: $INDEX_URI" 1>&2 fi popd > /dev/null } fetch $TARGET_DIR_PATH