raydog · March 29, 2016 18:27 · Mar 29, 2016 · Oct 10, 2015 · Oct 10, 2015 · Oct 8, 2015
diff --git a/rdump.go b/rdump.go
@@ -31,7 +31,7 @@ const (
 	download_fmt   string = "https:%s"
 	album_cutoff   int    = 8
 	num_workers    int    = 5
-	max_pages      int    = 1 // 0
+	max_pages      int    = 10
 )
 
 // Used for parsing the AJAX endpoints:

diff --git a/rdump.go b/rdump.go
@@ -6,9 +6,12 @@
 package main
 
 import (
+	"encoding/json"
+	"flag"
 	"fmt"
 	"github.com/PuerkitoBio/goquery"
 	"io"
+	"io/ioutil"
 	"log"
 	"net/http"
 	"net/url"
@@ -23,16 +26,44 @@ const (
 	base_url_fmt   string = "https://imgur.com/r/%s"
 	next_url_fmt   string = "https://imgur.com/r/%s/new/page/%d/hit?scrolled"
 	detail_url_fmt string = "https://imgur.com%s"
+	ajax_url_fmt   string = "https://imgur.com/ajaxalbums/getimages/%s/hit.json?all=true"
+	ajax_img_fmt   string = "https://i.imgur.com/%s%s"
 	download_fmt   string = "https:%s"
 	album_cutoff   int    = 8
 	num_workers    int    = 5
+	max_pages      int    = 1 // 0
 )
 
-// Enums for the image plucking:
-const (
-	state_normal int = 0
-	state_image  int = 1
-)
+// Used for parsing the AJAX endpoints:
+type PostDetail struct {
+	Hash      string `json:"hash"`
+	Title     string `json:"title"`
+	Desc      string `json:"description"`
+	Width     int    `json:"width"`
+	Height    int    `json:"height"`
+	Size      int    `json:"size"`
+	Ext       string `json:"ext"`
+	Anim      bool   `json:"animated"`
+	PreferVid bool   `json:"prefer_video"`
+	Looping   bool   `json:"looping"`
+	Timestamp string `json:"datetime"`
+}
+type ListData struct {
+	Count  int          `json:"count"`
+	Images []PostDetail `json:"images"`
+}
+type AJAXResponse struct {
+	Data    ListData `json:"data"`
+	Success bool     `json:"success"`
+	Status  int      `json:"status"`
+}
+
+func (pd PostDetail) GetURL() string {
+	if pd.Hash == "" || pd.Ext == "" {
+		return ""
+	}
+	return fmt.Sprintf(ajax_img_fmt, pd.Hash, pd.Ext)
+}
 
 // From a subreddit name, fetches all urls from that subreddit:
 func fetchAllImageLinks(subreddit string) chan string {
@@ -60,42 +91,53 @@ func urlGenerator(seed string) chan string {
 	base := fmt.Sprintf(base_url_fmt, seed)
 	go (func() {
 		out <- base
-		for n := 1; n <= 1; n++ {
+		for n := 1; n < max_pages; n++ {
 			out <- fmt.Sprintf(next_url_fmt, seed, n)
 		}
 		close(out)
 	})()
 	return out
 }
 
-// A stupid hack so we can manipulate our user-agent when fetching pages:
-func buildGoQueryDocument(url string) (*goquery.Document, error) {
+// Performs an HTTP GET, with the correct fake headers:
+func httpGET(url string) (*http.Response, error) {
 	request, err := http.NewRequest("GET", url, nil)
 	if err != nil {
 		return nil, err
 	}
 
 	request.Header.Set("User-Agent", user_agent)
 
-	resp, err := http.DefaultClient.Do(request)
+	return http.DefaultClient.Do(request)
+}
+
+// A stupid hack so we can manipulate our user-agent when fetching pages:
+func buildGoQueryDocument(url string) (*goquery.Document, error) {
+	resp, err := httpGET(url)
 	if err != nil {
 		return nil, err
 	}
 
 	return goquery.NewDocumentFromResponse(resp)
 }
 
+func extractFilename(link string) (string, error) {
+	parsed, err := url.Parse(link)
+	if err != nil {
+		return "", err
+	}
+	return path.Base(parsed.Path), nil
+}
+
 // Download a file... Unless we already have it:
 func maybeDownload(link string) {
 
-	parsed, err := url.Parse(link)
+	fname, err := extractFilename(link)
 	if err != nil {
 		log.Printf("Cannot download [%s] : Bad link. %v", link, err)
 		return
 	}
 
-	fname := path.Base(parsed.Path)
-
 	stat, err := os.Stat(fname)
 	if err == nil && stat.Size() > 0 {
 		log.Printf("Already have '%s'. Skipping.", fname)
@@ -109,7 +151,7 @@ func maybeDownload(link string) {
 	}
 	defer destFile.Close()
 
-	httpResp, err := http.Get(link)
+	httpResp, err := httpGET(link)
 	if err != nil {
 		log.Printf("Couldn't download '%s': %v", fname, err)
 		return
@@ -162,12 +204,69 @@ func fetchUrlList(link string) (pageNum int, urls chan string) {
 				urls <- href
 			}
 		})
+
+		// If page was malformed, and/or had no useable content, just send back page -1
+		if !pageNumSent {
+			log.Printf("Page [%s] contained no usable data", link)
+			pageNumChan <- -1
+		}
 	})()
 
 	pageNum = <-pageNumChan
 	return
 }
 
+func httpAJAX(detailLink string) ([]byte, error) {
+	albumId, err := extractFilename(detailLink)
+	if err != nil {
+		return nil, err
+	}
+
+	albumUrl := fmt.Sprintf(ajax_url_fmt, albumId)
+
+	resp, err := httpGET(albumUrl)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode > 299 {
+		return nil, fmt.Errorf("Bad status code: %d", resp.StatusCode)
+	}
+
+	return ioutil.ReadAll(resp.Body)
+}
+
+// Will use the AJAX endpoint to pluck all images in an album out:
+func fetchAJAXUrls(detailLink string) chan string {
+	out := make(chan string)
+
+	data, err := httpAJAX(detailLink)
+	if err != nil {
+		close(out)
+		return out
+	}
+
+	go (func() {
+		defer close(out)
+
+		parsed := AJAXResponse{}
+		err = json.Unmarshal(data, &parsed)
+		if err != nil {
+			log.Printf("AJAX Parse failed: %v", err)
+			return
+		}
+
+		for _, img := range parsed.Data.Images {
+			if imgUrl := img.GetURL(); imgUrl != "" {
+				out <- imgUrl
+			}
+		}
+	})()
+
+	return out
+}
+
 // Given the URL to a post detail page, returns the URLs to download:
 func fetchDownloadUrls(detailLink string) chan string {
 	out := make(chan string)
@@ -192,7 +291,10 @@ func fetchDownloadUrls(detailLink string) chan string {
 
 		// Albums could have TONS of pics, so use AJAX if too many pics:
 		if doc.Find("div.post-image").Length() >= album_cutoff {
-			log.Printf("AJAX album: %s (TODO)", detailLink)
+			log.Printf("Large album: %s", detailLink)
+			for linkz := range fetchAJAXUrls(detailLink) {
+				out <- linkz
+			}
 			return
 		}
 
@@ -220,19 +322,23 @@ func imageWorker(urls chan string, workerName string) chan bool {
 				maybeDownload(downloadMe)
 			}
 		}
-		out <- true
 	})()
 	return out
 }
 
 // Main func parses args, and sets things up:
 func main() {
-	if len(os.Args) == 1 {
-		log.Fatalf("Not enough arguments")
+
+	verbose := flag.Bool("v", false, "Verbosely log what's happening")
+	flag.Parse()
+
+	target := flag.Arg(0)
+
+	if !(*verbose) {
+		log.SetOutput(ioutil.Discard)
 	}
 
-	subreddit := os.Args[1]
-	imageChan := fetchAllImageLinks(subreddit)
+	imageChan := fetchAllImageLinks(target)
 
 	var workers [num_workers]chan bool
 	for i := range workers {
@@ -243,5 +349,6 @@ func main() {
 	for _, w := range workers {
 		_ = <-w
 	}
+
 	log.Printf("Done.")
 }
diff --git a/rdump.go b/rdump.go
@@ -1,239 +1,247 @@
 // rDump -- Dumps images in an Imgur sub-reddit thing
 
+// Dependencies:
+//   go get github.com/PuerkitoBio/goquery
+
 package main
 
 import (
-  "os"
-  "log"
-  "fmt"
-  "strings"
-  "strconv"
-  "golang.org/x/net/html"
-  "net/http"
-  "io/ioutil"
+	"fmt"
+	"github.com/PuerkitoBio/goquery"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"path"
+	"strconv"
 )
 
-
 // Magic values go here:
 const (
-  base_url_fmt string     = "https://imgur.com/r/%s"
-  next_url_fmt string     = "%s/new/page/%d/hit?scrolled"
-  detail_url_fmt string   = "https://imgur.com%s"
-  image_class_name string = "image-list-link"
-  post_class_name string  = "post-image"
-  num_workers int         = 5
+	user_agent     string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.130 Safari/537.36"
+	base_url_fmt   string = "https://imgur.com/r/%s"
+	next_url_fmt   string = "https://imgur.com/r/%s/new/page/%d/hit?scrolled"
+	detail_url_fmt string = "https://imgur.com%s"
+	download_fmt   string = "https:%s"
+	album_cutoff   int    = 8
+	num_workers    int    = 5
 )
 
 // Enums for the image plucking:
 const (
-  state_normal int = 0
-  state_picture int = 1
+	state_normal int = 0
+	state_image  int = 1
 )
 
-
 // From a subreddit name, fetches all urls from that subreddit:
 func fetchAllImageLinks(subreddit string) chan string {
-  out := make(chan string)
-  go (func () {
-    defer close(out)
-    for url := range urlGenerator(subreddit) {
-      for link := range scrapeURL(url, true) {
-        out <- link
-      }
-    }
-  })()
-  return out
+	// We give this channel a buffer, just so that page changes are less likely to
+	// block image workers:
+	out := make(chan string, 10)
+
+	go (func() {
+		defer close(out)
+		for link := range urlGenerator(subreddit) {
+			pageNo, linkChannel := fetchUrlList(link)
+			log.Printf("Entering Page #%d : %s", pageNo, link)
+
+			for link := range linkChannel {
+				out <- link
+			}
+		}
+	})()
+	return out
 }
 
 // Given a subreddit name, returns a channel of URLs to scrape:
 func urlGenerator(seed string) chan string {
-  out := make(chan string)
-  base := fmt.Sprintf(base_url_fmt, seed)
-  go (func () {
-    out <- base
-    for n:=1 ; n <= 2 ; n++ {
-      out <- fmt.Sprintf(next_url_fmt, base, n)
-    }
-    close(out)
-  })()
-  return out
-}
-
-// Will perform an HTTP request, and return the code + content as a string:
-func httpGet(url string) (body string, status int) {
-  log.Printf("Fetching %s...", url)
-  resp, err := http.Get(url)
-  if err != nil {
-    log.Printf("HTTP FAIL (%s) : %v", url, err)
-    return "", 999
-  }
-  defer resp.Body.Close()
-
-  bytes, err := ioutil.ReadAll(resp.Body)
-  if err != nil {
-    log.Printf("HTTP READ FAIL (%s) : %v", url, err)
-    return "", 999
-  }
-
-  return string(bytes), resp.StatusCode
+	out := make(chan string)
+	base := fmt.Sprintf(base_url_fmt, seed)
+	go (func() {
+		out <- base
+		for n := 1; n <= 1; n++ {
+			out <- fmt.Sprintf(next_url_fmt, seed, n)
+		}
+		close(out)
+	})()
+	return out
 }
 
-// Given an html node, will try to find a val in the attribute list: (linear)
-func getNodeAttr(n *html.Node, name string) string {
-  for _, a := range n.Attr {
-    if a.Key == name {
-      return a.Val
-    }
-  }
-  return ""
-}
-
-// Parses images out of the html + the data-page thing:
-func parseListHtml(body string) (pageNum int, urls chan string) {
-  pageNum, urls = -1, make(chan string)
-
-  tree, err := html.Parse(strings.NewReader(body))
-  if err != nil {
-    panic(err)
-  }
-
-  // We recieve a single value on this, which is the page num:
-  pageNumSent, pageNumChan := false, make(chan int)
-  defer close(pageNumChan)
-
-  // Recursive func to search for <a> links
-  var _findImages func (*html.Node)
-  _findImages = func (n *html.Node) {
-    if n.Type == html.ElementNode && n.Data == "a" {
-      elemHref  := getNodeAttr(n, "href")
-      elemPage  := getNodeAttr(n, "data-page")
-      elemClass := getNodeAttr(n, "class")
-
-      // If correct class:
-      if elemClass == image_class_name {
-
-        // First, check out the page number:
-        pageNo, err := strconv.ParseInt(elemPage, 10, 32)
-        if err == nil && !pageNumSent {
-          pageNumSent = true
-          pageNumChan <- int(pageNo)
-        }
-
-        // Then emit the href:
-        urls <- elemHref
-      }
-    }
-    for c := n.FirstChild; c != nil; c = c.NextSibling {
-      _findImages(c)
-    }
-  }
-
-  // Kick off the tree traversal async, and make sure we close afterwards:
-  go (func () {
-    _findImages(tree)
-    close(urls)
-  })()
-  pageNum = <- pageNumChan
-  return
-}
+// A stupid hack so we can manipulate our user-agent when fetching pages:
+func buildGoQueryDocument(url string) (*goquery.Document, error) {
+	request, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return nil, err
+	}
 
-// Will extract all image detail urls from a page:
-func scrapeURL(url string, pageZero bool) chan string {
-  out := make(chan string)
-  go (func () {
-    defer close(out)
+	request.Header.Set("User-Agent", user_agent)
 
-    body, status := httpGet(url)
-    log.Printf("%s :: Status (%d) Content Length (%d)", url, status, len(body))
+	resp, err := http.DefaultClient.Do(request)
+	if err != nil {
+		return nil, err
+	}
 
-    if status > 299 {
-      log.Printf("%s :: Bad Status. Skipping.", url)
-      return
-    }
-
-    pageNo, urls := parseListHtml(body)
+	return goquery.NewDocumentFromResponse(resp)
+}
 
-    log.Printf("GOT PAGE NO %d", pageNo)
-    for url := range urls {
-      out <- url
-    }
-  })()
-  return out
+// Download a file... Unless we already have it:
+func maybeDownload(link string) {
+
+	parsed, err := url.Parse(link)
+	if err != nil {
+		log.Printf("Cannot download [%s] : Bad link. %v", link, err)
+		return
+	}
+
+	fname := path.Base(parsed.Path)
+
+	stat, err := os.Stat(fname)
+	if err == nil && stat.Size() > 0 {
+		log.Printf("Already have '%s'. Skipping.", fname)
+		return
+	}
+
+	destFile, err := os.Create(fname)
+	if err != nil {
+		log.Printf("Failed to create '%s': %v", fname, err)
+		return
+	}
+	defer destFile.Close()
+
+	httpResp, err := http.Get(link)
+	if err != nil {
+		log.Printf("Couldn't download '%s': %v", fname, err)
+		return
+	}
+	defer httpResp.Body.Close()
+
+	if httpResp.StatusCode > 299 {
+		log.Printf("Download failed for '%s': Status code: %d", fname, httpResp.StatusCode)
+		return
+	}
+
+	n, err := io.Copy(destFile, httpResp.Body)
+	if err != nil {
+		log.Printf("Download failed for '%s': %v", fname, err)
+		return
+	}
+
+	log.Printf("Downloaded successful: '%s' (%d bytes)", fname, n)
 }
 
-// Check to see if a file already exists and has content:
-func fileExists(path) bool {
-  stat, err := os.Stat()
-  return os.IsExist(err) && stat.Size() > 0;
+// Parses images and the data-page thing out of the entry lists:
+func fetchUrlList(link string) (pageNum int, urls chan string) {
+	pageNum, urls = -1, make(chan string)
+
+	doc, err := buildGoQueryDocument(link)
+	if err != nil {
+		log.Printf("Failed to read URL: %s", link)
+		close(urls)
+		return
+	}
+
+	// We recieve a single value on this, which is the page num:
+	pageNumSent, pageNumChan := false, make(chan int)
+	defer close(pageNumChan)
+
+	go (func() {
+		defer close(urls)
+
+		doc.Find("a.image-list-link").Each(func(_ int, s *goquery.Selection) {
+			page, pageExists := s.Attr("data-page")
+			href, hrefExists := s.Attr("href")
+
+			if pageExists && !pageNumSent {
+				pageNo, _ := strconv.ParseInt(page, 10, 32)
+				pageNumSent = true
+				pageNumChan <- int(pageNo)
+			}
+
+			if hrefExists {
+				urls <- href
+			}
+		})
+	})()
+
+	pageNum = <-pageNumChan
+	return
 }
 
-// Given the URL to a post detail page, returns the URL to download:
-func fetchDownloadUrl(detailLink string) string {
-  detailUrl := fmt.Sprintf(detail_url_fmt, detailLink)
-  data, code := httpGet(detailUrl)
-  if code > 999 {
-    log.Printf("%s :: Bad Status. Skipping.", url)
-    return ""
-  }
-
-  var _findThing func (*html.Node, state int) string
-  _findThing = func (n *html.Node, state int) string {
-    if n.Type == html.ElementNode {
-
-      // Pluck the class, if it has one:
-      klass := getNodeAttr(n, "class")
-      tag   := n.Data
-
-      // Switch on state:
-      switch {
-      case state == state_normal && tag == "div" && klass == post_class_name:
-        state = state_image
-        for c := n.FirstChild; c != nil; c = c.NextSibling {
-          if maybe := _findImages(c, state_image) ; maybe != "" {
-            return maybe
-          }
-        }
-      case state == state_image && tag == "a":
-        return getNodeAttr(n, "href")
-      }
-
-      // Else, just recurse:
-      for c := n.FirstChild; c != nil; c = c.NextSibling {
-        if maybe := _findImages(c, state) ; maybe != "" {
-          return maybe
-        }
-      }
-    }
-  }
-
-  // Kick off the tree traversal async, and make sure we close afterwards:
-  return _findImages(tree)
-    close(urls)
-  })()
-  pageNum = <- pageNumChan
-  return
+// Given the URL to a post detail page, returns the URLs to download:
+func fetchDownloadUrls(detailLink string) chan string {
+	out := make(chan string)
+
+	detailUrl := fmt.Sprintf(detail_url_fmt, detailLink)
+	doc, err := buildGoQueryDocument(detailUrl)
+	if err != nil {
+		log.Printf("Failed to read detail URL: %s", detailUrl)
+		close(out)
+		return out
+	}
+
+	_maybeSend := func(s string, exists bool) {
+		if exists && s != "" {
+			fullUrl := fmt.Sprintf(download_fmt, s)
+			out <- fullUrl
+		}
+	}
+
+	go (func() {
+		defer close(out)
+
+		// Albums could have TONS of pics, so use AJAX if too many pics:
+		if doc.Find("div.post-image").Length() >= album_cutoff {
+			log.Printf("AJAX album: %s (TODO)", detailLink)
+			return
+		}
+
+		// Else, emit a single entry:
+		doc.Find("div.post-image").Each(func(_ int, s *goquery.Selection) {
+			_maybeSend(s.Find("img").Attr("src"))
+			_maybeSend(s.Find("source").Attr("src"))
+		})
+
+	})()
+
+	return out
 }
 
 // Will read from a channel, downloading links until the channel dies:
 func imageWorker(urls chan string, workerName string) chan bool {
-  out := make(chan bool)
-  go (func () {
-    defer close(out)
-
-    for link := range urls {
-      log.Printf("Image fetch %s handling %s", workerName, link)
-    }
-    out <- true
-  })()
+	out := make(chan bool)
+	go (func() {
+		defer close(out)
+		log.Printf("Starting up worker: %s", workerName)
+		for link := range urls {
+			log.Printf("%s : Handling %s", workerName, link)
+			for downloadMe := range fetchDownloadUrls(link) {
+				log.Printf("%s : Found: %s", workerName, downloadMe)
+				maybeDownload(downloadMe)
+			}
+		}
+		out <- true
+	})()
+	return out
 }
 
 // Main func parses args, and sets things up:
 func main() {
-  if len(os.Args) == 1 {
-    log.Fatalf("Not enough arguments")
-  }
-  subreddit := os.Args[1]
-  for detail := range fetchAllImageLinks(subreddit) {
-    log.Printf("Detail Link: %s", detail)
-  }
+	if len(os.Args) == 1 {
+		log.Fatalf("Not enough arguments")
+	}
+
+	subreddit := os.Args[1]
+	imageChan := fetchAllImageLinks(subreddit)
+
+	var workers [num_workers]chan bool
+	for i := range workers {
+		name := fmt.Sprintf("Worker[%d]", i+1)
+		workers[i] = imageWorker(imageChan, name)
+	}
+
+	for _, w := range workers {
+		_ = <-w
+	}
+	log.Printf("Done.")
 }
diff --git a/rdump.go b/rdump.go
@@ -0,0 +1,239 @@
+// rDump -- Dumps images in an Imgur sub-reddit thing
+
+package main
+
+import (
+  "os"
+  "log"
+  "fmt"
+  "strings"
+  "strconv"
+  "golang.org/x/net/html"
+  "net/http"
+  "io/ioutil"
+)
+
+
+// Magic values go here:
+const (
+  base_url_fmt string     = "https://imgur.com/r/%s"
+  next_url_fmt string     = "%s/new/page/%d/hit?scrolled"
+  detail_url_fmt string   = "https://imgur.com%s"
+  image_class_name string = "image-list-link"
+  post_class_name string  = "post-image"
+  num_workers int         = 5
+)
+
+// Enums for the image plucking:
+const (
+  state_normal int = 0
+  state_picture int = 1
+)
+
+
+// From a subreddit name, fetches all urls from that subreddit:
+func fetchAllImageLinks(subreddit string) chan string {
+  out := make(chan string)
+  go (func () {
+    defer close(out)
+    for url := range urlGenerator(subreddit) {
+      for link := range scrapeURL(url, true) {
+        out <- link
+      }
+    }
+  })()
+  return out
+}
+
+// Given a subreddit name, returns a channel of URLs to scrape:
+func urlGenerator(seed string) chan string {
+  out := make(chan string)
+  base := fmt.Sprintf(base_url_fmt, seed)
+  go (func () {
+    out <- base
+    for n:=1 ; n <= 2 ; n++ {
+      out <- fmt.Sprintf(next_url_fmt, base, n)
+    }
+    close(out)
+  })()
+  return out
+}
+
+// Will perform an HTTP request, and return the code + content as a string:
+func httpGet(url string) (body string, status int) {
+  log.Printf("Fetching %s...", url)
+  resp, err := http.Get(url)
+  if err != nil {
+    log.Printf("HTTP FAIL (%s) : %v", url, err)
+    return "", 999
+  }
+  defer resp.Body.Close()
+
+  bytes, err := ioutil.ReadAll(resp.Body)
+  if err != nil {
+    log.Printf("HTTP READ FAIL (%s) : %v", url, err)
+    return "", 999
+  }
+
+  return string(bytes), resp.StatusCode
+}
+
+// Given an html node, will try to find a val in the attribute list: (linear)
+func getNodeAttr(n *html.Node, name string) string {
+  for _, a := range n.Attr {
+    if a.Key == name {
+      return a.Val
+    }
+  }
+  return ""
+}
+
+// Parses images out of the html + the data-page thing:
+func parseListHtml(body string) (pageNum int, urls chan string) {
+  pageNum, urls = -1, make(chan string)
+
+  tree, err := html.Parse(strings.NewReader(body))
+  if err != nil {
+    panic(err)
+  }
+
+  // We recieve a single value on this, which is the page num:
+  pageNumSent, pageNumChan := false, make(chan int)
+  defer close(pageNumChan)
+
+  // Recursive func to search for <a> links
+  var _findImages func (*html.Node)
+  _findImages = func (n *html.Node) {
+    if n.Type == html.ElementNode && n.Data == "a" {
+      elemHref  := getNodeAttr(n, "href")
+      elemPage  := getNodeAttr(n, "data-page")
+      elemClass := getNodeAttr(n, "class")
+
+      // If correct class:
+      if elemClass == image_class_name {
+
+        // First, check out the page number:
+        pageNo, err := strconv.ParseInt(elemPage, 10, 32)
+        if err == nil && !pageNumSent {
+          pageNumSent = true
+          pageNumChan <- int(pageNo)
+        }
+
+        // Then emit the href:
+        urls <- elemHref
+      }
+    }
+    for c := n.FirstChild; c != nil; c = c.NextSibling {
+      _findImages(c)
+    }
+  }
+
+  // Kick off the tree traversal async, and make sure we close afterwards:
+  go (func () {
+    _findImages(tree)
+    close(urls)
+  })()
+  pageNum = <- pageNumChan
+  return
+}
+
+// Will extract all image detail urls from a page:
+func scrapeURL(url string, pageZero bool) chan string {
+  out := make(chan string)
+  go (func () {
+    defer close(out)
+
+    body, status := httpGet(url)
+    log.Printf("%s :: Status (%d) Content Length (%d)", url, status, len(body))
+
+    if status > 299 {
+      log.Printf("%s :: Bad Status. Skipping.", url)
+      return
+    }
+
+    pageNo, urls := parseListHtml(body)
+
+    log.Printf("GOT PAGE NO %d", pageNo)
+    for url := range urls {
+      out <- url
+    }
+  })()
+  return out
+}
+
+// Check to see if a file already exists and has content:
+func fileExists(path) bool {
+  stat, err := os.Stat()
+  return os.IsExist(err) && stat.Size() > 0;
+}
+
+// Given the URL to a post detail page, returns the URL to download:
+func fetchDownloadUrl(detailLink string) string {
+  detailUrl := fmt.Sprintf(detail_url_fmt, detailLink)
+  data, code := httpGet(detailUrl)
+  if code > 999 {
+    log.Printf("%s :: Bad Status. Skipping.", url)
+    return ""
+  }
+
+  var _findThing func (*html.Node, state int) string
+  _findThing = func (n *html.Node, state int) string {
+    if n.Type == html.ElementNode {
+
+      // Pluck the class, if it has one:
+      klass := getNodeAttr(n, "class")
+      tag   := n.Data
+
+      // Switch on state:
+      switch {
+      case state == state_normal && tag == "div" && klass == post_class_name:
+        state = state_image
+        for c := n.FirstChild; c != nil; c = c.NextSibling {
+          if maybe := _findImages(c, state_image) ; maybe != "" {
+            return maybe
+          }
+        }
+      case state == state_image && tag == "a":
+        return getNodeAttr(n, "href")
+      }
+
+      // Else, just recurse:
+      for c := n.FirstChild; c != nil; c = c.NextSibling {
+        if maybe := _findImages(c, state) ; maybe != "" {
+          return maybe
+        }
+      }
+    }
+  }
+
+  // Kick off the tree traversal async, and make sure we close afterwards:
+  return _findImages(tree)
+    close(urls)
+  })()
+  pageNum = <- pageNumChan
+  return
+}
+
+// Will read from a channel, downloading links until the channel dies:
+func imageWorker(urls chan string, workerName string) chan bool {
+  out := make(chan bool)
+  go (func () {
+    defer close(out)
+
+    for link := range urls {
+      log.Printf("Image fetch %s handling %s", workerName, link)
+    }
+    out <- true
+  })()
+}
+
+// Main func parses args, and sets things up:
+func main() {
+  if len(os.Args) == 1 {
+    log.Fatalf("Not enough arguments")
+  }
+  subreddit := os.Args[1]
+  for detail := range fetchAllImageLinks(subreddit) {
+    log.Printf("Detail Link: %s", detail)
+  }
+}