Skip to content

Instantly share code, notes, and snippets.

@arispublic
Last active June 4, 2018 03:47
Show Gist options
  • Save arispublic/f13d09e8d672870b146a to your computer and use it in GitHub Desktop.
Save arispublic/f13d09e8d672870b146a to your computer and use it in GitHub Desktop.
Easy web scrapping using phpQuery (https://code.google.com/p/phpquery/)
<?php
//load phpquery library here.
require('phpQuery/phpQuery.php');
//sample URL
$url = 'http://www.b*ngg**d.com/Wholesale-Mobile-Phones-c-140.html';
// load the url using curl
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$html = curl_exec($ch);
curl_close($ch);
// here we append the desired tags to the html
$html = '<!DOCTYPE html><html><body>'.$html.'</body></html>';
// Create phpQuery document with returned HTML
$doc = phpQuery::newDocument($html);
$products = array();
//looping through all of the product list
foreach ($doc['.goodlist_1 li'] as $data) {
$product = array();
$product['url'] = pq($data)->find('a:first')->attr('href');
$product['title'] = pq($data)->find('.title a')->html();
$product['logo'] = pq($data)->find('img:first')->attr('data-original');
$product['price'] = pq($data)->find('.price')->attr('oriprice');
$product['price_old'] = pq($data)->find('.price_old')->attr('oriprice');
$products[] = $product;
}
print '<pre>';
print_r($products);
print '</pre>';
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment