Im仍在为客户编制这一目录,通过PHP和简单的DOM Parser将图像从一个偏远地区上载。
// Code excerpt from http://internetvolk.de/fileadmin/template/res/scrape.php, this is just one case of a select
$subcat = $_GET[ subcat ];
$url = "http://pinesite.com/meubelen/index.php?".$subcat."&lang=de";
$html = file_get_html(html_entity_decode($url));
$iframe = $html->find( iframe ,0);
$url2 = $iframe->src;
$html->clear();
unset($html);
$fullurl = "http://pinesite.com/meubelen/".$url2;
$html2 = file_get_html(html_entity_decode($fullurl));
$pagecount = 1;
$titles = $html2->find( .tekst );
$images = $html2->find( .plaatje );
$output= ;
$i=0;
foreach ($images as $image) {
$item[ title ] = $titles[$i]->find( p ,0)->plaintext;
$imagePath = $image->find( img ,0)->src;
$item[ thumb ] = resize("http://pinesite.com".str_replace( thumb_ , ,$imagePath),array("w"=>225, "h"=>162));
$item[ image ] = http://pinesite.com .str_replace( thumb_ , ,$imagePath);
$fullurl2 = "http://pinesite.com/meubelen/prog/showpic.php?src=".str_replace( thumb_ , ,$imagePath)."&taal=de";
$html3 = file_get_html($fullurl2);
$item[ size ] = str_replace( , ,$html3->find( td ,1)->plaintext);
unset($html3);
$output[] = $item;
$i++;
}
if (count($html2->find( center )) > 1) {
// ok, multi-page here, let s find out how many there are
$pagecount = count($html2->find( center ,0)->find( a ))-1;
for ($i=1;$i<$pagecount; $i++) {
$startID = $i*20;
$newurl = html_entity_decode($fullurl."&beginrec=".$startID);
$html3 = file_get_html($newurl);
$titles = $html3->find( .tekst );
$images = $html3->find( .plaatje );
$a=0;
foreach ($images as $image) {
$item[ title ] = $titles[$a]->find( p ,0)->plaintext;
$item[ image ] = http://pinesite.com .str_replace( thumb_ , ,$image->find( img ,0)->src);
$item[ thumb ] = resize($item[ image ],array("w"=>225, "h"=>150));
$output[] = $item;
$a++;
}
$html3->clear();
unset ($html3);
}
}
echo json_encode($output);
因此,它应当做些什么(并且涉及某些类别): http://pinesite.com。
例如,如果你通过“功能=images&subcat=antiek”,但不通过“功能=images&subcat=stoelen”。 我甚至不认为这是一个与遥远的网页有关的问题,因此,我的法典必须出现错误。