Herzlich willkommen im SEO Forum der ABAKUS Internet Marketing GmbH
registrieren registriertes Mitglied
Versuch: Trennzeichen für Keywords/Variabeln in Dateinamen
Es existieren 2 Versionen von Seiten meiner Homepage - woher?
Code: Alles auswählen
function multi_thread_curl($urlArray, $optionArray, $nThreads) {
global $lc_delay_value, $lc_delay_cleaned;
$curlArray = array_chunk($urlArray, $nThreads, $preserve_keys = true);
foreach ($curlArray as $threads) {
foreach ($threads as $key => $value) {
${'ch' . $key} = curl_init();
curl_setopt_array(${'ch' . $key}, $optionArray);
curl_setopt(${'ch' . $key}, CURLOPT_URL, $value);
}
$mh = curl_multi_init();
foreach ($threads as $key => $value) {
curl_multi_add_handle($mh, ${'ch' . $key});
}
$active = null;
do {
$mrc = curl_multi_exec($mh, $active);
usleep($lc_delay_value);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
while ($active && $mrc == CURLM_OK) {
if (curl_multi_select($mh) != -1) {
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
foreach ($threads as $key => $value) {
$results[$key] = curl_multi_getcontent(${'ch' . $key});
curl_multi_remove_handle($mh, ${'ch' . $key});
}
curl_multi_close($mh);
}
return $results;
}
$optionArray = array(
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_HEADER => true,
CURLOPT_CUSTOMREQUEST => 'GET',
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_ENCODING => 'gzip',
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_TIMEOUT => 10,
CURLOPT_SSL_VERIFYHOST => 0,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_NOBODY => false,
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.47 Safari/537.36",
CURLOPT_HTTPHEADER => array('Cache-Control: max-age=0,no-store,no-cache'),
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1
);
$nThreads = $lc_threads_value;
$crawler_query = "SELECT url FROM lc_url WHERE blacklisted != 1";
$crawler_query_result = $mysqli->query($crawler_query);
if ($crawler_query_result->num_rows > 0) {
while ($crawlUrls = $crawler_query_result->fetch_array()) {
$crawlerurl = $crawlUrls['url'];
$urlArray = array($crawlerurl);
$results = multi_thread_curl($urlArray, $optionArray, $nThreads);
}
}
Code: Alles auswählen
$results = multi_thread_curl($urlArray, $optionArray, $nThreads);
$results = multi_thread_curl($urlArray, $optionArray2, $nThreads);
$results = multi_thread_curl($urlArray, $optionArray3, $nThreads);
.....
$results = multi_thread_curl($urlArray, $optionArray55, $nThreads);
Code: Alles auswählen
echo("Processing " . count($urlArray) . " URLs in parallel\n");
Code: Alles auswählen
$crawl_urls = array();
while ($row = $crawler_query_result->fetch_array())
{
$crawl_urls[] = $row["url"];
}
$results = multi_thread_curl($crawl_urls, $optionArray, $nThreads);
Ich habs nochmal bearbeitet.
Code: Alles auswählen
Processing 57396 URLs in parallel