Skip to content

Instantly share code, notes, and snippets.

@sarpavci
Created December 27, 2020 13:21
Show Gist options
  • Select an option

  • Save sarpavci/84247b506757421332274bd8a746b3d8 to your computer and use it in GitHub Desktop.

Select an option

Save sarpavci/84247b506757421332274bd8a746b3d8 to your computer and use it in GitHub Desktop.
Trendyol scrape example
const fs = require('fs');
const _ = require('lodash');
const axios = require('axios');
const async = require('async');
const getData = async (link) => {
try {
const response = await axios.get(link);
var result = JSON.parse(response.data.match(/((?<=type=application\/ld\+json\>))(.*)(?=\<\/script\>)/g)[0].replace(/@/g, "").replace(/https:\/\/schema\.org\//g, ""));
delete result.context;
result.image = result.image[0].split(',');
return result;
} catch (error) {
return error;
}
};
const urls = [
'https://www.trendyol.com/cekuonline/iphone-7-plus-kilif-simli-shining-desenli-silikon-kirmizi-stok252-kurabiye-p-44089949',
'https://www.trendyol.com/onadair/samsung-galaxy-j4-plus-bej-tuyler-desenli-telefon-kilifi-p-67069060',
'https://www.trendyol.com/tatfast/love-is-love-rainbow-kupa-p-45448346',
'https://www.trendyol.com/hadron/hr2551-intel-1150-1155-1156-775-amd-fm2-am3-am2-uyumlu-combo-islemci-fani-rgb-ledli-12cm-p-33898805',
'https://www.trendyol.com/organizasyon-pazari/kirmizi-kar-tanesi-strafor-yilbasi-susu-25-cm-cift-tarafli-p-66939289',
'https://www.trendyol.com/tatfast/i-have-top-secret-clown-business-kupa-p-45211276',
'https://www.trendyol.com/bikkembergs/erkek-cocuk-mavi-pantolon-p-6471160',
'https://www.trendyol.com/peyless/8-mm-arpa-kesim-metal-harf-p-66392906',
'https://www.trendyol.com/arunas-yayincilik/sey-p-82009',
'https://www.trendyol.com/elelehome/dekoratif-kanvas-tablo-30x90cm-etb-738422776-p-65842691',
'https://www.trendyol.com/teknomeg/apple-iphone-8-plus-tek-gozlu-kurukafa-kabartma-kilif-p-32905398',
'https://www.trendyol.com/lufian/maury-kaz-tuyu-kaban-lacivert-p-2042674',
];
const parallelStack = [];
_.take(urls, 1000).forEach((url, index) => parallelStack.push(cb => getData(url).then(res => {
console.log('ty-scrape.js:49955', res );
console.log(`Crawled ${index + 1}: ${url}`);
return cb(null, res);
}).catch(err => err)));
console.time("Scraping");
async.parallelLimit(parallelStack, 5, (err, results) => {
if (err) {
console.error(err);
process.exit(1);
}
fs.writeFileSync('./ty-scrape.txt', JSON.stringify(results, null, 3));
console.timeEnd("Scraping");
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment