Actor

cpatrick/TEST-AMZN-Best-Seller

  • Builds
  • latest 0.0.8 / 2018-05-03
  • Created 2018-03-29
  • Last modified 2018-05-03
  • grade 2

Description

Sadly, this actor has no description.


API

To run the actor, send a HTTP POST request to:

https://api.apify.com/v2/acts/cpatrick~TEST-AMZN-Best-Seller/runs?token=<YOUR_API_TOKEN>

The POST payload will be passed as input for the actor. For more information, read the docs.


Example input

Content type: application/json; charset=utf-8

[
	{
		"productCategory":"Nintendo Switch",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-Nintendo-Switch/zgbs/videogames/16227133011/"
	},
	
	{
		"productCategory":"PS4",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-PlayStation/zgbs/videogames/6427831011/"
	},

	{
		"productCategory":"PS3",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-PlayStation/zgbs/videogames/4924897011/"
	},

	{
		"productCategory":"xBox One",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-Xbox-One/zgbs/videogames/6469296011/"
	},

	{
		"productCategory":"xBox 360",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-Xbox-360/zgbs/videogames/4924903011/"
	},

	{
		"productCategory":"Wii U",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-Wii/zgbs/videogames/4924901011/"
	},

	{
		"productCategory":"PC Games",
		"baseURL":"https://www.amazon.com/Best-Sellers-Video-Games-PC-compatible/zgbs/videogames/4924894011/"
	}
]

Source code

Based on the apify/actor-node-chrome Docker image (see docs).

const Apify = require('apify');
const request = require('request-promise');

Apify.main(async () => {
  const input = await Apify.getValue('INPUT');
  const browser = await Apify.launchPuppeteer();
  let products = [];
  
  //Read all the input categories from the parameter JSON file
  for(var x = 0; x < input.length; x++) {
    console.log(x + ": " + input[x].productCategory + " - " + input[x].baseURL);    

    //Each Best Seller list has 100 products grouped 20 per page
    // Iterate over every page 
    for(var i = 1; i<=5; i++) {
      
      const page = await browser.newPage();
      
      //Block Images to cut down on traffic
      await page.setRequestInterception(true);
      page.on('request', request => {
          if (request.resourceType() === 'image')
            request.abort();
          else
            request.continue();
      });

      console.log(input[x].baseURL + "#" + i);
      await page.goto(input[x].baseURL + "#" + i );    

      await page.waitForSelector('.zg_itemImmersion');
      
      //The ugliest part of the code: give me a 4 second wait for the page to fully load
      // This is needed because results 21-100 load after results 1-20, so we need a buffer
      console.log("Waiting....");
      await page.waitFor(2750);
      console.log("GO");
      
      const singlePage = await page.evaluate(() => {
        //Get all instances of the Item boxes
        const results = Array.from(document.querySelectorAll('.zg_itemImmersion'));
        return results.map(result => {
            console.log(result);
              var price = result.querySelector('.p13n-sc-price');
              var link = result.querySelector('a');
              var name = result.querySelector('.p13n-sc-truncated');
              var rank = result.querySelector('.zg_rankNumber');
              var release = result.querySelector('.zg_releaseDate');
              var publisher = result.querySelector('div.a-row.a-size-small')
              var name1 = null;
              var name2 = null;
              
              if (price !== null) price = price.innerText;
              if (link !== null) link = link.href;
              if (name !== null) name1 = name.getAttribute('title'); //Title is ideal, but not all products use it
              if (name !== null) name2 = name.innerText; //Inner Text can be truncated or poorly formatted but is always present
              if (rank !== null) rank = rank.innerText;
              if (release !== null) release = release.innerText;
              if (publisher !== null) publisher = publisher.innerText;

           return {
             rank,
             title: name1 || name2, //Use the title if we can, otherwise use the inner text
             price,
             release,
             publisher,
             link
           }
         });
        return results
     });
     
      for(var p = 0; p<singlePage.length;p++) { singlePage[p].productCategory = input[x].productCategory;  }
      
      console.log(singlePage);
      await page.close();
      
      //Append our page results to the array of all products
      products.push.apply(products, singlePage);    
  }
}
console.log('Writing Output');
await Apify.setValue('OUTPUT', products)
await browser.close();

});