Act

mtrunkat/crawler-to-sitemap

  • Builds
  • latest 0.0.1 / 2017-10-06
  • Created 2017-06-22
  • Last modified 2017-10-25
  • grade 0

Description

This act can be used as crawler's finish webhook. It transforms crawler's result into sitemap XML file and stores it in key-value-store named "sitemaps".


API

To run the act, send a HTTP POST request to:

https://api.apify.com/v2/acts/mtrunkat~crawler-to-sitemap/runs?token=<YOUR_API_TOKEN>

The POST payload will be passed as input for the act. For more information, read the docs.


Example input

Content type: application/json

{
  "_id": "[CRAWLER EXECUTION ID]"
}

Source code

Based on the apify/actor-node-basic Docker image (see docs).

const _ = require('underscore');
const Apify = require('apify');

String.prototype.replaceAll = function(search, replacement) {
    var target = this;
    return target.replace(new RegExp(search, 'g'), replacement);
};

Apify.main(async () => {
    const { crawlers, keyValueStores } = Apify.client; 
    const executionId = (await Apify.getValue('INPUT'))._id;
    console.log({ executionId });
    const results = await crawlers.getExecutionResults({ executionId, simplified: 1 });
    
    output = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n`;
    output += results
        .items
        .map(page => page.url)
        .map(url => url.replaceAll('&', '&amp;')
                       .replaceAll('\'', '&apos;')
                       .replaceAll('"', '&quot;')
                       .replaceAll('>', '&gt;')
                       .replaceAll('<', '&lt;'))
        .map(url => `  <url>\n    <loc>${url}</loc>\n  </url>\n`)
        .join('');
    output += '</urlset>';
    
    const store = await keyValueStores.getOrCreateStore({ 
        storeName: 'sitemaps',
    });

    await keyValueStores.putRecord({
        storeId: store.id,
        contentType: 'application/xml',
        key: executionId,
        body: output,
    })
    
    console.log(`https://api.apifier.com/v2/key-value-stores/${store.id}/records/${executionId}?raw=1`);
});