Actor

mtrunkat/twitter

  • Builds
  • latest 0.0.16 / 2018-06-23
  • Created 2018-06-23
  • Last modified 2018-06-23
  • grade 4

Description

Extracts all tweets for given hashtag.


API

To run the actor, send a HTTP POST request to:

https://api.apify.com/v2/acts/mtrunkat~twitter/runs?token=<YOUR_API_TOKEN>

The POST payload will be passed as input for the actor. For more information, read the docs.


Example input

Content type: application/json; charset=utf-8

{ "hashtag": "trump" }

Source code

Based on the apify/actor-node-chrome Docker image (see docs).

const Apify = require('apify');

Apify.main(async () => {
    const { hashtag } = await Apify.getValue('INPUT');
    
    // Enqueue first URL.
    const requestQueue = await Apify.openRequestQueue();
    await requestQueue.addRequest(new Apify.Request({
        url: `https://mobile.twitter.com/search?q=%23${hashtag}`,
    }));
    
    // Open twitter with JS disabled to ble able to switch to new version,
    // save cookies (with switch to old Twitter version) and close the browser.
    const browser = await Apify.launchPuppeteer();
    const page = await browser.newPage();
    await page.setJavaScriptEnabled(false);
    await page.goto('https://mobile.twitter.com/home');
    await Apify.utils.sleep(1000);
    await page.click('button');
    await Apify.utils.sleep(1000);
    const cookies = await page.cookies();
    await browser.close();

    const crawler = new Apify.PuppeteerCrawler({
        requestQueue,
        useApifyProxy: true,
        
        // Here we apply our cookies from step 1.
        gotoFunction: async ({ page, request }) => {
            await page.setCookie(...cookies); 
            
            return page.goto(request.url);
        },

        handlePageFunction: async ({ page, request }) => {
            console.log(`Processing ${request.url}...`);

            // Enqueue next page
            try {
                const nextHref = await page.$eval('.w-button-more a', el => el.href);
                await requestQueue.addRequest(new Apify.Request({ url: nextHref }));
            } catch (err) {
                console.log(`Url ${request.url} is the last page!`);
            }

            // Extract data.
            const pageFunction = ($tweets) => {
                const data = [];

                $tweets.forEach(($tweet) => {
                    data.push({
                        username: $tweet.querySelector('.username').innerText,
                        tweet: $tweet.querySelector('.tweet-text').innerText,
                    });
                });

                return data;
            };
            const data = await page.$$eval('table.tweet', pageFunction);
            await Apify.pushData(data);
        },

        handleFailedRequestFunction: async ({ request }) => {
            console.log(`Request ${request.url} failed 4 times`);
        },
    });

    await crawler.run();
});