Act

sashadickey/EventBrite

  • Builds
  • latest 0.0.26 / 2018-05-24
  • Created 2018-05-08
  • Last modified 2018-05-24
  • grade 1

Description

Sadly, this act has no description.


API

To run the act, send a HTTP POST request to:

https://api.apify.com/v2/acts/sashadickey~EventBrite/runs?token=<YOUR_API_TOKEN>

The POST payload will be passed as input for the act. For more information, read the docs.


Example input

Content type: application/json; charset=utf-8

{"cities": ["va--richmond", "dc--washington", "va--williamsburg", "va--virginia-beach"]}

Source code

Based on the apify/actor-node-chrome Docker image (see docs).

const Promise = require('bluebird');
const Apify = require('apify');
const querystring = require('querystring');
const { MongoClient } = require('mongodb');
const _ = require('lodash');

const importObjectToCollection = async (collection, object) => {
    try {
        const existingObject = await collection.findOne(_.pick(object, 'event.id'));
        if (existingObject) {
            await collection.updateOne({ _id: existingObject._id }, object);
        } else {
            await collection.insert(object);
        }
    } catch (err) {
        console.log(`Cannot import object ${JSON.stringify(object)}: ${err.message}`);
    }
};

Apify.main(async () => {
    
    const input = await Apify.getValue('INPUT');
    const mongodbConnection = await MongoClient.connect('mongodb+srv://admin:FvFnYrzeFrq0AxDh@production-kwhlb.mongodb.net/eventbrite', { useNewUrlParser: true });
    const collection = await mongodbConnection.db("eventbrite").collection('events');

    // Get queue and enqueue first url.
    const categoryQueue = await Apify.openRequestQueue('EBcategories');
    const eventQueue = await Apify.openRequestQueue('EBevents');

    // Enqueue Start url.
    let rootUrl = 'https://www.eventbrite.com/d';
    
    const categories = [
        'business--events',
        // 'arts--events',
        // 'hobbies--events',
        // 'music--events',
        // 'health--events',
        // 'spirituality--events',
        // 'family-and-education--events',
        // 'other--events',
        // 'film-and-media--events',
        // 'travel-and-outdoor--events',
        // 'food-and-drink--events',
        // 'science-and-tech--events',
        // 'fashion--events',
        // 'home-and-lifestyle--events',
        // 'sports-and-fitness--events',
        // 'charity-and-causes--events',
        // 'community--events',
        // 'government--events'
    ];
    
    const cities = input.cities;
    cities.forEach(async city => {
        categories.forEach(async category => categoryQueue.addRequest(new Apify.Request({ url: `${rootUrl}/${city}/${category}/?crt=regular&page=1&sort=best` })))
    })

    // Create crawler.
    const categoryCrawler = new Apify.PuppeteerCrawler({
        requestQueue: categoryQueue,
        maxConcurrency: 5,
        maxRequestRetries: 5,
        launchPuppeteerOptions: {
            useChrome: true
        },

        // This page is executed for each request.
        // If request failes then it's retried 3 times.
        // Parameter page is Puppeteers page object with loaded page.
        handlePageFunction: async ({ page, request }) => {
            console.log(`Processing ${request.url}...`);
            await page.waitForSelector('script[type="application/ld+json"]', 4000);
            let pageJson = JSON.parse(await page.$eval('script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ')));
            try {
                // Only continue to next page if we found at least one event
                if(pageJson.length > 0) {

                    eventsObject = []

                    pageJson.forEach(eventObject => {
                        let obj = {url: null, eid: null}
                        obj.url = eventObject.url
                        switch (obj.url.substr(obj.url.length - 3)) {
                            case 'es2':
                                rgx = /(\d+)\?aff/.exec(eventObject.url)
                                break;
                            default:
                                rgx = /(\d{8,})/.exec(eventObject.url)
                        }
                        //console.log('RGX:           ' + rgx);
                        if (rgx && rgx[1]) {
                            obj.eid = rgx[1]
                        }
                        else {
                            error = "Could Not get eid of URL:  " + eventObject.url
                            console.log('----      ------             -----           ---- ' + error);
                        }
                        eventsObject.push(obj)
                    });
                    eventsObject.forEach(event => {
                        console.log(event.url);
                    });

                    await Promise.map(eventsObject, eventObject => eventQueue.addRequest(new Apify.Request({ url: eventObject.url, userData: eventObject })));

                    console.log('--- ---- PROMISE MAPPED')

                    // Enqueue next page.
                    let nextUrl = request.url.replace(/page=(\d+)/, ((match, pageNumber) => {
                        return 'page=' + (parseInt(pageNumber) + 1);
                    }));
                    console.log('ADDING NEXT URL TO QUEUE')
                    await categoryQueue.addRequest(new Apify.Request({ url: nextUrl }));
                }
            } catch (err) {
                console.log("++++++ ++++++           ERR IS:          " + err);
                // console.log(`Url ${request.url} is the last page!`);

            }

        },

        // If request failed 4 times then this function is executed.
        handleFailedRequestFunction: async ({ request }) => {
            console.log(`Request ${request.url} failed 4 times`);
        },
    });

    await categoryCrawler.run();

    const dataset = await Apify.openDataset('EBevents');

    const eventCrawler = new Apify.PuppeteerCrawler({
        requestQueue: eventQueue,
        maxConcurrency: 5,
        maxRequestRetries: 0,

        // This page is executed for each request.
        // If request failes then it's retried 3 times.
        // Parameter page is Puppeteers page object with loaded page.
        handlePageFunction: async ({ page, request }) => {
            console.log(`Processing ${request.url}...`);


            let event = {
                "owner":{
                    "name": null,
                    "description": null,
                    "url": null,
                },
                "place":{
                    "name": null,
                    "location": {
                        "street": null,
                        "street2": null,
                        "city": null,
                        "country": null,
                        "region": null,
                        "postcode": null,
                        "latitude": null,
                        "longitude": null,
                    }
                },
                "event": {
                    "id": null,
                    "name": null,
                    "url": null,
                    "description": null,
                    "dates": [{
                        "start": null,
                        "end": null
                    }],
                    "images": [{
                        "source": null,
                        "width": null,
                        "height": null
                    }],
                    "tickets": {
                        "url": null,
                    },
                    "timezone": null,
                    "updated_time": null,
                    "tags": null,
                    "suggestions": null,
                }
            };

            // Map event attributes to HTML selectors
            event.event.url = request.url;
            
            // Place
            try {
                const jsonEventData = JSON.parse(await page.$eval('script[type="text/javascript"] ~ script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ') ));
                // Try to get suggested events
                try {
                    event.event.suggestions = await page.$$eval('section.listing-related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));
                }catch(error) {
                    console.log('Suggestions Error:       '+ error);
                }
                // If no suggestions, try selector for irregulary formatted event page
                await page.waitForSelector('div.related-events a.poster-card__main', 10000);
                if (event.event.suggestions[0] == null) {
                    try{
                        event.event.suggestions = await page.$$eval('div.related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));
                    }catch(error) {
                        console.log('Suggestions Error:       '+ error);
                    }
                }
                console.log('++++++++++++++++++++++++++++' + event.event.suggestions)
                event.event.id = request.userData.eid;
                event.event.name = jsonEventData.name;
                event.event.description = jsonEventData.description;
                event.event.images[0].source = jsonEventData.image;
                event.event.dates[0].start = jsonEventData.startDate;
                event.event.dates[0].end = jsonEventData.endDate;

                event.event.tickets.url = jsonEventData.offers[0].url;
                event.event.tickets.lowPrice = jsonEventData.offers[0].lowPrice;
                event.event.tickets.highPrice = jsonEventData.offers[0].highPrice;
                event.event.tickets.inventory = jsonEventData.offers[0].inventoryLevel;
                event.event.tickets.startDate = jsonEventData.offers[0].availabilityStarts;
                event.event.tickets.endDate = jsonEventData.offers[0].availabilityEnds;
                event.event.tickets.currency = jsonEventData.offers[0].priceCurrency;
              
                event.owner.url = jsonEventData.organizer.url;
                event.owner.name = jsonEventData.organizer.name;
                event.owner.description = jsonEventData.organizer.description;

                event.place.name = jsonEventData.location.name;
                event.place.location.street = jsonEventData.location.address.streetAddress;
                event.place.location.city = jsonEventData.location.address.addressLocality;
                event.place.location.region = jsonEventData.location.address.addressRegion;
                event.place.location.postcode = jsonEventData.location.address.postalCode;
                event.place.location.country = jsonEventData.location.address.addressCountry;

                // Try to get coordinates from view map link
                try {
                    const viewMapLink = querystring.parse(await page.$eval('a.js-view-map-link', a => a.href));
                    if(viewMapLink.sll) {
                        event.place.location.latitude = viewMapLink.sll.split(',')[0];
                        event.place.location.longitude = viewMapLink.sll.split(',')[1];
                    }
                } catch(e) {}

            }
            catch(error){
                console.log('ERROR IS :                        ' + error);
            }

            // Save data.
            await importObjectToCollection(collection, event);

        },

        // If request failed 4 times then this function is executed.
        handleFailedRequestFunction: async ({ request }) => {
            console.log(`Request ${request.url} failed 4 times`);
        },
    });

    await eventCrawler.run();

});