Skip to main content
Loading...

More JavaScript Posts

//Disclaimer: I do not condone mass web scraping of ESPN websites, and this is just theoretical code that hasn't been used.

//Scrape player stats off of https://www.espn.com/nfl/team/roster/_/name/phi/philadelphia-eagles

//Example player JS object
/*
{
	"shortName": "S. Opeta",
	"name": "Sua Opeta",
	"href": "http://www.espn.com/nfl/player/_/id/3121009/sua-opeta",
	"uid": "s:20~l:28~a:3121009",
	"guid": "dec19157-e984-f981-3724-498281328c97",
	"id": "3121009",
	"height": "6' 4\"",
	"weight": "305 lbs",
	"age": 27,
	"position": "G",
	"jersey": "78",
	"birthDate": "08/15/96",
	"headshot": "https://a.espncdn.com/i/headshots/nfl/players/full/3121009.png",
	"lastName": "Sua Opeta",
	"experience": 4,
	"college": "Weber State"
}
*/

//The page needs to be focused, so you have to put this code in a bookmarklet and click the page before clicking it. Allegedly.
navigator.clipboard.writeText(JSON.stringify(
    window["__espnfitt__"].page.content.roster.groups.flatMap((group)=>{
        return group.athletes.map((athlete)=>{
            //We can assume all football players are above 100 lbs and below 1000 lbs.
            athlete.weight = parseInt(athlete.weight.substring(0,3));
            
            if(athlete.experience == "R") athlete.experience = 0;
            else athlete.experience = parseInt(athlete.experience);

            //We can assume players are at least 1 foot, or under 10 feet tall.
            athlete.inches = 12 * parseInt(athlete.height.substring(0, 1)); //Add feet in inches
            athlete.inches += parseInt(athlete.height.substring(2).replaceAll("\"", "").trim()); //Add remaining inches

            const monthDayYear = athlete.birthDate.split("/");
            athlete.birthMonth = parseInt(monthDayYear[0]);
            athlete.birthDay = parseInt(monthDayYear[1]);
            athlete.birthYear = parseInt(monthDayYear[2]);
            
            //The only really useful stuff we get from this is Height, weight, left handedness, age, position, and birthday
            return athlete;
        });
    })
, null, "\t")).then(null, ()=>{alert("That failed.")});

//Changes all the team links on this page https://www.espn.com/nfl/stats/team to the team's roster for easier scraping
$$("table > tbody > tr > td > div > div > a").forEach((elm)=>{
	elm.setAttribute("href", elm.getAttribute("href").replace("team/", "team/roster/"));
});
//Allegedly never tested on the twitter website
function scrapeScreen(){
    let articles = Array.from(document.getElementsByTagName("article"));
    let results = [];
    for(let tweet of articles){
        const isAnAd = Array.from(tweet.querySelectorAll("span")).map((e)=>e.textContent).includes("Ad");
        if(isAnAd){
            //console.log(tweet, "is an ad. Skipping...");
            continue;
        }
        const userName = tweet.querySelector("[data-testid='User-Name'] > div:nth-child(2) > div > div")?.textContent;
        const tweetContent = tweet.querySelector("[data-testid='tweetText']")?.textContent;
        const timeStamp = tweet.querySelector("time")?.getAttribute("datetime");
        const tweetLink = tweet.querySelector("time")?.parentElement?.getAttribute("href");
        if((!userName) || (!tweetContent)) continue;
        results.push({
            username: userName,
            tweetText: tweetContent,
            timeStamp: timeStamp,
            tweetLink: tweetLink
        });
    }
    return results;
}
let scraped = scrapeScreen();
setInterval(()=>{
    scraped = scraped.concat(
        scrapeScreen().filter((tweet)=>{
            for(let scrapedTweet of scraped){
                if(scrapedTweet.username == tweet.username && scrapedTweet.tweetText == tweet.tweetText) return false;
            }
            return true;
        })
    );
}, 500); //Scrape everything on the screen twice a second

window.scrollIntervalId = setInterval(function(){
    window.scrollBy(0, 1000);
}, 500); //Scroll for me


//http://bgrins.github.io/devtools-snippets/#console-save
(function(console){

console.save = function(data, filename){

 if(!data) {
 console.error('Console.save: No data')
 return;
 }

 if(!filename) filename = 'console.json'

 if(typeof data === "object"){
 data = JSON.stringify(data, undefined, '\t')
 }

 var blob = new Blob([data], {type: 'text/json'}),
 e = document.createEvent('MouseEvents'),
 a = document.createElement('a')

 a.download = filename
 a.href = window.URL.createObjectURL(blob)
 a.dataset.downloadurl = ['text/json', a.download, a.href].join(':')
 e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null)
 a.dispatchEvent(e)
 }
})(console)

setTimeout(()=>{
    clearTimeout(window.scrollIntervalId);
    delete window.scrollIntervalId;

    console.save(scraped, "TwitterScrape" + Date.now() + ".json");
}, 60 * 1000 * 20); //Twenty minutes